From 78f2083e8ea71af313f2b875bd78af1877d67c3d Mon Sep 17 00:00:00 2001 From: "james.p.turk" Date: Thu, 17 Jan 2008 19:05:20 +0000 Subject: [PATCH] geocoder.us changes git-svn-id: https://polipoly.googlecode.com/svn/trunk@11 1885ebd5-0a40-0410-88a4-770918bee656 --- examples/address_to_district.py | 21 ++--- examples/csvtest.py | 42 +++++----- polipoly.py | 135 +++++++++++++++++++------------- 3 files changed, 114 insertions(+), 84 deletions(-) diff --git a/examples/address_to_district.py b/examples/address_to_district.py index fa6c507..707c8b9 100755 --- a/examples/address_to_district.py +++ b/examples/address_to_district.py @@ -18,14 +18,15 @@ import cgi import re from polipoly import AddressToDistrictService, GeocodingError +PATH_TO_CDFILES = 'congdist/cd99_110' +GEOCODER = AddressToDistrictService.GEOCODER_US GMAPS_API_KEY = 'define-me' -PATH_TO_CDFILES = 'cd99_110' class ApiException(Exception): # these codes are in the 300s to fit the sunlight API STATUS_CODES = { - 300: 'Google returned a server error when attempting to geocode', + 300: 'Geocoder returned a server error when attempting to geocode', 301: 'Empty address string', 302: 'Unknown address', 303: 'Prohibited address', @@ -36,7 +37,7 @@ class ApiException(Exception): def __init__(self, code): self.code = code - + def __str__(self): return '%d: %s' % (self.code, self.STATUS_CODES[self.code]) @@ -45,32 +46,32 @@ def main(): fields = cgi.FieldStorage() addr = fields.getvalue('address') or '' output = fields.getvalue('output') - + # discard blank addresses as error 301 if re.match('^\s*$', addr): raise ApiException(301) - + # discard PO Box addresses as error 305 pobox = re.compile('[Pp]\.?[Oo]\.?\s*(?:box|Box|BOX)') if pobox.search(addr): raise ApiException(305) # create service and get a district - service = AddressToDistrictService(GMAPS_API_KEY, PATH_TO_CDFILES) + service = AddressToDistrictService(PATH_TO_CDFILES, GEOCODER, GMAPS_API_KEY) try: lat, lng, districts = service.address_to_district(addr) except GeocodingError, ge: # convert GeocodingError to API error code (300-303 and 320) err_dict = {500: 300, 601: 301, 602: 302, 603: 303, 620: 320} raise ApiException(err_dict.get(ge.code,304)) - + # 306: address did not fall within congressional district if len(districts) == 0: raise ApiException(306) # XML output if output == 'xml': - dist_str = '\n'.join([' %s' % dist + dist_str = '\n'.join([' %s' % dist for dist in districts]) print 'Content-type: text/xml\n' print ''' @@ -81,10 +82,10 @@ def main(): %s ''' % (addr, lat, lng, dist_str) - + # JSON output (default) else: - dist_str = ','.join(['{"state":"%s", "district":"%s"}' % dist + dist_str = ','.join(['{"state":"%s", "district":"%s"}' % dist for dist in districts]) print 'Content-type: application/json\n' print '''{"address":"%s", "latitude":"%s", "longitude":"%s", diff --git a/examples/csvtest.py b/examples/csvtest.py index 29e2c1b..7f8e09b 100755 --- a/examples/csvtest.py +++ b/examples/csvtest.py @@ -11,33 +11,33 @@ Be sure to set GMAPS_API_KEY and PATH_TO_CDFILES appropriately: import csv from polipoly import AddressToDistrictService -GMAPS_API_KEY = 'define-me' -PATH_TO_CDFILES = '' +PATH_TO_CDFILES = 'congdist/cd99_110' +GEOCODER = AddressToDistrictService.GEOCODER_US +GMAPS_API_KEY = None # This sample data was collected from Project Vote Smart -SAMPLE_DATA = '''representative,state,district,address -Josiah Bonner,AL,01,"1141 Montlimar Drive, Suite 3010, Mobile, AL 36609" -Terry Everett,AL,02,"101 North Main Street, Opp, AL 36467" -Michael Rogers,AL,03,"7550 Halcyon Summit Drive, Montgomery, AL 36117" -Robert Aderholt,AL,04,"1710 Alabama Avenue, Jasper, AL 35501" -Robert Cramer,AL,05,"1011 George Wallace Boulevard, Tuscumbia, AL 35674" -Spencer Bachus,AL,06,"1900 International Park Drive, Suite 107, Birmingham, AL 35243" -Artur Davis,AL,07,"102 East Washington Street, Suite F, Demopolis, AL 36732" -Rick Renzi,AZ,01,"501 North Florence Street, Suite 102, Casa Grande, AZ 85222" -Trent Franks,AZ,02,"7121 West Bell Road, Suite 200, Glendale, AZ 85308" -John Shadegg,AZ,03,"301 East Bethany Home Road, Suite C-178, Phoenix, AZ 85012" -Ed Pastor,AZ,04,"411 North Central Avenue, Suite 150, Phoenix, AZ 85004" -Harry Mitchell,AZ,05,"7201 East Camelback Road, Suite 335, Scottsdale, AZ 85251" -Jeff Flake,AZ,06,"1640 South Stapley, Suite 215, Mesa, AZ 85204" -Raul Grijalva,AZ,07,"1455 South Fourth Avenue, Suite 4, Yuma, AZ 85364" -Gabrielle Giffords,AZ,08,"1661 North Swan, Suite 112, Tucson, AZ 85712"''' +SAMPLE_DATA = ['representative,state,district,address', +'Josiah Bonner,AL,01,"1141 Montlimar Drive, Suite 3010, Mobile, AL 36609"', +'Terry Everett,AL,02,"101 North Main Street, Opp, AL 36467"', +'Michael Rogers,AL,03,"7550 Halcyon Summit Drive, Montgomery, AL 36117"', +'Robert Aderholt,AL,04,"1710 Alabama Avenue, Jasper, AL 35501"', +'Robert Cramer,AL,05,"1011 George Wallace Boulevard, Tuscumbia, AL 35674"', +'Spencer Bachus,AL,06,"1900 International Park Drive, Suite 107, Birmingham, AL 35243"', +'Artur Davis,AL,07,"102 East Washington Street, Suite F, Demopolis, AL 36732"', +'Rick Renzi,AZ,01,"501 North Florence Street, Suite 102, Casa Grande, AZ 85222"', +'Trent Franks,AZ,02,"7121 West Bell Road, Suite 200, Glendale, AZ 85308"', +'John Shadegg,AZ,03,"301 East Bethany Home Road, Suite C-178, Phoenix, AZ 85012"', +'Ed Pastor,AZ,04,"411 North Central Avenue, Suite 150, Phoenix, AZ 85004"', +'Harry Mitchell,AZ,05,"7201 East Camelback Road, Suite 335, Scottsdale, AZ 85251"', +'Jeff Flake,AZ,06,"1640 South Stapley, Suite 215, Mesa, AZ 85204"', +'Raul Grijalva,AZ,07,"1455 South Fourth Avenue, Suite 4, Yuma, AZ 85364"', +'Gabrielle Giffords,AZ,08,"1661 North Swan, Suite 112, Tucson, AZ 85712"'] -service = AddressToDistrictService(GMAPS_API_KEY, PATH_TO_CDFILES) +service = AddressToDistrictService(PATH_TO_CDFILES, GEOCODER, GMAPS_API_KEY) records = csv.DictReader(SAMPLE_DATA) for rec in records: lat, lng, dists = service.address_to_district(rec['address']) - print '%s\tcorrect: %s-%s\tfound: %s-%s' % (rec['representative'], + print '%s\tcorrect: %s-%s\tfound: %s-%s' % (rec['representative'], rec['state'], rec['district'], dists[0][0], dists[0][1]) - diff --git a/polipoly.py b/polipoly.py index a9188bd..5935b11 100644 --- a/polipoly.py +++ b/polipoly.py @@ -1,16 +1,16 @@ """Python library for working with political boundaries Political boundaries are defined by one or more polygons and obtained -from census.gov shapefiles. Census boundary shapefiles are available at -http://www.census.gov/geo/www/cob/bdy_files.html. +from census.gov shapefiles. Census boundary shapefiles are available at +http://www.census.gov/geo/www/cob/bdy_files.html. At the moment this library has only been used with State and Congressional District boundaries. """ __author__ = "James Turk (james.p.turk@gmail.com)" -__version__ = "0.1.0" -__copyright__ = "Copyright (c) 2007 Sunlight Labs" +__version__ = "0.1.5" +__copyright__ = "Copyright (c) 2007-2008 Sunlight Labs" __license__ = "BSD" import urllib @@ -22,14 +22,14 @@ from dbflib import DBFFile # internally used helper function def left_of_edge(point, ep1, ep2): """Determine if point is left of infinite line touching ep1 and ep2""" - return ((ep1[0]-point[0])*(ep2[1]-point[1]) - + return ((ep1[0]-point[0])*(ep2[1]-point[1]) - (ep2[0]-point[0])*(ep1[1]-point[1])) < 0 - - + + class Polygon(object): ''' Simple polygon class used for point containment testing and conversion - - Allows for testing if a polygon contains a point as well as conversion + + Allows for testing if a polygon contains a point as well as conversion to various portable representations ''' def __init__(self, vertices): @@ -45,7 +45,7 @@ class Polygon(object): for i in xrange(len(self.vertices)-1): # add wind if edge crosses point going up and point is to left - if (self.vertices[i][1] < point[1] < self.vertices[i+1][1] and + if (self.vertices[i][1] < point[1] < self.vertices[i+1][1] and left_of_edge(point, self.vertices[i], self.vertices[i+1])): winds += 1 # end wind if edge crosses point going down and point is to right @@ -58,7 +58,7 @@ class Polygon(object): def to_kml(self): ''' get KML polygon representation ''' - + coordstr = ' '.join("%.15f,%.15f" % v for v in self.vertices) return ''' @@ -70,10 +70,10 @@ class Polygon(object): class GeocodingError(Exception): """Custom exception which maps possible google geocoder error codes to human readable strings. - + See http://www.google.com/apis/maps/documentation/reference.html#GGeoStatusCode """ - + STATUS_CODES = {500: 'Unknown Geocoding Server Error', 601: 'Empty Address', 602: 'Unknown Address', @@ -81,21 +81,26 @@ class GeocodingError(Exception): 610: 'Bad API Key', 620: 'Too Many Requests'} - def __init__(self, code): + def __init__(self, code, extra=None): Exception.__init__(self) self.code = int(code) - + self.extra = extra + def __str__(self): - return 'GeocodingError: %d - %s' % (self.code, + desc = 'GeocodingError: %d - %s' % (self.code, self.STATUS_CODES[self.code]) + if self.extra: + desc += ' (%s)' % self.extra + + class ShapefileError(Exception): """ Exception for problems with census shapefiles.""" def __init__(self, message): Exception.__init__(self, message) - + def __str__(self): return 'ShapefileError: %s' % (self.message) - + ### Census Shapefiles ### @@ -115,7 +120,7 @@ FIPS_TO_STATE = { class Entity(object): """ A named list of polygons associated with a political boundary. - + eg. a state, congressional district, or school district""" def __init__(self, name, entity, vertices, extents): @@ -187,10 +192,10 @@ class State(Entity): def from_shapefile(obj, rec): """ Construct a State from a census.gov shapefile """ return State(obj.vertices(), obj.extents(), rec['NAME']) - + def read_census_shapefile(filename): """Read census shapefile and return list of entity-derived objects. - + Given the base name of a census .shp/.dbf file returns a list of all Entity-derived objects described by the the file. """ @@ -219,42 +224,40 @@ def read_census_shapefile(filename): # shp.info()[0] is the number of objects return [Entity.from_shapefile(shp.read_object(i), dbf.read_record(i)) for i in xrange(shape_count)] - -### Geocoding ### - + +### Geocoding ### + class AddressToDistrictService(object): - """Reusable service which maps addresses to districts using the census - data and the google geocoder. - - Usage: - service = AddressToDistrictService('google-maps-apikey','path-to-cd99') - lat,lng,district = service.address_to_district('address') - """ + """Abstract base class for service which maps addresses to districts using + the census data and a geocoder.""" - def __init__(self, apikey, census_file): + GEOCODER_GMAPS = 1 + GEOCODER_US = 2 + + def __init__(self, census_file, geocoder=GEOCODER_US, apikey=None): """AddressToDistrictService constructor - - Initialize given a google maps API key and a path to a census.gov - all congressional districts (cd99) dataset. - - Google maps API keys are available from: - http://www.google.com/apis/maps/signup.html - - The cd99_110 dataset is available from: + + Initialize given a path to a census.gov all congressional districts + (cd99) dataset. + + The cd99_110 dataset is available from: http://www.census.gov/geo/www/cob/cd110.html """ + if geocoder == self.GEOCODER_GMAPS and not apikey: + raise GeocodingError(610) # bad api key - self.apikey = apikey self.boundaries = read_census_shapefile(census_file) + self.geocoder = geocoder + self.apikey = apikey - def geocode(self, address): + def _google_geocode(self, address): """Convert an address into a latitude/longitude via google maps""" - + url = 'http://maps.google.com/maps/geo?output=csv&q=%s&key=%s' % \ (urllib.quote(address), self.apikey) # returns status,level-of-detail,lat,long - status, lat, lat, lng = urllib.urlopen(url).read().split(',') + status, _, lat, lng = urllib.urlopen(url).read().split(',') # 200 - OK if status == '200': @@ -262,15 +265,41 @@ class AddressToDistrictService(object): else: raise GeocodingError(status) + def _geocoderus_geocode(self, address): + """Convert an address into a latitude/longitude via geocoder.us""" + + if not address: + raise GeocodingError(601) # empty address + + url = 'http://rpc.geocoder.us/service/csv?address=%s' % \ + urllib.quote(address) + data = urllib.urlopen(url).readline() # only get first line for now + + # returns lat,long,street,city,state,zip or #: errmsg + if data.startswith('2:'): + raise GeocodingError(602) # address not found + + try: + lat, lng, _, _, _, _ = data.split(',') + return lat, lng + except ValueError: + raise GeocodingError(500, data) # unmapped error + + def lat_long_to_district(self, lat, lng): + """ Obtain the district containing a given latitude and longitude.""" + flat, flng = float(lat), float(lng) + return lat, lng, [(cb.state, cb.district) for cb in self.boundaries + if cb.contains((flng,flat))] + def address_to_district(self, address): """Given an address returns the congressional district it lies within. - - This function works by geocoding the address and then finding the point - that the returned lat/long returned lie within. - """ - lat, lng = self.geocode(address) - flat, flng = float(lat), float(lng) - return lat, lng, [(cb.state, cb.district) for cb in self.boundaries - if cb.contains((flng,flat))] - + This function works by geocoding the address and then finding the point + that the returned lat/long returned lie within. + """ + if self.geocoder == self.GEOCODER_GMAPS: + lat, lng = self._google_geocode(address) + elif self.geocoder == self.GEOCODER_US: + lat, lng = self._geocoderus_geocode(address) + + return self.lat_long_to_district(lat, lng)