geocoder.us changes

git-svn-id: https://polipoly.googlecode.com/svn/trunk@11 1885ebd5-0a40-0410-88a4-770918bee656
This commit is contained in:
james.p.turk 2008-01-17 19:05:20 +00:00
parent 9f77360de1
commit 78f2083e8e
3 changed files with 114 additions and 84 deletions

View File

@ -18,14 +18,15 @@ import cgi
import re import re
from polipoly import AddressToDistrictService, GeocodingError from polipoly import AddressToDistrictService, GeocodingError
PATH_TO_CDFILES = 'congdist/cd99_110'
GEOCODER = AddressToDistrictService.GEOCODER_US
GMAPS_API_KEY = 'define-me' GMAPS_API_KEY = 'define-me'
PATH_TO_CDFILES = 'cd99_110'
class ApiException(Exception): class ApiException(Exception):
# these codes are in the 300s to fit the sunlight API # these codes are in the 300s to fit the sunlight API
STATUS_CODES = { STATUS_CODES = {
300: 'Google returned a server error when attempting to geocode', 300: 'Geocoder returned a server error when attempting to geocode',
301: 'Empty address string', 301: 'Empty address string',
302: 'Unknown address', 302: 'Unknown address',
303: 'Prohibited address', 303: 'Prohibited address',
@ -36,7 +37,7 @@ class ApiException(Exception):
def __init__(self, code): def __init__(self, code):
self.code = code self.code = code
def __str__(self): def __str__(self):
return '%d: %s' % (self.code, self.STATUS_CODES[self.code]) return '%d: %s' % (self.code, self.STATUS_CODES[self.code])
@ -45,32 +46,32 @@ def main():
fields = cgi.FieldStorage() fields = cgi.FieldStorage()
addr = fields.getvalue('address') or '' addr = fields.getvalue('address') or ''
output = fields.getvalue('output') output = fields.getvalue('output')
# discard blank addresses as error 301 # discard blank addresses as error 301
if re.match('^\s*$', addr): if re.match('^\s*$', addr):
raise ApiException(301) raise ApiException(301)
# discard PO Box addresses as error 305 # discard PO Box addresses as error 305
pobox = re.compile('[Pp]\.?[Oo]\.?\s*(?:box|Box|BOX)') pobox = re.compile('[Pp]\.?[Oo]\.?\s*(?:box|Box|BOX)')
if pobox.search(addr): if pobox.search(addr):
raise ApiException(305) raise ApiException(305)
# create service and get a district # create service and get a district
service = AddressToDistrictService(GMAPS_API_KEY, PATH_TO_CDFILES) service = AddressToDistrictService(PATH_TO_CDFILES, GEOCODER, GMAPS_API_KEY)
try: try:
lat, lng, districts = service.address_to_district(addr) lat, lng, districts = service.address_to_district(addr)
except GeocodingError, ge: except GeocodingError, ge:
# convert GeocodingError to API error code (300-303 and 320) # convert GeocodingError to API error code (300-303 and 320)
err_dict = {500: 300, 601: 301, 602: 302, 603: 303, 620: 320} err_dict = {500: 300, 601: 301, 602: 302, 603: 303, 620: 320}
raise ApiException(err_dict.get(ge.code,304)) raise ApiException(err_dict.get(ge.code,304))
# 306: address did not fall within congressional district # 306: address did not fall within congressional district
if len(districts) == 0: if len(districts) == 0:
raise ApiException(306) raise ApiException(306)
# XML output # XML output
if output == 'xml': if output == 'xml':
dist_str = '\n'.join([' <district state="%s">%s</district>' % dist dist_str = '\n'.join([' <district state="%s">%s</district>' % dist
for dist in districts]) for dist in districts])
print 'Content-type: text/xml\n' print 'Content-type: text/xml\n'
print '''<results> print '''<results>
@ -81,10 +82,10 @@ def main():
%s %s
</districts> </districts>
</results>''' % (addr, lat, lng, dist_str) </results>''' % (addr, lat, lng, dist_str)
# JSON output (default) # JSON output (default)
else: else:
dist_str = ','.join(['{"state":"%s", "district":"%s"}' % dist dist_str = ','.join(['{"state":"%s", "district":"%s"}' % dist
for dist in districts]) for dist in districts])
print 'Content-type: application/json\n' print 'Content-type: application/json\n'
print '''{"address":"%s", "latitude":"%s", "longitude":"%s", print '''{"address":"%s", "latitude":"%s", "longitude":"%s",

View File

@ -11,33 +11,33 @@ Be sure to set GMAPS_API_KEY and PATH_TO_CDFILES appropriately:
import csv import csv
from polipoly import AddressToDistrictService from polipoly import AddressToDistrictService
GMAPS_API_KEY = 'define-me' PATH_TO_CDFILES = 'congdist/cd99_110'
PATH_TO_CDFILES = '' GEOCODER = AddressToDistrictService.GEOCODER_US
GMAPS_API_KEY = None
# This sample data was collected from Project Vote Smart # This sample data was collected from Project Vote Smart
SAMPLE_DATA = '''representative,state,district,address SAMPLE_DATA = ['representative,state,district,address',
Josiah Bonner,AL,01,"1141 Montlimar Drive, Suite 3010, Mobile, AL 36609" 'Josiah Bonner,AL,01,"1141 Montlimar Drive, Suite 3010, Mobile, AL 36609"',
Terry Everett,AL,02,"101 North Main Street, Opp, AL 36467" 'Terry Everett,AL,02,"101 North Main Street, Opp, AL 36467"',
Michael Rogers,AL,03,"7550 Halcyon Summit Drive, Montgomery, AL 36117" 'Michael Rogers,AL,03,"7550 Halcyon Summit Drive, Montgomery, AL 36117"',
Robert Aderholt,AL,04,"1710 Alabama Avenue, Jasper, AL 35501" 'Robert Aderholt,AL,04,"1710 Alabama Avenue, Jasper, AL 35501"',
Robert Cramer,AL,05,"1011 George Wallace Boulevard, Tuscumbia, AL 35674" 'Robert Cramer,AL,05,"1011 George Wallace Boulevard, Tuscumbia, AL 35674"',
Spencer Bachus,AL,06,"1900 International Park Drive, Suite 107, Birmingham, AL 35243" 'Spencer Bachus,AL,06,"1900 International Park Drive, Suite 107, Birmingham, AL 35243"',
Artur Davis,AL,07,"102 East Washington Street, Suite F, Demopolis, AL 36732" 'Artur Davis,AL,07,"102 East Washington Street, Suite F, Demopolis, AL 36732"',
Rick Renzi,AZ,01,"501 North Florence Street, Suite 102, Casa Grande, AZ 85222" 'Rick Renzi,AZ,01,"501 North Florence Street, Suite 102, Casa Grande, AZ 85222"',
Trent Franks,AZ,02,"7121 West Bell Road, Suite 200, Glendale, AZ 85308" 'Trent Franks,AZ,02,"7121 West Bell Road, Suite 200, Glendale, AZ 85308"',
John Shadegg,AZ,03,"301 East Bethany Home Road, Suite C-178, Phoenix, AZ 85012" 'John Shadegg,AZ,03,"301 East Bethany Home Road, Suite C-178, Phoenix, AZ 85012"',
Ed Pastor,AZ,04,"411 North Central Avenue, Suite 150, Phoenix, AZ 85004" 'Ed Pastor,AZ,04,"411 North Central Avenue, Suite 150, Phoenix, AZ 85004"',
Harry Mitchell,AZ,05,"7201 East Camelback Road, Suite 335, Scottsdale, AZ 85251" 'Harry Mitchell,AZ,05,"7201 East Camelback Road, Suite 335, Scottsdale, AZ 85251"',
Jeff Flake,AZ,06,"1640 South Stapley, Suite 215, Mesa, AZ 85204" 'Jeff Flake,AZ,06,"1640 South Stapley, Suite 215, Mesa, AZ 85204"',
Raul Grijalva,AZ,07,"1455 South Fourth Avenue, Suite 4, Yuma, AZ 85364" 'Raul Grijalva,AZ,07,"1455 South Fourth Avenue, Suite 4, Yuma, AZ 85364"',
Gabrielle Giffords,AZ,08,"1661 North Swan, Suite 112, Tucson, AZ 85712"''' 'Gabrielle Giffords,AZ,08,"1661 North Swan, Suite 112, Tucson, AZ 85712"']
service = AddressToDistrictService(GMAPS_API_KEY, PATH_TO_CDFILES) service = AddressToDistrictService(PATH_TO_CDFILES, GEOCODER, GMAPS_API_KEY)
records = csv.DictReader(SAMPLE_DATA) records = csv.DictReader(SAMPLE_DATA)
for rec in records: for rec in records:
lat, lng, dists = service.address_to_district(rec['address']) lat, lng, dists = service.address_to_district(rec['address'])
print '%s\tcorrect: %s-%s\tfound: %s-%s' % (rec['representative'], print '%s\tcorrect: %s-%s\tfound: %s-%s' % (rec['representative'],
rec['state'], rec['district'], dists[0][0], dists[0][1]) rec['state'], rec['district'], dists[0][0], dists[0][1])

View File

@ -1,16 +1,16 @@
"""Python library for working with political boundaries """Python library for working with political boundaries
Political boundaries are defined by one or more polygons and obtained Political boundaries are defined by one or more polygons and obtained
from census.gov shapefiles. Census boundary shapefiles are available at from census.gov shapefiles. Census boundary shapefiles are available at
http://www.census.gov/geo/www/cob/bdy_files.html. http://www.census.gov/geo/www/cob/bdy_files.html.
At the moment this library has only been used with State and Congressional At the moment this library has only been used with State and Congressional
District boundaries. District boundaries.
""" """
__author__ = "James Turk (james.p.turk@gmail.com)" __author__ = "James Turk (james.p.turk@gmail.com)"
__version__ = "0.1.0" __version__ = "0.1.5"
__copyright__ = "Copyright (c) 2007 Sunlight Labs" __copyright__ = "Copyright (c) 2007-2008 Sunlight Labs"
__license__ = "BSD" __license__ = "BSD"
import urllib import urllib
@ -22,14 +22,14 @@ from dbflib import DBFFile
# internally used helper function # internally used helper function
def left_of_edge(point, ep1, ep2): def left_of_edge(point, ep1, ep2):
"""Determine if point is left of infinite line touching ep1 and ep2""" """Determine if point is left of infinite line touching ep1 and ep2"""
return ((ep1[0]-point[0])*(ep2[1]-point[1]) - return ((ep1[0]-point[0])*(ep2[1]-point[1]) -
(ep2[0]-point[0])*(ep1[1]-point[1])) < 0 (ep2[0]-point[0])*(ep1[1]-point[1])) < 0
class Polygon(object): class Polygon(object):
''' Simple polygon class used for point containment testing and conversion ''' Simple polygon class used for point containment testing and conversion
Allows for testing if a polygon contains a point as well as conversion Allows for testing if a polygon contains a point as well as conversion
to various portable representations ''' to various portable representations '''
def __init__(self, vertices): def __init__(self, vertices):
@ -45,7 +45,7 @@ class Polygon(object):
for i in xrange(len(self.vertices)-1): for i in xrange(len(self.vertices)-1):
# add wind if edge crosses point going up and point is to left # add wind if edge crosses point going up and point is to left
if (self.vertices[i][1] < point[1] < self.vertices[i+1][1] and if (self.vertices[i][1] < point[1] < self.vertices[i+1][1] and
left_of_edge(point, self.vertices[i], self.vertices[i+1])): left_of_edge(point, self.vertices[i], self.vertices[i+1])):
winds += 1 winds += 1
# end wind if edge crosses point going down and point is to right # end wind if edge crosses point going down and point is to right
@ -58,7 +58,7 @@ class Polygon(object):
def to_kml(self): def to_kml(self):
''' get KML polygon representation ''' ''' get KML polygon representation '''
coordstr = ' '.join("%.15f,%.15f" % v for v in self.vertices) coordstr = ' '.join("%.15f,%.15f" % v for v in self.vertices)
return '''<Polygon><outerBoundaryIs><LinearRing> return '''<Polygon><outerBoundaryIs><LinearRing>
@ -70,10 +70,10 @@ class Polygon(object):
class GeocodingError(Exception): class GeocodingError(Exception):
"""Custom exception which maps possible google geocoder error codes to """Custom exception which maps possible google geocoder error codes to
human readable strings. human readable strings.
See http://www.google.com/apis/maps/documentation/reference.html#GGeoStatusCode See http://www.google.com/apis/maps/documentation/reference.html#GGeoStatusCode
""" """
STATUS_CODES = {500: 'Unknown Geocoding Server Error', STATUS_CODES = {500: 'Unknown Geocoding Server Error',
601: 'Empty Address', 601: 'Empty Address',
602: 'Unknown Address', 602: 'Unknown Address',
@ -81,21 +81,26 @@ class GeocodingError(Exception):
610: 'Bad API Key', 610: 'Bad API Key',
620: 'Too Many Requests'} 620: 'Too Many Requests'}
def __init__(self, code): def __init__(self, code, extra=None):
Exception.__init__(self) Exception.__init__(self)
self.code = int(code) self.code = int(code)
self.extra = extra
def __str__(self): def __str__(self):
return 'GeocodingError: %d - %s' % (self.code, desc = 'GeocodingError: %d - %s' % (self.code,
self.STATUS_CODES[self.code]) self.STATUS_CODES[self.code])
if self.extra:
desc += ' (%s)' % self.extra
class ShapefileError(Exception): class ShapefileError(Exception):
""" Exception for problems with census shapefiles.""" """ Exception for problems with census shapefiles."""
def __init__(self, message): def __init__(self, message):
Exception.__init__(self, message) Exception.__init__(self, message)
def __str__(self): def __str__(self):
return 'ShapefileError: %s' % (self.message) return 'ShapefileError: %s' % (self.message)
### Census Shapefiles ### ### Census Shapefiles ###
@ -115,7 +120,7 @@ FIPS_TO_STATE = {
class Entity(object): class Entity(object):
""" A named list of polygons associated with a political boundary. """ A named list of polygons associated with a political boundary.
eg. a state, congressional district, or school district""" eg. a state, congressional district, or school district"""
def __init__(self, name, entity, vertices, extents): def __init__(self, name, entity, vertices, extents):
@ -187,10 +192,10 @@ class State(Entity):
def from_shapefile(obj, rec): def from_shapefile(obj, rec):
""" Construct a State from a census.gov shapefile """ """ Construct a State from a census.gov shapefile """
return State(obj.vertices(), obj.extents(), rec['NAME']) return State(obj.vertices(), obj.extents(), rec['NAME'])
def read_census_shapefile(filename): def read_census_shapefile(filename):
"""Read census shapefile and return list of entity-derived objects. """Read census shapefile and return list of entity-derived objects.
Given the base name of a census .shp/.dbf file returns a list of all Given the base name of a census .shp/.dbf file returns a list of all
Entity-derived objects described by the the file. Entity-derived objects described by the the file.
""" """
@ -219,42 +224,40 @@ def read_census_shapefile(filename):
# shp.info()[0] is the number of objects # shp.info()[0] is the number of objects
return [Entity.from_shapefile(shp.read_object(i), dbf.read_record(i)) return [Entity.from_shapefile(shp.read_object(i), dbf.read_record(i))
for i in xrange(shape_count)] for i in xrange(shape_count)]
### Geocoding ###
### Geocoding ###
class AddressToDistrictService(object): class AddressToDistrictService(object):
"""Reusable service which maps addresses to districts using the census """Abstract base class for service which maps addresses to districts using
data and the google geocoder. the census data and a geocoder."""
Usage:
service = AddressToDistrictService('google-maps-apikey','path-to-cd99')
lat,lng,district = service.address_to_district('address')
"""
def __init__(self, apikey, census_file): GEOCODER_GMAPS = 1
GEOCODER_US = 2
def __init__(self, census_file, geocoder=GEOCODER_US, apikey=None):
"""AddressToDistrictService constructor """AddressToDistrictService constructor
Initialize given a google maps API key and a path to a census.gov Initialize given a path to a census.gov all congressional districts
all congressional districts (cd99) dataset. (cd99) dataset.
Google maps API keys are available from: The cd99_110 dataset is available from:
http://www.google.com/apis/maps/signup.html
The cd99_110 dataset is available from:
http://www.census.gov/geo/www/cob/cd110.html http://www.census.gov/geo/www/cob/cd110.html
""" """
if geocoder == self.GEOCODER_GMAPS and not apikey:
raise GeocodingError(610) # bad api key
self.apikey = apikey
self.boundaries = read_census_shapefile(census_file) self.boundaries = read_census_shapefile(census_file)
self.geocoder = geocoder
self.apikey = apikey
def geocode(self, address): def _google_geocode(self, address):
"""Convert an address into a latitude/longitude via google maps""" """Convert an address into a latitude/longitude via google maps"""
url = 'http://maps.google.com/maps/geo?output=csv&q=%s&key=%s' % \ url = 'http://maps.google.com/maps/geo?output=csv&q=%s&key=%s' % \
(urllib.quote(address), self.apikey) (urllib.quote(address), self.apikey)
# returns status,level-of-detail,lat,long # returns status,level-of-detail,lat,long
status, lat, lat, lng = urllib.urlopen(url).read().split(',') status, _, lat, lng = urllib.urlopen(url).read().split(',')
# 200 - OK # 200 - OK
if status == '200': if status == '200':
@ -262,15 +265,41 @@ class AddressToDistrictService(object):
else: else:
raise GeocodingError(status) raise GeocodingError(status)
def _geocoderus_geocode(self, address):
"""Convert an address into a latitude/longitude via geocoder.us"""
if not address:
raise GeocodingError(601) # empty address
url = 'http://rpc.geocoder.us/service/csv?address=%s' % \
urllib.quote(address)
data = urllib.urlopen(url).readline() # only get first line for now
# returns lat,long,street,city,state,zip or #: errmsg
if data.startswith('2:'):
raise GeocodingError(602) # address not found
try:
lat, lng, _, _, _, _ = data.split(',')
return lat, lng
except ValueError:
raise GeocodingError(500, data) # unmapped error
def lat_long_to_district(self, lat, lng):
""" Obtain the district containing a given latitude and longitude."""
flat, flng = float(lat), float(lng)
return lat, lng, [(cb.state, cb.district) for cb in self.boundaries
if cb.contains((flng,flat))]
def address_to_district(self, address): def address_to_district(self, address):
"""Given an address returns the congressional district it lies within. """Given an address returns the congressional district it lies within.
This function works by geocoding the address and then finding the point
that the returned lat/long returned lie within.
"""
lat, lng = self.geocode(address) This function works by geocoding the address and then finding the point
flat, flng = float(lat), float(lng) that the returned lat/long returned lie within.
return lat, lng, [(cb.state, cb.district) for cb in self.boundaries """
if cb.contains((flng,flat))] if self.geocoder == self.GEOCODER_GMAPS:
lat, lng = self._google_geocode(address)
elif self.geocoder == self.GEOCODER_US:
lat, lng = self._geocoderus_geocode(address)
return self.lat_long_to_district(lat, lng)