geocoder.us changes

git-svn-id: https://polipoly.googlecode.com/svn/trunk@11 1885ebd5-0a40-0410-88a4-770918bee656
This commit is contained in:
james.p.turk 2008-01-17 19:05:20 +00:00
parent 9f77360de1
commit 78f2083e8e
3 changed files with 114 additions and 84 deletions

View File

@ -18,14 +18,15 @@ import cgi
import re
from polipoly import AddressToDistrictService, GeocodingError
PATH_TO_CDFILES = 'congdist/cd99_110'
GEOCODER = AddressToDistrictService.GEOCODER_US
GMAPS_API_KEY = 'define-me'
PATH_TO_CDFILES = 'cd99_110'
class ApiException(Exception):
# these codes are in the 300s to fit the sunlight API
STATUS_CODES = {
300: 'Google returned a server error when attempting to geocode',
300: 'Geocoder returned a server error when attempting to geocode',
301: 'Empty address string',
302: 'Unknown address',
303: 'Prohibited address',
@ -36,7 +37,7 @@ class ApiException(Exception):
def __init__(self, code):
self.code = code
def __str__(self):
return '%d: %s' % (self.code, self.STATUS_CODES[self.code])
@ -45,32 +46,32 @@ def main():
fields = cgi.FieldStorage()
addr = fields.getvalue('address') or ''
output = fields.getvalue('output')
# discard blank addresses as error 301
if re.match('^\s*$', addr):
raise ApiException(301)
# discard PO Box addresses as error 305
pobox = re.compile('[Pp]\.?[Oo]\.?\s*(?:box|Box|BOX)')
if pobox.search(addr):
raise ApiException(305)
# create service and get a district
service = AddressToDistrictService(GMAPS_API_KEY, PATH_TO_CDFILES)
service = AddressToDistrictService(PATH_TO_CDFILES, GEOCODER, GMAPS_API_KEY)
try:
lat, lng, districts = service.address_to_district(addr)
except GeocodingError, ge:
# convert GeocodingError to API error code (300-303 and 320)
err_dict = {500: 300, 601: 301, 602: 302, 603: 303, 620: 320}
raise ApiException(err_dict.get(ge.code,304))
# 306: address did not fall within congressional district
if len(districts) == 0:
raise ApiException(306)
# XML output
if output == 'xml':
dist_str = '\n'.join([' <district state="%s">%s</district>' % dist
dist_str = '\n'.join([' <district state="%s">%s</district>' % dist
for dist in districts])
print 'Content-type: text/xml\n'
print '''<results>
@ -81,10 +82,10 @@ def main():
%s
</districts>
</results>''' % (addr, lat, lng, dist_str)
# JSON output (default)
else:
dist_str = ','.join(['{"state":"%s", "district":"%s"}' % dist
dist_str = ','.join(['{"state":"%s", "district":"%s"}' % dist
for dist in districts])
print 'Content-type: application/json\n'
print '''{"address":"%s", "latitude":"%s", "longitude":"%s",

View File

@ -11,33 +11,33 @@ Be sure to set GMAPS_API_KEY and PATH_TO_CDFILES appropriately:
import csv
from polipoly import AddressToDistrictService
GMAPS_API_KEY = 'define-me'
PATH_TO_CDFILES = ''
PATH_TO_CDFILES = 'congdist/cd99_110'
GEOCODER = AddressToDistrictService.GEOCODER_US
GMAPS_API_KEY = None
# This sample data was collected from Project Vote Smart
SAMPLE_DATA = '''representative,state,district,address
Josiah Bonner,AL,01,"1141 Montlimar Drive, Suite 3010, Mobile, AL 36609"
Terry Everett,AL,02,"101 North Main Street, Opp, AL 36467"
Michael Rogers,AL,03,"7550 Halcyon Summit Drive, Montgomery, AL 36117"
Robert Aderholt,AL,04,"1710 Alabama Avenue, Jasper, AL 35501"
Robert Cramer,AL,05,"1011 George Wallace Boulevard, Tuscumbia, AL 35674"
Spencer Bachus,AL,06,"1900 International Park Drive, Suite 107, Birmingham, AL 35243"
Artur Davis,AL,07,"102 East Washington Street, Suite F, Demopolis, AL 36732"
Rick Renzi,AZ,01,"501 North Florence Street, Suite 102, Casa Grande, AZ 85222"
Trent Franks,AZ,02,"7121 West Bell Road, Suite 200, Glendale, AZ 85308"
John Shadegg,AZ,03,"301 East Bethany Home Road, Suite C-178, Phoenix, AZ 85012"
Ed Pastor,AZ,04,"411 North Central Avenue, Suite 150, Phoenix, AZ 85004"
Harry Mitchell,AZ,05,"7201 East Camelback Road, Suite 335, Scottsdale, AZ 85251"
Jeff Flake,AZ,06,"1640 South Stapley, Suite 215, Mesa, AZ 85204"
Raul Grijalva,AZ,07,"1455 South Fourth Avenue, Suite 4, Yuma, AZ 85364"
Gabrielle Giffords,AZ,08,"1661 North Swan, Suite 112, Tucson, AZ 85712"'''
SAMPLE_DATA = ['representative,state,district,address',
'Josiah Bonner,AL,01,"1141 Montlimar Drive, Suite 3010, Mobile, AL 36609"',
'Terry Everett,AL,02,"101 North Main Street, Opp, AL 36467"',
'Michael Rogers,AL,03,"7550 Halcyon Summit Drive, Montgomery, AL 36117"',
'Robert Aderholt,AL,04,"1710 Alabama Avenue, Jasper, AL 35501"',
'Robert Cramer,AL,05,"1011 George Wallace Boulevard, Tuscumbia, AL 35674"',
'Spencer Bachus,AL,06,"1900 International Park Drive, Suite 107, Birmingham, AL 35243"',
'Artur Davis,AL,07,"102 East Washington Street, Suite F, Demopolis, AL 36732"',
'Rick Renzi,AZ,01,"501 North Florence Street, Suite 102, Casa Grande, AZ 85222"',
'Trent Franks,AZ,02,"7121 West Bell Road, Suite 200, Glendale, AZ 85308"',
'John Shadegg,AZ,03,"301 East Bethany Home Road, Suite C-178, Phoenix, AZ 85012"',
'Ed Pastor,AZ,04,"411 North Central Avenue, Suite 150, Phoenix, AZ 85004"',
'Harry Mitchell,AZ,05,"7201 East Camelback Road, Suite 335, Scottsdale, AZ 85251"',
'Jeff Flake,AZ,06,"1640 South Stapley, Suite 215, Mesa, AZ 85204"',
'Raul Grijalva,AZ,07,"1455 South Fourth Avenue, Suite 4, Yuma, AZ 85364"',
'Gabrielle Giffords,AZ,08,"1661 North Swan, Suite 112, Tucson, AZ 85712"']
service = AddressToDistrictService(GMAPS_API_KEY, PATH_TO_CDFILES)
service = AddressToDistrictService(PATH_TO_CDFILES, GEOCODER, GMAPS_API_KEY)
records = csv.DictReader(SAMPLE_DATA)
for rec in records:
lat, lng, dists = service.address_to_district(rec['address'])
print '%s\tcorrect: %s-%s\tfound: %s-%s' % (rec['representative'],
print '%s\tcorrect: %s-%s\tfound: %s-%s' % (rec['representative'],
rec['state'], rec['district'], dists[0][0], dists[0][1])

View File

@ -1,16 +1,16 @@
"""Python library for working with political boundaries
Political boundaries are defined by one or more polygons and obtained
from census.gov shapefiles. Census boundary shapefiles are available at
http://www.census.gov/geo/www/cob/bdy_files.html.
from census.gov shapefiles. Census boundary shapefiles are available at
http://www.census.gov/geo/www/cob/bdy_files.html.
At the moment this library has only been used with State and Congressional
District boundaries.
"""
__author__ = "James Turk (james.p.turk@gmail.com)"
__version__ = "0.1.0"
__copyright__ = "Copyright (c) 2007 Sunlight Labs"
__version__ = "0.1.5"
__copyright__ = "Copyright (c) 2007-2008 Sunlight Labs"
__license__ = "BSD"
import urllib
@ -22,14 +22,14 @@ from dbflib import DBFFile
# internally used helper function
def left_of_edge(point, ep1, ep2):
"""Determine if point is left of infinite line touching ep1 and ep2"""
return ((ep1[0]-point[0])*(ep2[1]-point[1]) -
return ((ep1[0]-point[0])*(ep2[1]-point[1]) -
(ep2[0]-point[0])*(ep1[1]-point[1])) < 0
class Polygon(object):
''' Simple polygon class used for point containment testing and conversion
Allows for testing if a polygon contains a point as well as conversion
Allows for testing if a polygon contains a point as well as conversion
to various portable representations '''
def __init__(self, vertices):
@ -45,7 +45,7 @@ class Polygon(object):
for i in xrange(len(self.vertices)-1):
# add wind if edge crosses point going up and point is to left
if (self.vertices[i][1] < point[1] < self.vertices[i+1][1] and
if (self.vertices[i][1] < point[1] < self.vertices[i+1][1] and
left_of_edge(point, self.vertices[i], self.vertices[i+1])):
winds += 1
# end wind if edge crosses point going down and point is to right
@ -58,7 +58,7 @@ class Polygon(object):
def to_kml(self):
''' get KML polygon representation '''
coordstr = ' '.join("%.15f,%.15f" % v for v in self.vertices)
return '''<Polygon><outerBoundaryIs><LinearRing>
@ -70,10 +70,10 @@ class Polygon(object):
class GeocodingError(Exception):
"""Custom exception which maps possible google geocoder error codes to
human readable strings.
See http://www.google.com/apis/maps/documentation/reference.html#GGeoStatusCode
"""
STATUS_CODES = {500: 'Unknown Geocoding Server Error',
601: 'Empty Address',
602: 'Unknown Address',
@ -81,21 +81,26 @@ class GeocodingError(Exception):
610: 'Bad API Key',
620: 'Too Many Requests'}
def __init__(self, code):
def __init__(self, code, extra=None):
Exception.__init__(self)
self.code = int(code)
self.extra = extra
def __str__(self):
return 'GeocodingError: %d - %s' % (self.code,
desc = 'GeocodingError: %d - %s' % (self.code,
self.STATUS_CODES[self.code])
if self.extra:
desc += ' (%s)' % self.extra
class ShapefileError(Exception):
""" Exception for problems with census shapefiles."""
def __init__(self, message):
Exception.__init__(self, message)
def __str__(self):
return 'ShapefileError: %s' % (self.message)
### Census Shapefiles ###
@ -115,7 +120,7 @@ FIPS_TO_STATE = {
class Entity(object):
""" A named list of polygons associated with a political boundary.
eg. a state, congressional district, or school district"""
def __init__(self, name, entity, vertices, extents):
@ -187,10 +192,10 @@ class State(Entity):
def from_shapefile(obj, rec):
""" Construct a State from a census.gov shapefile """
return State(obj.vertices(), obj.extents(), rec['NAME'])
def read_census_shapefile(filename):
"""Read census shapefile and return list of entity-derived objects.
Given the base name of a census .shp/.dbf file returns a list of all
Entity-derived objects described by the the file.
"""
@ -219,42 +224,40 @@ def read_census_shapefile(filename):
# shp.info()[0] is the number of objects
return [Entity.from_shapefile(shp.read_object(i), dbf.read_record(i))
for i in xrange(shape_count)]
### Geocoding ###
### Geocoding ###
class AddressToDistrictService(object):
"""Reusable service which maps addresses to districts using the census
data and the google geocoder.
Usage:
service = AddressToDistrictService('google-maps-apikey','path-to-cd99')
lat,lng,district = service.address_to_district('address')
"""
"""Abstract base class for service which maps addresses to districts using
the census data and a geocoder."""
def __init__(self, apikey, census_file):
GEOCODER_GMAPS = 1
GEOCODER_US = 2
def __init__(self, census_file, geocoder=GEOCODER_US, apikey=None):
"""AddressToDistrictService constructor
Initialize given a google maps API key and a path to a census.gov
all congressional districts (cd99) dataset.
Google maps API keys are available from:
http://www.google.com/apis/maps/signup.html
The cd99_110 dataset is available from:
Initialize given a path to a census.gov all congressional districts
(cd99) dataset.
The cd99_110 dataset is available from:
http://www.census.gov/geo/www/cob/cd110.html
"""
if geocoder == self.GEOCODER_GMAPS and not apikey:
raise GeocodingError(610) # bad api key
self.apikey = apikey
self.boundaries = read_census_shapefile(census_file)
self.geocoder = geocoder
self.apikey = apikey
def geocode(self, address):
def _google_geocode(self, address):
"""Convert an address into a latitude/longitude via google maps"""
url = 'http://maps.google.com/maps/geo?output=csv&q=%s&key=%s' % \
(urllib.quote(address), self.apikey)
# returns status,level-of-detail,lat,long
status, lat, lat, lng = urllib.urlopen(url).read().split(',')
status, _, lat, lng = urllib.urlopen(url).read().split(',')
# 200 - OK
if status == '200':
@ -262,15 +265,41 @@ class AddressToDistrictService(object):
else:
raise GeocodingError(status)
def _geocoderus_geocode(self, address):
"""Convert an address into a latitude/longitude via geocoder.us"""
if not address:
raise GeocodingError(601) # empty address
url = 'http://rpc.geocoder.us/service/csv?address=%s' % \
urllib.quote(address)
data = urllib.urlopen(url).readline() # only get first line for now
# returns lat,long,street,city,state,zip or #: errmsg
if data.startswith('2:'):
raise GeocodingError(602) # address not found
try:
lat, lng, _, _, _, _ = data.split(',')
return lat, lng
except ValueError:
raise GeocodingError(500, data) # unmapped error
def lat_long_to_district(self, lat, lng):
""" Obtain the district containing a given latitude and longitude."""
flat, flng = float(lat), float(lng)
return lat, lng, [(cb.state, cb.district) for cb in self.boundaries
if cb.contains((flng,flat))]
def address_to_district(self, address):
"""Given an address returns the congressional district it lies within.
This function works by geocoding the address and then finding the point
that the returned lat/long returned lie within.
"""
lat, lng = self.geocode(address)
flat, flng = float(lat), float(lng)
return lat, lng, [(cb.state, cb.district) for cb in self.boundaries
if cb.contains((flng,flat))]
This function works by geocoding the address and then finding the point
that the returned lat/long returned lie within.
"""
if self.geocoder == self.GEOCODER_GMAPS:
lat, lng = self._google_geocode(address)
elif self.geocoder == self.GEOCODER_US:
lat, lng = self._geocoderus_geocode(address)
return self.lat_long_to_district(lat, lng)