geocoder.us changes
git-svn-id: https://polipoly.googlecode.com/svn/trunk@11 1885ebd5-0a40-0410-88a4-770918bee656
This commit is contained in:
parent
9f77360de1
commit
78f2083e8e
@ -18,14 +18,15 @@ import cgi
|
||||
import re
|
||||
from polipoly import AddressToDistrictService, GeocodingError
|
||||
|
||||
PATH_TO_CDFILES = 'congdist/cd99_110'
|
||||
GEOCODER = AddressToDistrictService.GEOCODER_US
|
||||
GMAPS_API_KEY = 'define-me'
|
||||
PATH_TO_CDFILES = 'cd99_110'
|
||||
|
||||
class ApiException(Exception):
|
||||
|
||||
# these codes are in the 300s to fit the sunlight API
|
||||
STATUS_CODES = {
|
||||
300: 'Google returned a server error when attempting to geocode',
|
||||
300: 'Geocoder returned a server error when attempting to geocode',
|
||||
301: 'Empty address string',
|
||||
302: 'Unknown address',
|
||||
303: 'Prohibited address',
|
||||
@ -36,7 +37,7 @@ class ApiException(Exception):
|
||||
|
||||
def __init__(self, code):
|
||||
self.code = code
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return '%d: %s' % (self.code, self.STATUS_CODES[self.code])
|
||||
|
||||
@ -45,32 +46,32 @@ def main():
|
||||
fields = cgi.FieldStorage()
|
||||
addr = fields.getvalue('address') or ''
|
||||
output = fields.getvalue('output')
|
||||
|
||||
|
||||
# discard blank addresses as error 301
|
||||
if re.match('^\s*$', addr):
|
||||
raise ApiException(301)
|
||||
|
||||
|
||||
# discard PO Box addresses as error 305
|
||||
pobox = re.compile('[Pp]\.?[Oo]\.?\s*(?:box|Box|BOX)')
|
||||
if pobox.search(addr):
|
||||
raise ApiException(305)
|
||||
|
||||
# create service and get a district
|
||||
service = AddressToDistrictService(GMAPS_API_KEY, PATH_TO_CDFILES)
|
||||
service = AddressToDistrictService(PATH_TO_CDFILES, GEOCODER, GMAPS_API_KEY)
|
||||
try:
|
||||
lat, lng, districts = service.address_to_district(addr)
|
||||
except GeocodingError, ge:
|
||||
# convert GeocodingError to API error code (300-303 and 320)
|
||||
err_dict = {500: 300, 601: 301, 602: 302, 603: 303, 620: 320}
|
||||
raise ApiException(err_dict.get(ge.code,304))
|
||||
|
||||
|
||||
# 306: address did not fall within congressional district
|
||||
if len(districts) == 0:
|
||||
raise ApiException(306)
|
||||
|
||||
# XML output
|
||||
if output == 'xml':
|
||||
dist_str = '\n'.join([' <district state="%s">%s</district>' % dist
|
||||
dist_str = '\n'.join([' <district state="%s">%s</district>' % dist
|
||||
for dist in districts])
|
||||
print 'Content-type: text/xml\n'
|
||||
print '''<results>
|
||||
@ -81,10 +82,10 @@ def main():
|
||||
%s
|
||||
</districts>
|
||||
</results>''' % (addr, lat, lng, dist_str)
|
||||
|
||||
|
||||
# JSON output (default)
|
||||
else:
|
||||
dist_str = ','.join(['{"state":"%s", "district":"%s"}' % dist
|
||||
dist_str = ','.join(['{"state":"%s", "district":"%s"}' % dist
|
||||
for dist in districts])
|
||||
print 'Content-type: application/json\n'
|
||||
print '''{"address":"%s", "latitude":"%s", "longitude":"%s",
|
||||
|
@ -11,33 +11,33 @@ Be sure to set GMAPS_API_KEY and PATH_TO_CDFILES appropriately:
|
||||
import csv
|
||||
from polipoly import AddressToDistrictService
|
||||
|
||||
GMAPS_API_KEY = 'define-me'
|
||||
PATH_TO_CDFILES = ''
|
||||
PATH_TO_CDFILES = 'congdist/cd99_110'
|
||||
GEOCODER = AddressToDistrictService.GEOCODER_US
|
||||
GMAPS_API_KEY = None
|
||||
|
||||
# This sample data was collected from Project Vote Smart
|
||||
|
||||
SAMPLE_DATA = '''representative,state,district,address
|
||||
Josiah Bonner,AL,01,"1141 Montlimar Drive, Suite 3010, Mobile, AL 36609"
|
||||
Terry Everett,AL,02,"101 North Main Street, Opp, AL 36467"
|
||||
Michael Rogers,AL,03,"7550 Halcyon Summit Drive, Montgomery, AL 36117"
|
||||
Robert Aderholt,AL,04,"1710 Alabama Avenue, Jasper, AL 35501"
|
||||
Robert Cramer,AL,05,"1011 George Wallace Boulevard, Tuscumbia, AL 35674"
|
||||
Spencer Bachus,AL,06,"1900 International Park Drive, Suite 107, Birmingham, AL 35243"
|
||||
Artur Davis,AL,07,"102 East Washington Street, Suite F, Demopolis, AL 36732"
|
||||
Rick Renzi,AZ,01,"501 North Florence Street, Suite 102, Casa Grande, AZ 85222"
|
||||
Trent Franks,AZ,02,"7121 West Bell Road, Suite 200, Glendale, AZ 85308"
|
||||
John Shadegg,AZ,03,"301 East Bethany Home Road, Suite C-178, Phoenix, AZ 85012"
|
||||
Ed Pastor,AZ,04,"411 North Central Avenue, Suite 150, Phoenix, AZ 85004"
|
||||
Harry Mitchell,AZ,05,"7201 East Camelback Road, Suite 335, Scottsdale, AZ 85251"
|
||||
Jeff Flake,AZ,06,"1640 South Stapley, Suite 215, Mesa, AZ 85204"
|
||||
Raul Grijalva,AZ,07,"1455 South Fourth Avenue, Suite 4, Yuma, AZ 85364"
|
||||
Gabrielle Giffords,AZ,08,"1661 North Swan, Suite 112, Tucson, AZ 85712"'''
|
||||
SAMPLE_DATA = ['representative,state,district,address',
|
||||
'Josiah Bonner,AL,01,"1141 Montlimar Drive, Suite 3010, Mobile, AL 36609"',
|
||||
'Terry Everett,AL,02,"101 North Main Street, Opp, AL 36467"',
|
||||
'Michael Rogers,AL,03,"7550 Halcyon Summit Drive, Montgomery, AL 36117"',
|
||||
'Robert Aderholt,AL,04,"1710 Alabama Avenue, Jasper, AL 35501"',
|
||||
'Robert Cramer,AL,05,"1011 George Wallace Boulevard, Tuscumbia, AL 35674"',
|
||||
'Spencer Bachus,AL,06,"1900 International Park Drive, Suite 107, Birmingham, AL 35243"',
|
||||
'Artur Davis,AL,07,"102 East Washington Street, Suite F, Demopolis, AL 36732"',
|
||||
'Rick Renzi,AZ,01,"501 North Florence Street, Suite 102, Casa Grande, AZ 85222"',
|
||||
'Trent Franks,AZ,02,"7121 West Bell Road, Suite 200, Glendale, AZ 85308"',
|
||||
'John Shadegg,AZ,03,"301 East Bethany Home Road, Suite C-178, Phoenix, AZ 85012"',
|
||||
'Ed Pastor,AZ,04,"411 North Central Avenue, Suite 150, Phoenix, AZ 85004"',
|
||||
'Harry Mitchell,AZ,05,"7201 East Camelback Road, Suite 335, Scottsdale, AZ 85251"',
|
||||
'Jeff Flake,AZ,06,"1640 South Stapley, Suite 215, Mesa, AZ 85204"',
|
||||
'Raul Grijalva,AZ,07,"1455 South Fourth Avenue, Suite 4, Yuma, AZ 85364"',
|
||||
'Gabrielle Giffords,AZ,08,"1661 North Swan, Suite 112, Tucson, AZ 85712"']
|
||||
|
||||
service = AddressToDistrictService(GMAPS_API_KEY, PATH_TO_CDFILES)
|
||||
service = AddressToDistrictService(PATH_TO_CDFILES, GEOCODER, GMAPS_API_KEY)
|
||||
|
||||
records = csv.DictReader(SAMPLE_DATA)
|
||||
for rec in records:
|
||||
lat, lng, dists = service.address_to_district(rec['address'])
|
||||
print '%s\tcorrect: %s-%s\tfound: %s-%s' % (rec['representative'],
|
||||
print '%s\tcorrect: %s-%s\tfound: %s-%s' % (rec['representative'],
|
||||
rec['state'], rec['district'], dists[0][0], dists[0][1])
|
||||
|
||||
|
135
polipoly.py
135
polipoly.py
@ -1,16 +1,16 @@
|
||||
"""Python library for working with political boundaries
|
||||
|
||||
Political boundaries are defined by one or more polygons and obtained
|
||||
from census.gov shapefiles. Census boundary shapefiles are available at
|
||||
http://www.census.gov/geo/www/cob/bdy_files.html.
|
||||
from census.gov shapefiles. Census boundary shapefiles are available at
|
||||
http://www.census.gov/geo/www/cob/bdy_files.html.
|
||||
|
||||
At the moment this library has only been used with State and Congressional
|
||||
District boundaries.
|
||||
"""
|
||||
|
||||
__author__ = "James Turk (james.p.turk@gmail.com)"
|
||||
__version__ = "0.1.0"
|
||||
__copyright__ = "Copyright (c) 2007 Sunlight Labs"
|
||||
__version__ = "0.1.5"
|
||||
__copyright__ = "Copyright (c) 2007-2008 Sunlight Labs"
|
||||
__license__ = "BSD"
|
||||
|
||||
import urllib
|
||||
@ -22,14 +22,14 @@ from dbflib import DBFFile
|
||||
# internally used helper function
|
||||
def left_of_edge(point, ep1, ep2):
|
||||
"""Determine if point is left of infinite line touching ep1 and ep2"""
|
||||
return ((ep1[0]-point[0])*(ep2[1]-point[1]) -
|
||||
return ((ep1[0]-point[0])*(ep2[1]-point[1]) -
|
||||
(ep2[0]-point[0])*(ep1[1]-point[1])) < 0
|
||||
|
||||
|
||||
|
||||
|
||||
class Polygon(object):
|
||||
''' Simple polygon class used for point containment testing and conversion
|
||||
|
||||
Allows for testing if a polygon contains a point as well as conversion
|
||||
|
||||
Allows for testing if a polygon contains a point as well as conversion
|
||||
to various portable representations '''
|
||||
|
||||
def __init__(self, vertices):
|
||||
@ -45,7 +45,7 @@ class Polygon(object):
|
||||
for i in xrange(len(self.vertices)-1):
|
||||
|
||||
# add wind if edge crosses point going up and point is to left
|
||||
if (self.vertices[i][1] < point[1] < self.vertices[i+1][1] and
|
||||
if (self.vertices[i][1] < point[1] < self.vertices[i+1][1] and
|
||||
left_of_edge(point, self.vertices[i], self.vertices[i+1])):
|
||||
winds += 1
|
||||
# end wind if edge crosses point going down and point is to right
|
||||
@ -58,7 +58,7 @@ class Polygon(object):
|
||||
|
||||
def to_kml(self):
|
||||
''' get KML polygon representation '''
|
||||
|
||||
|
||||
coordstr = ' '.join("%.15f,%.15f" % v for v in self.vertices)
|
||||
|
||||
return '''<Polygon><outerBoundaryIs><LinearRing>
|
||||
@ -70,10 +70,10 @@ class Polygon(object):
|
||||
class GeocodingError(Exception):
|
||||
"""Custom exception which maps possible google geocoder error codes to
|
||||
human readable strings.
|
||||
|
||||
|
||||
See http://www.google.com/apis/maps/documentation/reference.html#GGeoStatusCode
|
||||
"""
|
||||
|
||||
|
||||
STATUS_CODES = {500: 'Unknown Geocoding Server Error',
|
||||
601: 'Empty Address',
|
||||
602: 'Unknown Address',
|
||||
@ -81,21 +81,26 @@ class GeocodingError(Exception):
|
||||
610: 'Bad API Key',
|
||||
620: 'Too Many Requests'}
|
||||
|
||||
def __init__(self, code):
|
||||
def __init__(self, code, extra=None):
|
||||
Exception.__init__(self)
|
||||
self.code = int(code)
|
||||
|
||||
self.extra = extra
|
||||
|
||||
def __str__(self):
|
||||
return 'GeocodingError: %d - %s' % (self.code,
|
||||
desc = 'GeocodingError: %d - %s' % (self.code,
|
||||
self.STATUS_CODES[self.code])
|
||||
if self.extra:
|
||||
desc += ' (%s)' % self.extra
|
||||
|
||||
|
||||
class ShapefileError(Exception):
|
||||
""" Exception for problems with census shapefiles."""
|
||||
def __init__(self, message):
|
||||
Exception.__init__(self, message)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return 'ShapefileError: %s' % (self.message)
|
||||
|
||||
|
||||
|
||||
### Census Shapefiles ###
|
||||
|
||||
@ -115,7 +120,7 @@ FIPS_TO_STATE = {
|
||||
|
||||
class Entity(object):
|
||||
""" A named list of polygons associated with a political boundary.
|
||||
|
||||
|
||||
eg. a state, congressional district, or school district"""
|
||||
|
||||
def __init__(self, name, entity, vertices, extents):
|
||||
@ -187,10 +192,10 @@ class State(Entity):
|
||||
def from_shapefile(obj, rec):
|
||||
""" Construct a State from a census.gov shapefile """
|
||||
return State(obj.vertices(), obj.extents(), rec['NAME'])
|
||||
|
||||
|
||||
def read_census_shapefile(filename):
|
||||
"""Read census shapefile and return list of entity-derived objects.
|
||||
|
||||
|
||||
Given the base name of a census .shp/.dbf file returns a list of all
|
||||
Entity-derived objects described by the the file.
|
||||
"""
|
||||
@ -219,42 +224,40 @@ def read_census_shapefile(filename):
|
||||
# shp.info()[0] is the number of objects
|
||||
return [Entity.from_shapefile(shp.read_object(i), dbf.read_record(i))
|
||||
for i in xrange(shape_count)]
|
||||
|
||||
|
||||
### Geocoding ###
|
||||
|
||||
|
||||
### Geocoding ###
|
||||
|
||||
class AddressToDistrictService(object):
|
||||
"""Reusable service which maps addresses to districts using the census
|
||||
data and the google geocoder.
|
||||
|
||||
Usage:
|
||||
service = AddressToDistrictService('google-maps-apikey','path-to-cd99')
|
||||
lat,lng,district = service.address_to_district('address')
|
||||
"""
|
||||
"""Abstract base class for service which maps addresses to districts using
|
||||
the census data and a geocoder."""
|
||||
|
||||
def __init__(self, apikey, census_file):
|
||||
GEOCODER_GMAPS = 1
|
||||
GEOCODER_US = 2
|
||||
|
||||
def __init__(self, census_file, geocoder=GEOCODER_US, apikey=None):
|
||||
"""AddressToDistrictService constructor
|
||||
|
||||
Initialize given a google maps API key and a path to a census.gov
|
||||
all congressional districts (cd99) dataset.
|
||||
|
||||
Google maps API keys are available from:
|
||||
http://www.google.com/apis/maps/signup.html
|
||||
|
||||
The cd99_110 dataset is available from:
|
||||
|
||||
Initialize given a path to a census.gov all congressional districts
|
||||
(cd99) dataset.
|
||||
|
||||
The cd99_110 dataset is available from:
|
||||
http://www.census.gov/geo/www/cob/cd110.html
|
||||
"""
|
||||
if geocoder == self.GEOCODER_GMAPS and not apikey:
|
||||
raise GeocodingError(610) # bad api key
|
||||
|
||||
self.apikey = apikey
|
||||
self.boundaries = read_census_shapefile(census_file)
|
||||
self.geocoder = geocoder
|
||||
self.apikey = apikey
|
||||
|
||||
def geocode(self, address):
|
||||
def _google_geocode(self, address):
|
||||
"""Convert an address into a latitude/longitude via google maps"""
|
||||
|
||||
|
||||
url = 'http://maps.google.com/maps/geo?output=csv&q=%s&key=%s' % \
|
||||
(urllib.quote(address), self.apikey)
|
||||
# returns status,level-of-detail,lat,long
|
||||
status, lat, lat, lng = urllib.urlopen(url).read().split(',')
|
||||
status, _, lat, lng = urllib.urlopen(url).read().split(',')
|
||||
|
||||
# 200 - OK
|
||||
if status == '200':
|
||||
@ -262,15 +265,41 @@ class AddressToDistrictService(object):
|
||||
else:
|
||||
raise GeocodingError(status)
|
||||
|
||||
def _geocoderus_geocode(self, address):
|
||||
"""Convert an address into a latitude/longitude via geocoder.us"""
|
||||
|
||||
if not address:
|
||||
raise GeocodingError(601) # empty address
|
||||
|
||||
url = 'http://rpc.geocoder.us/service/csv?address=%s' % \
|
||||
urllib.quote(address)
|
||||
data = urllib.urlopen(url).readline() # only get first line for now
|
||||
|
||||
# returns lat,long,street,city,state,zip or #: errmsg
|
||||
if data.startswith('2:'):
|
||||
raise GeocodingError(602) # address not found
|
||||
|
||||
try:
|
||||
lat, lng, _, _, _, _ = data.split(',')
|
||||
return lat, lng
|
||||
except ValueError:
|
||||
raise GeocodingError(500, data) # unmapped error
|
||||
|
||||
def lat_long_to_district(self, lat, lng):
|
||||
""" Obtain the district containing a given latitude and longitude."""
|
||||
flat, flng = float(lat), float(lng)
|
||||
return lat, lng, [(cb.state, cb.district) for cb in self.boundaries
|
||||
if cb.contains((flng,flat))]
|
||||
|
||||
def address_to_district(self, address):
|
||||
"""Given an address returns the congressional district it lies within.
|
||||
|
||||
This function works by geocoding the address and then finding the point
|
||||
that the returned lat/long returned lie within.
|
||||
"""
|
||||
|
||||
lat, lng = self.geocode(address)
|
||||
flat, flng = float(lat), float(lng)
|
||||
return lat, lng, [(cb.state, cb.district) for cb in self.boundaries
|
||||
if cb.contains((flng,flat))]
|
||||
|
||||
This function works by geocoding the address and then finding the point
|
||||
that the returned lat/long returned lie within.
|
||||
"""
|
||||
if self.geocoder == self.GEOCODER_GMAPS:
|
||||
lat, lng = self._google_geocode(address)
|
||||
elif self.geocoder == self.GEOCODER_US:
|
||||
lat, lng = self._geocoderus_geocode(address)
|
||||
|
||||
return self.lat_long_to_district(lat, lng)
|
||||
|
Loading…
Reference in New Issue
Block a user