geocoder.us changes

git-svn-id: https://polipoly.googlecode.com/svn/trunk@11 1885ebd5-0a40-0410-88a4-770918bee656
This commit is contained in:
james.p.turk 2008-01-17 19:05:20 +00:00
parent 9f77360de1
commit 78f2083e8e
3 changed files with 114 additions and 84 deletions

View File

@ -18,14 +18,15 @@ import cgi
import re
from polipoly import AddressToDistrictService, GeocodingError
PATH_TO_CDFILES = 'congdist/cd99_110'
GEOCODER = AddressToDistrictService.GEOCODER_US
GMAPS_API_KEY = 'define-me'
PATH_TO_CDFILES = 'cd99_110'
class ApiException(Exception):
# these codes are in the 300s to fit the sunlight API
STATUS_CODES = {
300: 'Google returned a server error when attempting to geocode',
300: 'Geocoder returned a server error when attempting to geocode',
301: 'Empty address string',
302: 'Unknown address',
303: 'Prohibited address',
@ -56,7 +57,7 @@ def main():
raise ApiException(305)
# create service and get a district
service = AddressToDistrictService(GMAPS_API_KEY, PATH_TO_CDFILES)
service = AddressToDistrictService(PATH_TO_CDFILES, GEOCODER, GMAPS_API_KEY)
try:
lat, lng, districts = service.address_to_district(addr)
except GeocodingError, ge:

View File

@ -11,33 +11,33 @@ Be sure to set GMAPS_API_KEY and PATH_TO_CDFILES appropriately:
import csv
from polipoly import AddressToDistrictService
GMAPS_API_KEY = 'define-me'
PATH_TO_CDFILES = ''
PATH_TO_CDFILES = 'congdist/cd99_110'
GEOCODER = AddressToDistrictService.GEOCODER_US
GMAPS_API_KEY = None
# This sample data was collected from Project Vote Smart
SAMPLE_DATA = '''representative,state,district,address
Josiah Bonner,AL,01,"1141 Montlimar Drive, Suite 3010, Mobile, AL 36609"
Terry Everett,AL,02,"101 North Main Street, Opp, AL 36467"
Michael Rogers,AL,03,"7550 Halcyon Summit Drive, Montgomery, AL 36117"
Robert Aderholt,AL,04,"1710 Alabama Avenue, Jasper, AL 35501"
Robert Cramer,AL,05,"1011 George Wallace Boulevard, Tuscumbia, AL 35674"
Spencer Bachus,AL,06,"1900 International Park Drive, Suite 107, Birmingham, AL 35243"
Artur Davis,AL,07,"102 East Washington Street, Suite F, Demopolis, AL 36732"
Rick Renzi,AZ,01,"501 North Florence Street, Suite 102, Casa Grande, AZ 85222"
Trent Franks,AZ,02,"7121 West Bell Road, Suite 200, Glendale, AZ 85308"
John Shadegg,AZ,03,"301 East Bethany Home Road, Suite C-178, Phoenix, AZ 85012"
Ed Pastor,AZ,04,"411 North Central Avenue, Suite 150, Phoenix, AZ 85004"
Harry Mitchell,AZ,05,"7201 East Camelback Road, Suite 335, Scottsdale, AZ 85251"
Jeff Flake,AZ,06,"1640 South Stapley, Suite 215, Mesa, AZ 85204"
Raul Grijalva,AZ,07,"1455 South Fourth Avenue, Suite 4, Yuma, AZ 85364"
Gabrielle Giffords,AZ,08,"1661 North Swan, Suite 112, Tucson, AZ 85712"'''
SAMPLE_DATA = ['representative,state,district,address',
'Josiah Bonner,AL,01,"1141 Montlimar Drive, Suite 3010, Mobile, AL 36609"',
'Terry Everett,AL,02,"101 North Main Street, Opp, AL 36467"',
'Michael Rogers,AL,03,"7550 Halcyon Summit Drive, Montgomery, AL 36117"',
'Robert Aderholt,AL,04,"1710 Alabama Avenue, Jasper, AL 35501"',
'Robert Cramer,AL,05,"1011 George Wallace Boulevard, Tuscumbia, AL 35674"',
'Spencer Bachus,AL,06,"1900 International Park Drive, Suite 107, Birmingham, AL 35243"',
'Artur Davis,AL,07,"102 East Washington Street, Suite F, Demopolis, AL 36732"',
'Rick Renzi,AZ,01,"501 North Florence Street, Suite 102, Casa Grande, AZ 85222"',
'Trent Franks,AZ,02,"7121 West Bell Road, Suite 200, Glendale, AZ 85308"',
'John Shadegg,AZ,03,"301 East Bethany Home Road, Suite C-178, Phoenix, AZ 85012"',
'Ed Pastor,AZ,04,"411 North Central Avenue, Suite 150, Phoenix, AZ 85004"',
'Harry Mitchell,AZ,05,"7201 East Camelback Road, Suite 335, Scottsdale, AZ 85251"',
'Jeff Flake,AZ,06,"1640 South Stapley, Suite 215, Mesa, AZ 85204"',
'Raul Grijalva,AZ,07,"1455 South Fourth Avenue, Suite 4, Yuma, AZ 85364"',
'Gabrielle Giffords,AZ,08,"1661 North Swan, Suite 112, Tucson, AZ 85712"']
service = AddressToDistrictService(GMAPS_API_KEY, PATH_TO_CDFILES)
service = AddressToDistrictService(PATH_TO_CDFILES, GEOCODER, GMAPS_API_KEY)
records = csv.DictReader(SAMPLE_DATA)
for rec in records:
lat, lng, dists = service.address_to_district(rec['address'])
print '%s\tcorrect: %s-%s\tfound: %s-%s' % (rec['representative'],
rec['state'], rec['district'], dists[0][0], dists[0][1])

View File

@ -9,8 +9,8 @@ District boundaries.
"""
__author__ = "James Turk (james.p.turk@gmail.com)"
__version__ = "0.1.0"
__copyright__ = "Copyright (c) 2007 Sunlight Labs"
__version__ = "0.1.5"
__copyright__ = "Copyright (c) 2007-2008 Sunlight Labs"
__license__ = "BSD"
import urllib
@ -81,13 +81,18 @@ class GeocodingError(Exception):
610: 'Bad API Key',
620: 'Too Many Requests'}
def __init__(self, code):
def __init__(self, code, extra=None):
Exception.__init__(self)
self.code = int(code)
self.extra = extra
def __str__(self):
return 'GeocodingError: %d - %s' % (self.code,
desc = 'GeocodingError: %d - %s' % (self.code,
self.STATUS_CODES[self.code])
if self.extra:
desc += ' (%s)' % self.extra
class ShapefileError(Exception):
""" Exception for problems with census shapefiles."""
def __init__(self, message):
@ -224,37 +229,35 @@ def read_census_shapefile(filename):
### Geocoding ###
class AddressToDistrictService(object):
"""Reusable service which maps addresses to districts using the census
data and the google geocoder.
"""Abstract base class for service which maps addresses to districts using
the census data and a geocoder."""
Usage:
service = AddressToDistrictService('google-maps-apikey','path-to-cd99')
lat,lng,district = service.address_to_district('address')
"""
GEOCODER_GMAPS = 1
GEOCODER_US = 2
def __init__(self, apikey, census_file):
def __init__(self, census_file, geocoder=GEOCODER_US, apikey=None):
"""AddressToDistrictService constructor
Initialize given a google maps API key and a path to a census.gov
all congressional districts (cd99) dataset.
Google maps API keys are available from:
http://www.google.com/apis/maps/signup.html
Initialize given a path to a census.gov all congressional districts
(cd99) dataset.
The cd99_110 dataset is available from:
http://www.census.gov/geo/www/cob/cd110.html
"""
if geocoder == self.GEOCODER_GMAPS and not apikey:
raise GeocodingError(610) # bad api key
self.apikey = apikey
self.boundaries = read_census_shapefile(census_file)
self.geocoder = geocoder
self.apikey = apikey
def geocode(self, address):
def _google_geocode(self, address):
"""Convert an address into a latitude/longitude via google maps"""
url = 'http://maps.google.com/maps/geo?output=csv&q=%s&key=%s' % \
(urllib.quote(address), self.apikey)
# returns status,level-of-detail,lat,long
status, lat, lat, lng = urllib.urlopen(url).read().split(',')
status, _, lat, lng = urllib.urlopen(url).read().split(',')
# 200 - OK
if status == '200':
@ -262,15 +265,41 @@ class AddressToDistrictService(object):
else:
raise GeocodingError(status)
def _geocoderus_geocode(self, address):
"""Convert an address into a latitude/longitude via geocoder.us"""
if not address:
raise GeocodingError(601) # empty address
url = 'http://rpc.geocoder.us/service/csv?address=%s' % \
urllib.quote(address)
data = urllib.urlopen(url).readline() # only get first line for now
# returns lat,long,street,city,state,zip or #: errmsg
if data.startswith('2:'):
raise GeocodingError(602) # address not found
try:
lat, lng, _, _, _, _ = data.split(',')
return lat, lng
except ValueError:
raise GeocodingError(500, data) # unmapped error
def lat_long_to_district(self, lat, lng):
""" Obtain the district containing a given latitude and longitude."""
flat, flng = float(lat), float(lng)
return lat, lng, [(cb.state, cb.district) for cb in self.boundaries
if cb.contains((flng,flat))]
def address_to_district(self, address):
"""Given an address returns the congressional district it lies within.
This function works by geocoding the address and then finding the point
that the returned lat/long returned lie within.
"""
if self.geocoder == self.GEOCODER_GMAPS:
lat, lng = self._google_geocode(address)
elif self.geocoder == self.GEOCODER_US:
lat, lng = self._geocoderus_geocode(address)
lat, lng = self.geocode(address)
flat, flng = float(lat), float(lng)
return lat, lng, [(cb.state, cb.district) for cb in self.boundaries
if cb.contains((flng,flat))]
return self.lat_long_to_district(lat, lng)