2007-12-10 09:27:49 +00:00
|
|
|
"""Python library for working with political boundaries
|
2007-11-29 06:40:47 +00:00
|
|
|
|
|
|
|
Political boundaries are defined by one or more polygons and obtained
|
2008-01-17 19:05:20 +00:00
|
|
|
from census.gov shapefiles. Census boundary shapefiles are available at
|
|
|
|
http://www.census.gov/geo/www/cob/bdy_files.html.
|
2007-11-29 06:40:47 +00:00
|
|
|
|
|
|
|
At the moment this library has only been used with State and Congressional
|
|
|
|
District boundaries.
|
|
|
|
"""
|
|
|
|
|
2007-12-10 09:27:49 +00:00
|
|
|
__author__ = "James Turk (james.p.turk@gmail.com)"
|
2008-01-17 19:05:20 +00:00
|
|
|
__version__ = "0.1.5"
|
|
|
|
__copyright__ = "Copyright (c) 2007-2008 Sunlight Labs"
|
2007-12-10 09:27:49 +00:00
|
|
|
__license__ = "BSD"
|
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
import urllib
|
|
|
|
from shapelib import ShapeFile
|
|
|
|
from dbflib import DBFFile
|
|
|
|
|
|
|
|
### General Utilities ###
|
|
|
|
|
|
|
|
# internally used helper function
|
|
|
|
def left_of_edge(point, ep1, ep2):
|
|
|
|
"""Determine if point is left of infinite line touching ep1 and ep2"""
|
2008-01-17 19:05:20 +00:00
|
|
|
return ((ep1[0]-point[0])*(ep2[1]-point[1]) -
|
2007-11-29 06:40:47 +00:00
|
|
|
(ep2[0]-point[0])*(ep1[1]-point[1])) < 0
|
2008-01-17 19:05:20 +00:00
|
|
|
|
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
class Polygon(object):
|
|
|
|
''' Simple polygon class used for point containment testing and conversion
|
2008-01-17 19:05:20 +00:00
|
|
|
|
|
|
|
Allows for testing if a polygon contains a point as well as conversion
|
2007-11-29 06:40:47 +00:00
|
|
|
to various portable representations '''
|
|
|
|
|
|
|
|
def __init__(self, vertices):
|
|
|
|
self.vertices = vertices
|
|
|
|
|
|
|
|
def contains(self, point):
|
|
|
|
''' Determine if point lies within the polygon. '''
|
|
|
|
|
|
|
|
# initially winds is 0
|
|
|
|
winds = 0
|
|
|
|
|
|
|
|
# iterate over edges
|
|
|
|
for i in xrange(len(self.vertices)-1):
|
|
|
|
|
|
|
|
# add wind if edge crosses point going up and point is to left
|
2008-01-17 19:05:20 +00:00
|
|
|
if (self.vertices[i][1] < point[1] < self.vertices[i+1][1] and
|
2007-11-29 06:40:47 +00:00
|
|
|
left_of_edge(point, self.vertices[i], self.vertices[i+1])):
|
|
|
|
winds += 1
|
|
|
|
# end wind if edge crosses point going down and point is to right
|
|
|
|
elif (self.vertices[i][1] > point[1] > self.vertices[i+1][1] and
|
|
|
|
not left_of_edge(point, self.vertices[i], self.vertices[i+1])):
|
|
|
|
winds -= 1
|
|
|
|
|
|
|
|
# point is contained if net winds is not zero
|
|
|
|
return winds != 0
|
|
|
|
|
|
|
|
def to_kml(self):
|
|
|
|
''' get KML polygon representation '''
|
2008-01-17 19:05:20 +00:00
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
coordstr = ' '.join("%.15f,%.15f" % v for v in self.vertices)
|
|
|
|
|
|
|
|
return '''<Polygon><outerBoundaryIs><LinearRing>
|
|
|
|
<coordinates>%s</coordinates>
|
|
|
|
</LinearRing></outerBoundaryIs></Polygon>''' % coordstr
|
|
|
|
|
|
|
|
### Exceptions ###
|
|
|
|
|
|
|
|
class GeocodingError(Exception):
|
|
|
|
"""Custom exception which maps possible google geocoder error codes to
|
|
|
|
human readable strings.
|
2008-01-17 19:05:20 +00:00
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
See http://www.google.com/apis/maps/documentation/reference.html#GGeoStatusCode
|
|
|
|
"""
|
2008-01-17 19:05:20 +00:00
|
|
|
|
2007-12-17 17:44:19 +00:00
|
|
|
STATUS_CODES = {500: 'Unknown Geocoding Server Error',
|
|
|
|
601: 'Empty Address',
|
|
|
|
602: 'Unknown Address',
|
|
|
|
603: 'Prohibited Address',
|
|
|
|
610: 'Bad API Key',
|
|
|
|
620: 'Too Many Requests'}
|
2007-11-29 06:40:47 +00:00
|
|
|
|
2008-01-17 19:05:20 +00:00
|
|
|
def __init__(self, code, extra=None):
|
2007-11-29 06:40:47 +00:00
|
|
|
Exception.__init__(self)
|
2007-12-17 17:44:19 +00:00
|
|
|
self.code = int(code)
|
2008-01-17 19:05:20 +00:00
|
|
|
self.extra = extra
|
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
def __str__(self):
|
2008-01-17 19:05:20 +00:00
|
|
|
desc = 'GeocodingError: %d - %s' % (self.code,
|
2007-11-29 06:40:47 +00:00
|
|
|
self.STATUS_CODES[self.code])
|
2008-01-17 19:05:20 +00:00
|
|
|
if self.extra:
|
|
|
|
desc += ' (%s)' % self.extra
|
|
|
|
|
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
class ShapefileError(Exception):
|
|
|
|
""" Exception for problems with census shapefiles."""
|
|
|
|
def __init__(self, message):
|
|
|
|
Exception.__init__(self, message)
|
2008-01-17 19:05:20 +00:00
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
def __str__(self):
|
|
|
|
return 'ShapefileError: %s' % (self.message)
|
2008-01-17 19:05:20 +00:00
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
|
|
|
|
### Census Shapefiles ###
|
|
|
|
|
|
|
|
# Mapping of FIPS codes to Postal Abbreviations
|
|
|
|
# obtained from http://www.itl.nist.gov/fipspubs/fip5-2.htm
|
|
|
|
FIPS_TO_STATE = {
|
|
|
|
'01':'AL', '02':'AK', '04':'AZ', '05':'AR', '06':'CA', '08':'CO', '09':'CT',
|
|
|
|
'10':'DE', '11':'DC', '12':'FL', '13':'GA', '15':'HI', '16':'ID', '17':'IL',
|
|
|
|
'18':'IN', '19':'IA', '20':'KS', '21':'KY', '22':'LA', '23':'ME', '24':'MD',
|
|
|
|
'25':'MA', '26':'MI', '27':'MN', '28':'MS', '29':'MO', '30':'MT', '31':'NE',
|
|
|
|
'32':'NV', '33':'NH', '34':'NJ', '35':'NM', '36':'NY', '37':'NC', '38':'ND',
|
|
|
|
'39':'OH', '40':'OK', '41':'OR', '42':'PA', '44':'RI', '45':'SC', '46':'SD',
|
|
|
|
'47':'TN', '48':'TX', '49':'UT', '50':'VT', '51':'VA', '53':'WA', '54':'WV',
|
|
|
|
'55':'WI', '56':'WY', '72':'PR' }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Entity(object):
|
|
|
|
""" A named list of polygons associated with a political boundary.
|
2008-01-17 19:05:20 +00:00
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
eg. a state, congressional district, or school district"""
|
|
|
|
|
|
|
|
def __init__(self, name, entity, vertices, extents):
|
|
|
|
self.name = name
|
|
|
|
self.entity = entity
|
|
|
|
self.polygons = [Polygon(vlist) for vlist in vertices]
|
|
|
|
self.extents = extents
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def from_shapefile(obj, rec):
|
|
|
|
""" Factory function that creates a Entity (or derived class) from
|
|
|
|
a census.gov shapefile. """
|
|
|
|
|
|
|
|
# by using the LSAD determine if a subclass is defined for this entity
|
|
|
|
lsad_mapping = {
|
|
|
|
('01'): State,
|
|
|
|
('C1', 'C2', 'C3', 'C4'): CongressDistrict
|
|
|
|
}
|
|
|
|
for lsads, cls in lsad_mapping.iteritems():
|
|
|
|
if rec['LSAD'] in lsads:
|
|
|
|
return cls.from_shapefile(obj, rec)
|
|
|
|
|
|
|
|
# if there is no mapping for the LSAD just construct a Entity
|
|
|
|
return Entity('', rec['LSAD_TRANS'], obj.vertices(), obj.extents())
|
|
|
|
|
|
|
|
def in_rect(self, point):
|
|
|
|
""" Check if a point lies within the bounding extents of the entity """
|
|
|
|
return self.extents[0][0] < point[0] < self.extents[1][0] and \
|
|
|
|
self.extents[0][1] < point[1] < self.extents[1][1]
|
|
|
|
|
|
|
|
def contains(self, point):
|
|
|
|
""" Check if a point lies within any of the entities polygons """
|
|
|
|
if self.in_rect(point):
|
|
|
|
for poly in self.polygons:
|
|
|
|
if poly.contains(point):
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def to_kml(self):
|
|
|
|
""" Return a KML Placemark representing the entity """
|
|
|
|
return """<Placemark><name>%s</name>
|
|
|
|
<MultiGeometry>%s</MultiGeometry>
|
|
|
|
</Placemark>""" % (self.name, ''.join(poly.to_kml() for poly in self.polygons))
|
|
|
|
|
|
|
|
class CongressDistrict(Entity):
|
|
|
|
""" Entity with state and district properties. """
|
|
|
|
|
|
|
|
def __init__(self, entity, vertices, extents, state, district):
|
|
|
|
Entity.__init__(self, '%s-%s' % (state, district),
|
|
|
|
entity, vertices, extents)
|
|
|
|
self.state = state
|
|
|
|
self.district = district
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def from_shapefile(obj, rec):
|
|
|
|
""" Construct a CongressDistrict from a census.gov shapefile """
|
|
|
|
return CongressDistrict(rec['LSAD_TRANS'], obj.vertices(),
|
|
|
|
obj.extents(), FIPS_TO_STATE[rec['STATE']],
|
|
|
|
rec['CD'])
|
|
|
|
|
|
|
|
class State(Entity):
|
|
|
|
""" Entity for states, adds a state property """
|
|
|
|
|
|
|
|
def __init__(self, vertices, extents, state):
|
|
|
|
Entity.__init__(self, 'State', state, vertices, extents)
|
|
|
|
self.state = state
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def from_shapefile(obj, rec):
|
|
|
|
""" Construct a State from a census.gov shapefile """
|
|
|
|
return State(obj.vertices(), obj.extents(), rec['NAME'])
|
2008-01-17 19:05:20 +00:00
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
def read_census_shapefile(filename):
|
|
|
|
"""Read census shapefile and return list of entity-derived objects.
|
2008-01-17 19:05:20 +00:00
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
Given the base name of a census .shp/.dbf file returns a list of all
|
|
|
|
Entity-derived objects described by the the file.
|
|
|
|
"""
|
|
|
|
|
|
|
|
try:
|
|
|
|
shp = ShapeFile(filename)
|
|
|
|
except IOError:
|
|
|
|
raise ShapefileError('Could not open %s.shp' % filename)
|
|
|
|
|
|
|
|
try:
|
|
|
|
dbf = DBFFile(filename)
|
|
|
|
except IOError:
|
|
|
|
raise ShapefileError('Could not open %s.dbf' % filename)
|
|
|
|
|
|
|
|
shape_count = shp.info()[0]
|
|
|
|
|
|
|
|
# shape_count should always equal dbf.record_count()
|
|
|
|
if shape_count != dbf.record_count():
|
|
|
|
raise ShapefileError('SHP/DBF record count mismatch (SHP=%d, DBF=%d)' %
|
|
|
|
(shape_count, dbf.record_count()))
|
|
|
|
|
|
|
|
# generator version
|
|
|
|
#for i in xrange(shp.info()[0]):
|
|
|
|
# yield Entity.fromShapefile(shp.read_object(i), dbf.read_record(i))
|
|
|
|
|
|
|
|
# shp.info()[0] is the number of objects
|
|
|
|
return [Entity.from_shapefile(shp.read_object(i), dbf.read_record(i))
|
|
|
|
for i in xrange(shape_count)]
|
|
|
|
|
2008-01-17 19:05:20 +00:00
|
|
|
|
|
|
|
### Geocoding ###
|
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
class AddressToDistrictService(object):
|
2008-01-17 19:05:20 +00:00
|
|
|
"""Abstract base class for service which maps addresses to districts using
|
|
|
|
the census data and a geocoder."""
|
|
|
|
|
|
|
|
GEOCODER_GMAPS = 1
|
|
|
|
GEOCODER_US = 2
|
2007-11-29 06:40:47 +00:00
|
|
|
|
2008-01-17 19:05:20 +00:00
|
|
|
def __init__(self, census_file, geocoder=GEOCODER_US, apikey=None):
|
2007-11-29 06:40:47 +00:00
|
|
|
"""AddressToDistrictService constructor
|
2008-01-17 19:05:20 +00:00
|
|
|
|
|
|
|
Initialize given a path to a census.gov all congressional districts
|
|
|
|
(cd99) dataset.
|
|
|
|
|
|
|
|
The cd99_110 dataset is available from:
|
2007-11-29 06:40:47 +00:00
|
|
|
http://www.census.gov/geo/www/cob/cd110.html
|
|
|
|
"""
|
2008-01-17 19:05:20 +00:00
|
|
|
if geocoder == self.GEOCODER_GMAPS and not apikey:
|
|
|
|
raise GeocodingError(610) # bad api key
|
2007-11-29 06:40:47 +00:00
|
|
|
|
|
|
|
self.boundaries = read_census_shapefile(census_file)
|
2008-01-17 19:05:20 +00:00
|
|
|
self.geocoder = geocoder
|
|
|
|
self.apikey = apikey
|
2007-11-29 06:40:47 +00:00
|
|
|
|
2008-01-17 19:05:20 +00:00
|
|
|
def _google_geocode(self, address):
|
2007-11-29 06:40:47 +00:00
|
|
|
"""Convert an address into a latitude/longitude via google maps"""
|
2008-01-17 19:05:20 +00:00
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
url = 'http://maps.google.com/maps/geo?output=csv&q=%s&key=%s' % \
|
|
|
|
(urllib.quote(address), self.apikey)
|
|
|
|
# returns status,level-of-detail,lat,long
|
2008-01-17 19:05:20 +00:00
|
|
|
status, _, lat, lng = urllib.urlopen(url).read().split(',')
|
2007-11-29 06:40:47 +00:00
|
|
|
|
|
|
|
# 200 - OK
|
|
|
|
if status == '200':
|
|
|
|
return lat, lng
|
|
|
|
else:
|
|
|
|
raise GeocodingError(status)
|
|
|
|
|
2008-01-17 19:05:20 +00:00
|
|
|
def _geocoderus_geocode(self, address):
|
|
|
|
"""Convert an address into a latitude/longitude via geocoder.us"""
|
|
|
|
|
|
|
|
if not address:
|
|
|
|
raise GeocodingError(601) # empty address
|
|
|
|
|
|
|
|
url = 'http://rpc.geocoder.us/service/csv?address=%s' % \
|
|
|
|
urllib.quote(address)
|
|
|
|
data = urllib.urlopen(url).readline() # only get first line for now
|
|
|
|
|
|
|
|
# returns lat,long,street,city,state,zip or #: errmsg
|
|
|
|
if data.startswith('2:'):
|
|
|
|
raise GeocodingError(602) # address not found
|
|
|
|
|
|
|
|
try:
|
|
|
|
lat, lng, _, _, _, _ = data.split(',')
|
|
|
|
return lat, lng
|
|
|
|
except ValueError:
|
|
|
|
raise GeocodingError(500, data) # unmapped error
|
|
|
|
|
|
|
|
def lat_long_to_district(self, lat, lng):
|
|
|
|
""" Obtain the district containing a given latitude and longitude."""
|
2008-06-19 20:44:50 +00:00
|
|
|
flat, flng = float(lat), -abs(float(lng))
|
|
|
|
districts = []
|
|
|
|
for cb in self.boundaries:
|
|
|
|
if cb.contains((flng,flat)):
|
|
|
|
if cb.district == '98':
|
|
|
|
cb.district = '00'
|
|
|
|
elif cb.district[0] == '0':
|
|
|
|
cb.district = cb.district[1]
|
|
|
|
districts.append((cb.state, cb.district))
|
|
|
|
return lat, lng, districts
|
2008-01-17 19:05:20 +00:00
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
def address_to_district(self, address):
|
|
|
|
"""Given an address returns the congressional district it lies within.
|
2008-01-17 19:05:20 +00:00
|
|
|
|
2007-11-29 06:40:47 +00:00
|
|
|
This function works by geocoding the address and then finding the point
|
2008-01-17 19:05:20 +00:00
|
|
|
that the returned lat/long returned lie within.
|
2007-11-29 06:40:47 +00:00
|
|
|
"""
|
2008-01-17 19:05:20 +00:00
|
|
|
if self.geocoder == self.GEOCODER_GMAPS:
|
|
|
|
lat, lng = self._google_geocode(address)
|
|
|
|
elif self.geocoder == self.GEOCODER_US:
|
|
|
|
lat, lng = self._geocoderus_geocode(address)
|
2007-11-29 06:40:47 +00:00
|
|
|
|
2008-01-17 19:05:20 +00:00
|
|
|
return self.lat_long_to_district(lat, lng)
|