saucebrush/examples/fec_cobol.py
2008-11-04 17:20:10 +00:00

68 lines
3.3 KiB
Python

from saucebrush.sources import FixedWidthFileSource
from saucebrush.filters import FieldModifier, FieldRemover
from saucebrush.emitters import SqliteEmitter, SqlDumpEmitter
from saucebrush import run_recipe
CM_FIELDS = [('id',9),('name',90),('treasurer',38),('street_1',34),
('street_2',34),('city',18),('state',2),('zipcode',5),
('designation',1),('type',1),('party',3),('filing_frequency',1),
('interest_group_category',1),('connected_org_name',38),
('candidate_id',9)]
# party_1 and party_3 become party and party_2
CN_FIELDS = [('id',9), ('name',38), ('party',3), ('fillerA',3), ('party_2',3),
('seat_status',1), ('fillerB',1), ('status',1), ('street_1',34),
('street_2',34), ('city',18), ('state',2), ('zipcode',5),
('committee_id', 9), ('election_year',2), ('district', 2)]
# Combines year field (split in data for no apparent reason)
INDIV_FIELDS = [('filer_id',9), ('amendment',1), ('report_type', 3),
('primary_general',1), ('microfilm_loc',11),
('transaction_type',3), ('name',34), ('city',18), ('state', 2),
('zipcode', 5), ('occupation', 35), ('month', 2), ('day', 2),
('year',4), ('amount',7), ('other_id',9),
('fec_record_number',7)]
# Combines year field (split in data for no apparent reason)
PAS2_FIELDS = [('id',9), ('amendment',1), ('report_type', 3),
('primary_general',1), ('microfilm_loc',11),
('transaction_type',3), ('month', 2), ('day', 2),
('year',4), ('amount',7), ('other_id',9), ('candidate_id',9),
('fec_record_number', 7)]
def fix_cobol_number(number):
mapping = {']':'0', 'j':'1', 'k':'2', 'l':'3', 'm':'4', 'n':'5', 'o':'6', 'p':'7', 'q':'8', 'r':'9'}
number = number.lstrip('0')
if not number:
number = '0'
elif number[-1] in mapping.keys():
number = '-' + number[0:-1] + mapping[number[-1]]
return number
def process_fec_year(year):
# committees
source = FixedWidthFileSource(open('%s/foiacm.dta' % year), CM_FIELDS)
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'committee', [f[0] for f in CM_FIELDS if f[0] != 'filler'])
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'committee', [f[0] for f in CM_FIELDS if f[0] != 'filler'])
run_recipe(source, emit_mysql)
# candidate
source = FixedWidthFileSource(open('%s/foiacn.dta' % year), CN_FIELDS)
fieldremover = FieldRemover(('fillerA', 'fillerB'))
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'candidate', [f[0] for f in CN_FIELDS if f[0] != 'filler'])
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'candidate', [f[0] for f in CN_FIELDS if not f[0].startswith('filler')])
run_recipe(source, fieldremover, emit_mysql)
# contributions
source = FixedWidthFileSource(open('%s/itcont.dta' % year), INDIV_FIELDS)
decobolizer = FieldModifier(('amount', ), fix_cobol_number)
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'contribution', [f[0] for f in INDIV_FIELDS if f[0] != 'filler'])
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'contribution', [f[0] for f in INDIV_FIELDS if f[0] != 'filler'])
run_recipe(source, decobolizer, emit_mysql)
if __name__=='__main__':
process_fec_year(2008)
#for year in [2000,2002,2004,2006,2008]:
# process_fec_year(year)