68 lines
3.3 KiB
Python
68 lines
3.3 KiB
Python
from saucebrush.sources import FixedWidthFileSource
|
|
from saucebrush.filters import FieldModifier, FieldRemover
|
|
from saucebrush.emitters import SqliteEmitter, SqlDumpEmitter
|
|
from saucebrush import run_recipe
|
|
|
|
CM_FIELDS = [('id',9),('name',90),('treasurer',38),('street_1',34),
|
|
('street_2',34),('city',18),('state',2),('zipcode',5),
|
|
('designation',1),('type',1),('party',3),('filing_frequency',1),
|
|
('interest_group_category',1),('connected_org_name',38),
|
|
('candidate_id',9)]
|
|
|
|
# party_1 and party_3 become party and party_2
|
|
CN_FIELDS = [('id',9), ('name',38), ('party',3), ('fillerA',3), ('party_2',3),
|
|
('seat_status',1), ('fillerB',1), ('status',1), ('street_1',34),
|
|
('street_2',34), ('city',18), ('state',2), ('zipcode',5),
|
|
('committee_id', 9), ('election_year',2), ('district', 2)]
|
|
|
|
# Combines year field (split in data for no apparent reason)
|
|
INDIV_FIELDS = [('filer_id',9), ('amendment',1), ('report_type', 3),
|
|
('primary_general',1), ('microfilm_loc',11),
|
|
('transaction_type',3), ('name',34), ('city',18), ('state', 2),
|
|
('zipcode', 5), ('occupation', 35), ('month', 2), ('day', 2),
|
|
('year',4), ('amount',7), ('other_id',9),
|
|
('fec_record_number',7)]
|
|
|
|
# Combines year field (split in data for no apparent reason)
|
|
PAS2_FIELDS = [('id',9), ('amendment',1), ('report_type', 3),
|
|
('primary_general',1), ('microfilm_loc',11),
|
|
('transaction_type',3), ('month', 2), ('day', 2),
|
|
('year',4), ('amount',7), ('other_id',9), ('candidate_id',9),
|
|
('fec_record_number', 7)]
|
|
|
|
|
|
def fix_cobol_number(number):
|
|
mapping = {']':'0', 'j':'1', 'k':'2', 'l':'3', 'm':'4', 'n':'5', 'o':'6', 'p':'7', 'q':'8', 'r':'9'}
|
|
number = number.lstrip('0')
|
|
if not number:
|
|
number = '0'
|
|
elif number[-1] in mapping.keys():
|
|
number = '-' + number[0:-1] + mapping[number[-1]]
|
|
return number
|
|
|
|
def process_fec_year(year):
|
|
# committees
|
|
source = FixedWidthFileSource(open('%s/foiacm.dta' % year), CM_FIELDS)
|
|
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'committee', [f[0] for f in CM_FIELDS if f[0] != 'filler'])
|
|
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'committee', [f[0] for f in CM_FIELDS if f[0] != 'filler'])
|
|
run_recipe(source, emit_mysql)
|
|
|
|
# candidate
|
|
source = FixedWidthFileSource(open('%s/foiacn.dta' % year), CN_FIELDS)
|
|
fieldremover = FieldRemover(('fillerA', 'fillerB'))
|
|
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'candidate', [f[0] for f in CN_FIELDS if f[0] != 'filler'])
|
|
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'candidate', [f[0] for f in CN_FIELDS if not f[0].startswith('filler')])
|
|
run_recipe(source, fieldremover, emit_mysql)
|
|
|
|
# contributions
|
|
source = FixedWidthFileSource(open('%s/itcont.dta' % year), INDIV_FIELDS)
|
|
decobolizer = FieldModifier(('amount', ), fix_cobol_number)
|
|
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'contribution', [f[0] for f in INDIV_FIELDS if f[0] != 'filler'])
|
|
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'contribution', [f[0] for f in INDIV_FIELDS if f[0] != 'filler'])
|
|
run_recipe(source, decobolizer, emit_mysql)
|
|
|
|
if __name__=='__main__':
|
|
process_fec_year(2008)
|
|
#for year in [2000,2002,2004,2006,2008]:
|
|
# process_fec_year(year)
|