saucebrush/examples/sopr_lobbyists.py

52 lines
2.1 KiB
Python

"""
Import SOPR lobbyist filings using lobbyists.py
(http://github.com/dhess/lobbyists/tree/master)
"""
import saucebrush
from saucebrush.filters import *
from saucebrush.emitters import DjangoModelEmitter, DebugEmitter
import lobbyists
def process_sopr_filing(sopr_xml_file):
from sunlightapi import live_settings as DJ_SETTINGS
DJ_APPLABEL = 'lobbyists'
saucebrush.run_recipe(lobbyists.parse_filings(sopr_xml_file),
# flatten non-list dictionaries & clean up some fields
DictFlattener(['filing', 'client', 'registrant']),
FieldRemover(['govt_entities', 'affiliated_orgs', 'foreign_entities',
'client_state_or_local_gov', 'client_status',
'filing_affiliated_orgs_url']),
FieldRenamer({'filing_date': 'filing_filing_date'}),
# process names & dates
FieldAdder('client_contact_name', ''),
NameCleaner('client_contact_name', prefix='client_contact_', nomatch_name='client_raw_contact_name'),
FieldModifier('filing_date', lambda x: x.split('.')[0]),
DateCleaner('filing_date', from_format='%Y-%m-%dT%H:%M:%S', to_format='%Y-%m-%d'),
# flatten lists
Flattener(['issues', 'lobbyists']),
FieldCopier({'issues.filing_id': 'filing_id',
'lobbyists.filing_id': 'filing_id'}),
# handle lists
saucebrush.filters.Splitter({
'issues':[DjangoModelEmitter(DJ_SETTINGS, DJ_APPLABEL, 'issue')],
'lobbyists':[FieldRemover(['indicator', 'status']),
NameCleaner('name', nomatch_name='raw_name'),
Unique(), # remove some duplicate lobbyists on a form
DjangoModelEmitter(DJ_SETTINGS, DJ_APPLABEL, 'lobbyist')
],
}),
FieldRemover(['issues', 'lobbyists']),
DjangoModelEmitter(DJ_SETTINGS, DJ_APPLABEL, 'filing')
)
if __name__ == '__main__':
import sys
for fname in sys.argv[1:]:
print 'processing', fname
process_sopr_filing(fname)