52 lines
2.1 KiB
Python
52 lines
2.1 KiB
Python
"""
|
|
Import SOPR lobbyist filings using lobbyists.py
|
|
(http://github.com/dhess/lobbyists/tree/master)
|
|
|
|
"""
|
|
import saucebrush
|
|
from saucebrush.filters import *
|
|
from saucebrush.emitters import DjangoModelEmitter, DebugEmitter
|
|
import lobbyists
|
|
|
|
def process_sopr_filing(sopr_xml_file):
|
|
from sunlightapi import live_settings as DJ_SETTINGS
|
|
DJ_APPLABEL = 'lobbyists'
|
|
|
|
saucebrush.run_recipe(lobbyists.parse_filings(sopr_xml_file),
|
|
# flatten non-list dictionaries & clean up some fields
|
|
DictFlattener(['filing', 'client', 'registrant']),
|
|
FieldRemover(['govt_entities', 'affiliated_orgs', 'foreign_entities',
|
|
'client_state_or_local_gov', 'client_status',
|
|
'filing_affiliated_orgs_url']),
|
|
FieldRenamer({'filing_date': 'filing_filing_date'}),
|
|
|
|
# process names & dates
|
|
FieldAdder('client_contact_name', ''),
|
|
NameCleaner('client_contact_name', prefix='client_contact_', nomatch_name='client_raw_contact_name'),
|
|
FieldModifier('filing_date', lambda x: x.split('.')[0]),
|
|
DateCleaner('filing_date', from_format='%Y-%m-%dT%H:%M:%S', to_format='%Y-%m-%d'),
|
|
|
|
# flatten lists
|
|
Flattener(['issues', 'lobbyists']),
|
|
FieldCopier({'issues.filing_id': 'filing_id',
|
|
'lobbyists.filing_id': 'filing_id'}),
|
|
|
|
# handle lists
|
|
saucebrush.filters.Splitter({
|
|
'issues':[DjangoModelEmitter(DJ_SETTINGS, DJ_APPLABEL, 'issue')],
|
|
'lobbyists':[FieldRemover(['indicator', 'status']),
|
|
NameCleaner('name', nomatch_name='raw_name'),
|
|
Unique(), # remove some duplicate lobbyists on a form
|
|
DjangoModelEmitter(DJ_SETTINGS, DJ_APPLABEL, 'lobbyist')
|
|
],
|
|
}),
|
|
FieldRemover(['issues', 'lobbyists']),
|
|
DjangoModelEmitter(DJ_SETTINGS, DJ_APPLABEL, 'filing')
|
|
)
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
for fname in sys.argv[1:]:
|
|
print 'processing', fname
|
|
process_sopr_filing(fname)
|