added flatten & flattener
This commit is contained in:
parent
31ffeaff61
commit
8ead922b4c
@ -45,21 +45,21 @@ def process_fec_year(year):
|
||||
source = FixedWidthFileSource(open('%s/foiacm.dta' % year), CM_FIELDS)
|
||||
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'committee', [f[0] for f in CM_FIELDS if f[0] != 'filler'])
|
||||
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'committee', [f[0] for f in CM_FIELDS if f[0] != 'filler'])
|
||||
run_recipe(source, [emit_mysql])
|
||||
run_recipe(source, emit_mysql)
|
||||
|
||||
# candidate
|
||||
source = FixedWidthFileSource(open('%s/foiacn.dta' % year), CN_FIELDS)
|
||||
fieldremover = FieldRemover(('fillerA', 'fillerB'))
|
||||
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'candidate', [f[0] for f in CN_FIELDS if f[0] != 'filler'])
|
||||
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'candidate', [f[0] for f in CN_FIELDS if not f[0].startswith('filler')])
|
||||
run_recipe(source, [fieldremover, emit_mysql])
|
||||
run_recipe(source, fieldremover, emit_mysql)
|
||||
|
||||
# contributions
|
||||
source = FixedWidthFileSource(open('%s/itcont.dta' % year), INDIV_FIELDS)
|
||||
decobolizer = FieldModifier(('amount', ), fix_cobol_number)
|
||||
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'contribution', [f[0] for f in INDIV_FIELDS if f[0] != 'filler'])
|
||||
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'contribution', [f[0] for f in INDIV_FIELDS if f[0] != 'filler'])
|
||||
run_recipe(source, [decobolizer, emit_mysql])
|
||||
run_recipe(source, decobolizer, emit_mysql)
|
||||
|
||||
if __name__=='__main__':
|
||||
process_fec_year(2008)
|
||||
|
@ -8,6 +8,7 @@
|
||||
"""
|
||||
|
||||
from exceptions import NotImplementedError
|
||||
from saucebrush import utils
|
||||
|
||||
######################
|
||||
## Abstract Filters ##
|
||||
@ -156,6 +157,9 @@ class FieldAdder(Filter):
|
||||
|
||||
from itertools import count
|
||||
FieldAdder('id', count)
|
||||
|
||||
would yield a new column named id that uses the itertools count iterable
|
||||
to create sequentially numbered ids.
|
||||
"""
|
||||
|
||||
def __init__(self, field_name, field_value):
|
||||
@ -214,6 +218,35 @@ class Splitter(Filter):
|
||||
return record
|
||||
|
||||
|
||||
class Flattener(Filter):
|
||||
""" Collapse a set of similar dictionaries into a list.
|
||||
|
||||
Takes a dictionary of keys and flattens the key names:
|
||||
|
||||
addresses = [{'addresses': [{'address': {'state':'NC', 'street':'146 shirley drive'}},
|
||||
{'address': {'state':'NY', 'street':'3000 Winton Rd'}}]}]
|
||||
flattener = Flattener(['addresses'])
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
super(Flattener, self).__init__()
|
||||
|
||||
'''def process_field(self, item):
|
||||
# create a list of dictionaries with concatenated keys
|
||||
retlist = []
|
||||
for subitem in item:
|
||||
newitem = {}
|
||||
for key1,subdict in subitem.iteritems():
|
||||
for key2,value in subdict.iteritems():
|
||||
newitem[key1+'_'+key2] = value
|
||||
retlist.append(newitem)
|
||||
return retlist
|
||||
'''
|
||||
|
||||
def process_record(self, record):
|
||||
return utils.flatten(record)
|
||||
|
||||
|
||||
###########################
|
||||
## Commonly Used Filters ##
|
||||
|
@ -32,6 +32,28 @@ def string_dig(element, joiner=''):
|
||||
return joiner.join([string_dig(child) for child in element.findAll(True)])
|
||||
|
||||
|
||||
def flatten(item, prefix=''):
|
||||
"""
|
||||
Flatten nested dictionary into one with its keys concatenated together.
|
||||
|
||||
>>> flatten({'a':1, 'b':{'c':2}, 'd':[{'e':{'r':7}}, {'e':5}],
|
||||
'f':{'g':{'h':6}}})
|
||||
{'a': 1, 'b_c': 2, 'd': [{'e_r': 7}, {'e': 5}], 'f_g_h': 6}
|
||||
"""
|
||||
if isinstance(item, dict):
|
||||
# don't prepend a leading _
|
||||
if prefix != '':
|
||||
prefix += '_'
|
||||
retval = {}
|
||||
for key, value in item.iteritems():
|
||||
retval.update(flatten(value, prefix + key))
|
||||
return retval
|
||||
elif isinstance(item, (tuple, list)):
|
||||
return {prefix: [flatten(i) for i in item]}
|
||||
else:
|
||||
return {prefix: item}
|
||||
|
||||
|
||||
def dotted_key_lookup(dict_, dotted_key, default=KeyError, separator='.'):
|
||||
"""
|
||||
Do a lookup within dict_ by the various elements of dotted_key.
|
||||
|
Loading…
Reference in New Issue
Block a user