added flatten & flattener
This commit is contained in:
parent
31ffeaff61
commit
8ead922b4c
@ -45,21 +45,21 @@ def process_fec_year(year):
|
|||||||
source = FixedWidthFileSource(open('%s/foiacm.dta' % year), CM_FIELDS)
|
source = FixedWidthFileSource(open('%s/foiacm.dta' % year), CM_FIELDS)
|
||||||
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'committee', [f[0] for f in CM_FIELDS if f[0] != 'filler'])
|
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'committee', [f[0] for f in CM_FIELDS if f[0] != 'filler'])
|
||||||
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'committee', [f[0] for f in CM_FIELDS if f[0] != 'filler'])
|
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'committee', [f[0] for f in CM_FIELDS if f[0] != 'filler'])
|
||||||
run_recipe(source, [emit_mysql])
|
run_recipe(source, emit_mysql)
|
||||||
|
|
||||||
# candidate
|
# candidate
|
||||||
source = FixedWidthFileSource(open('%s/foiacn.dta' % year), CN_FIELDS)
|
source = FixedWidthFileSource(open('%s/foiacn.dta' % year), CN_FIELDS)
|
||||||
fieldremover = FieldRemover(('fillerA', 'fillerB'))
|
fieldremover = FieldRemover(('fillerA', 'fillerB'))
|
||||||
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'candidate', [f[0] for f in CN_FIELDS if f[0] != 'filler'])
|
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'candidate', [f[0] for f in CN_FIELDS if f[0] != 'filler'])
|
||||||
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'candidate', [f[0] for f in CN_FIELDS if not f[0].startswith('filler')])
|
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'candidate', [f[0] for f in CN_FIELDS if not f[0].startswith('filler')])
|
||||||
run_recipe(source, [fieldremover, emit_mysql])
|
run_recipe(source, fieldremover, emit_mysql)
|
||||||
|
|
||||||
# contributions
|
# contributions
|
||||||
source = FixedWidthFileSource(open('%s/itcont.dta' % year), INDIV_FIELDS)
|
source = FixedWidthFileSource(open('%s/itcont.dta' % year), INDIV_FIELDS)
|
||||||
decobolizer = FieldModifier(('amount', ), fix_cobol_number)
|
decobolizer = FieldModifier(('amount', ), fix_cobol_number)
|
||||||
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'contribution', [f[0] for f in INDIV_FIELDS if f[0] != 'filler'])
|
#sqlite = SqliteOutput('fec%s.sqlite' % year, 'contribution', [f[0] for f in INDIV_FIELDS if f[0] != 'filler'])
|
||||||
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'contribution', [f[0] for f in INDIV_FIELDS if f[0] != 'filler'])
|
emit_mysql = SqlDumpEmitter(open('fec%s.sql' % year,'a'), 'contribution', [f[0] for f in INDIV_FIELDS if f[0] != 'filler'])
|
||||||
run_recipe(source, [decobolizer, emit_mysql])
|
run_recipe(source, decobolizer, emit_mysql)
|
||||||
|
|
||||||
if __name__=='__main__':
|
if __name__=='__main__':
|
||||||
process_fec_year(2008)
|
process_fec_year(2008)
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from exceptions import NotImplementedError
|
from exceptions import NotImplementedError
|
||||||
|
from saucebrush import utils
|
||||||
|
|
||||||
######################
|
######################
|
||||||
## Abstract Filters ##
|
## Abstract Filters ##
|
||||||
@ -156,6 +157,9 @@ class FieldAdder(Filter):
|
|||||||
|
|
||||||
from itertools import count
|
from itertools import count
|
||||||
FieldAdder('id', count)
|
FieldAdder('id', count)
|
||||||
|
|
||||||
|
would yield a new column named id that uses the itertools count iterable
|
||||||
|
to create sequentially numbered ids.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, field_name, field_value):
|
def __init__(self, field_name, field_value):
|
||||||
@ -214,6 +218,35 @@ class Splitter(Filter):
|
|||||||
return record
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
class Flattener(Filter):
|
||||||
|
""" Collapse a set of similar dictionaries into a list.
|
||||||
|
|
||||||
|
Takes a dictionary of keys and flattens the key names:
|
||||||
|
|
||||||
|
addresses = [{'addresses': [{'address': {'state':'NC', 'street':'146 shirley drive'}},
|
||||||
|
{'address': {'state':'NY', 'street':'3000 Winton Rd'}}]}]
|
||||||
|
flattener = Flattener(['addresses'])
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(Flattener, self).__init__()
|
||||||
|
|
||||||
|
'''def process_field(self, item):
|
||||||
|
# create a list of dictionaries with concatenated keys
|
||||||
|
retlist = []
|
||||||
|
for subitem in item:
|
||||||
|
newitem = {}
|
||||||
|
for key1,subdict in subitem.iteritems():
|
||||||
|
for key2,value in subdict.iteritems():
|
||||||
|
newitem[key1+'_'+key2] = value
|
||||||
|
retlist.append(newitem)
|
||||||
|
return retlist
|
||||||
|
'''
|
||||||
|
|
||||||
|
def process_record(self, record):
|
||||||
|
return utils.flatten(record)
|
||||||
|
|
||||||
|
|
||||||
###########################
|
###########################
|
||||||
## Commonly Used Filters ##
|
## Commonly Used Filters ##
|
||||||
|
@ -32,6 +32,28 @@ def string_dig(element, joiner=''):
|
|||||||
return joiner.join([string_dig(child) for child in element.findAll(True)])
|
return joiner.join([string_dig(child) for child in element.findAll(True)])
|
||||||
|
|
||||||
|
|
||||||
|
def flatten(item, prefix=''):
|
||||||
|
"""
|
||||||
|
Flatten nested dictionary into one with its keys concatenated together.
|
||||||
|
|
||||||
|
>>> flatten({'a':1, 'b':{'c':2}, 'd':[{'e':{'r':7}}, {'e':5}],
|
||||||
|
'f':{'g':{'h':6}}})
|
||||||
|
{'a': 1, 'b_c': 2, 'd': [{'e_r': 7}, {'e': 5}], 'f_g_h': 6}
|
||||||
|
"""
|
||||||
|
if isinstance(item, dict):
|
||||||
|
# don't prepend a leading _
|
||||||
|
if prefix != '':
|
||||||
|
prefix += '_'
|
||||||
|
retval = {}
|
||||||
|
for key, value in item.iteritems():
|
||||||
|
retval.update(flatten(value, prefix + key))
|
||||||
|
return retval
|
||||||
|
elif isinstance(item, (tuple, list)):
|
||||||
|
return {prefix: [flatten(i) for i in item]}
|
||||||
|
else:
|
||||||
|
return {prefix: item}
|
||||||
|
|
||||||
|
|
||||||
def dotted_key_lookup(dict_, dotted_key, default=KeyError, separator='.'):
|
def dotted_key_lookup(dict_, dotted_key, default=KeyError, separator='.'):
|
||||||
"""
|
"""
|
||||||
Do a lookup within dict_ by the various elements of dotted_key.
|
Do a lookup within dict_ by the various elements of dotted_key.
|
||||||
|
Loading…
Reference in New Issue
Block a user