113 lines
3.4 KiB
Python
113 lines
3.4 KiB
Python
import re
|
|
import exceptions
|
|
|
|
class FECSource:
|
|
|
|
SPLIT_CHAR = '\x1c'
|
|
FORM_FIELDS = {
|
|
'F56' : ['form_type', 'committee_id', 'transaction_id', 'entity_type',
|
|
'contributor_organization', 'contributor_lastname', 'contributor_firstname',
|
|
'contributor_middlename', 'contributor_prefix', 'contributor_suffix',
|
|
'contributor_street1', 'contributor_street2', 'contributor_city',
|
|
'contributor_state', 'contributor_zip', 'contributor_committee_id',
|
|
'date', 'amount', 'contributor_employer', 'contributor_occupation']
|
|
}
|
|
|
|
# use Regex to map forms to keys in FORM_FIELDS
|
|
FORM_MAPPING = (
|
|
('F1(A|N)', 'F1'),
|
|
('F1S', 'F1S'),
|
|
('F1M(A|N)', 'F1M'),
|
|
('F2(A|N)', 'F2'),
|
|
('F2S', 'F2S'),
|
|
('F24', 'F24'),
|
|
('F3(N|A|T)', 'F3'),
|
|
('F3S', 'F3S'),
|
|
('F3ZT?', 'F3Z'),
|
|
('F3P(N|A|T)', 'F3P'),
|
|
('F3PS', 'F3PS'),
|
|
('F3P31AL', 'F3P31AL'),
|
|
('F3X(N|A|T)', 'F3X'),
|
|
('F4(N|A|T)', 'F4'),
|
|
('F5(N|A|T)', 'F5'),
|
|
('F56', 'F56'),
|
|
('F57', 'F57'),
|
|
('F6', 'F6'),
|
|
('F65', 'F65'),
|
|
('F7(N|A|T)', 'F7'),
|
|
('F76', 'F76'),
|
|
('F8(N|A|T)', 'F8'),
|
|
('F82', 'F82'),
|
|
('F83', 'F83'),
|
|
('F9(A|N)', 'F9'),
|
|
('F91', 'F91'),
|
|
('F92', 'F92'),
|
|
('F93', 'F93'),
|
|
('F94', 'F94'),
|
|
('F10', 'F10'),
|
|
('F105', 'F105'),
|
|
('F13(A|N)', 'F13'),
|
|
('F132', 'F132'),
|
|
('F133', 'F133'),
|
|
('F99', 'F99'),
|
|
('SA.+', 'SA'),
|
|
('SB.+', 'SB'),
|
|
('SC/.+', 'SC'),
|
|
('SC1/.+', 'SC1'),
|
|
('SC2/.+', 'SC2'),
|
|
('SD.+', 'SD'),
|
|
('SE', 'SE'),
|
|
('SF', 'SF'),
|
|
('H1', 'H1'),
|
|
('H2', 'H2'),
|
|
('H3', 'H3'),
|
|
('H4', 'H4'),
|
|
('H5', 'H5'),
|
|
('H6', 'H6'),
|
|
('SI', 'SI'),
|
|
('SL', 'SL'),
|
|
('TEXT', 'TEXT'),
|
|
)
|
|
|
|
# compile regexes with optional quotes
|
|
FORM_MAPPING = dict( [(re.compile("(\")?%s(\")?" % pattern), form)
|
|
for pattern,form in FORM_MAPPING] )
|
|
|
|
def __init__(self, filename):
|
|
self.filename = filename
|
|
self.fecfile = open(filename)
|
|
self.header = self.fecfile.readline().split(self.SPLIT_CHAR)
|
|
if self.header[0] != "HDR":
|
|
print self.header
|
|
#assert self.header[2].startswith("6.2"), self.header
|
|
self._in_textblock = False
|
|
|
|
@staticmethod
|
|
def get_form_type(rectype):
|
|
for type_re, type in FECSource.FORM_MAPPING.items():
|
|
if type_re.match(rectype):
|
|
return type
|
|
|
|
def process_file(self):
|
|
begintext = re.compile('\[BEGINTEXT\]', re.IGNORECASE)
|
|
endtext = re.compile('\[ENDTEXT\]', re.IGNORECASE)
|
|
in_textblock = False
|
|
|
|
for line in self.fecfile:
|
|
|
|
# get fields from line
|
|
fields = line.split(self.SPLIT_CHAR)
|
|
|
|
# handle the BEGINTEXT/ENDTEXT blocks
|
|
if begintext.match(fields[0]):
|
|
in_textblock = True
|
|
elif begintext.match(fields[0]):
|
|
in_textblock = False
|
|
elif line != '\n' and not in_textblock:
|
|
type = self.get_form_type(fields[0])
|
|
if type in self.FORM_FIELDS:
|
|
yield dict(zip(self.FORM_FIELDS[type], fields))
|
|
|
|
def __iter__(self):
|
|
return self.process_file()
|