saucebrush/examples/fec_electronic.py
2022-11-10 22:08:26 -06:00

113 lines
3.4 KiB
Python

import re
import exceptions
class FECSource:
SPLIT_CHAR = '\x1c'
FORM_FIELDS = {
'F56' : ['form_type', 'committee_id', 'transaction_id', 'entity_type',
'contributor_organization', 'contributor_lastname', 'contributor_firstname',
'contributor_middlename', 'contributor_prefix', 'contributor_suffix',
'contributor_street1', 'contributor_street2', 'contributor_city',
'contributor_state', 'contributor_zip', 'contributor_committee_id',
'date', 'amount', 'contributor_employer', 'contributor_occupation']
}
# use Regex to map forms to keys in FORM_FIELDS
FORM_MAPPING = (
('F1(A|N)', 'F1'),
('F1S', 'F1S'),
('F1M(A|N)', 'F1M'),
('F2(A|N)', 'F2'),
('F2S', 'F2S'),
('F24', 'F24'),
('F3(N|A|T)', 'F3'),
('F3S', 'F3S'),
('F3ZT?', 'F3Z'),
('F3P(N|A|T)', 'F3P'),
('F3PS', 'F3PS'),
('F3P31AL', 'F3P31AL'),
('F3X(N|A|T)', 'F3X'),
('F4(N|A|T)', 'F4'),
('F5(N|A|T)', 'F5'),
('F56', 'F56'),
('F57', 'F57'),
('F6', 'F6'),
('F65', 'F65'),
('F7(N|A|T)', 'F7'),
('F76', 'F76'),
('F8(N|A|T)', 'F8'),
('F82', 'F82'),
('F83', 'F83'),
('F9(A|N)', 'F9'),
('F91', 'F91'),
('F92', 'F92'),
('F93', 'F93'),
('F94', 'F94'),
('F10', 'F10'),
('F105', 'F105'),
('F13(A|N)', 'F13'),
('F132', 'F132'),
('F133', 'F133'),
('F99', 'F99'),
('SA.+', 'SA'),
('SB.+', 'SB'),
('SC/.+', 'SC'),
('SC1/.+', 'SC1'),
('SC2/.+', 'SC2'),
('SD.+', 'SD'),
('SE', 'SE'),
('SF', 'SF'),
('H1', 'H1'),
('H2', 'H2'),
('H3', 'H3'),
('H4', 'H4'),
('H5', 'H5'),
('H6', 'H6'),
('SI', 'SI'),
('SL', 'SL'),
('TEXT', 'TEXT'),
)
# compile regexes with optional quotes
FORM_MAPPING = dict( [(re.compile("(\")?%s(\")?" % pattern), form)
for pattern,form in FORM_MAPPING] )
def __init__(self, filename):
self.filename = filename
self.fecfile = open(filename)
self.header = self.fecfile.readline().split(self.SPLIT_CHAR)
if self.header[0] != "HDR":
print self.header
#assert self.header[2].startswith("6.2"), self.header
self._in_textblock = False
@staticmethod
def get_form_type(rectype):
for type_re, type in FECSource.FORM_MAPPING.items():
if type_re.match(rectype):
return type
def process_file(self):
begintext = re.compile('\[BEGINTEXT\]', re.IGNORECASE)
endtext = re.compile('\[ENDTEXT\]', re.IGNORECASE)
in_textblock = False
for line in self.fecfile:
# get fields from line
fields = line.split(self.SPLIT_CHAR)
# handle the BEGINTEXT/ENDTEXT blocks
if begintext.match(fields[0]):
in_textblock = True
elif begintext.match(fields[0]):
in_textblock = False
elif line != '\n' and not in_textblock:
type = self.get_form_type(fields[0])
if type in self.FORM_FIELDS:
yield dict(zip(self.FORM_FIELDS[type], fields))
def __iter__(self):
return self.process_file()