whole bunch of changes to support new unicode-based strings in Py3
This commit is contained in:
parent
46c8ab3ae3
commit
d5b56b931b
@ -2,7 +2,7 @@
|
||||
Saucebrush Emitters are filters that instead of modifying the record, output
|
||||
it in some manner.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from saucebrush.filters import Filter
|
||||
|
||||
class Emitter(Filter):
|
||||
@ -50,12 +50,12 @@ class DebugEmitter(Emitter):
|
||||
self._outfile = outfile
|
||||
|
||||
def emit_record(self, record):
|
||||
self._outfile.write(str(record) + '\n')
|
||||
self._outfile.write("{0}\n".format(record))
|
||||
|
||||
|
||||
class CountEmitter(Emitter):
|
||||
""" Emitter that writes the record count to a file-like object.
|
||||
|
||||
|
||||
CountEmitter() by default writes to stdout.
|
||||
CountEmitter(outfile=open('text', 'w')) would print to a file name test.
|
||||
CountEmitter(every=1000000) would write the count every 1,000,000 records.
|
||||
@ -63,36 +63,36 @@ class CountEmitter(Emitter):
|
||||
"""
|
||||
|
||||
def __init__(self, every=1000, of=None, outfile=None, format=None):
|
||||
|
||||
|
||||
super(CountEmitter, self).__init__()
|
||||
|
||||
|
||||
if not outfile:
|
||||
import sys
|
||||
self._outfile = sys.stdout
|
||||
else:
|
||||
self._outfile = outfile
|
||||
|
||||
|
||||
if format is None:
|
||||
if of is not None:
|
||||
format = "%(count)s of %(of)s\n"
|
||||
else:
|
||||
format = "%(count)s\n"
|
||||
|
||||
|
||||
self._format = format
|
||||
self._every = every
|
||||
self._of = of
|
||||
self.count = 0
|
||||
|
||||
def __str__(self):
|
||||
|
||||
def format(self):
|
||||
return self._format % {'count': self.count, 'of': self._of}
|
||||
|
||||
def emit_record(self, record):
|
||||
self.count += 1
|
||||
if self.count % self._every == 0:
|
||||
self._outfile.write(str(self))
|
||||
|
||||
self._outfile.write(self.format())
|
||||
|
||||
def done(self):
|
||||
self._outfile.write(str(self))
|
||||
self._outfile.write(self.format())
|
||||
|
||||
|
||||
class CSVEmitter(Emitter):
|
||||
@ -107,7 +107,9 @@ class CSVEmitter(Emitter):
|
||||
import csv
|
||||
self._dictwriter = csv.DictWriter(csvfile, fieldnames)
|
||||
# write header row
|
||||
self._dictwriter.writerow(dict(zip(fieldnames, fieldnames)))
|
||||
header_row = dict(zip(fieldnames, fieldnames))
|
||||
print(header_row)
|
||||
self._dictwriter.writerow(header_row)
|
||||
|
||||
def emit_record(self, record):
|
||||
self._dictwriter.writerow(record)
|
||||
|
@ -1,47 +1,53 @@
|
||||
from __future__ import unicode_literals
|
||||
from contextlib import closing
|
||||
from io import BytesIO, StringIO
|
||||
import unittest
|
||||
from cStringIO import StringIO
|
||||
|
||||
from saucebrush.emitters import DebugEmitter, CSVEmitter, CountEmitter
|
||||
|
||||
class EmitterTestCase(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.output = StringIO()
|
||||
|
||||
def test_debug_emitter(self):
|
||||
de = DebugEmitter(self.output)
|
||||
data = de.attach([1,2,3])
|
||||
for _ in data:
|
||||
pass
|
||||
self.assertEquals(self.output.getvalue(), '1\n2\n3\n')
|
||||
with closing(StringIO()) as output:
|
||||
de = DebugEmitter(output)
|
||||
list(de.attach([1,2,3]))
|
||||
self.assertEqual(output.getvalue(), '1\n2\n3\n')
|
||||
|
||||
def test_csv_emitter(self):
|
||||
ce = CSVEmitter(self.output, ('x','y','z'))
|
||||
data = ce.attach([{'x':1,'y':2,'z':3}, {'x':5, 'y':5, 'z':5}])
|
||||
for _ in data:
|
||||
pass
|
||||
self.assertEquals(self.output.getvalue(), 'x,y,z\r\n1,2,3\r\n5,5,5\r\n')
|
||||
|
||||
|
||||
try:
|
||||
import cStringIO # if Python 2.x then use BytesIO
|
||||
io = BytesIO()
|
||||
except:
|
||||
io = StringIO() # if Python 3.x then use StringIO
|
||||
|
||||
with closing(io) as output:
|
||||
ce = CSVEmitter(output, ('x','y','z'))
|
||||
list(ce.attach([{'x':1, 'y':2, 'z':3}, {'x':5, 'y':5, 'z':5}]))
|
||||
self.assertEqual(output.getvalue(), 'x,y,z\r\n1,2,3\r\n5,5,5\r\n')
|
||||
|
||||
def test_count_emitter(self):
|
||||
|
||||
|
||||
# values for test
|
||||
values = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22]
|
||||
|
||||
# test without of parameter
|
||||
ce = CountEmitter(every=10, outfile=self.output, format="%(count)s records\n")
|
||||
list(ce.attach(values))
|
||||
self.assertEquals(self.output.getvalue(), '10 records\n20 records\n')
|
||||
ce.done()
|
||||
self.assertEquals(self.output.getvalue(), '10 records\n20 records\n22 records\n')
|
||||
|
||||
# reset output
|
||||
self.output.truncate(0)
|
||||
|
||||
# test with of parameter
|
||||
ce = CountEmitter(every=10, outfile=self.output, of=len(values))
|
||||
list(ce.attach(values))
|
||||
self.assertEquals(self.output.getvalue(), '10 of 22\n20 of 22\n')
|
||||
ce.done()
|
||||
self.assertEquals(self.output.getvalue(), '10 of 22\n20 of 22\n22 of 22\n')
|
||||
|
||||
with closing(StringIO()) as output:
|
||||
|
||||
# test without of parameter
|
||||
ce = CountEmitter(every=10, outfile=output, format="%(count)s records\n")
|
||||
list(ce.attach(values))
|
||||
self.assertEqual(output.getvalue(), '10 records\n20 records\n')
|
||||
ce.done()
|
||||
self.assertEqual(output.getvalue(), '10 records\n20 records\n22 records\n')
|
||||
|
||||
with closing(StringIO()) as output:
|
||||
|
||||
# test with of parameter
|
||||
ce = CountEmitter(every=10, outfile=output, of=len(values))
|
||||
list(ce.attach(values))
|
||||
self.assertEqual(output.getvalue(), '10 of 22\n20 of 22\n')
|
||||
ce.done()
|
||||
self.assertEqual(output.getvalue(), '10 of 22\n20 of 22\n22 of 22\n')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -57,7 +57,7 @@ class FilterTestCase(unittest.TestCase):
|
||||
|
||||
def assert_filter_result(self, filter_obj, expected_data):
|
||||
result = filter_obj.attach(self._simple_data())
|
||||
self.assertEquals(list(result), expected_data)
|
||||
self.assertEqual(list(result), expected_data)
|
||||
|
||||
def test_reject_record(self):
|
||||
recipe = DummyRecipe()
|
||||
@ -68,31 +68,31 @@ class FilterTestCase(unittest.TestCase):
|
||||
f.reject_record('bad', 'this one was bad')
|
||||
|
||||
# ensure that the rejection propagated to the recipe
|
||||
self.assertEquals('bad', recipe.rejected_record)
|
||||
self.assertEquals('this one was bad', recipe.rejected_msg)
|
||||
self.assertEqual('bad', recipe.rejected_record)
|
||||
self.assertEqual('this one was bad', recipe.rejected_msg)
|
||||
|
||||
def test_simple_filter(self):
|
||||
df = Doubler()
|
||||
result = df.attach([1,2,3])
|
||||
|
||||
# ensure we got a generator that yields 2,4,6
|
||||
self.assertEquals(type(result), types.GeneratorType)
|
||||
self.assertEquals(list(result), [2,4,6])
|
||||
self.assertEqual(type(result), types.GeneratorType)
|
||||
self.assertEqual(list(result), [2,4,6])
|
||||
|
||||
def test_simple_filter_return_none(self):
|
||||
cf = OddRemover()
|
||||
result = cf.attach(range(10))
|
||||
|
||||
# ensure only even numbers remain
|
||||
self.assertEquals(list(result), [0,2,4,6,8])
|
||||
self.assertEqual(list(result), [0,2,4,6,8])
|
||||
|
||||
def test_simple_yield_filter(self):
|
||||
lf = ListFlattener()
|
||||
result = lf.attach([[1],[2,3],[4,5,6]])
|
||||
|
||||
# ensure we got a generator that yields 1,2,3,4,5,6
|
||||
self.assertEquals(type(result), types.GeneratorType)
|
||||
self.assertEquals(list(result), [1,2,3,4,5,6])
|
||||
self.assertEqual(type(result), types.GeneratorType)
|
||||
self.assertEqual(list(result), [1,2,3,4,5,6])
|
||||
|
||||
def test_simple_field_filter(self):
|
||||
ff = FieldDoubler(['a', 'c'])
|
||||
@ -108,7 +108,7 @@ class FilterTestCase(unittest.TestCase):
|
||||
result = cf.attach(range(10))
|
||||
|
||||
# ensure only even numbers remain
|
||||
self.assertEquals(list(result), [0,2,4,6,8])
|
||||
self.assertEqual(list(result), [0,2,4,6,8])
|
||||
|
||||
### Tests for Subrecord
|
||||
|
||||
@ -124,7 +124,7 @@ class FilterTestCase(unittest.TestCase):
|
||||
sf = SubrecordFilter('a', NonModifyingFieldDoubler('b'))
|
||||
result = sf.attach(data)
|
||||
|
||||
self.assertEquals(list(result), expected)
|
||||
self.assertEqual(list(result), expected)
|
||||
|
||||
def test_subrecord_filter_deep(self):
|
||||
data = [{'a': {'d':[{'b': 2}, {'b': 4}]}},
|
||||
@ -138,7 +138,7 @@ class FilterTestCase(unittest.TestCase):
|
||||
sf = SubrecordFilter('a.d', NonModifyingFieldDoubler('b'))
|
||||
result = sf.attach(data)
|
||||
|
||||
self.assertEquals(list(result), expected)
|
||||
self.assertEqual(list(result), expected)
|
||||
|
||||
def test_subrecord_filter_nonlist(self):
|
||||
data = [
|
||||
@ -156,7 +156,7 @@ class FilterTestCase(unittest.TestCase):
|
||||
sf = SubrecordFilter('a.b', NonModifyingFieldDoubler('c'))
|
||||
result = sf.attach(data)
|
||||
|
||||
self.assertEquals(list(result), expected)
|
||||
self.assertEqual(list(result), expected)
|
||||
|
||||
def test_subrecord_filter_list_in_path(self):
|
||||
data = [
|
||||
@ -174,7 +174,7 @@ class FilterTestCase(unittest.TestCase):
|
||||
sf = SubrecordFilter('a.b', NonModifyingFieldDoubler('c'))
|
||||
result = sf.attach(data)
|
||||
|
||||
self.assertEquals(list(result), expected)
|
||||
self.assertEqual(list(result), expected)
|
||||
|
||||
def test_conditional_path(self):
|
||||
|
||||
@ -296,7 +296,7 @@ class FilterTestCase(unittest.TestCase):
|
||||
expected_data = [{'a': 77}, {'a':33}]
|
||||
result = u.attach(in_data)
|
||||
|
||||
self.assertEquals(list(result), expected_data)
|
||||
self.assertEqual(list(result), expected_data)
|
||||
|
||||
# TODO: unicode & string filter tests
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
from io import BytesIO, StringIO
|
||||
import unittest
|
||||
import cStringIO
|
||||
|
||||
from saucebrush.sources import CSVSource, FixedWidthFileSource
|
||||
|
||||
class SourceTestCase(unittest.TestCase):
|
||||
@ -9,14 +11,14 @@ class SourceTestCase(unittest.TestCase):
|
||||
1,2,3
|
||||
5,5,5
|
||||
1,10,100'''
|
||||
return cStringIO.StringIO(data)
|
||||
return StringIO(data)
|
||||
|
||||
def test_csv_source_basic(self):
|
||||
source = CSVSource(self._get_csv())
|
||||
expected_data = [{'a':'1', 'b':'2', 'c':'3'},
|
||||
{'a':'5', 'b':'5', 'c':'5'},
|
||||
{'a':'1', 'b':'10', 'c':'100'}]
|
||||
self.assertEquals(list(source), expected_data)
|
||||
self.assertEqual(list(source), expected_data)
|
||||
|
||||
def test_csv_source_fieldnames(self):
|
||||
source = CSVSource(self._get_csv(), ['x','y','z'])
|
||||
@ -24,23 +26,23 @@ class SourceTestCase(unittest.TestCase):
|
||||
{'x':'1', 'y':'2', 'z':'3'},
|
||||
{'x':'5', 'y':'5', 'z':'5'},
|
||||
{'x':'1', 'y':'10', 'z':'100'}]
|
||||
self.assertEquals(list(source), expected_data)
|
||||
self.assertEqual(list(source), expected_data)
|
||||
|
||||
def test_csv_source_skiprows(self):
|
||||
source = CSVSource(self._get_csv(), skiprows=1)
|
||||
expected_data = [{'a':'5', 'b':'5', 'c':'5'},
|
||||
{'a':'1', 'b':'10', 'c':'100'}]
|
||||
self.assertEquals(list(source), expected_data)
|
||||
self.assertEqual(list(source), expected_data)
|
||||
|
||||
def test_fixed_width_source(self):
|
||||
data = cStringIO.StringIO('JamesNovember 3 1986\nTim September151999')
|
||||
data = StringIO('JamesNovember 3 1986\nTim September151999')
|
||||
fields = (('name',5), ('month',9), ('day',2), ('year',4))
|
||||
source = FixedWidthFileSource(data, fields)
|
||||
expected_data = [{'name':'James', 'month':'November', 'day':'3',
|
||||
'year':'1986'},
|
||||
{'name':'Tim', 'month':'September', 'day':'15',
|
||||
'year':'1999'}]
|
||||
self.assertEquals(list(source), expected_data)
|
||||
self.assertEqual(list(source), expected_data)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
Loading…
Reference in New Issue
Block a user