diff --git a/saucebrush/emitters.py b/saucebrush/emitters.py index a2a1f5e..31a9464 100644 --- a/saucebrush/emitters.py +++ b/saucebrush/emitters.py @@ -2,7 +2,7 @@ Saucebrush Emitters are filters that instead of modifying the record, output it in some manner. """ - +from __future__ import unicode_literals from saucebrush.filters import Filter class Emitter(Filter): @@ -50,12 +50,12 @@ class DebugEmitter(Emitter): self._outfile = outfile def emit_record(self, record): - self._outfile.write(str(record) + '\n') + self._outfile.write("{0}\n".format(record)) class CountEmitter(Emitter): """ Emitter that writes the record count to a file-like object. - + CountEmitter() by default writes to stdout. CountEmitter(outfile=open('text', 'w')) would print to a file name test. CountEmitter(every=1000000) would write the count every 1,000,000 records. @@ -63,36 +63,36 @@ class CountEmitter(Emitter): """ def __init__(self, every=1000, of=None, outfile=None, format=None): - + super(CountEmitter, self).__init__() - + if not outfile: import sys self._outfile = sys.stdout else: self._outfile = outfile - + if format is None: if of is not None: format = "%(count)s of %(of)s\n" else: format = "%(count)s\n" - + self._format = format self._every = every self._of = of self.count = 0 - - def __str__(self): + + def format(self): return self._format % {'count': self.count, 'of': self._of} def emit_record(self, record): self.count += 1 if self.count % self._every == 0: - self._outfile.write(str(self)) - + self._outfile.write(self.format()) + def done(self): - self._outfile.write(str(self)) + self._outfile.write(self.format()) class CSVEmitter(Emitter): @@ -107,7 +107,9 @@ class CSVEmitter(Emitter): import csv self._dictwriter = csv.DictWriter(csvfile, fieldnames) # write header row - self._dictwriter.writerow(dict(zip(fieldnames, fieldnames))) + header_row = dict(zip(fieldnames, fieldnames)) + print(header_row) + self._dictwriter.writerow(header_row) def emit_record(self, record): self._dictwriter.writerow(record) diff --git a/saucebrush/tests/emitters.py b/saucebrush/tests/emitters.py index 3660f9f..ea4e282 100644 --- a/saucebrush/tests/emitters.py +++ b/saucebrush/tests/emitters.py @@ -1,47 +1,53 @@ +from __future__ import unicode_literals +from contextlib import closing +from io import BytesIO, StringIO import unittest -from cStringIO import StringIO + from saucebrush.emitters import DebugEmitter, CSVEmitter, CountEmitter class EmitterTestCase(unittest.TestCase): - def setUp(self): - self.output = StringIO() - def test_debug_emitter(self): - de = DebugEmitter(self.output) - data = de.attach([1,2,3]) - for _ in data: - pass - self.assertEquals(self.output.getvalue(), '1\n2\n3\n') + with closing(StringIO()) as output: + de = DebugEmitter(output) + list(de.attach([1,2,3])) + self.assertEqual(output.getvalue(), '1\n2\n3\n') def test_csv_emitter(self): - ce = CSVEmitter(self.output, ('x','y','z')) - data = ce.attach([{'x':1,'y':2,'z':3}, {'x':5, 'y':5, 'z':5}]) - for _ in data: - pass - self.assertEquals(self.output.getvalue(), 'x,y,z\r\n1,2,3\r\n5,5,5\r\n') - + + try: + import cStringIO # if Python 2.x then use BytesIO + io = BytesIO() + except: + io = StringIO() # if Python 3.x then use StringIO + + with closing(io) as output: + ce = CSVEmitter(output, ('x','y','z')) + list(ce.attach([{'x':1, 'y':2, 'z':3}, {'x':5, 'y':5, 'z':5}])) + self.assertEqual(output.getvalue(), 'x,y,z\r\n1,2,3\r\n5,5,5\r\n') + def test_count_emitter(self): - + # values for test values = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22] - - # test without of parameter - ce = CountEmitter(every=10, outfile=self.output, format="%(count)s records\n") - list(ce.attach(values)) - self.assertEquals(self.output.getvalue(), '10 records\n20 records\n') - ce.done() - self.assertEquals(self.output.getvalue(), '10 records\n20 records\n22 records\n') - - # reset output - self.output.truncate(0) - - # test with of parameter - ce = CountEmitter(every=10, outfile=self.output, of=len(values)) - list(ce.attach(values)) - self.assertEquals(self.output.getvalue(), '10 of 22\n20 of 22\n') - ce.done() - self.assertEquals(self.output.getvalue(), '10 of 22\n20 of 22\n22 of 22\n') + + with closing(StringIO()) as output: + + # test without of parameter + ce = CountEmitter(every=10, outfile=output, format="%(count)s records\n") + list(ce.attach(values)) + self.assertEqual(output.getvalue(), '10 records\n20 records\n') + ce.done() + self.assertEqual(output.getvalue(), '10 records\n20 records\n22 records\n') + + with closing(StringIO()) as output: + + # test with of parameter + ce = CountEmitter(every=10, outfile=output, of=len(values)) + list(ce.attach(values)) + self.assertEqual(output.getvalue(), '10 of 22\n20 of 22\n') + ce.done() + self.assertEqual(output.getvalue(), '10 of 22\n20 of 22\n22 of 22\n') if __name__ == '__main__': unittest.main() diff --git a/saucebrush/tests/filters.py b/saucebrush/tests/filters.py index c7e4dc7..04ce0c4 100644 --- a/saucebrush/tests/filters.py +++ b/saucebrush/tests/filters.py @@ -57,7 +57,7 @@ class FilterTestCase(unittest.TestCase): def assert_filter_result(self, filter_obj, expected_data): result = filter_obj.attach(self._simple_data()) - self.assertEquals(list(result), expected_data) + self.assertEqual(list(result), expected_data) def test_reject_record(self): recipe = DummyRecipe() @@ -68,31 +68,31 @@ class FilterTestCase(unittest.TestCase): f.reject_record('bad', 'this one was bad') # ensure that the rejection propagated to the recipe - self.assertEquals('bad', recipe.rejected_record) - self.assertEquals('this one was bad', recipe.rejected_msg) + self.assertEqual('bad', recipe.rejected_record) + self.assertEqual('this one was bad', recipe.rejected_msg) def test_simple_filter(self): df = Doubler() result = df.attach([1,2,3]) # ensure we got a generator that yields 2,4,6 - self.assertEquals(type(result), types.GeneratorType) - self.assertEquals(list(result), [2,4,6]) + self.assertEqual(type(result), types.GeneratorType) + self.assertEqual(list(result), [2,4,6]) def test_simple_filter_return_none(self): cf = OddRemover() result = cf.attach(range(10)) # ensure only even numbers remain - self.assertEquals(list(result), [0,2,4,6,8]) + self.assertEqual(list(result), [0,2,4,6,8]) def test_simple_yield_filter(self): lf = ListFlattener() result = lf.attach([[1],[2,3],[4,5,6]]) # ensure we got a generator that yields 1,2,3,4,5,6 - self.assertEquals(type(result), types.GeneratorType) - self.assertEquals(list(result), [1,2,3,4,5,6]) + self.assertEqual(type(result), types.GeneratorType) + self.assertEqual(list(result), [1,2,3,4,5,6]) def test_simple_field_filter(self): ff = FieldDoubler(['a', 'c']) @@ -108,7 +108,7 @@ class FilterTestCase(unittest.TestCase): result = cf.attach(range(10)) # ensure only even numbers remain - self.assertEquals(list(result), [0,2,4,6,8]) + self.assertEqual(list(result), [0,2,4,6,8]) ### Tests for Subrecord @@ -124,7 +124,7 @@ class FilterTestCase(unittest.TestCase): sf = SubrecordFilter('a', NonModifyingFieldDoubler('b')) result = sf.attach(data) - self.assertEquals(list(result), expected) + self.assertEqual(list(result), expected) def test_subrecord_filter_deep(self): data = [{'a': {'d':[{'b': 2}, {'b': 4}]}}, @@ -138,7 +138,7 @@ class FilterTestCase(unittest.TestCase): sf = SubrecordFilter('a.d', NonModifyingFieldDoubler('b')) result = sf.attach(data) - self.assertEquals(list(result), expected) + self.assertEqual(list(result), expected) def test_subrecord_filter_nonlist(self): data = [ @@ -156,7 +156,7 @@ class FilterTestCase(unittest.TestCase): sf = SubrecordFilter('a.b', NonModifyingFieldDoubler('c')) result = sf.attach(data) - self.assertEquals(list(result), expected) + self.assertEqual(list(result), expected) def test_subrecord_filter_list_in_path(self): data = [ @@ -174,7 +174,7 @@ class FilterTestCase(unittest.TestCase): sf = SubrecordFilter('a.b', NonModifyingFieldDoubler('c')) result = sf.attach(data) - self.assertEquals(list(result), expected) + self.assertEqual(list(result), expected) def test_conditional_path(self): @@ -296,7 +296,7 @@ class FilterTestCase(unittest.TestCase): expected_data = [{'a': 77}, {'a':33}] result = u.attach(in_data) - self.assertEquals(list(result), expected_data) + self.assertEqual(list(result), expected_data) # TODO: unicode & string filter tests diff --git a/saucebrush/tests/sources.py b/saucebrush/tests/sources.py index f2f670d..82c7086 100644 --- a/saucebrush/tests/sources.py +++ b/saucebrush/tests/sources.py @@ -1,5 +1,7 @@ +from __future__ import unicode_literals +from io import BytesIO, StringIO import unittest -import cStringIO + from saucebrush.sources import CSVSource, FixedWidthFileSource class SourceTestCase(unittest.TestCase): @@ -9,14 +11,14 @@ class SourceTestCase(unittest.TestCase): 1,2,3 5,5,5 1,10,100''' - return cStringIO.StringIO(data) + return StringIO(data) def test_csv_source_basic(self): source = CSVSource(self._get_csv()) expected_data = [{'a':'1', 'b':'2', 'c':'3'}, {'a':'5', 'b':'5', 'c':'5'}, {'a':'1', 'b':'10', 'c':'100'}] - self.assertEquals(list(source), expected_data) + self.assertEqual(list(source), expected_data) def test_csv_source_fieldnames(self): source = CSVSource(self._get_csv(), ['x','y','z']) @@ -24,23 +26,23 @@ class SourceTestCase(unittest.TestCase): {'x':'1', 'y':'2', 'z':'3'}, {'x':'5', 'y':'5', 'z':'5'}, {'x':'1', 'y':'10', 'z':'100'}] - self.assertEquals(list(source), expected_data) + self.assertEqual(list(source), expected_data) def test_csv_source_skiprows(self): source = CSVSource(self._get_csv(), skiprows=1) expected_data = [{'a':'5', 'b':'5', 'c':'5'}, {'a':'1', 'b':'10', 'c':'100'}] - self.assertEquals(list(source), expected_data) + self.assertEqual(list(source), expected_data) def test_fixed_width_source(self): - data = cStringIO.StringIO('JamesNovember 3 1986\nTim September151999') + data = StringIO('JamesNovember 3 1986\nTim September151999') fields = (('name',5), ('month',9), ('day',2), ('year',4)) source = FixedWidthFileSource(data, fields) expected_data = [{'name':'James', 'month':'November', 'day':'3', 'year':'1986'}, {'name':'Tim', 'month':'September', 'day':'15', 'year':'1999'}] - self.assertEquals(list(source), expected_data) + self.assertEqual(list(source), expected_data) if __name__ == '__main__': unittest.main()