whole bunch of changes to support new unicode-based strings in Py3
This commit is contained in:
		
							parent
							
								
									46c8ab3ae3
								
							
						
					
					
						commit
						d5b56b931b
					
				
					 4 changed files with 77 additions and 67 deletions
				
			
		|  | @ -2,7 +2,7 @@ | |||
|     Saucebrush Emitters are filters that instead of modifying the record, output | ||||
|     it in some manner. | ||||
| """ | ||||
| 
 | ||||
| from __future__ import unicode_literals | ||||
| from saucebrush.filters import Filter | ||||
| 
 | ||||
| class Emitter(Filter): | ||||
|  | @ -50,12 +50,12 @@ class DebugEmitter(Emitter): | |||
|             self._outfile = outfile | ||||
| 
 | ||||
|     def emit_record(self, record): | ||||
|         self._outfile.write(str(record) + '\n') | ||||
|         self._outfile.write("{0}\n".format(record)) | ||||
| 
 | ||||
| 
 | ||||
| class CountEmitter(Emitter): | ||||
|     """ Emitter that writes the record count to a file-like object. | ||||
|      | ||||
| 
 | ||||
|         CountEmitter() by default writes to stdout. | ||||
|         CountEmitter(outfile=open('text', 'w')) would print to a file name test. | ||||
|         CountEmitter(every=1000000) would write the count every 1,000,000 records. | ||||
|  | @ -63,36 +63,36 @@ class CountEmitter(Emitter): | |||
|     """ | ||||
| 
 | ||||
|     def __init__(self, every=1000, of=None, outfile=None, format=None): | ||||
|          | ||||
| 
 | ||||
|         super(CountEmitter, self).__init__() | ||||
|          | ||||
| 
 | ||||
|         if not outfile: | ||||
|             import sys | ||||
|             self._outfile = sys.stdout | ||||
|         else: | ||||
|             self._outfile = outfile | ||||
|              | ||||
| 
 | ||||
|         if format is None: | ||||
|             if of is not None: | ||||
|                 format = "%(count)s of %(of)s\n" | ||||
|             else: | ||||
|                 format = "%(count)s\n" | ||||
|                  | ||||
| 
 | ||||
|         self._format = format | ||||
|         self._every = every | ||||
|         self._of = of | ||||
|         self.count = 0 | ||||
|      | ||||
|     def __str__(self): | ||||
| 
 | ||||
|     def format(self): | ||||
|         return self._format % {'count': self.count, 'of': self._of} | ||||
| 
 | ||||
|     def emit_record(self, record): | ||||
|         self.count += 1 | ||||
|         if self.count % self._every == 0: | ||||
|             self._outfile.write(str(self)) | ||||
|      | ||||
|             self._outfile.write(self.format()) | ||||
| 
 | ||||
|     def done(self): | ||||
|         self._outfile.write(str(self)) | ||||
|         self._outfile.write(self.format()) | ||||
| 
 | ||||
| 
 | ||||
| class CSVEmitter(Emitter): | ||||
|  | @ -107,7 +107,9 @@ class CSVEmitter(Emitter): | |||
|         import csv | ||||
|         self._dictwriter = csv.DictWriter(csvfile, fieldnames) | ||||
|         # write header row | ||||
|         self._dictwriter.writerow(dict(zip(fieldnames, fieldnames))) | ||||
|         header_row = dict(zip(fieldnames, fieldnames)) | ||||
|         print(header_row) | ||||
|         self._dictwriter.writerow(header_row) | ||||
| 
 | ||||
|     def emit_record(self, record): | ||||
|         self._dictwriter.writerow(record) | ||||
|  |  | |||
|  | @ -1,47 +1,53 @@ | |||
| from __future__ import unicode_literals | ||||
| from contextlib import closing | ||||
| from io import BytesIO, StringIO | ||||
| import unittest | ||||
| from cStringIO import StringIO | ||||
| 
 | ||||
| from saucebrush.emitters import DebugEmitter, CSVEmitter, CountEmitter | ||||
| 
 | ||||
| class EmitterTestCase(unittest.TestCase): | ||||
| 
 | ||||
|     def setUp(self): | ||||
|         self.output = StringIO() | ||||
| 
 | ||||
|     def test_debug_emitter(self): | ||||
|         de = DebugEmitter(self.output) | ||||
|         data = de.attach([1,2,3]) | ||||
|         for _ in data: | ||||
|             pass | ||||
|         self.assertEquals(self.output.getvalue(), '1\n2\n3\n') | ||||
|         with closing(StringIO()) as output: | ||||
|             de = DebugEmitter(output) | ||||
|             list(de.attach([1,2,3])) | ||||
|             self.assertEqual(output.getvalue(), '1\n2\n3\n') | ||||
| 
 | ||||
|     def test_csv_emitter(self): | ||||
|         ce = CSVEmitter(self.output, ('x','y','z')) | ||||
|         data = ce.attach([{'x':1,'y':2,'z':3}, {'x':5, 'y':5, 'z':5}]) | ||||
|         for _ in data: | ||||
|             pass | ||||
|         self.assertEquals(self.output.getvalue(), 'x,y,z\r\n1,2,3\r\n5,5,5\r\n') | ||||
|      | ||||
| 
 | ||||
|         try: | ||||
|             import cStringIO    # if Python 2.x then use BytesIO | ||||
|             io = BytesIO() | ||||
|         except: | ||||
|             io = StringIO()     # if Python 3.x then use StringIO | ||||
| 
 | ||||
|         with closing(io) as output: | ||||
|             ce = CSVEmitter(output, ('x','y','z')) | ||||
|             list(ce.attach([{'x':1, 'y':2, 'z':3}, {'x':5, 'y':5, 'z':5}])) | ||||
|             self.assertEqual(output.getvalue(), 'x,y,z\r\n1,2,3\r\n5,5,5\r\n') | ||||
| 
 | ||||
|     def test_count_emitter(self): | ||||
|          | ||||
| 
 | ||||
|         # values for test | ||||
|         values = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22] | ||||
|          | ||||
|         # test without of parameter | ||||
|         ce = CountEmitter(every=10, outfile=self.output, format="%(count)s records\n") | ||||
|         list(ce.attach(values)) | ||||
|         self.assertEquals(self.output.getvalue(), '10 records\n20 records\n') | ||||
|         ce.done() | ||||
|         self.assertEquals(self.output.getvalue(), '10 records\n20 records\n22 records\n') | ||||
|          | ||||
|         # reset output | ||||
|         self.output.truncate(0) | ||||
|          | ||||
|         # test with of parameter | ||||
|         ce = CountEmitter(every=10, outfile=self.output, of=len(values)) | ||||
|         list(ce.attach(values)) | ||||
|         self.assertEquals(self.output.getvalue(), '10 of 22\n20 of 22\n') | ||||
|         ce.done() | ||||
|         self.assertEquals(self.output.getvalue(), '10 of 22\n20 of 22\n22 of 22\n') | ||||
| 
 | ||||
|         with closing(StringIO()) as output: | ||||
| 
 | ||||
|             # test without of parameter | ||||
|             ce = CountEmitter(every=10, outfile=output, format="%(count)s records\n") | ||||
|             list(ce.attach(values)) | ||||
|             self.assertEqual(output.getvalue(), '10 records\n20 records\n') | ||||
|             ce.done() | ||||
|             self.assertEqual(output.getvalue(), '10 records\n20 records\n22 records\n') | ||||
| 
 | ||||
|         with closing(StringIO()) as output: | ||||
| 
 | ||||
|             # test with of parameter | ||||
|             ce = CountEmitter(every=10, outfile=output, of=len(values)) | ||||
|             list(ce.attach(values)) | ||||
|             self.assertEqual(output.getvalue(), '10 of 22\n20 of 22\n') | ||||
|             ce.done() | ||||
|             self.assertEqual(output.getvalue(), '10 of 22\n20 of 22\n22 of 22\n') | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|  |  | |||
|  | @ -57,7 +57,7 @@ class FilterTestCase(unittest.TestCase): | |||
| 
 | ||||
|     def assert_filter_result(self, filter_obj, expected_data): | ||||
|         result = filter_obj.attach(self._simple_data()) | ||||
|         self.assertEquals(list(result), expected_data) | ||||
|         self.assertEqual(list(result), expected_data) | ||||
| 
 | ||||
|     def test_reject_record(self): | ||||
|         recipe = DummyRecipe() | ||||
|  | @ -68,31 +68,31 @@ class FilterTestCase(unittest.TestCase): | |||
|         f.reject_record('bad', 'this one was bad') | ||||
| 
 | ||||
|         # ensure that the rejection propagated to the recipe | ||||
|         self.assertEquals('bad', recipe.rejected_record) | ||||
|         self.assertEquals('this one was bad', recipe.rejected_msg) | ||||
|         self.assertEqual('bad', recipe.rejected_record) | ||||
|         self.assertEqual('this one was bad', recipe.rejected_msg) | ||||
| 
 | ||||
|     def test_simple_filter(self): | ||||
|         df = Doubler() | ||||
|         result = df.attach([1,2,3]) | ||||
| 
 | ||||
|         # ensure we got a generator that yields 2,4,6 | ||||
|         self.assertEquals(type(result), types.GeneratorType) | ||||
|         self.assertEquals(list(result), [2,4,6]) | ||||
|         self.assertEqual(type(result), types.GeneratorType) | ||||
|         self.assertEqual(list(result), [2,4,6]) | ||||
| 
 | ||||
|     def test_simple_filter_return_none(self): | ||||
|         cf = OddRemover() | ||||
|         result = cf.attach(range(10)) | ||||
| 
 | ||||
|         # ensure only even numbers remain | ||||
|         self.assertEquals(list(result), [0,2,4,6,8]) | ||||
|         self.assertEqual(list(result), [0,2,4,6,8]) | ||||
| 
 | ||||
|     def test_simple_yield_filter(self): | ||||
|         lf = ListFlattener() | ||||
|         result = lf.attach([[1],[2,3],[4,5,6]]) | ||||
| 
 | ||||
|         # ensure we got a generator that yields 1,2,3,4,5,6 | ||||
|         self.assertEquals(type(result), types.GeneratorType) | ||||
|         self.assertEquals(list(result), [1,2,3,4,5,6]) | ||||
|         self.assertEqual(type(result), types.GeneratorType) | ||||
|         self.assertEqual(list(result), [1,2,3,4,5,6]) | ||||
| 
 | ||||
|     def test_simple_field_filter(self): | ||||
|         ff = FieldDoubler(['a', 'c']) | ||||
|  | @ -108,7 +108,7 @@ class FilterTestCase(unittest.TestCase): | |||
|         result = cf.attach(range(10)) | ||||
| 
 | ||||
|         # ensure only even numbers remain | ||||
|         self.assertEquals(list(result), [0,2,4,6,8]) | ||||
|         self.assertEqual(list(result), [0,2,4,6,8]) | ||||
| 
 | ||||
|     ### Tests for Subrecord | ||||
| 
 | ||||
|  | @ -124,7 +124,7 @@ class FilterTestCase(unittest.TestCase): | |||
|         sf = SubrecordFilter('a', NonModifyingFieldDoubler('b')) | ||||
|         result = sf.attach(data) | ||||
| 
 | ||||
|         self.assertEquals(list(result), expected) | ||||
|         self.assertEqual(list(result), expected) | ||||
| 
 | ||||
|     def test_subrecord_filter_deep(self): | ||||
|         data = [{'a': {'d':[{'b': 2}, {'b': 4}]}}, | ||||
|  | @ -138,7 +138,7 @@ class FilterTestCase(unittest.TestCase): | |||
|         sf = SubrecordFilter('a.d', NonModifyingFieldDoubler('b')) | ||||
|         result = sf.attach(data) | ||||
| 
 | ||||
|         self.assertEquals(list(result), expected) | ||||
|         self.assertEqual(list(result), expected) | ||||
| 
 | ||||
|     def test_subrecord_filter_nonlist(self): | ||||
|         data = [ | ||||
|  | @ -156,7 +156,7 @@ class FilterTestCase(unittest.TestCase): | |||
|         sf = SubrecordFilter('a.b', NonModifyingFieldDoubler('c')) | ||||
|         result = sf.attach(data) | ||||
| 
 | ||||
|         self.assertEquals(list(result), expected) | ||||
|         self.assertEqual(list(result), expected) | ||||
| 
 | ||||
|     def test_subrecord_filter_list_in_path(self): | ||||
|         data = [ | ||||
|  | @ -174,7 +174,7 @@ class FilterTestCase(unittest.TestCase): | |||
|         sf = SubrecordFilter('a.b', NonModifyingFieldDoubler('c')) | ||||
|         result = sf.attach(data) | ||||
| 
 | ||||
|         self.assertEquals(list(result), expected) | ||||
|         self.assertEqual(list(result), expected) | ||||
| 
 | ||||
|     def test_conditional_path(self): | ||||
| 
 | ||||
|  | @ -296,7 +296,7 @@ class FilterTestCase(unittest.TestCase): | |||
|         expected_data = [{'a': 77}, {'a':33}] | ||||
|         result = u.attach(in_data) | ||||
| 
 | ||||
|         self.assertEquals(list(result), expected_data) | ||||
|         self.assertEqual(list(result), expected_data) | ||||
| 
 | ||||
|     # TODO: unicode & string filter tests | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,5 +1,7 @@ | |||
| from __future__ import unicode_literals | ||||
| from io import BytesIO, StringIO | ||||
| import unittest | ||||
| import cStringIO | ||||
| 
 | ||||
| from saucebrush.sources import CSVSource, FixedWidthFileSource | ||||
| 
 | ||||
| class SourceTestCase(unittest.TestCase): | ||||
|  | @ -9,14 +11,14 @@ class SourceTestCase(unittest.TestCase): | |||
| 1,2,3 | ||||
| 5,5,5 | ||||
| 1,10,100''' | ||||
|         return cStringIO.StringIO(data) | ||||
|         return StringIO(data) | ||||
| 
 | ||||
|     def test_csv_source_basic(self): | ||||
|         source = CSVSource(self._get_csv()) | ||||
|         expected_data = [{'a':'1', 'b':'2', 'c':'3'}, | ||||
|                          {'a':'5', 'b':'5', 'c':'5'}, | ||||
|                          {'a':'1', 'b':'10', 'c':'100'}] | ||||
|         self.assertEquals(list(source), expected_data) | ||||
|         self.assertEqual(list(source), expected_data) | ||||
| 
 | ||||
|     def test_csv_source_fieldnames(self): | ||||
|         source = CSVSource(self._get_csv(), ['x','y','z']) | ||||
|  | @ -24,23 +26,23 @@ class SourceTestCase(unittest.TestCase): | |||
|                          {'x':'1', 'y':'2', 'z':'3'}, | ||||
|                          {'x':'5', 'y':'5', 'z':'5'}, | ||||
|                          {'x':'1', 'y':'10', 'z':'100'}] | ||||
|         self.assertEquals(list(source), expected_data) | ||||
|         self.assertEqual(list(source), expected_data) | ||||
| 
 | ||||
|     def test_csv_source_skiprows(self): | ||||
|         source = CSVSource(self._get_csv(), skiprows=1) | ||||
|         expected_data = [{'a':'5', 'b':'5', 'c':'5'}, | ||||
|                          {'a':'1', 'b':'10', 'c':'100'}] | ||||
|         self.assertEquals(list(source), expected_data) | ||||
|         self.assertEqual(list(source), expected_data) | ||||
| 
 | ||||
|     def test_fixed_width_source(self): | ||||
|         data = cStringIO.StringIO('JamesNovember 3 1986\nTim  September151999') | ||||
|         data = StringIO('JamesNovember 3 1986\nTim  September151999') | ||||
|         fields = (('name',5), ('month',9), ('day',2), ('year',4)) | ||||
|         source = FixedWidthFileSource(data, fields) | ||||
|         expected_data = [{'name':'James', 'month':'November', 'day':'3', | ||||
|                           'year':'1986'}, | ||||
|                          {'name':'Tim', 'month':'September', 'day':'15', | ||||
|                           'year':'1999'}] | ||||
|         self.assertEquals(list(source), expected_data) | ||||
|         self.assertEqual(list(source), expected_data) | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Jeremy Carbaugh
						Jeremy Carbaugh