saucebrush/tests/test_filters.py

356 lines
10 KiB
Python
Raw Normal View History

import unittest
import types
2022-11-11 03:26:09 +00:00
from saucebrush.filters import (
Filter,
YieldFilter,
FieldFilter,
SubrecordFilter,
ConditionalPathFilter,
ConditionalFilter,
FieldModifier,
FieldKeeper,
FieldRemover,
FieldMerger,
FieldAdder,
FieldCopier,
FieldRenamer,
Unique,
)
class DummyRecipe(object):
rejected_record = None
rejected_msg = None
2022-11-11 03:26:09 +00:00
def reject_record(self, record, msg):
self.rejected_record = record
self.rejected_msg = msg
2022-11-11 03:26:09 +00:00
class Doubler(Filter):
def process_record(self, record):
2022-11-11 03:26:09 +00:00
return record * 2
2010-02-21 17:01:04 +00:00
class OddRemover(Filter):
def process_record(self, record):
if record % 2 == 0:
return record
else:
2022-11-11 03:26:09 +00:00
return None # explicitly return None
2010-02-21 17:01:04 +00:00
class ListFlattener(YieldFilter):
def process_record(self, record):
for item in record:
yield item
2022-11-11 03:26:09 +00:00
class FieldDoubler(FieldFilter):
def process_field(self, item):
2022-11-11 03:26:09 +00:00
return item * 2
2010-06-28 19:47:25 +00:00
class NonModifyingFieldDoubler(Filter):
def __init__(self, key):
self.key = key
def process_record(self, record):
record = dict(record)
record[self.key] *= 2
return record
2022-11-11 03:26:09 +00:00
2010-02-21 17:01:04 +00:00
class ConditionalOddRemover(ConditionalFilter):
def test_record(self, record):
# return True for even values
return record % 2 == 0
2022-11-11 03:26:09 +00:00
class FilterTestCase(unittest.TestCase):
def _simple_data(self):
2022-11-11 03:26:09 +00:00
return [
{"a": 1, "b": 2, "c": 3},
{"a": 5, "b": 5, "c": 5},
{"a": 1, "b": 10, "c": 100},
]
def assert_filter_result(self, filter_obj, expected_data):
result = filter_obj.attach(self._simple_data())
self.assertEqual(list(result), expected_data)
def test_reject_record(self):
recipe = DummyRecipe()
f = Doubler()
2022-11-11 03:26:09 +00:00
result = f.attach([1, 2, 3], recipe=recipe)
# next has to be called for attach to take effect
next(result)
2022-11-11 03:26:09 +00:00
f.reject_record("bad", "this one was bad")
# ensure that the rejection propagated to the recipe
2022-11-11 03:26:09 +00:00
self.assertEqual("bad", recipe.rejected_record)
self.assertEqual("this one was bad", recipe.rejected_msg)
def test_simple_filter(self):
df = Doubler()
2022-11-11 03:26:09 +00:00
result = df.attach([1, 2, 3])
# ensure we got a generator that yields 2,4,6
self.assertEqual(type(result), types.GeneratorType)
2022-11-11 03:26:09 +00:00
self.assertEqual(list(result), [2, 4, 6])
2010-02-21 17:01:04 +00:00
def test_simple_filter_return_none(self):
cf = OddRemover()
result = cf.attach(range(10))
# ensure only even numbers remain
2022-11-11 03:26:09 +00:00
self.assertEqual(list(result), [0, 2, 4, 6, 8])
2010-02-21 17:01:04 +00:00
def test_simple_yield_filter(self):
lf = ListFlattener()
2022-11-11 03:26:09 +00:00
result = lf.attach([[1], [2, 3], [4, 5, 6]])
# ensure we got a generator that yields 1,2,3,4,5,6
self.assertEqual(type(result), types.GeneratorType)
2022-11-11 03:26:09 +00:00
self.assertEqual(list(result), [1, 2, 3, 4, 5, 6])
def test_simple_field_filter(self):
2022-11-11 03:26:09 +00:00
ff = FieldDoubler(["a", "c"])
# check against expected data
2022-11-11 03:26:09 +00:00
expected_data = [
{"a": 2, "b": 2, "c": 6},
{"a": 10, "b": 5, "c": 10},
{"a": 2, "b": 10, "c": 200},
]
self.assert_filter_result(ff, expected_data)
def test_conditional_filter(self):
2010-02-21 17:01:04 +00:00
cf = ConditionalOddRemover()
result = cf.attach(range(10))
# ensure only even numbers remain
2022-11-11 03:26:09 +00:00
self.assertEqual(list(result), [0, 2, 4, 6, 8])
2022-11-11 03:31:18 +00:00
# Tests for Subrecord
2010-06-28 14:43:19 +00:00
def test_subrecord_filter_list(self):
2022-11-11 03:26:09 +00:00
data = [
{"a": [{"b": 2}, {"b": 4}]},
{"a": [{"b": 5}]},
{"a": [{"b": 8}, {"b": 2}, {"b": 1}]},
]
2010-06-28 14:43:19 +00:00
2022-11-11 03:26:09 +00:00
expected = [
{"a": [{"b": 4}, {"b": 8}]},
{"a": [{"b": 10}]},
{"a": [{"b": 16}, {"b": 4}, {"b": 2}]},
]
2010-06-28 14:43:19 +00:00
2022-11-11 03:26:09 +00:00
sf = SubrecordFilter("a", NonModifyingFieldDoubler("b"))
2010-06-28 14:43:19 +00:00
result = sf.attach(data)
self.assertEqual(list(result), expected)
2010-06-28 14:43:19 +00:00
def test_subrecord_filter_deep(self):
2022-11-11 03:26:09 +00:00
data = [
{"a": {"d": [{"b": 2}, {"b": 4}]}},
{"a": {"d": [{"b": 5}]}},
{"a": {"d": [{"b": 8}, {"b": 2}, {"b": 1}]}},
]
2022-11-11 03:26:09 +00:00
expected = [
{"a": {"d": [{"b": 4}, {"b": 8}]}},
{"a": {"d": [{"b": 10}]}},
{"a": {"d": [{"b": 16}, {"b": 4}, {"b": 2}]}},
]
2022-11-11 03:26:09 +00:00
sf = SubrecordFilter("a.d", NonModifyingFieldDoubler("b"))
result = sf.attach(data)
self.assertEqual(list(result), expected)
def test_subrecord_filter_nonlist(self):
data = [
2022-11-11 03:26:09 +00:00
{"a": {"b": {"c": 1}}},
{"a": {"b": {"c": 2}}},
{"a": {"b": {"c": 3}}},
]
expected = [
2022-11-11 03:26:09 +00:00
{"a": {"b": {"c": 2}}},
{"a": {"b": {"c": 4}}},
{"a": {"b": {"c": 6}}},
]
2022-11-11 03:26:09 +00:00
sf = SubrecordFilter("a.b", NonModifyingFieldDoubler("c"))
result = sf.attach(data)
self.assertEqual(list(result), expected)
def test_subrecord_filter_list_in_path(self):
data = [
2022-11-11 03:26:09 +00:00
{"a": [{"b": {"c": 5}}, {"b": {"c": 6}}]},
{"a": [{"b": {"c": 1}}, {"b": {"c": 2}}, {"b": {"c": 3}}]},
{"a": [{"b": {"c": 2}}]},
]
expected = [
2022-11-11 03:26:09 +00:00
{"a": [{"b": {"c": 10}}, {"b": {"c": 12}}]},
{"a": [{"b": {"c": 2}}, {"b": {"c": 4}}, {"b": {"c": 6}}]},
{"a": [{"b": {"c": 4}}]},
]
2022-11-11 03:26:09 +00:00
sf = SubrecordFilter("a.b", NonModifyingFieldDoubler("c"))
result = sf.attach(data)
self.assertEqual(list(result), expected)
2010-06-29 01:03:30 +00:00
def test_conditional_path(self):
2022-11-11 03:31:18 +00:00
predicate = lambda r: r["a"] == 1 # noqa
2010-06-29 01:03:30 +00:00
# double b if a == 1, otherwise double c
2022-11-11 03:26:09 +00:00
cpf = ConditionalPathFilter(predicate, FieldDoubler("b"), FieldDoubler("c"))
expected_data = [
{"a": 1, "b": 4, "c": 3},
{"a": 5, "b": 5, "c": 10},
{"a": 1, "b": 20, "c": 100},
]
2010-06-29 01:03:30 +00:00
self.assert_filter_result(cpf, expected_data)
2022-11-11 03:31:18 +00:00
# Tests for Generic Filters
2010-06-28 14:43:19 +00:00
def test_field_modifier(self):
# another version of FieldDoubler
2022-11-11 03:26:09 +00:00
fm = FieldModifier(["a", "c"], lambda x: x * 2)
# check against expected data
2022-11-11 03:26:09 +00:00
expected_data = [
{"a": 2, "b": 2, "c": 6},
{"a": 10, "b": 5, "c": 10},
{"a": 2, "b": 10, "c": 200},
]
self.assert_filter_result(fm, expected_data)
2011-09-15 15:09:04 +00:00
def test_field_keeper(self):
2022-11-11 03:26:09 +00:00
fk = FieldKeeper(["c"])
2011-09-15 15:09:04 +00:00
# check against expected results
2022-11-11 03:26:09 +00:00
expected_data = [{"c": 3}, {"c": 5}, {"c": 100}]
2011-09-15 15:09:04 +00:00
self.assert_filter_result(fk, expected_data)
def test_field_remover(self):
2022-11-11 03:26:09 +00:00
fr = FieldRemover(["a", "b"])
# check against expected results
2022-11-11 03:26:09 +00:00
expected_data = [{"c": 3}, {"c": 5}, {"c": 100}]
self.assert_filter_result(fr, expected_data)
def test_field_merger(self):
2022-11-11 03:26:09 +00:00
fm = FieldMerger({"sum": ("a", "b", "c")}, lambda x, y, z: x + y + z)
# check against expected results
2022-11-11 03:26:09 +00:00
expected_data = [{"sum": 6}, {"sum": 15}, {"sum": 111}]
self.assert_filter_result(fm, expected_data)
def test_field_merger_keep_fields(self):
2022-11-11 03:26:09 +00:00
fm = FieldMerger(
{"sum": ("a", "b", "c")}, lambda x, y, z: x + y + z, keep_fields=True
)
# check against expected results
2022-11-11 03:26:09 +00:00
expected_data = [
{"a": 1, "b": 2, "c": 3, "sum": 6},
{"a": 5, "b": 5, "c": 5, "sum": 15},
{"a": 1, "b": 10, "c": 100, "sum": 111},
]
self.assert_filter_result(fm, expected_data)
def test_field_adder_scalar(self):
2022-11-11 03:26:09 +00:00
fa = FieldAdder("x", 7)
2022-11-11 03:26:09 +00:00
expected_data = [
{"a": 1, "b": 2, "c": 3, "x": 7},
{"a": 5, "b": 5, "c": 5, "x": 7},
{"a": 1, "b": 10, "c": 100, "x": 7},
]
self.assert_filter_result(fa, expected_data)
def test_field_adder_callable(self):
2022-11-11 03:26:09 +00:00
fa = FieldAdder("x", lambda: 7)
2022-11-11 03:26:09 +00:00
expected_data = [
{"a": 1, "b": 2, "c": 3, "x": 7},
{"a": 5, "b": 5, "c": 5, "x": 7},
{"a": 1, "b": 10, "c": 100, "x": 7},
]
self.assert_filter_result(fa, expected_data)
def test_field_adder_iterable(self):
2022-11-11 03:26:09 +00:00
fa = FieldAdder("x", [1, 2, 3])
2022-11-11 03:26:09 +00:00
expected_data = [
{"a": 1, "b": 2, "c": 3, "x": 1},
{"a": 5, "b": 5, "c": 5, "x": 2},
{"a": 1, "b": 10, "c": 100, "x": 3},
]
self.assert_filter_result(fa, expected_data)
def test_field_adder_replace(self):
2022-11-11 03:26:09 +00:00
fa = FieldAdder("b", lambda: 7)
2022-11-11 03:26:09 +00:00
expected_data = [
{"a": 1, "b": 7, "c": 3},
{"a": 5, "b": 7, "c": 5},
{"a": 1, "b": 7, "c": 100},
]
self.assert_filter_result(fa, expected_data)
def test_field_adder_no_replace(self):
2022-11-11 03:26:09 +00:00
fa = FieldAdder("b", lambda: 7, replace=False)
2022-11-11 03:26:09 +00:00
expected_data = [
{"a": 1, "b": 2, "c": 3},
{"a": 5, "b": 5, "c": 5},
{"a": 1, "b": 10, "c": 100},
]
self.assert_filter_result(fa, expected_data)
def test_field_copier(self):
2022-11-11 03:26:09 +00:00
fc = FieldCopier({"a2": "a", "b2": "b"})
2022-11-11 03:26:09 +00:00
expected_data = [
{"a": 1, "b": 2, "c": 3, "a2": 1, "b2": 2},
{"a": 5, "b": 5, "c": 5, "a2": 5, "b2": 5},
{"a": 1, "b": 10, "c": 100, "a2": 1, "b2": 10},
]
self.assert_filter_result(fc, expected_data)
def test_field_renamer(self):
2022-11-11 03:26:09 +00:00
fr = FieldRenamer({"x": "a", "y": "b"})
2022-11-11 03:26:09 +00:00
expected_data = [
{"x": 1, "y": 2, "c": 3},
{"x": 5, "y": 5, "c": 5},
{"x": 1, "y": 10, "c": 100},
]
self.assert_filter_result(fr, expected_data)
2010-02-21 17:01:04 +00:00
# TODO: splitter & flattner tests?
def test_unique_filter(self):
u = Unique()
2022-11-11 03:26:09 +00:00
in_data = [{"a": 77}, {"a": 33}, {"a": 77}]
expected_data = [{"a": 77}, {"a": 33}]
2010-02-21 17:01:04 +00:00
result = u.attach(in_data)
self.assertEqual(list(result), expected_data)
2010-02-21 17:01:04 +00:00
# TODO: unicode & string filter tests
2022-11-11 03:26:09 +00:00
if __name__ == "__main__":
unittest.main()