diff --git a/saucebrush/filters.py b/saucebrush/filters.py index c292dfa..d2bfee8 100644 --- a/saucebrush/filters.py +++ b/saucebrush/filters.py @@ -361,8 +361,22 @@ class UnicodeFilter(Filter): if isinstance(value, str): record[key] = unicode(value, self._encoding, self._errors) elif isinstance(value, unicode): + record[key] = value.decode(self._encoding, self._errors) + return record + +class StringFilter(Filter): + + def __init__(self, encoding='utf-8', errors='ignore'): + super(UnicodeFilter, self).__init__() + self._encoding = encoding + self._errors = errors + + def process_record(self, record): + for key, value in record.iteritems(): + if isinstance(value, unicode): record[key] = value.encode(self._encoding, self._errors) return record + ########################### ## Commonly Used Filters ## diff --git a/saucebrush/utils.py b/saucebrush/utils.py index 2085e7e..9f538f7 100644 --- a/saucebrush/utils.py +++ b/saucebrush/utils.py @@ -167,11 +167,11 @@ def dotted_key_set(dict_or_list, dotted_key, value, separator='.'): class Files(object): - def __init__(self, file_open_callback=None, *args): + def __init__(self, *args): self.paths = [] for arg in args: self.add(arg) - self.file_open_callback = file_open_callback + self.file_open_callback = None def add(self, path): self.paths.append(path) @@ -180,9 +180,11 @@ class Files(object): return self.linereader() def linereader(self): + import os for path in iter(self.paths): if os.path.exists(path): - self.file_open_callback(path) + if self.file_open_callback: + self.file_open_callback(path) f = open(path) for line in f: yield line