commit
40008d7b53
1
.github/FUNDING.yml
vendored
Normal file
1
.github/FUNDING.yml
vendored
Normal file
@ -0,0 +1 @@
|
||||
github: [jamesturk]
|
17
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
17
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: ""
|
||||
labels: bug
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**Environment**
|
||||
Please provide output of `python -V` & `spatula --version`, as well as what operating system you're using, and any other details:
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: enhancement
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
||||
|
||||
**Describe the solution you'd like**
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
**Describe alternatives you've considered**
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the feature request here.
|
36
.github/workflows/test.yml
vendored
Normal file
36
.github/workflows/test.yml
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
name: Test & Lint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
max-parallel: 4
|
||||
matrix:
|
||||
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
|
||||
|
||||
steps:
|
||||
# Python & dependency installation
|
||||
- uses: actions/checkout@v3
|
||||
- name: setup Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: install Poetry
|
||||
uses: snok/install-poetry@v1.2.1
|
||||
- name: set poetry config path
|
||||
run: poetry config virtualenvs.path ~/.virtualenvs
|
||||
- name: install dependencies
|
||||
run: poetry install
|
||||
|
||||
# - name: lint with mypy
|
||||
# run: poetry run mypy src
|
||||
- name: lint with flake8
|
||||
run: poetry run flake8 --show-source --statistics --ignore=E203,E501,W503 src
|
||||
- name: pytest
|
||||
run: poetry run pytest
|
@ -1,9 +0,0 @@
|
||||
language: python
|
||||
python:
|
||||
- "2.7"
|
||||
- "3.5"
|
||||
install: pip install nose
|
||||
script: nosetests
|
||||
notifications:
|
||||
email:
|
||||
- james.p.turk@gmail.com
|
@ -1,7 +1,7 @@
|
||||
import re
|
||||
import exceptions
|
||||
|
||||
class FECSource(object):
|
||||
class FECSource:
|
||||
|
||||
SPLIT_CHAR = '\x1c'
|
||||
FORM_FIELDS = {
|
||||
|
395
poetry.lock
generated
Normal file
395
poetry.lock
generated
Normal file
@ -0,0 +1,395 @@
|
||||
[[package]]
|
||||
name = "attrs"
|
||||
version = "22.1.0"
|
||||
description = "Classes Without Boilerplate"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
|
||||
[package.extras]
|
||||
dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy (>=0.900,!=0.940)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "sphinx", "sphinx-notfound-page", "zope.interface"]
|
||||
docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"]
|
||||
tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"]
|
||||
tests-no-zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"]
|
||||
|
||||
[[package]]
|
||||
name = "black"
|
||||
version = "22.10.0"
|
||||
description = "The uncompromising code formatter."
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.dependencies]
|
||||
click = ">=8.0.0"
|
||||
mypy-extensions = ">=0.4.3"
|
||||
pathspec = ">=0.9.0"
|
||||
platformdirs = ">=2"
|
||||
tomli = {version = ">=1.1.0", markers = "python_full_version < \"3.11.0a7\""}
|
||||
|
||||
[package.extras]
|
||||
colorama = ["colorama (>=0.4.3)"]
|
||||
d = ["aiohttp (>=3.7.4)"]
|
||||
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
|
||||
uvloop = ["uvloop (>=0.15.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "click"
|
||||
version = "8.1.3"
|
||||
description = "Composable command line interface toolkit"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
description = "Cross-platform colored terminal text."
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
||||
|
||||
[[package]]
|
||||
name = "cssselect"
|
||||
version = "1.2.0"
|
||||
description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.0.1"
|
||||
description = "Backport of PEP 654 (exception groups)"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.extras]
|
||||
test = ["pytest (>=6)"]
|
||||
|
||||
[[package]]
|
||||
name = "flake8"
|
||||
version = "5.0.4"
|
||||
description = "the modular source code checker: pep8 pyflakes and co"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.6.1"
|
||||
|
||||
[package.dependencies]
|
||||
mccabe = ">=0.7.0,<0.8.0"
|
||||
pycodestyle = ">=2.9.0,<2.10.0"
|
||||
pyflakes = ">=2.5.0,<2.6.0"
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "1.1.1"
|
||||
description = "iniconfig: brain-dead simple config-ini parsing"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "lxml"
|
||||
version = "4.9.1"
|
||||
description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
|
||||
|
||||
[package.extras]
|
||||
cssselect = ["cssselect (>=0.7)"]
|
||||
html5 = ["html5lib"]
|
||||
htmlsoup = ["BeautifulSoup4"]
|
||||
source = ["Cython (>=0.29.7)"]
|
||||
|
||||
[[package]]
|
||||
name = "mccabe"
|
||||
version = "0.7.0"
|
||||
description = "McCabe checker, plugin for flake8"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "mypy-extensions"
|
||||
version = "0.4.3"
|
||||
description = "Experimental type system extensions for programs checked with the mypy typechecker."
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "21.3"
|
||||
description = "Core utilities for Python packages"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
|
||||
|
||||
[[package]]
|
||||
name = "pathspec"
|
||||
version = "0.10.1"
|
||||
description = "Utility library for gitignore style pattern matching of file paths."
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[[package]]
|
||||
name = "platformdirs"
|
||||
version = "2.5.3"
|
||||
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.extras]
|
||||
docs = ["furo (>=2022.9.29)", "proselint (>=0.13)", "sphinx (>=5.3)", "sphinx-autodoc-typehints (>=1.19.4)"]
|
||||
test = ["appdirs (==1.4.4)", "pytest (>=7.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.0.0"
|
||||
description = "plugin and hook calling mechanisms for python"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.extras]
|
||||
dev = ["pre-commit", "tox"]
|
||||
testing = ["pytest", "pytest-benchmark"]
|
||||
|
||||
[[package]]
|
||||
name = "pycodestyle"
|
||||
version = "2.9.1"
|
||||
description = "Python style guide checker"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "pyflakes"
|
||||
version = "2.5.0"
|
||||
description = "passive checker of Python programs"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "pyparsing"
|
||||
version = "3.0.9"
|
||||
description = "pyparsing module - Classes and methods to define and execute parsing grammars"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.6.8"
|
||||
|
||||
[package.extras]
|
||||
diagrams = ["jinja2", "railroad-diagrams"]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "7.2.0"
|
||||
description = "pytest: simple powerful testing with Python"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.dependencies]
|
||||
attrs = ">=19.2.0"
|
||||
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
||||
exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
|
||||
iniconfig = "*"
|
||||
packaging = "*"
|
||||
pluggy = ">=0.12,<2.0"
|
||||
tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
|
||||
|
||||
[package.extras]
|
||||
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
|
||||
|
||||
[[package]]
|
||||
name = "tomli"
|
||||
version = "2.0.1"
|
||||
description = "A lil' TOML parser"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "765977e700b56e9b852f6ca6f5d54e2c1343b3a07b9220e83ef969a277f67866"
|
||||
|
||||
[metadata.files]
|
||||
attrs = [
|
||||
{file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"},
|
||||
{file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"},
|
||||
]
|
||||
black = [
|
||||
{file = "black-22.10.0-1fixedarch-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:5cc42ca67989e9c3cf859e84c2bf014f6633db63d1cbdf8fdb666dcd9e77e3fa"},
|
||||
{file = "black-22.10.0-1fixedarch-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:5d8f74030e67087b219b032aa33a919fae8806d49c867846bfacde57f43972ef"},
|
||||
{file = "black-22.10.0-1fixedarch-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:197df8509263b0b8614e1df1756b1dd41be6738eed2ba9e9769f3880c2b9d7b6"},
|
||||
{file = "black-22.10.0-1fixedarch-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:2644b5d63633702bc2c5f3754b1b475378fbbfb481f62319388235d0cd104c2d"},
|
||||
{file = "black-22.10.0-1fixedarch-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:e41a86c6c650bcecc6633ee3180d80a025db041a8e2398dcc059b3afa8382cd4"},
|
||||
{file = "black-22.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2039230db3c6c639bd84efe3292ec7b06e9214a2992cd9beb293d639c6402edb"},
|
||||
{file = "black-22.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14ff67aec0a47c424bc99b71005202045dc09270da44a27848d534600ac64fc7"},
|
||||
{file = "black-22.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:819dc789f4498ecc91438a7de64427c73b45035e2e3680c92e18795a839ebb66"},
|
||||
{file = "black-22.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5b9b29da4f564ba8787c119f37d174f2b69cdfdf9015b7d8c5c16121ddc054ae"},
|
||||
{file = "black-22.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8b49776299fece66bffaafe357d929ca9451450f5466e997a7285ab0fe28e3b"},
|
||||
{file = "black-22.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:21199526696b8f09c3997e2b4db8d0b108d801a348414264d2eb8eb2532e540d"},
|
||||
{file = "black-22.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e464456d24e23d11fced2bc8c47ef66d471f845c7b7a42f3bd77bf3d1789650"},
|
||||
{file = "black-22.10.0-cp37-cp37m-win_amd64.whl", hash = "sha256:9311e99228ae10023300ecac05be5a296f60d2fd10fff31cf5c1fa4ca4b1988d"},
|
||||
{file = "black-22.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fba8a281e570adafb79f7755ac8721b6cf1bbf691186a287e990c7929c7692ff"},
|
||||
{file = "black-22.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:915ace4ff03fdfff953962fa672d44be269deb2eaf88499a0f8805221bc68c87"},
|
||||
{file = "black-22.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:444ebfb4e441254e87bad00c661fe32df9969b2bf224373a448d8aca2132b395"},
|
||||
{file = "black-22.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:974308c58d057a651d182208a484ce80a26dac0caef2895836a92dd6ebd725e0"},
|
||||
{file = "black-22.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72ef3925f30e12a184889aac03d77d031056860ccae8a1e519f6cbb742736383"},
|
||||
{file = "black-22.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:432247333090c8c5366e69627ccb363bc58514ae3e63f7fc75c54b1ea80fa7de"},
|
||||
{file = "black-22.10.0-py3-none-any.whl", hash = "sha256:c957b2b4ea88587b46cf49d1dc17681c1e672864fd7af32fc1e9664d572b3458"},
|
||||
{file = "black-22.10.0.tar.gz", hash = "sha256:f513588da599943e0cde4e32cc9879e825d58720d6557062d1098c5ad80080e1"},
|
||||
]
|
||||
click = [
|
||||
{file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
|
||||
{file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
|
||||
]
|
||||
colorama = [
|
||||
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
||||
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
||||
]
|
||||
cssselect = [
|
||||
{file = "cssselect-1.2.0-py2.py3-none-any.whl", hash = "sha256:da1885f0c10b60c03ed5eccbb6b68d6eff248d91976fcde348f395d54c9fd35e"},
|
||||
{file = "cssselect-1.2.0.tar.gz", hash = "sha256:666b19839cfaddb9ce9d36bfe4c969132c647b92fc9088c4e23f786b30f1b3dc"},
|
||||
]
|
||||
exceptiongroup = [
|
||||
{file = "exceptiongroup-1.0.1-py3-none-any.whl", hash = "sha256:4d6c0aa6dd825810941c792f53d7b8d71da26f5e5f84f20f9508e8f2d33b140a"},
|
||||
{file = "exceptiongroup-1.0.1.tar.gz", hash = "sha256:73866f7f842ede6cb1daa42c4af078e2035e5f7607f0e2c762cc51bb31bbe7b2"},
|
||||
]
|
||||
flake8 = [
|
||||
{file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"},
|
||||
{file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"},
|
||||
]
|
||||
iniconfig = [
|
||||
{file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
|
||||
{file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
|
||||
]
|
||||
lxml = [
|
||||
{file = "lxml-4.9.1-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:98cafc618614d72b02185ac583c6f7796202062c41d2eeecdf07820bad3295ed"},
|
||||
{file = "lxml-4.9.1-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c62e8dd9754b7debda0c5ba59d34509c4688f853588d75b53c3791983faa96fc"},
|
||||
{file = "lxml-4.9.1-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:21fb3d24ab430fc538a96e9fbb9b150029914805d551deeac7d7822f64631dfc"},
|
||||
{file = "lxml-4.9.1-cp27-cp27m-win32.whl", hash = "sha256:86e92728ef3fc842c50a5cb1d5ba2bc66db7da08a7af53fb3da79e202d1b2cd3"},
|
||||
{file = "lxml-4.9.1-cp27-cp27m-win_amd64.whl", hash = "sha256:4cfbe42c686f33944e12f45a27d25a492cc0e43e1dc1da5d6a87cbcaf2e95627"},
|
||||
{file = "lxml-4.9.1-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dad7b164905d3e534883281c050180afcf1e230c3d4a54e8038aa5cfcf312b84"},
|
||||
{file = "lxml-4.9.1-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a614e4afed58c14254e67862456d212c4dcceebab2eaa44d627c2ca04bf86837"},
|
||||
{file = "lxml-4.9.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f9ced82717c7ec65a67667bb05865ffe38af0e835cdd78728f1209c8fffe0cad"},
|
||||
{file = "lxml-4.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:d9fc0bf3ff86c17348dfc5d322f627d78273eba545db865c3cd14b3f19e57fa5"},
|
||||
{file = "lxml-4.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e5f66bdf0976ec667fc4594d2812a00b07ed14d1b44259d19a41ae3fff99f2b8"},
|
||||
{file = "lxml-4.9.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:fe17d10b97fdf58155f858606bddb4e037b805a60ae023c009f760d8361a4eb8"},
|
||||
{file = "lxml-4.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8caf4d16b31961e964c62194ea3e26a0e9561cdf72eecb1781458b67ec83423d"},
|
||||
{file = "lxml-4.9.1-cp310-cp310-win32.whl", hash = "sha256:4780677767dd52b99f0af1f123bc2c22873d30b474aa0e2fc3fe5e02217687c7"},
|
||||
{file = "lxml-4.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:b122a188cd292c4d2fcd78d04f863b789ef43aa129b233d7c9004de08693728b"},
|
||||
{file = "lxml-4.9.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:be9eb06489bc975c38706902cbc6888f39e946b81383abc2838d186f0e8b6a9d"},
|
||||
{file = "lxml-4.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f1be258c4d3dc609e654a1dc59d37b17d7fef05df912c01fc2e15eb43a9735f3"},
|
||||
{file = "lxml-4.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:927a9dd016d6033bc12e0bf5dee1dde140235fc8d0d51099353c76081c03dc29"},
|
||||
{file = "lxml-4.9.1-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9232b09f5efee6a495a99ae6824881940d6447debe272ea400c02e3b68aad85d"},
|
||||
{file = "lxml-4.9.1-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:04da965dfebb5dac2619cb90fcf93efdb35b3c6994fea58a157a834f2f94b318"},
|
||||
{file = "lxml-4.9.1-cp35-cp35m-win32.whl", hash = "sha256:4d5bae0a37af799207140652a700f21a85946f107a199bcb06720b13a4f1f0b7"},
|
||||
{file = "lxml-4.9.1-cp35-cp35m-win_amd64.whl", hash = "sha256:4878e667ebabe9b65e785ac8da4d48886fe81193a84bbe49f12acff8f7a383a4"},
|
||||
{file = "lxml-4.9.1-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:1355755b62c28950f9ce123c7a41460ed9743c699905cbe664a5bcc5c9c7c7fb"},
|
||||
{file = "lxml-4.9.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:bcaa1c495ce623966d9fc8a187da80082334236a2a1c7e141763ffaf7a405067"},
|
||||
{file = "lxml-4.9.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6eafc048ea3f1b3c136c71a86db393be36b5b3d9c87b1c25204e7d397cee9536"},
|
||||
{file = "lxml-4.9.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:13c90064b224e10c14dcdf8086688d3f0e612db53766e7478d7754703295c7c8"},
|
||||
{file = "lxml-4.9.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:206a51077773c6c5d2ce1991327cda719063a47adc02bd703c56a662cdb6c58b"},
|
||||
{file = "lxml-4.9.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e8f0c9d65da595cfe91713bc1222af9ecabd37971762cb830dea2fc3b3bb2acf"},
|
||||
{file = "lxml-4.9.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:8f0a4d179c9a941eb80c3a63cdb495e539e064f8054230844dcf2fcb812b71d3"},
|
||||
{file = "lxml-4.9.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:830c88747dce8a3e7525defa68afd742b4580df6aa2fdd6f0855481e3994d391"},
|
||||
{file = "lxml-4.9.1-cp36-cp36m-win32.whl", hash = "sha256:1e1cf47774373777936c5aabad489fef7b1c087dcd1f426b621fda9dcc12994e"},
|
||||
{file = "lxml-4.9.1-cp36-cp36m-win_amd64.whl", hash = "sha256:5974895115737a74a00b321e339b9c3f45c20275d226398ae79ac008d908bff7"},
|
||||
{file = "lxml-4.9.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:1423631e3d51008871299525b541413c9b6c6423593e89f9c4cfbe8460afc0a2"},
|
||||
{file = "lxml-4.9.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:2aaf6a0a6465d39b5ca69688fce82d20088c1838534982996ec46633dc7ad6cc"},
|
||||
{file = "lxml-4.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:9f36de4cd0c262dd9927886cc2305aa3f2210db437aa4fed3fb4940b8bf4592c"},
|
||||
{file = "lxml-4.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae06c1e4bc60ee076292e582a7512f304abdf6c70db59b56745cca1684f875a4"},
|
||||
{file = "lxml-4.9.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:57e4d637258703d14171b54203fd6822fda218c6c2658a7d30816b10995f29f3"},
|
||||
{file = "lxml-4.9.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6d279033bf614953c3fc4a0aa9ac33a21e8044ca72d4fa8b9273fe75359d5cca"},
|
||||
{file = "lxml-4.9.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a60f90bba4c37962cbf210f0188ecca87daafdf60271f4c6948606e4dabf8785"},
|
||||
{file = "lxml-4.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6ca2264f341dd81e41f3fffecec6e446aa2121e0b8d026fb5130e02de1402785"},
|
||||
{file = "lxml-4.9.1-cp37-cp37m-win32.whl", hash = "sha256:27e590352c76156f50f538dbcebd1925317a0f70540f7dc8c97d2931c595783a"},
|
||||
{file = "lxml-4.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:eea5d6443b093e1545ad0210e6cf27f920482bfcf5c77cdc8596aec73523bb7e"},
|
||||
{file = "lxml-4.9.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:f05251bbc2145349b8d0b77c0d4e5f3b228418807b1ee27cefb11f69ed3d233b"},
|
||||
{file = "lxml-4.9.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:487c8e61d7acc50b8be82bda8c8d21d20e133c3cbf41bd8ad7eb1aaeb3f07c97"},
|
||||
{file = "lxml-4.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d1a92d8e90b286d491e5626af53afef2ba04da33e82e30744795c71880eaa21"},
|
||||
{file = "lxml-4.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:b570da8cd0012f4af9fa76a5635cd31f707473e65a5a335b186069d5c7121ff2"},
|
||||
{file = "lxml-4.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ef87fca280fb15342726bd5f980f6faf8b84a5287fcc2d4962ea8af88b35130"},
|
||||
{file = "lxml-4.9.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:93e414e3206779ef41e5ff2448067213febf260ba747fc65389a3ddaa3fb8715"},
|
||||
{file = "lxml-4.9.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6653071f4f9bac46fbc30f3c7838b0e9063ee335908c5d61fb7a4a86c8fd2036"},
|
||||
{file = "lxml-4.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:32a73c53783becdb7eaf75a2a1525ea8e49379fb7248c3eeefb9412123536387"},
|
||||
{file = "lxml-4.9.1-cp38-cp38-win32.whl", hash = "sha256:1a7c59c6ffd6ef5db362b798f350e24ab2cfa5700d53ac6681918f314a4d3b94"},
|
||||
{file = "lxml-4.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:1436cf0063bba7888e43f1ba8d58824f085410ea2025befe81150aceb123e345"},
|
||||
{file = "lxml-4.9.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:4beea0f31491bc086991b97517b9683e5cfb369205dac0148ef685ac12a20a67"},
|
||||
{file = "lxml-4.9.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:41fb58868b816c202e8881fd0f179a4644ce6e7cbbb248ef0283a34b73ec73bb"},
|
||||
{file = "lxml-4.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:bd34f6d1810d9354dc7e35158aa6cc33456be7706df4420819af6ed966e85448"},
|
||||
{file = "lxml-4.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:edffbe3c510d8f4bf8640e02ca019e48a9b72357318383ca60e3330c23aaffc7"},
|
||||
{file = "lxml-4.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d949f53ad4fc7cf02c44d6678e7ff05ec5f5552b235b9e136bd52e9bf730b91"},
|
||||
{file = "lxml-4.9.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:079b68f197c796e42aa80b1f739f058dcee796dc725cc9a1be0cdb08fc45b000"},
|
||||
{file = "lxml-4.9.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9c3a88d20e4fe4a2a4a84bf439a5ac9c9aba400b85244c63a1ab7088f85d9d25"},
|
||||
{file = "lxml-4.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4e285b5f2bf321fc0857b491b5028c5f276ec0c873b985d58d7748ece1d770dd"},
|
||||
{file = "lxml-4.9.1-cp39-cp39-win32.whl", hash = "sha256:ef72013e20dd5ba86a8ae1aed7f56f31d3374189aa8b433e7b12ad182c0d2dfb"},
|
||||
{file = "lxml-4.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:10d2017f9150248563bb579cd0d07c61c58da85c922b780060dcc9a3aa9f432d"},
|
||||
{file = "lxml-4.9.1-pp37-pypy37_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0538747a9d7827ce3e16a8fdd201a99e661c7dee3c96c885d8ecba3c35d1032c"},
|
||||
{file = "lxml-4.9.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0645e934e940107e2fdbe7c5b6fb8ec6232444260752598bc4d09511bd056c0b"},
|
||||
{file = "lxml-4.9.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6daa662aba22ef3258934105be2dd9afa5bb45748f4f702a3b39a5bf53a1f4dc"},
|
||||
{file = "lxml-4.9.1-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:603a464c2e67d8a546ddaa206d98e3246e5db05594b97db844c2f0a1af37cf5b"},
|
||||
{file = "lxml-4.9.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c4b2e0559b68455c085fb0f6178e9752c4be3bba104d6e881eb5573b399d1eb2"},
|
||||
{file = "lxml-4.9.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0f3f0059891d3254c7b5fb935330d6db38d6519ecd238ca4fce93c234b4a0f73"},
|
||||
{file = "lxml-4.9.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c852b1530083a620cb0de5f3cd6826f19862bafeaf77586f1aef326e49d95f0c"},
|
||||
{file = "lxml-4.9.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:287605bede6bd36e930577c5925fcea17cb30453d96a7b4c63c14a257118dbb9"},
|
||||
{file = "lxml-4.9.1.tar.gz", hash = "sha256:fe749b052bb7233fe5d072fcb549221a8cb1a16725c47c37e42b0b9cb3ff2c3f"},
|
||||
]
|
||||
mccabe = [
|
||||
{file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
|
||||
{file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
|
||||
]
|
||||
mypy-extensions = [
|
||||
{file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
|
||||
{file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
|
||||
]
|
||||
packaging = [
|
||||
{file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
|
||||
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
|
||||
]
|
||||
pathspec = [
|
||||
{file = "pathspec-0.10.1-py3-none-any.whl", hash = "sha256:46846318467efc4556ccfd27816e004270a9eeeeb4d062ce5e6fc7a87c573f93"},
|
||||
{file = "pathspec-0.10.1.tar.gz", hash = "sha256:7ace6161b621d31e7902eb6b5ae148d12cfd23f4a249b9ffb6b9fee12084323d"},
|
||||
]
|
||||
platformdirs = [
|
||||
{file = "platformdirs-2.5.3-py3-none-any.whl", hash = "sha256:0cb405749187a194f444c25c82ef7225232f11564721eabffc6ec70df83b11cb"},
|
||||
{file = "platformdirs-2.5.3.tar.gz", hash = "sha256:6e52c21afff35cb659c6e52d8b4d61b9bd544557180440538f255d9382c8cbe0"},
|
||||
]
|
||||
pluggy = [
|
||||
{file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
|
||||
{file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
|
||||
]
|
||||
pycodestyle = [
|
||||
{file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"},
|
||||
{file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"},
|
||||
]
|
||||
pyflakes = [
|
||||
{file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"},
|
||||
{file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"},
|
||||
]
|
||||
pyparsing = [
|
||||
{file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
|
||||
{file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
|
||||
]
|
||||
pytest = [
|
||||
{file = "pytest-7.2.0-py3-none-any.whl", hash = "sha256:892f933d339f068883b6fd5a459f03d85bfcb355e4981e146d2c7616c21fef71"},
|
||||
{file = "pytest-7.2.0.tar.gz", hash = "sha256:c4014eb40e10f11f355ad4e3c2fb2c6c6d1919c73f3b5a433de4708202cade59"},
|
||||
]
|
||||
tomli = [
|
||||
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
|
||||
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
|
||||
]
|
22
pyproject.toml
Normal file
22
pyproject.toml
Normal file
@ -0,0 +1,22 @@
|
||||
[tool.poetry]
|
||||
name = "saucebrush"
|
||||
version = "0.6.0"
|
||||
description = ""
|
||||
authors = ["James Turk <dev@jamesturk.net>"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
lxml = "^4.9.1"
|
||||
cssselect = "^1.2.0"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.2.0"
|
||||
flake8 = "^5.0.4"
|
||||
black = "^22.10.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
@ -1,15 +0,0 @@
|
||||
import unittest
|
||||
from saucebrush.tests.filters import FilterTestCase
|
||||
from saucebrush.tests.sources import SourceTestCase
|
||||
from saucebrush.tests.emitters import EmitterTestCase
|
||||
from saucebrush.tests.recipes import RecipeTestCase
|
||||
from saucebrush.tests.stats import StatsTestCase
|
||||
|
||||
filter_suite = unittest.TestLoader().loadTestsFromTestCase(FilterTestCase)
|
||||
source_suite = unittest.TestLoader().loadTestsFromTestCase(SourceTestCase)
|
||||
emitter_suite = unittest.TestLoader().loadTestsFromTestCase(EmitterTestCase)
|
||||
recipe_suite = unittest.TestLoader().loadTestsFromTestCase(RecipeTestCase)
|
||||
stats_suite = unittest.TestLoader().loadTestsFromTestCase(StatsTestCase)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -1,86 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
from contextlib import closing
|
||||
from io import StringIO
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from saucebrush.emitters import (
|
||||
DebugEmitter, CSVEmitter, CountEmitter, SqliteEmitter, SqlDumpEmitter)
|
||||
|
||||
class EmitterTestCase(unittest.TestCase):
|
||||
|
||||
def test_debug_emitter(self):
|
||||
with closing(StringIO()) as output:
|
||||
de = DebugEmitter(output)
|
||||
list(de.attach([1,2,3]))
|
||||
self.assertEqual(output.getvalue(), '1\n2\n3\n')
|
||||
|
||||
def test_count_emitter(self):
|
||||
|
||||
# values for test
|
||||
values = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22]
|
||||
|
||||
with closing(StringIO()) as output:
|
||||
|
||||
# test without of parameter
|
||||
ce = CountEmitter(every=10, outfile=output, format="%(count)s records\n")
|
||||
list(ce.attach(values))
|
||||
self.assertEqual(output.getvalue(), '10 records\n20 records\n')
|
||||
ce.done()
|
||||
self.assertEqual(output.getvalue(), '10 records\n20 records\n22 records\n')
|
||||
|
||||
with closing(StringIO()) as output:
|
||||
|
||||
# test with of parameter
|
||||
ce = CountEmitter(every=10, outfile=output, of=len(values))
|
||||
list(ce.attach(values))
|
||||
self.assertEqual(output.getvalue(), '10 of 22\n20 of 22\n')
|
||||
ce.done()
|
||||
self.assertEqual(output.getvalue(), '10 of 22\n20 of 22\n22 of 22\n')
|
||||
|
||||
def test_csv_emitter(self):
|
||||
|
||||
try:
|
||||
import cStringIO # if Python 2.x then use old cStringIO
|
||||
io = cStringIO.StringIO()
|
||||
except:
|
||||
io = StringIO() # if Python 3.x then use StringIO
|
||||
|
||||
with closing(io) as output:
|
||||
ce = CSVEmitter(output, ('x','y','z'))
|
||||
list(ce.attach([{'x':1, 'y':2, 'z':3}, {'x':5, 'y':5, 'z':5}]))
|
||||
self.assertEqual(output.getvalue(), 'x,y,z\r\n1,2,3\r\n5,5,5\r\n')
|
||||
|
||||
def test_sqlite_emitter(self):
|
||||
|
||||
import sqlite3, tempfile
|
||||
|
||||
with closing(tempfile.NamedTemporaryFile(suffix='.db')) as f:
|
||||
db_path = f.name
|
||||
|
||||
sle = SqliteEmitter(db_path, 'testtable', fieldnames=('a','b','c'))
|
||||
list(sle.attach([{'a': '1', 'b': '2', 'c': '3'}]))
|
||||
sle.done()
|
||||
|
||||
with closing(sqlite3.connect(db_path)) as conn:
|
||||
cur = conn.cursor()
|
||||
cur.execute("""SELECT a, b, c FROM testtable""")
|
||||
results = cur.fetchall()
|
||||
|
||||
os.unlink(db_path)
|
||||
|
||||
self.assertEqual(results, [('1', '2', '3')])
|
||||
|
||||
def test_sql_dump_emitter(self):
|
||||
|
||||
with closing(StringIO()) as bffr:
|
||||
|
||||
sde = SqlDumpEmitter(bffr, 'testtable', ('a', 'b'))
|
||||
list(sde.attach([{'a': 1, 'b': '2'}]))
|
||||
sde.done()
|
||||
|
||||
self.assertEqual(bffr.getvalue(), "INSERT INTO `testtable` (`a`,`b`) VALUES (1,'2');\n")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -1,304 +0,0 @@
|
||||
import unittest
|
||||
import operator
|
||||
import types
|
||||
from saucebrush.filters import (Filter, YieldFilter, FieldFilter,
|
||||
SubrecordFilter, ConditionalPathFilter,
|
||||
ConditionalFilter, FieldModifier, FieldKeeper,
|
||||
FieldRemover, FieldMerger, FieldAdder,
|
||||
FieldCopier, FieldRenamer, Unique)
|
||||
|
||||
class DummyRecipe(object):
|
||||
rejected_record = None
|
||||
rejected_msg = None
|
||||
def reject_record(self, record, msg):
|
||||
self.rejected_record = record
|
||||
self.rejected_msg = msg
|
||||
|
||||
class Doubler(Filter):
|
||||
def process_record(self, record):
|
||||
return record*2
|
||||
|
||||
class OddRemover(Filter):
|
||||
def process_record(self, record):
|
||||
if record % 2 == 0:
|
||||
return record
|
||||
else:
|
||||
return None # explicitly return None
|
||||
|
||||
class ListFlattener(YieldFilter):
|
||||
def process_record(self, record):
|
||||
for item in record:
|
||||
yield item
|
||||
|
||||
class FieldDoubler(FieldFilter):
|
||||
def process_field(self, item):
|
||||
return item*2
|
||||
|
||||
class NonModifyingFieldDoubler(Filter):
|
||||
def __init__(self, key):
|
||||
self.key = key
|
||||
|
||||
def process_record(self, record):
|
||||
record = dict(record)
|
||||
record[self.key] *= 2
|
||||
return record
|
||||
|
||||
class ConditionalOddRemover(ConditionalFilter):
|
||||
def test_record(self, record):
|
||||
# return True for even values
|
||||
return record % 2 == 0
|
||||
|
||||
class FilterTestCase(unittest.TestCase):
|
||||
|
||||
def _simple_data(self):
|
||||
return [{'a':1, 'b':2, 'c':3},
|
||||
{'a':5, 'b':5, 'c':5},
|
||||
{'a':1, 'b':10, 'c':100}]
|
||||
|
||||
def assert_filter_result(self, filter_obj, expected_data):
|
||||
result = filter_obj.attach(self._simple_data())
|
||||
self.assertEqual(list(result), expected_data)
|
||||
|
||||
def test_reject_record(self):
|
||||
recipe = DummyRecipe()
|
||||
f = Doubler()
|
||||
result = f.attach([1,2,3], recipe=recipe)
|
||||
# next has to be called for attach to take effect
|
||||
next(result)
|
||||
f.reject_record('bad', 'this one was bad')
|
||||
|
||||
# ensure that the rejection propagated to the recipe
|
||||
self.assertEqual('bad', recipe.rejected_record)
|
||||
self.assertEqual('this one was bad', recipe.rejected_msg)
|
||||
|
||||
def test_simple_filter(self):
|
||||
df = Doubler()
|
||||
result = df.attach([1,2,3])
|
||||
|
||||
# ensure we got a generator that yields 2,4,6
|
||||
self.assertEqual(type(result), types.GeneratorType)
|
||||
self.assertEqual(list(result), [2,4,6])
|
||||
|
||||
def test_simple_filter_return_none(self):
|
||||
cf = OddRemover()
|
||||
result = cf.attach(range(10))
|
||||
|
||||
# ensure only even numbers remain
|
||||
self.assertEqual(list(result), [0,2,4,6,8])
|
||||
|
||||
def test_simple_yield_filter(self):
|
||||
lf = ListFlattener()
|
||||
result = lf.attach([[1],[2,3],[4,5,6]])
|
||||
|
||||
# ensure we got a generator that yields 1,2,3,4,5,6
|
||||
self.assertEqual(type(result), types.GeneratorType)
|
||||
self.assertEqual(list(result), [1,2,3,4,5,6])
|
||||
|
||||
def test_simple_field_filter(self):
|
||||
ff = FieldDoubler(['a', 'c'])
|
||||
|
||||
# check against expected data
|
||||
expected_data = [{'a':2, 'b':2, 'c':6},
|
||||
{'a':10, 'b':5, 'c':10},
|
||||
{'a':2, 'b':10, 'c':200}]
|
||||
self.assert_filter_result(ff, expected_data)
|
||||
|
||||
def test_conditional_filter(self):
|
||||
cf = ConditionalOddRemover()
|
||||
result = cf.attach(range(10))
|
||||
|
||||
# ensure only even numbers remain
|
||||
self.assertEqual(list(result), [0,2,4,6,8])
|
||||
|
||||
### Tests for Subrecord
|
||||
|
||||
def test_subrecord_filter_list(self):
|
||||
data = [{'a': [{'b': 2}, {'b': 4}]},
|
||||
{'a': [{'b': 5}]},
|
||||
{'a': [{'b': 8}, {'b':2}, {'b':1}]}]
|
||||
|
||||
expected = [{'a': [{'b': 4}, {'b': 8}]},
|
||||
{'a': [{'b': 10}]},
|
||||
{'a': [{'b': 16}, {'b':4}, {'b':2}]}]
|
||||
|
||||
sf = SubrecordFilter('a', NonModifyingFieldDoubler('b'))
|
||||
result = sf.attach(data)
|
||||
|
||||
self.assertEqual(list(result), expected)
|
||||
|
||||
def test_subrecord_filter_deep(self):
|
||||
data = [{'a': {'d':[{'b': 2}, {'b': 4}]}},
|
||||
{'a': {'d':[{'b': 5}]}},
|
||||
{'a': {'d':[{'b': 8}, {'b':2}, {'b':1}]}}]
|
||||
|
||||
expected = [{'a': {'d':[{'b': 4}, {'b': 8}]}},
|
||||
{'a': {'d':[{'b': 10}]}},
|
||||
{'a': {'d':[{'b': 16}, {'b':4}, {'b':2}]}}]
|
||||
|
||||
sf = SubrecordFilter('a.d', NonModifyingFieldDoubler('b'))
|
||||
result = sf.attach(data)
|
||||
|
||||
self.assertEqual(list(result), expected)
|
||||
|
||||
def test_subrecord_filter_nonlist(self):
|
||||
data = [
|
||||
{'a':{'b':{'c':1}}},
|
||||
{'a':{'b':{'c':2}}},
|
||||
{'a':{'b':{'c':3}}},
|
||||
]
|
||||
|
||||
expected = [
|
||||
{'a':{'b':{'c':2}}},
|
||||
{'a':{'b':{'c':4}}},
|
||||
{'a':{'b':{'c':6}}},
|
||||
]
|
||||
|
||||
sf = SubrecordFilter('a.b', NonModifyingFieldDoubler('c'))
|
||||
result = sf.attach(data)
|
||||
|
||||
self.assertEqual(list(result), expected)
|
||||
|
||||
def test_subrecord_filter_list_in_path(self):
|
||||
data = [
|
||||
{'a': [{'b': {'c': 5}}, {'b': {'c': 6}}]},
|
||||
{'a': [{'b': {'c': 1}}, {'b': {'c': 2}}, {'b': {'c': 3}}]},
|
||||
{'a': [{'b': {'c': 2}} ]}
|
||||
]
|
||||
|
||||
expected = [
|
||||
{'a': [{'b': {'c': 10}}, {'b': {'c': 12}}]},
|
||||
{'a': [{'b': {'c': 2}}, {'b': {'c': 4}}, {'b': {'c': 6}}]},
|
||||
{'a': [{'b': {'c': 4}} ]}
|
||||
]
|
||||
|
||||
sf = SubrecordFilter('a.b', NonModifyingFieldDoubler('c'))
|
||||
result = sf.attach(data)
|
||||
|
||||
self.assertEqual(list(result), expected)
|
||||
|
||||
def test_conditional_path(self):
|
||||
|
||||
predicate = lambda r: r['a'] == 1
|
||||
|
||||
# double b if a == 1, otherwise double c
|
||||
cpf = ConditionalPathFilter(predicate, FieldDoubler('b'),
|
||||
FieldDoubler('c'))
|
||||
expected_data = [{'a':1, 'b':4, 'c':3},
|
||||
{'a':5, 'b':5, 'c':10},
|
||||
{'a':1, 'b':20, 'c':100}]
|
||||
|
||||
self.assert_filter_result(cpf, expected_data)
|
||||
|
||||
### Tests for Generic Filters
|
||||
|
||||
def test_field_modifier(self):
|
||||
# another version of FieldDoubler
|
||||
fm = FieldModifier(['a', 'c'], lambda x: x*2)
|
||||
|
||||
# check against expected data
|
||||
expected_data = [{'a':2, 'b':2, 'c':6},
|
||||
{'a':10, 'b':5, 'c':10},
|
||||
{'a':2, 'b':10, 'c':200}]
|
||||
self.assert_filter_result(fm, expected_data)
|
||||
|
||||
def test_field_keeper(self):
|
||||
fk = FieldKeeper(['c'])
|
||||
|
||||
# check against expected results
|
||||
expected_data = [{'c':3}, {'c':5}, {'c':100}]
|
||||
self.assert_filter_result(fk, expected_data)
|
||||
|
||||
def test_field_remover(self):
|
||||
fr = FieldRemover(['a', 'b'])
|
||||
|
||||
# check against expected results
|
||||
expected_data = [{'c':3}, {'c':5}, {'c':100}]
|
||||
self.assert_filter_result(fr, expected_data)
|
||||
|
||||
def test_field_merger(self):
|
||||
fm = FieldMerger({'sum':('a','b','c')}, lambda x,y,z: x+y+z)
|
||||
|
||||
# check against expected results
|
||||
expected_data = [{'sum':6}, {'sum':15}, {'sum':111}]
|
||||
self.assert_filter_result(fm, expected_data)
|
||||
|
||||
def test_field_merger_keep_fields(self):
|
||||
fm = FieldMerger({'sum':('a','b','c')}, lambda x,y,z: x+y+z,
|
||||
keep_fields=True)
|
||||
|
||||
# check against expected results
|
||||
expected_data = [{'a':1, 'b':2, 'c':3, 'sum':6},
|
||||
{'a':5, 'b':5, 'c':5, 'sum':15},
|
||||
{'a':1, 'b':10, 'c':100, 'sum': 111}]
|
||||
self.assert_filter_result(fm, expected_data)
|
||||
|
||||
def test_field_adder_scalar(self):
|
||||
fa = FieldAdder('x', 7)
|
||||
|
||||
expected_data = [{'a':1, 'b':2, 'c':3, 'x':7},
|
||||
{'a':5, 'b':5, 'c':5, 'x':7},
|
||||
{'a':1, 'b':10, 'c':100, 'x': 7}]
|
||||
self.assert_filter_result(fa, expected_data)
|
||||
|
||||
def test_field_adder_callable(self):
|
||||
fa = FieldAdder('x', lambda: 7)
|
||||
|
||||
expected_data = [{'a':1, 'b':2, 'c':3, 'x':7},
|
||||
{'a':5, 'b':5, 'c':5, 'x':7},
|
||||
{'a':1, 'b':10, 'c':100, 'x': 7}]
|
||||
self.assert_filter_result(fa, expected_data)
|
||||
|
||||
def test_field_adder_iterable(self):
|
||||
fa = FieldAdder('x', [1,2,3])
|
||||
|
||||
expected_data = [{'a':1, 'b':2, 'c':3, 'x':1},
|
||||
{'a':5, 'b':5, 'c':5, 'x':2},
|
||||
{'a':1, 'b':10, 'c':100, 'x': 3}]
|
||||
self.assert_filter_result(fa, expected_data)
|
||||
|
||||
def test_field_adder_replace(self):
|
||||
fa = FieldAdder('b', lambda: 7)
|
||||
|
||||
expected_data = [{'a':1, 'b':7, 'c':3},
|
||||
{'a':5, 'b':7, 'c':5},
|
||||
{'a':1, 'b':7, 'c':100}]
|
||||
self.assert_filter_result(fa, expected_data)
|
||||
|
||||
def test_field_adder_no_replace(self):
|
||||
fa = FieldAdder('b', lambda: 7, replace=False)
|
||||
|
||||
expected_data = [{'a':1, 'b':2, 'c':3},
|
||||
{'a':5, 'b':5, 'c':5},
|
||||
{'a':1, 'b':10, 'c':100}]
|
||||
self.assert_filter_result(fa, expected_data)
|
||||
|
||||
def test_field_copier(self):
|
||||
fc = FieldCopier({'a2':'a', 'b2':'b'})
|
||||
|
||||
expected_data = [{'a':1, 'b':2, 'c':3, 'a2':1, 'b2':2},
|
||||
{'a':5, 'b':5, 'c':5, 'a2':5, 'b2':5},
|
||||
{'a':1, 'b':10, 'c':100, 'a2': 1, 'b2': 10}]
|
||||
self.assert_filter_result(fc, expected_data)
|
||||
|
||||
def test_field_renamer(self):
|
||||
fr = FieldRenamer({'x':'a', 'y':'b'})
|
||||
|
||||
expected_data = [{'x':1, 'y':2, 'c':3},
|
||||
{'x':5, 'y':5, 'c':5},
|
||||
{'x':1, 'y':10, 'c':100}]
|
||||
self.assert_filter_result(fr, expected_data)
|
||||
|
||||
# TODO: splitter & flattner tests?
|
||||
|
||||
def test_unique_filter(self):
|
||||
u = Unique()
|
||||
in_data = [{'a': 77}, {'a':33}, {'a': 77}]
|
||||
expected_data = [{'a': 77}, {'a':33}]
|
||||
result = u.attach(in_data)
|
||||
|
||||
self.assertEqual(list(result), expected_data)
|
||||
|
||||
# TODO: unicode & string filter tests
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -1,53 +0,0 @@
|
||||
import doctest
|
||||
import unittest
|
||||
from saucebrush import Recipe, run_recipe, SaucebrushError, OvercookedError
|
||||
from saucebrush.filters import Filter
|
||||
|
||||
|
||||
class Raiser(Filter):
|
||||
def process_record(self, record):
|
||||
raise Exception("bad record")
|
||||
|
||||
|
||||
class Saver(Filter):
|
||||
def __init__(self):
|
||||
self.saved = []
|
||||
|
||||
def process_record(self, record):
|
||||
self.saved.append(record)
|
||||
return record
|
||||
|
||||
|
||||
class RecipeTestCase(unittest.TestCase):
|
||||
def test_error_stream(self):
|
||||
saver = Saver()
|
||||
recipe = Recipe(Raiser(), error_stream=saver)
|
||||
recipe.run([{'a': 1}, {'b': 2}])
|
||||
recipe.done()
|
||||
|
||||
self.assertEqual(saver.saved[0]['record'], {'a': 1})
|
||||
self.assertEqual(saver.saved[1]['record'], {'b': 2})
|
||||
|
||||
# Must pass either a Recipe, a Filter or an iterable of Filters
|
||||
# as the error_stream argument
|
||||
self.assertRaises(SaucebrushError, Recipe, error_stream=5)
|
||||
|
||||
def test_run_recipe(self):
|
||||
saver = Saver()
|
||||
run_recipe([1, 2], saver)
|
||||
|
||||
self.assertEqual(saver.saved, [1, 2])
|
||||
|
||||
def test_done(self):
|
||||
saver = Saver()
|
||||
recipe = Recipe(saver)
|
||||
recipe.run([1])
|
||||
recipe.done()
|
||||
|
||||
self.assertRaises(OvercookedError, recipe.run, [2])
|
||||
self.assertRaises(OvercookedError, recipe.done)
|
||||
self.assertEqual(saver.saved, [1])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -1,87 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
from io import BytesIO, StringIO
|
||||
import unittest
|
||||
|
||||
from saucebrush.sources import (
|
||||
CSVSource, FixedWidthFileSource, HtmlTableSource, JSONSource)
|
||||
|
||||
class SourceTestCase(unittest.TestCase):
|
||||
|
||||
def _get_csv(self):
|
||||
data = '''a,b,c
|
||||
1,2,3
|
||||
5,5,5
|
||||
1,10,100'''
|
||||
return StringIO(data)
|
||||
|
||||
def test_csv_source_basic(self):
|
||||
source = CSVSource(self._get_csv())
|
||||
expected_data = [{'a':'1', 'b':'2', 'c':'3'},
|
||||
{'a':'5', 'b':'5', 'c':'5'},
|
||||
{'a':'1', 'b':'10', 'c':'100'}]
|
||||
self.assertEqual(list(source), expected_data)
|
||||
|
||||
def test_csv_source_fieldnames(self):
|
||||
source = CSVSource(self._get_csv(), ['x','y','z'])
|
||||
expected_data = [{'x':'a', 'y':'b', 'z':'c'},
|
||||
{'x':'1', 'y':'2', 'z':'3'},
|
||||
{'x':'5', 'y':'5', 'z':'5'},
|
||||
{'x':'1', 'y':'10', 'z':'100'}]
|
||||
self.assertEqual(list(source), expected_data)
|
||||
|
||||
def test_csv_source_skiprows(self):
|
||||
source = CSVSource(self._get_csv(), skiprows=1)
|
||||
expected_data = [{'a':'5', 'b':'5', 'c':'5'},
|
||||
{'a':'1', 'b':'10', 'c':'100'}]
|
||||
self.assertEqual(list(source), expected_data)
|
||||
|
||||
def test_fixed_width_source(self):
|
||||
data = StringIO('JamesNovember 3 1986\nTim September151999')
|
||||
fields = (('name',5), ('month',9), ('day',2), ('year',4))
|
||||
source = FixedWidthFileSource(data, fields)
|
||||
expected_data = [{'name':'James', 'month':'November', 'day':'3',
|
||||
'year':'1986'},
|
||||
{'name':'Tim', 'month':'September', 'day':'15',
|
||||
'year':'1999'}]
|
||||
self.assertEqual(list(source), expected_data)
|
||||
|
||||
def test_json_source(self):
|
||||
|
||||
content = StringIO("""[{"a": 1, "b": "2", "c": 3}]""")
|
||||
|
||||
js = JSONSource(content)
|
||||
self.assertEqual(list(js), [{'a': 1, 'b': '2', 'c': 3}])
|
||||
|
||||
def test_html_table_source(self):
|
||||
|
||||
content = StringIO("""
|
||||
<html>
|
||||
<table id="thetable">
|
||||
<tr>
|
||||
<th>a</th>
|
||||
<th>b</th>
|
||||
<th>c</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>1</td>
|
||||
<td>2</td>
|
||||
<td>3</td>
|
||||
</tr>
|
||||
</table>
|
||||
</html>
|
||||
""")
|
||||
|
||||
try:
|
||||
|
||||
import lxml
|
||||
|
||||
hts = HtmlTableSource(content, 'thetable')
|
||||
self.assertEqual(list(hts), [{'a': '1', 'b': '2', 'c': '3'}])
|
||||
|
||||
except ImportError:
|
||||
# Python 2.6 doesn't have skipTest. We'll just suffer without it.
|
||||
if hasattr(self, 'skipTest'):
|
||||
self.skipTest("lxml is not installed")
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -1,52 +0,0 @@
|
||||
import unittest
|
||||
from saucebrush.stats import Sum, Average, Median, MinMax, StandardDeviation, Histogram
|
||||
|
||||
class StatsTestCase(unittest.TestCase):
|
||||
|
||||
def _simple_data(self):
|
||||
return [{'a':1, 'b':2, 'c':3},
|
||||
{'a':5, 'b':5, 'c':5},
|
||||
{'a':1, 'b':10, 'c':100}]
|
||||
|
||||
def test_sum(self):
|
||||
fltr = Sum('b')
|
||||
list(fltr.attach(self._simple_data()))
|
||||
self.assertEqual(fltr.value(), 17)
|
||||
|
||||
def test_average(self):
|
||||
fltr = Average('c')
|
||||
list(fltr.attach(self._simple_data()))
|
||||
self.assertEqual(fltr.value(), 36.0)
|
||||
|
||||
def test_median(self):
|
||||
# odd number of values
|
||||
fltr = Median('a')
|
||||
list(fltr.attach(self._simple_data()))
|
||||
self.assertEqual(fltr.value(), 1)
|
||||
|
||||
# even number of values
|
||||
fltr = Median('a')
|
||||
list(fltr.attach(self._simple_data()[:2]))
|
||||
self.assertEqual(fltr.value(), 3)
|
||||
|
||||
def test_minmax(self):
|
||||
fltr = MinMax('b')
|
||||
list(fltr.attach(self._simple_data()))
|
||||
self.assertEqual(fltr.value(), (2, 10))
|
||||
|
||||
def test_standard_deviation(self):
|
||||
fltr = StandardDeviation('c')
|
||||
list(fltr.attach(self._simple_data()))
|
||||
self.assertEqual(fltr.average(), 36.0)
|
||||
self.assertEqual(fltr.median(), 5)
|
||||
self.assertEqual(fltr.value(), (55.4346462061408, 3073.0))
|
||||
self.assertEqual(fltr.value(True), (45.2621990922521, 2048.6666666666665))
|
||||
|
||||
def test_histogram(self):
|
||||
fltr = Histogram('a')
|
||||
fltr.label_length = 1
|
||||
list(fltr.attach(self._simple_data()))
|
||||
self.assertEqual(str(fltr), "\n1 **\n5 *\n")
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
7
setup.py
7
setup.py
@ -1,7 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
from setuptools import setup
|
||||
|
||||
setup(name="saucebrush",
|
||||
version='0.5.0-dev',
|
||||
packages=['saucebrush'],
|
||||
)
|
@ -2,7 +2,7 @@
|
||||
Saucebrush is a data loading & manipulation framework written in python.
|
||||
"""
|
||||
|
||||
from . import filters, emitters, sources, utils
|
||||
from . import filters, emitters, sources, utils # noqa
|
||||
|
||||
|
||||
class SaucebrushError(Exception):
|
||||
@ -13,39 +13,39 @@ class OvercookedError(Exception):
|
||||
"""
|
||||
Exception for trying to operate on a Recipe that has been finished.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class Recipe(object):
|
||||
|
||||
class Recipe:
|
||||
def __init__(self, *filter_args, **kwargs):
|
||||
self.finished = False
|
||||
|
||||
self.filters = []
|
||||
for filter in filter_args:
|
||||
if hasattr(filter, 'filters'):
|
||||
if hasattr(filter, "filters"):
|
||||
self.filters.extend(filter.filters)
|
||||
else:
|
||||
self.filters.append(filter)
|
||||
|
||||
self.error_stream = kwargs.get('error_stream')
|
||||
self.error_stream = kwargs.get("error_stream")
|
||||
if self.error_stream and not isinstance(self.error_stream, Recipe):
|
||||
if isinstance(self.error_stream, filters.Filter):
|
||||
self.error_stream = Recipe(self.error_stream)
|
||||
elif hasattr(self.error_stream, '__iter__'):
|
||||
elif hasattr(self.error_stream, "__iter__"):
|
||||
self.error_stream = Recipe(*self.error_stream)
|
||||
else:
|
||||
raise SaucebrushError('error_stream must be either a filter'
|
||||
' or an iterable of filters')
|
||||
raise SaucebrushError(
|
||||
"error_stream must be either a filter" " or an iterable of filters"
|
||||
)
|
||||
|
||||
def reject_record(self, record, exception):
|
||||
if self.error_stream:
|
||||
self.error_stream.run([{'record': record,
|
||||
'exception': repr(exception)}])
|
||||
self.error_stream.run([{"record": record, "exception": repr(exception)}])
|
||||
|
||||
def run(self, source):
|
||||
if self.finished:
|
||||
raise OvercookedError('run() called on finished recipe')
|
||||
raise OvercookedError("run() called on finished recipe")
|
||||
|
||||
# connect datapath
|
||||
data = source
|
||||
@ -58,7 +58,7 @@ class Recipe(object):
|
||||
|
||||
def done(self):
|
||||
if self.finished:
|
||||
raise OvercookedError('done() called on finished recipe')
|
||||
raise OvercookedError("done() called on finished recipe")
|
||||
|
||||
self.finished = True
|
||||
|
||||
@ -70,12 +70,11 @@ class Recipe(object):
|
||||
try:
|
||||
filter_.done()
|
||||
except AttributeError:
|
||||
pass # don't care if there isn't a done method
|
||||
pass # don't care if there isn't a done method
|
||||
|
||||
|
||||
def run_recipe(source, *filter_args, **kwargs):
|
||||
""" Process data, taking it from a source and applying any number of filters
|
||||
"""
|
||||
"""Process data, taking it from a source and applying any number of filters"""
|
||||
|
||||
r = Recipe(*filter_args, **kwargs)
|
||||
r.run(source)
|
@ -2,49 +2,53 @@
|
||||
Saucebrush Emitters are filters that instead of modifying the record, output
|
||||
it in some manner.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
from saucebrush.filters import Filter
|
||||
|
||||
|
||||
class Emitter(Filter):
|
||||
""" ABC for emitters
|
||||
"""ABC for emitters
|
||||
|
||||
All derived emitters must provide an emit_record(self, record) that
|
||||
takes a single record (python dictionary).
|
||||
All derived emitters must provide an emit_record(self, record) that
|
||||
takes a single record (python dictionary).
|
||||
|
||||
Emitters can optionally define a done() method that is called after
|
||||
all records are processed (allowing database flushes, or printing of
|
||||
aggregate data).
|
||||
Emitters can optionally define a done() method that is called after
|
||||
all records are processed (allowing database flushes, or printing of
|
||||
aggregate data).
|
||||
"""
|
||||
|
||||
def process_record(self, record):
|
||||
self.emit_record(record)
|
||||
return record
|
||||
|
||||
def emit_record(self, record):
|
||||
""" Abstract method to be overridden.
|
||||
"""Abstract method to be overridden.
|
||||
|
||||
Called with a single record, should "emit" the record unmodified.
|
||||
Called with a single record, should "emit" the record unmodified.
|
||||
"""
|
||||
raise NotImplementedError('emit_record not defined in ' +
|
||||
self.__class__.__name__)
|
||||
raise NotImplementedError(
|
||||
"emit_record not defined in " + self.__class__.__name__
|
||||
)
|
||||
|
||||
def done(self):
|
||||
""" No-op Method to be overridden.
|
||||
"""No-op Method to be overridden.
|
||||
|
||||
Called when all processing is complete
|
||||
Called when all processing is complete
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class DebugEmitter(Emitter):
|
||||
""" Emitter that prints raw records to a file, useful for debugging.
|
||||
"""Emitter that prints raw records to a file, useful for debugging.
|
||||
|
||||
DebugEmitter() by default prints to stdout.
|
||||
DebugEmitter(open('test', 'w')) would print to a file named test
|
||||
DebugEmitter() by default prints to stdout.
|
||||
DebugEmitter(open('test', 'w')) would print to a file named test
|
||||
"""
|
||||
|
||||
def __init__(self, outfile=None):
|
||||
super(DebugEmitter, self).__init__()
|
||||
super().__init__()
|
||||
if not outfile:
|
||||
import sys
|
||||
|
||||
self._outfile = sys.stdout
|
||||
else:
|
||||
self._outfile = outfile
|
||||
@ -54,20 +58,21 @@ class DebugEmitter(Emitter):
|
||||
|
||||
|
||||
class CountEmitter(Emitter):
|
||||
""" Emitter that writes the record count to a file-like object.
|
||||
"""Emitter that writes the record count to a file-like object.
|
||||
|
||||
CountEmitter() by default writes to stdout.
|
||||
CountEmitter(outfile=open('text', 'w')) would print to a file name test.
|
||||
CountEmitter(every=1000000) would write the count every 1,000,000 records.
|
||||
CountEmitter(every=100, of=2000) would write "<count> of 2000" every 100 records.
|
||||
CountEmitter() by default writes to stdout.
|
||||
CountEmitter(outfile=open('text', 'w')) would print to a file name test.
|
||||
CountEmitter(every=1000000) would write the count every 1,000,000 records.
|
||||
CountEmitter(every=100, of=2000) would write "<count> of 2000" every 100 records.
|
||||
"""
|
||||
|
||||
def __init__(self, every=1000, of=None, outfile=None, format=None):
|
||||
|
||||
super(CountEmitter, self).__init__()
|
||||
super().__init__()
|
||||
|
||||
if not outfile:
|
||||
import sys
|
||||
|
||||
self._outfile = sys.stdout
|
||||
else:
|
||||
self._outfile = outfile
|
||||
@ -84,7 +89,7 @@ class CountEmitter(Emitter):
|
||||
self.count = 0
|
||||
|
||||
def format(self):
|
||||
return self._format % {'count': self.count, 'of': self._of}
|
||||
return self._format % {"count": self.count, "of": self._of}
|
||||
|
||||
def emit_record(self, record):
|
||||
self.count += 1
|
||||
@ -96,15 +101,16 @@ class CountEmitter(Emitter):
|
||||
|
||||
|
||||
class CSVEmitter(Emitter):
|
||||
""" Emitter that writes records to a CSV file.
|
||||
"""Emitter that writes records to a CSV file.
|
||||
|
||||
CSVEmitter(open('output.csv','w'), ('id', 'name', 'phone')) writes all
|
||||
records to a csvfile with the columns in the order specified.
|
||||
CSVEmitter(open('output.csv','w'), ('id', 'name', 'phone')) writes all
|
||||
records to a csvfile with the columns in the order specified.
|
||||
"""
|
||||
|
||||
def __init__(self, csvfile, fieldnames):
|
||||
super(CSVEmitter, self).__init__()
|
||||
super().__init__()
|
||||
import csv
|
||||
|
||||
self._dictwriter = csv.DictWriter(csvfile, fieldnames)
|
||||
# write header row
|
||||
header_row = dict(zip(fieldnames, fieldnames))
|
||||
@ -115,36 +121,43 @@ class CSVEmitter(Emitter):
|
||||
|
||||
|
||||
class SqliteEmitter(Emitter):
|
||||
""" Emitter that writes records to a SQLite database.
|
||||
"""Emitter that writes records to a SQLite database.
|
||||
|
||||
SqliteEmitter('addressbook.db', 'friend') writes all records to the
|
||||
friends table in the SQLite database named addressbook.db
|
||||
SqliteEmitter('addressbook.db', 'friend') writes all records to the
|
||||
friends table in the SQLite database named addressbook.db
|
||||
|
||||
(To have the emitter create the table, the fieldnames should be passed
|
||||
as a third parameter to SqliteEmitter.)
|
||||
(To have the emitter create the table, the fieldnames should be passed
|
||||
as a third parameter to SqliteEmitter.)
|
||||
"""
|
||||
|
||||
def __init__(self, dbname, table_name, fieldnames=None, replace=False, quiet=False):
|
||||
super(SqliteEmitter, self).__init__()
|
||||
super().__init__()
|
||||
import sqlite3
|
||||
|
||||
self._conn = sqlite3.connect(dbname)
|
||||
self._cursor = self._conn.cursor()
|
||||
self._table_name = table_name
|
||||
self._replace = replace
|
||||
self._quiet = quiet
|
||||
if fieldnames:
|
||||
create = "CREATE TABLE IF NOT EXISTS %s (%s)" % (table_name,
|
||||
', '.join([' '.join((field, 'TEXT')) for field in fieldnames]))
|
||||
create = "CREATE TABLE IF NOT EXISTS %s (%s)" % (
|
||||
table_name,
|
||||
", ".join([" ".join((field, "TEXT")) for field in fieldnames]),
|
||||
)
|
||||
self._cursor.execute(create)
|
||||
|
||||
def emit_record(self, record):
|
||||
import sqlite3
|
||||
|
||||
# input should be escaped with ? if data isn't trusted
|
||||
qmarks = ','.join(('?',) * len(record))
|
||||
insert = 'INSERT OR REPLACE' if self._replace else 'INSERT'
|
||||
insert = '%s INTO %s (%s) VALUES (%s)' % (insert, self._table_name,
|
||||
','.join(record.keys()),
|
||||
qmarks)
|
||||
qmarks = ",".join(("?",) * len(record))
|
||||
insert = "INSERT OR REPLACE" if self._replace else "INSERT"
|
||||
insert = "%s INTO %s (%s) VALUES (%s)" % (
|
||||
insert,
|
||||
self._table_name,
|
||||
",".join(record.keys()),
|
||||
qmarks,
|
||||
)
|
||||
try:
|
||||
self._cursor.execute(insert, list(record.values()))
|
||||
except sqlite3.IntegrityError as ie:
|
||||
@ -158,26 +171,29 @@ class SqliteEmitter(Emitter):
|
||||
|
||||
|
||||
class SqlDumpEmitter(Emitter):
|
||||
""" Emitter that writes SQL INSERT statements.
|
||||
"""Emitter that writes SQL INSERT statements.
|
||||
|
||||
The output generated by the SqlDumpEmitter is intended to be used to
|
||||
populate a mySQL database.
|
||||
The output generated by the SqlDumpEmitter is intended to be used to
|
||||
populate a mySQL database.
|
||||
|
||||
SqlDumpEmitter(open('addresses.sql', 'w'), 'friend', ('name', 'phone'))
|
||||
writes statements to addresses.sql to insert the data
|
||||
into the friends table.
|
||||
SqlDumpEmitter(open('addresses.sql', 'w'), 'friend', ('name', 'phone'))
|
||||
writes statements to addresses.sql to insert the data
|
||||
into the friends table.
|
||||
"""
|
||||
|
||||
def __init__(self, outfile, table_name, fieldnames):
|
||||
super(SqlDumpEmitter, self).__init__()
|
||||
super().__init__()
|
||||
self._fieldnames = fieldnames
|
||||
if not outfile:
|
||||
import sys
|
||||
|
||||
self._outfile = sys.stderr
|
||||
else:
|
||||
self._outfile = outfile
|
||||
self._insert_str = "INSERT INTO `%s` (`%s`) VALUES (%%s);\n" % (
|
||||
table_name, '`,`'.join(fieldnames))
|
||||
table_name,
|
||||
"`,`".join(fieldnames),
|
||||
)
|
||||
|
||||
def quote(self, item):
|
||||
|
||||
@ -190,29 +206,31 @@ class SqlDumpEmitter(Emitter):
|
||||
types = (str,)
|
||||
|
||||
if isinstance(item, types):
|
||||
item = item.replace("\\","\\\\").replace("'","\\'").replace(chr(0),'0')
|
||||
item = item.replace("\\", "\\\\").replace("'", "\\'").replace(chr(0), "0")
|
||||
return "'%s'" % item
|
||||
|
||||
return "%s" % item
|
||||
|
||||
def emit_record(self, record):
|
||||
quoted_data = [self.quote(record[field]) for field in self._fieldnames]
|
||||
self._outfile.write(self._insert_str % ','.join(quoted_data))
|
||||
self._outfile.write(self._insert_str % ",".join(quoted_data))
|
||||
|
||||
|
||||
class DjangoModelEmitter(Emitter):
|
||||
""" Emitter that populates a table corresponding to a django model.
|
||||
"""Emitter that populates a table corresponding to a django model.
|
||||
|
||||
Takes a django settings file, app label and model name and uses django
|
||||
to insert the records into the appropriate table.
|
||||
Takes a django settings file, app label and model name and uses django
|
||||
to insert the records into the appropriate table.
|
||||
|
||||
DjangoModelEmitter('settings.py', 'addressbook', 'friend') writes
|
||||
records to addressbook.models.friend model using database settings
|
||||
from settings.py.
|
||||
DjangoModelEmitter('settings.py', 'addressbook', 'friend') writes
|
||||
records to addressbook.models.friend model using database settings
|
||||
from settings.py.
|
||||
"""
|
||||
|
||||
def __init__(self, dj_settings, app_label, model_name):
|
||||
super(DjangoModelEmitter, self).__init__()
|
||||
super().__init__()
|
||||
from saucebrush.utils import get_django_model
|
||||
|
||||
self._dbmodel = get_django_model(dj_settings, app_label, model_name)
|
||||
if not self._dbmodel:
|
||||
raise Exception("No such model: %s %s" % (app_label, model_name))
|
||||
@ -222,19 +240,30 @@ class DjangoModelEmitter(Emitter):
|
||||
|
||||
|
||||
class MongoDBEmitter(Emitter):
|
||||
""" Emitter that creates a document in a MongoDB datastore
|
||||
"""Emitter that creates a document in a MongoDB datastore
|
||||
|
||||
The names of the database and collection in which the records will
|
||||
be inserted are required parameters. The host and port are optional,
|
||||
defaulting to 'localhost' and 27017, repectively.
|
||||
The names of the database and collection in which the records will
|
||||
be inserted are required parameters. The host and port are optional,
|
||||
defaulting to 'localhost' and 27017, repectively.
|
||||
"""
|
||||
def __init__(self, database, collection, host='localhost', port=27017, drop_collection=False, conn=None):
|
||||
super(MongoDBEmitter, self).__init__()
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
database,
|
||||
collection,
|
||||
host="localhost",
|
||||
port=27017,
|
||||
drop_collection=False,
|
||||
conn=None,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
from pymongo.database import Database
|
||||
|
||||
if not isinstance(database, Database):
|
||||
if not conn:
|
||||
from pymongo.connection import Connection
|
||||
|
||||
conn = Connection(host, port)
|
||||
db = conn[database]
|
||||
else:
|
||||
@ -249,16 +278,17 @@ class MongoDBEmitter(Emitter):
|
||||
|
||||
|
||||
class LoggingEmitter(Emitter):
|
||||
""" Emitter that logs to a Python logging.Logger instance.
|
||||
"""Emitter that logs to a Python logging.Logger instance.
|
||||
|
||||
The msg_template will be passed the record being emitted as
|
||||
a format parameter. The resulting message will get logged
|
||||
at the provided level.
|
||||
The msg_template will be passed the record being emitted as
|
||||
a format parameter. The resulting message will get logged
|
||||
at the provided level.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
def __init__(self, logger, msg_template, level=logging.DEBUG):
|
||||
super(LoggingEmitter, self).__init__()
|
||||
super().__init__()
|
||||
self.logger = logger
|
||||
self.msg_template = msg_template
|
||||
self.level = level
|
@ -12,26 +12,28 @@ import re
|
||||
import time
|
||||
|
||||
######################
|
||||
## Abstract Filters ##
|
||||
# Abstract Filters #
|
||||
######################
|
||||
|
||||
class Filter(object):
|
||||
""" ABC for filters that operate on records.
|
||||
|
||||
All derived filters must provide a process_record(self, record) that
|
||||
takes a single record (python dictionary) and returns a result.
|
||||
class Filter:
|
||||
"""ABC for filters that operate on records.
|
||||
|
||||
All derived filters must provide a process_record(self, record) that
|
||||
takes a single record (python dictionary) and returns a result.
|
||||
"""
|
||||
|
||||
def process_record(self, record):
|
||||
""" Abstract method to be overridden.
|
||||
"""Abstract method to be overridden.
|
||||
|
||||
Called with a single record, should return modified record.
|
||||
Called with a single record, should return modified record.
|
||||
"""
|
||||
raise NotImplementedError('process_record not defined in ' +
|
||||
self.__class__.__name__)
|
||||
raise NotImplementedError(
|
||||
"process_record not defined in " + self.__class__.__name__
|
||||
)
|
||||
|
||||
def reject_record(self, record, exception):
|
||||
recipe = getattr(self, '_recipe')
|
||||
recipe = getattr(self, "_recipe")
|
||||
if recipe:
|
||||
recipe.reject_record(record, exception)
|
||||
|
||||
@ -47,11 +49,11 @@ class Filter(object):
|
||||
|
||||
|
||||
class YieldFilter(Filter):
|
||||
""" ABC for defining filters where process_record yields.
|
||||
"""ABC for defining filters where process_record yields.
|
||||
|
||||
If process_record cannot return exactly one result for every record
|
||||
it is passed, it should yield back as many records as needed and the
|
||||
filter must derive from YieldFilter.
|
||||
If process_record cannot return exactly one result for every record
|
||||
it is passed, it should yield back as many records as needed and the
|
||||
filter must derive from YieldFilter.
|
||||
"""
|
||||
|
||||
def attach(self, source, recipe=None):
|
||||
@ -65,19 +67,19 @@ class YieldFilter(Filter):
|
||||
|
||||
|
||||
class FieldFilter(Filter):
|
||||
""" ABC for filters that do a single operation on individual fields.
|
||||
"""ABC for filters that do a single operation on individual fields.
|
||||
|
||||
All derived filters must provide a process_field(self, item) that
|
||||
returns a modified item. process_field is called on one or more keys
|
||||
passed into __init__.
|
||||
All derived filters must provide a process_field(self, item) that
|
||||
returns a modified item. process_field is called on one or more keys
|
||||
passed into __init__.
|
||||
"""
|
||||
|
||||
def __init__(self, keys):
|
||||
super(FieldFilter, self).__init__()
|
||||
super().__init__()
|
||||
self._target_keys = utils.str_or_list(keys)
|
||||
|
||||
def process_record(self, record):
|
||||
""" Calls process_field on all keys passed to __init__. """
|
||||
"""Calls process_field on all keys passed to __init__."""
|
||||
|
||||
for key in self._target_keys:
|
||||
try:
|
||||
@ -89,29 +91,31 @@ class FieldFilter(Filter):
|
||||
return record
|
||||
|
||||
def process_field(self, item):
|
||||
""" Given a value, return the value that it should be replaced with. """
|
||||
"""Given a value, return the value that it should be replaced with."""
|
||||
|
||||
raise NotImplementedError('process_field not defined in ' +
|
||||
self.__class__.__name__)
|
||||
raise NotImplementedError(
|
||||
"process_field not defined in " + self.__class__.__name__
|
||||
)
|
||||
|
||||
def __unicode__(self):
|
||||
return '%s( %s )' % (self.__class__.__name__, str(self._target_keys))
|
||||
return "%s( %s )" % (self.__class__.__name__, str(self._target_keys))
|
||||
|
||||
|
||||
class ConditionalFilter(YieldFilter):
|
||||
""" ABC for filters that only pass through records meeting a condition.
|
||||
"""ABC for filters that only pass through records meeting a condition.
|
||||
|
||||
All derived filters must provide a test_record(self, record) that
|
||||
returns True or False -- True indicating that the record should be
|
||||
passed through, and False preventing pass through.
|
||||
All derived filters must provide a test_record(self, record) that
|
||||
returns True or False -- True indicating that the record should be
|
||||
passed through, and False preventing pass through.
|
||||
|
||||
If validator is True then raises a ValidationError instead of
|
||||
silently dropping records that fail test_record.
|
||||
If validator is True then raises a ValidationError instead of
|
||||
silently dropping records that fail test_record.
|
||||
"""
|
||||
|
||||
validator = False
|
||||
|
||||
def process_record(self, record):
|
||||
""" Yields all records for which self.test_record is true """
|
||||
"""Yields all records for which self.test_record is true"""
|
||||
|
||||
if self.test_record(record):
|
||||
yield record
|
||||
@ -119,41 +123,45 @@ class ConditionalFilter(YieldFilter):
|
||||
raise ValidationError(record)
|
||||
|
||||
def test_record(self, record):
|
||||
""" Given a record, return True iff it should be passed on """
|
||||
raise NotImplementedError('test_record not defined in ' +
|
||||
self.__class__.__name__)
|
||||
"""Given a record, return True iff it should be passed on"""
|
||||
raise NotImplementedError(
|
||||
"test_record not defined in " + self.__class__.__name__
|
||||
)
|
||||
|
||||
|
||||
class ValidationError(Exception):
|
||||
def __init__(self, record):
|
||||
super(ValidationError, self).__init__(repr(record))
|
||||
super().__init__(repr(record))
|
||||
self.record = record
|
||||
|
||||
|
||||
def _dotted_get(d, path):
|
||||
"""
|
||||
utility function for SubrecordFilter
|
||||
utility function for SubrecordFilter
|
||||
|
||||
dives into a complex nested dictionary with paths like a.b.c
|
||||
dives into a complex nested dictionary with paths like a.b.c
|
||||
"""
|
||||
if path:
|
||||
key_pieces = path.split('.', 1)
|
||||
key_pieces = path.split(".", 1)
|
||||
piece = d[key_pieces[0]]
|
||||
if isinstance(piece, (tuple, list)):
|
||||
return [_dotted_get(i, '.'.join(key_pieces[1:])) for i in piece]
|
||||
return [_dotted_get(i, ".".join(key_pieces[1:])) for i in piece]
|
||||
elif isinstance(piece, (dict)):
|
||||
return _dotted_get(piece, '.'.join(key_pieces[1:]))
|
||||
return _dotted_get(piece, ".".join(key_pieces[1:]))
|
||||
else:
|
||||
return d
|
||||
|
||||
class SubrecordFilter(Filter):
|
||||
""" Filter that calls another filter on subrecord(s) of a record
|
||||
|
||||
Takes a dotted path (eg. a.b.c) and instantiated filter and runs that
|
||||
filter on all subrecords found at the path.
|
||||
class SubrecordFilter(Filter):
|
||||
"""Filter that calls another filter on subrecord(s) of a record
|
||||
|
||||
Takes a dotted path (eg. a.b.c) and instantiated filter and runs that
|
||||
filter on all subrecords found at the path.
|
||||
"""
|
||||
|
||||
def __init__(self, field_path, filter_):
|
||||
if '.' in field_path:
|
||||
self.field_path, self.key = field_path.rsplit('.', 1)
|
||||
if "." in field_path:
|
||||
self.field_path, self.key = field_path.rsplit(".", 1)
|
||||
else:
|
||||
self.field_path = None
|
||||
self.key = field_path
|
||||
@ -178,8 +186,9 @@ class SubrecordFilter(Filter):
|
||||
self.process_subrecord(subrecord_parent)
|
||||
return record
|
||||
|
||||
|
||||
class ConditionalPathFilter(Filter):
|
||||
""" Filter that uses a predicate to split input among two filter paths. """
|
||||
"""Filter that uses a predicate to split input among two filter paths."""
|
||||
|
||||
def __init__(self, predicate_func, true_filter, false_filter):
|
||||
self.predicate_func = predicate_func
|
||||
@ -192,38 +201,43 @@ class ConditionalPathFilter(Filter):
|
||||
else:
|
||||
return self.false_filter.process_record(record)
|
||||
|
||||
|
||||
#####################
|
||||
## Generic Filters ##
|
||||
# Generic Filters #
|
||||
#####################
|
||||
|
||||
|
||||
class FieldModifier(FieldFilter):
|
||||
""" Filter that calls a given function on a given set of fields.
|
||||
"""Filter that calls a given function on a given set of fields.
|
||||
|
||||
FieldModifier(('spam','eggs'), abs) to call the abs method on the spam
|
||||
and eggs fields in each record filtered.
|
||||
FieldModifier(('spam','eggs'), abs) to call the abs method on the spam
|
||||
and eggs fields in each record filtered.
|
||||
"""
|
||||
|
||||
def __init__(self, keys, func):
|
||||
super(FieldModifier, self).__init__(keys)
|
||||
super().__init__(keys)
|
||||
self._filter_func = func
|
||||
|
||||
def process_field(self, item):
|
||||
return self._filter_func(item)
|
||||
|
||||
def __unicode__(self):
|
||||
return '%s( %s, %s )' % (self.__class__.__name__,
|
||||
str(self._target_keys), str(self._filter_func))
|
||||
def __str__(self):
|
||||
return "%s( %s, %s )" % (
|
||||
self.__class__.__name__,
|
||||
str(self._target_keys),
|
||||
str(self._filter_func),
|
||||
)
|
||||
|
||||
|
||||
class FieldKeeper(Filter):
|
||||
""" Filter that removes all but the given set of fields.
|
||||
"""Filter that removes all but the given set of fields.
|
||||
|
||||
FieldKeeper(('spam', 'eggs')) removes all bu tthe spam and eggs
|
||||
fields from every record filtered.
|
||||
FieldKeeper(('spam', 'eggs')) removes all bu tthe spam and eggs
|
||||
fields from every record filtered.
|
||||
"""
|
||||
|
||||
def __init__(self, keys):
|
||||
super(FieldKeeper, self).__init__()
|
||||
super().__init__()
|
||||
self._target_keys = utils.str_or_list(keys)
|
||||
|
||||
def process_record(self, record):
|
||||
@ -234,14 +248,14 @@ class FieldKeeper(Filter):
|
||||
|
||||
|
||||
class FieldRemover(Filter):
|
||||
""" Filter that removes a given set of fields.
|
||||
"""Filter that removes a given set of fields.
|
||||
|
||||
FieldRemover(('spam', 'eggs')) removes the spam and eggs fields from
|
||||
every record filtered.
|
||||
FieldRemover(('spam', 'eggs')) removes the spam and eggs fields from
|
||||
every record filtered.
|
||||
"""
|
||||
|
||||
def __init__(self, keys):
|
||||
super(FieldRemover, self).__init__()
|
||||
super().__init__()
|
||||
self._target_keys = utils.str_or_list(keys)
|
||||
|
||||
def process_record(self, record):
|
||||
@ -249,21 +263,21 @@ class FieldRemover(Filter):
|
||||
record.pop(key, None)
|
||||
return record
|
||||
|
||||
def __unicode__(self):
|
||||
return '%s( %s )' % (self.__class__.__name__, str(self._target_keys))
|
||||
def __str__(self):
|
||||
return "%s( %s )" % (self.__class__.__name__, str(self._target_keys))
|
||||
|
||||
|
||||
class FieldMerger(Filter):
|
||||
""" Filter that merges a given set of fields using a supplied merge_func.
|
||||
"""Filter that merges a given set of fields using a supplied merge_func.
|
||||
|
||||
Takes a mapping (dictionary of new_column:(from_col1,from_col2))
|
||||
Takes a mapping (dictionary of new_column:(from_col1,from_col2))
|
||||
|
||||
FieldMerger({"bacon": ("spam", "eggs")}, operator.add) creates a new
|
||||
column bacon that is the result of spam+eggs
|
||||
FieldMerger({"bacon": ("spam", "eggs")}, operator.add) creates a new
|
||||
column bacon that is the result of spam+eggs
|
||||
"""
|
||||
|
||||
def __init__(self, mapping, merge_func, keep_fields=False):
|
||||
super(FieldMerger, self).__init__()
|
||||
super().__init__()
|
||||
self._field_mapping = mapping
|
||||
self._merge_func = merge_func
|
||||
self._keep_fields = keep_fields
|
||||
@ -277,30 +291,32 @@ class FieldMerger(Filter):
|
||||
record[to_col] = self._merge_func(*values)
|
||||
return record
|
||||
|
||||
def __unicode__(self):
|
||||
return '%s( %s, %s )' % (self.__class__.__name__,
|
||||
str(self._field_mapping),
|
||||
str(self._merge_func))
|
||||
def __str__(self):
|
||||
return "%s( %s, %s )" % (
|
||||
self.__class__.__name__,
|
||||
str(self._field_mapping),
|
||||
str(self._merge_func),
|
||||
)
|
||||
|
||||
|
||||
class FieldAdder(Filter):
|
||||
""" Filter that adds a new field.
|
||||
"""Filter that adds a new field.
|
||||
|
||||
Takes a name for the new field and a value, field_value can be an
|
||||
iterable, a function, or a static value.
|
||||
Takes a name for the new field and a value, field_value can be an
|
||||
iterable, a function, or a static value.
|
||||
|
||||
from itertools import count
|
||||
FieldAdder('id', count)
|
||||
from itertools import count
|
||||
FieldAdder('id', count)
|
||||
|
||||
would yield a new column named id that uses the itertools count iterable
|
||||
to create sequentially numbered ids.
|
||||
would yield a new column named id that uses the itertools count iterable
|
||||
to create sequentially numbered ids.
|
||||
"""
|
||||
|
||||
def __init__(self, field_name, field_value, replace=True):
|
||||
super(FieldAdder, self).__init__()
|
||||
super().__init__()
|
||||
self._field_name = field_name
|
||||
self._field_value = field_value
|
||||
if hasattr(self._field_value, '__iter__'):
|
||||
if hasattr(self._field_value, "__iter__"):
|
||||
value_iter = iter(self._field_value)
|
||||
if hasattr(value_iter, "next"):
|
||||
self._field_value = value_iter.next
|
||||
@ -317,17 +333,22 @@ class FieldAdder(Filter):
|
||||
return record
|
||||
|
||||
def __unicode__(self):
|
||||
return '%s( %s, %s )' % (self.__class__.__name__, self._field_name,
|
||||
str(self._field_value))
|
||||
return "%s( %s, %s )" % (
|
||||
self.__class__.__name__,
|
||||
self._field_name,
|
||||
str(self._field_value),
|
||||
)
|
||||
|
||||
|
||||
class FieldCopier(Filter):
|
||||
""" Filter that copies one field to another.
|
||||
"""Filter that copies one field to another.
|
||||
|
||||
Takes a dictionary mapping destination keys to source keys.
|
||||
Takes a dictionary mapping destination keys to source keys.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, copy_mapping):
|
||||
super(FieldCopier, self).__init__()
|
||||
super().__init__()
|
||||
self._copy_mapping = copy_mapping
|
||||
|
||||
def process_record(self, record):
|
||||
@ -336,13 +357,15 @@ class FieldCopier(Filter):
|
||||
record[dest] = record[source]
|
||||
return record
|
||||
|
||||
class FieldRenamer(Filter):
|
||||
""" Filter that renames one field to another.
|
||||
|
||||
Takes a dictionary mapping destination keys to source keys.
|
||||
class FieldRenamer(Filter):
|
||||
"""Filter that renames one field to another.
|
||||
|
||||
Takes a dictionary mapping destination keys to source keys.
|
||||
"""
|
||||
|
||||
def __init__(self, rename_mapping):
|
||||
super(FieldRenamer, self).__init__()
|
||||
super().__init__()
|
||||
self._rename_mapping = rename_mapping
|
||||
|
||||
def process_record(self, record):
|
||||
@ -351,15 +374,16 @@ class FieldRenamer(Filter):
|
||||
record[dest] = record.pop(source)
|
||||
return record
|
||||
|
||||
class FieldNameModifier(Filter):
|
||||
""" Filter that calls a given function on a given set of fields.
|
||||
|
||||
FieldNameModifier(('spam','eggs'), abs) to call the abs method on the spam
|
||||
and eggs field names in each record filtered.
|
||||
class FieldNameModifier(Filter):
|
||||
"""Filter that calls a given function on a given set of fields.
|
||||
|
||||
FieldNameModifier(('spam','eggs'), abs) to call the abs method on the spam
|
||||
and eggs field names in each record filtered.
|
||||
"""
|
||||
|
||||
def __init__(self, func):
|
||||
super(FieldNameModifier, self).__init__()
|
||||
super().__init__()
|
||||
self._filter_func = func
|
||||
|
||||
def process_record(self, record):
|
||||
@ -368,19 +392,20 @@ class FieldNameModifier(Filter):
|
||||
record[dest] = record.pop(source)
|
||||
return record
|
||||
|
||||
|
||||
class Splitter(Filter):
|
||||
""" Filter that splits nested data into different paths.
|
||||
"""Filter that splits nested data into different paths.
|
||||
|
||||
Takes a dictionary of keys and a series of filters to run against the
|
||||
associated dictionaries.
|
||||
Takes a dictionary of keys and a series of filters to run against the
|
||||
associated dictionaries.
|
||||
|
||||
{'person': {'firstname': 'James', 'lastname': 'Turk'},
|
||||
'phones': [{'phone': '222-222-2222'}, {'phone': '335-333-3321'}]
|
||||
}
|
||||
{'person': {'firstname': 'James', 'lastname': 'Turk'},
|
||||
'phones': [{'phone': '222-222-2222'}, {'phone': '335-333-3321'}]
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, split_mapping):
|
||||
super(Splitter, self).__init__()
|
||||
super().__init__()
|
||||
self._split_mapping = split_mapping
|
||||
|
||||
def process_record(self, record):
|
||||
@ -409,21 +434,22 @@ class Splitter(Filter):
|
||||
|
||||
|
||||
class Flattener(FieldFilter):
|
||||
""" Collapse a set of similar dictionaries into a list.
|
||||
"""Collapse a set of similar dictionaries into a list.
|
||||
|
||||
Takes a dictionary of keys and flattens the key names:
|
||||
Takes a dictionary of keys and flattens the key names:
|
||||
|
||||
addresses = [{'addresses': [{'address': {'state':'NC', 'street':'146 shirley drive'}},
|
||||
{'address': {'state':'NY', 'street':'3000 Winton Rd'}}]}]
|
||||
flattener = Flattener(['addresses'])
|
||||
addresses = [{'addresses': [{'address': {'state':'NC', 'street':'146 shirley drive'}},
|
||||
{'address': {'state':'NY', 'street':'3000 Winton Rd'}}]}]
|
||||
flattener = Flattener(['addresses'])
|
||||
|
||||
would yield:
|
||||
would yield:
|
||||
|
||||
{'addresses': [{'state': 'NC', 'street': '146 shirley drive'},
|
||||
{'state': 'NY', 'street': '3000 Winton Rd'}]}
|
||||
{'addresses': [{'state': 'NC', 'street': '146 shirley drive'},
|
||||
{'state': 'NY', 'street': '3000 Winton Rd'}]}
|
||||
"""
|
||||
|
||||
def __init__(self, keys):
|
||||
super(Flattener, self).__init__(keys)
|
||||
super().__init__(keys)
|
||||
|
||||
def process_field(self, item):
|
||||
result = []
|
||||
@ -436,8 +462,8 @@ class Flattener(FieldFilter):
|
||||
|
||||
|
||||
class DictFlattener(Filter):
|
||||
def __init__(self, keys, separator='_'):
|
||||
super(DictFlattener, self).__init__()
|
||||
def __init__(self, keys, separator="_"):
|
||||
super().__init__()
|
||||
self._keys = utils.str_or_list(keys)
|
||||
self._separator = separator
|
||||
|
||||
@ -446,11 +472,10 @@ class DictFlattener(Filter):
|
||||
|
||||
|
||||
class Unique(ConditionalFilter):
|
||||
""" Filter that ensures that all records passing through are unique.
|
||||
"""
|
||||
"""Filter that ensures that all records passing through are unique."""
|
||||
|
||||
def __init__(self):
|
||||
super(Unique, self).__init__()
|
||||
super().__init__()
|
||||
self._seen = set()
|
||||
|
||||
def test_record(self, record):
|
||||
@ -461,19 +486,20 @@ class Unique(ConditionalFilter):
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
class UniqueValidator(Unique):
|
||||
validator = True
|
||||
|
||||
|
||||
class UniqueID(ConditionalFilter):
|
||||
""" Filter that ensures that all records through have a unique ID.
|
||||
"""Filter that ensures that all records through have a unique ID.
|
||||
|
||||
Takes the name of an ID field, or multiple field names in the case
|
||||
of a composite ID.
|
||||
Takes the name of an ID field, or multiple field names in the case
|
||||
of a composite ID.
|
||||
"""
|
||||
|
||||
def __init__(self, field='id', *args):
|
||||
super(UniqueID, self).__init__()
|
||||
def __init__(self, field="id", *args):
|
||||
super().__init__()
|
||||
self._seen = set()
|
||||
self._id_fields = [field]
|
||||
self._id_fields.extend(args)
|
||||
@ -486,58 +512,30 @@ class UniqueID(ConditionalFilter):
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
class UniqueIDValidator(UniqueID):
|
||||
validator = True
|
||||
|
||||
|
||||
class UnicodeFilter(Filter):
|
||||
""" Convert all str elements in the record to Unicode.
|
||||
"""
|
||||
|
||||
def __init__(self, encoding='utf-8', errors='ignore'):
|
||||
super(UnicodeFilter, self).__init__()
|
||||
self._encoding = encoding
|
||||
self._errors = errors
|
||||
|
||||
def process_record(self, record):
|
||||
for key, value in record.items():
|
||||
if isinstance(value, str):
|
||||
record[key] = unicode(value, self._encoding, self._errors)
|
||||
elif isinstance(value, unicode):
|
||||
record[key] = value.decode(self._encoding, self._errors)
|
||||
return record
|
||||
|
||||
class StringFilter(Filter):
|
||||
|
||||
def __init__(self, encoding='utf-8', errors='ignore'):
|
||||
super(StringFilter, self).__init__()
|
||||
self._encoding = encoding
|
||||
self._errors = errors
|
||||
|
||||
def process_record(self, record):
|
||||
for key, value in record.items():
|
||||
if isinstance(value, unicode):
|
||||
record[key] = value.encode(self._encoding, self._errors)
|
||||
return record
|
||||
|
||||
|
||||
###########################
|
||||
## Commonly Used Filters ##
|
||||
# Commonly Used Filters #
|
||||
###########################
|
||||
|
||||
|
||||
class PhoneNumberCleaner(FieldFilter):
|
||||
""" Filter that cleans phone numbers to match a given format.
|
||||
"""Filter that cleans phone numbers to match a given format.
|
||||
|
||||
Takes a list of target keys and an optional phone # format that has
|
||||
10 %s placeholders.
|
||||
Takes a list of target keys and an optional phone # format that has
|
||||
10 %s placeholders.
|
||||
|
||||
PhoneNumberCleaner( ('phone','fax'), number_format='%s%s%s-%s%s%s-%s%s%s%s')
|
||||
would format the phone & fax columns to 555-123-4567 format.
|
||||
PhoneNumberCleaner( ('phone','fax'), number_format='%s%s%s-%s%s%s-%s%s%s%s')
|
||||
would format the phone & fax columns to 555-123-4567 format.
|
||||
"""
|
||||
def __init__(self, keys, number_format='%s%s%s.%s%s%s.%s%s%s%s'):
|
||||
super(PhoneNumberCleaner, self).__init__(keys)
|
||||
|
||||
def __init__(self, keys, number_format="%s%s%s.%s%s%s.%s%s%s%s"):
|
||||
super().__init__(keys)
|
||||
self._number_format = number_format
|
||||
self._num_re = re.compile('\d')
|
||||
self._num_re = re.compile(r"\d")
|
||||
|
||||
def process_field(self, item):
|
||||
nums = self._num_re.findall(item)
|
||||
@ -545,46 +543,54 @@ class PhoneNumberCleaner(FieldFilter):
|
||||
item = self._number_format % tuple(nums)
|
||||
return item
|
||||
|
||||
class DateCleaner(FieldFilter):
|
||||
""" Filter that cleans dates to match a given format.
|
||||
|
||||
Takes a list of target keys and to and from formats in strftime format.
|
||||
class DateCleaner(FieldFilter):
|
||||
"""Filter that cleans dates to match a given format.
|
||||
|
||||
Takes a list of target keys and to and from formats in strftime format.
|
||||
"""
|
||||
|
||||
def __init__(self, keys, from_format, to_format):
|
||||
super(DateCleaner, self).__init__(keys)
|
||||
super().__init__(keys)
|
||||
self._from_format = from_format
|
||||
self._to_format = to_format
|
||||
|
||||
def process_field(self, item):
|
||||
return time.strftime(self._to_format,
|
||||
time.strptime(item, self._from_format))
|
||||
return time.strftime(self._to_format, time.strptime(item, self._from_format))
|
||||
|
||||
|
||||
class NameCleaner(Filter):
|
||||
""" Filter that splits names into a first, last, and middle name field.
|
||||
"""Filter that splits names into a first, last, and middle name field.
|
||||
|
||||
Takes a list of target keys.
|
||||
Takes a list of target keys.
|
||||
|
||||
NameCleaner( ('name', ), nomatch_name='raw_name')
|
||||
would attempt to split 'name' into firstname, middlename, lastname,
|
||||
and suffix columns, and if it did not fit would place it in raw_name
|
||||
NameCleaner( ('name', ), nomatch_name='raw_name')
|
||||
would attempt to split 'name' into firstname, middlename, lastname,
|
||||
and suffix columns, and if it did not fit would place it in raw_name
|
||||
"""
|
||||
|
||||
# first middle? last suffix?
|
||||
FIRST_LAST = re.compile('''^\s*(?:(?P<firstname>\w+)(?:\.?)
|
||||
FIRST_LAST = re.compile(
|
||||
r"""^\s*(?:(?P<firstname>\w+)(?:\.?)
|
||||
\s+(?:(?P<middlename>\w+)\.?\s+)?
|
||||
(?P<lastname>[A-Za-z'-]+))
|
||||
(?:\s+(?P<suffix>JR\.?|II|III|IV))?
|
||||
\s*$''', re.VERBOSE | re.IGNORECASE)
|
||||
\s*$""",
|
||||
re.VERBOSE | re.IGNORECASE,
|
||||
)
|
||||
|
||||
# last, first middle? suffix?
|
||||
LAST_FIRST = re.compile('''^\s*(?:(?P<lastname>[A-Za-z'-]+),
|
||||
LAST_FIRST = re.compile(
|
||||
r"""^\s*(?:(?P<lastname>[A-Za-z'-]+),
|
||||
\s+(?P<firstname>\w+)(?:\.?)
|
||||
(?:\s+(?P<middlename>\w+)\.?)?)
|
||||
(?:\s+(?P<suffix>JR\.?|II|III|IV))?
|
||||
\s*$''', re.VERBOSE | re.IGNORECASE)
|
||||
\s*$""",
|
||||
re.VERBOSE | re.IGNORECASE,
|
||||
)
|
||||
|
||||
def __init__(self, keys, prefix='', formats=None, nomatch_name=None):
|
||||
super(NameCleaner, self).__init__()
|
||||
def __init__(self, keys, prefix="", formats=None, nomatch_name=None):
|
||||
super().__init__()
|
||||
self._keys = utils.str_or_list(keys)
|
||||
self._name_prefix = prefix
|
||||
self._nomatch_name = nomatch_name
|
||||
@ -605,7 +611,7 @@ class NameCleaner(Filter):
|
||||
# if there is a match, remove original name and add pieces
|
||||
if match:
|
||||
record.pop(key)
|
||||
for k,v in match.groupdict().items():
|
||||
for k, v in match.groupdict().items():
|
||||
record[self._name_prefix + k] = v
|
||||
break
|
||||
|
@ -4,27 +4,28 @@
|
||||
All sources must implement the iterable interface and return python
|
||||
dictionaries.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import string
|
||||
|
||||
from saucebrush import utils
|
||||
|
||||
class CSVSource(object):
|
||||
""" Saucebrush source for reading from CSV files.
|
||||
|
||||
Takes an open csvfile, an optional set of fieldnames and optional number
|
||||
of rows to skip.
|
||||
class CSVSource:
|
||||
"""Saucebrush source for reading from CSV files.
|
||||
|
||||
CSVSource(open('test.csv')) will read a csvfile, using the first row as
|
||||
the field names.
|
||||
Takes an open csvfile, an optional set of fieldnames and optional number
|
||||
of rows to skip.
|
||||
|
||||
CSVSource(open('test.csv'), ('name', 'phone', 'address'), 1) will read
|
||||
in a CSV file and treat the three columns as name, phone, and address,
|
||||
ignoring the first row (presumed to be column names).
|
||||
CSVSource(open('test.csv')) will read a csvfile, using the first row as
|
||||
the field names.
|
||||
|
||||
CSVSource(open('test.csv'), ('name', 'phone', 'address'), 1) will read
|
||||
in a CSV file and treat the three columns as name, phone, and address,
|
||||
ignoring the first row (presumed to be column names).
|
||||
"""
|
||||
|
||||
def __init__(self, csvfile, fieldnames=None, skiprows=0, **kwargs):
|
||||
import csv
|
||||
|
||||
self._dictreader = csv.DictReader(csvfile, fieldnames, **kwargs)
|
||||
for _ in range(skiprows):
|
||||
next(self._dictreader)
|
||||
@ -33,17 +34,17 @@ class CSVSource(object):
|
||||
return self._dictreader
|
||||
|
||||
|
||||
class FixedWidthFileSource(object):
|
||||
""" Saucebrush source for reading from fixed width field files.
|
||||
class FixedWidthFileSource:
|
||||
"""Saucebrush source for reading from fixed width field files.
|
||||
|
||||
FixedWidthFileSource expects an open fixed width file and a tuple
|
||||
of fields with their lengths. There is also an optional fillchars
|
||||
command that is the filler characters to strip from the end of each
|
||||
field. (defaults to whitespace)
|
||||
FixedWidthFileSource expects an open fixed width file and a tuple
|
||||
of fields with their lengths. There is also an optional fillchars
|
||||
command that is the filler characters to strip from the end of each
|
||||
field. (defaults to whitespace)
|
||||
|
||||
FixedWidthFileSource(open('testfile'), (('name',30), ('phone',12)))
|
||||
will read in a fixed width file where the first 30 characters of each
|
||||
line are part of a name and the characters 31-42 are a phone number.
|
||||
FixedWidthFileSource(open('testfile'), (('name',30), ('phone',12)))
|
||||
will read in a fixed width file where the first 30 characters of each
|
||||
line are part of a name and the characters 31-42 are a phone number.
|
||||
"""
|
||||
|
||||
def __init__(self, fwfile, fields, fillchars=string.whitespace):
|
||||
@ -64,97 +65,98 @@ class FixedWidthFileSource(object):
|
||||
line = next(self._fwfile)
|
||||
record = {}
|
||||
for name, range_ in self._fields_dict.items():
|
||||
record[name] = line[range_[0]:range_[1]].rstrip(self._fillchars)
|
||||
record[name] = line[range_[0] : range_[1]].rstrip(self._fillchars)
|
||||
return record
|
||||
|
||||
def next(self):
|
||||
""" Keep Python 2 next() method that defers to __next__().
|
||||
"""
|
||||
return self.__next__()
|
||||
|
||||
class HtmlTableSource:
|
||||
"""Saucebrush source for reading data from an HTML table.
|
||||
|
||||
class HtmlTableSource(object):
|
||||
""" Saucebrush source for reading data from an HTML table.
|
||||
HtmlTableSource expects an open html file, the id of the table or a
|
||||
number indicating which table on the page to use, an optional fieldnames
|
||||
tuple, and an optional number of rows to skip.
|
||||
|
||||
HtmlTableSource expects an open html file, the id of the table or a
|
||||
number indicating which table on the page to use, an optional fieldnames
|
||||
tuple, and an optional number of rows to skip.
|
||||
HtmlTableSource(open('test.html'), 0) opens the first HTML table and
|
||||
uses the first row as the names of the columns.
|
||||
|
||||
HtmlTableSource(open('test.html'), 0) opens the first HTML table and
|
||||
uses the first row as the names of the columns.
|
||||
|
||||
HtmlTableSource(open('test.html'), 'people', ('name','phone'), 1) opens
|
||||
the HTML table with an id of 'people' and names the two columns
|
||||
name and phone, skipping the first row where alternate names are
|
||||
stored.
|
||||
HtmlTableSource(open('test.html'), 'people', ('name','phone'), 1) opens
|
||||
the HTML table with an id of 'people' and names the two columns
|
||||
name and phone, skipping the first row where alternate names are
|
||||
stored.
|
||||
"""
|
||||
|
||||
def __init__(self, htmlfile, id_or_num, fieldnames=None, skiprows=0):
|
||||
|
||||
# extract the table
|
||||
from lxml.html import parse
|
||||
|
||||
doc = parse(htmlfile).getroot()
|
||||
if isinstance(id_or_num, int):
|
||||
table = doc.cssselect('table')[id_or_num]
|
||||
table = doc.cssselect("table")[id_or_num]
|
||||
else:
|
||||
table = doc.cssselect('table#%s' % id_or_num)
|
||||
table = doc.cssselect("table#%s" % id_or_num)
|
||||
|
||||
table = table[0] # get the first table
|
||||
table = table[0] # get the first table
|
||||
|
||||
# skip the necessary number of rows
|
||||
self._rows = table.cssselect('tr')[skiprows:]
|
||||
self._rows = table.cssselect("tr")[skiprows:]
|
||||
|
||||
# determine the fieldnames
|
||||
if not fieldnames:
|
||||
self._fieldnames = [td.text_content()
|
||||
for td in self._rows[0].cssselect('td, th')]
|
||||
self._fieldnames = [
|
||||
td.text_content() for td in self._rows[0].cssselect("td, th")
|
||||
]
|
||||
skiprows += 1
|
||||
else:
|
||||
self._fieldnames = fieldnames
|
||||
|
||||
# skip the necessary number of rows
|
||||
self._rows = table.cssselect('tr')[skiprows:]
|
||||
self._rows = table.cssselect("tr")[skiprows:]
|
||||
|
||||
def process_tr(self):
|
||||
for row in self._rows:
|
||||
strings = [td.text_content() for td in row.cssselect('td')]
|
||||
strings = [td.text_content() for td in row.cssselect("td")]
|
||||
yield dict(zip(self._fieldnames, strings))
|
||||
|
||||
def __iter__(self):
|
||||
return self.process_tr()
|
||||
|
||||
|
||||
class DjangoModelSource(object):
|
||||
""" Saucebrush source for reading data from django models.
|
||||
class DjangoModelSource:
|
||||
"""Saucebrush source for reading data from django models.
|
||||
|
||||
DjangoModelSource expects a django settings file, app label, and model
|
||||
name. The resulting records contain all columns in the table for the
|
||||
specified model.
|
||||
DjangoModelSource expects a django settings file, app label, and model
|
||||
name. The resulting records contain all columns in the table for the
|
||||
specified model.
|
||||
|
||||
DjangoModelSource('settings.py', 'phonebook', 'friend') would read all
|
||||
friends from the friend model in the phonebook app described in
|
||||
settings.py.
|
||||
DjangoModelSource('settings.py', 'phonebook', 'friend') would read all
|
||||
friends from the friend model in the phonebook app described in
|
||||
settings.py.
|
||||
"""
|
||||
|
||||
def __init__(self, dj_settings, app_label, model_name):
|
||||
dbmodel = utils.get_django_model(dj_settings, app_label, model_name)
|
||||
|
||||
# only get values defined in model (no extra fields from custom manager)
|
||||
self._data = dbmodel.objects.values(*[f.name
|
||||
for f in dbmodel._meta.fields])
|
||||
self._data = dbmodel.objects.values(*[f.name for f in dbmodel._meta.fields])
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._data)
|
||||
|
||||
|
||||
class MongoDBSource(object):
|
||||
""" Source for reading from a MongoDB database.
|
||||
class MongoDBSource:
|
||||
"""Source for reading from a MongoDB database.
|
||||
|
||||
The record dict is populated with records matching the spec
|
||||
from the specified database and collection.
|
||||
The record dict is populated with records matching the spec
|
||||
from the specified database and collection.
|
||||
"""
|
||||
def __init__(self, database, collection, spec=None, host='localhost', port=27017, conn=None):
|
||||
|
||||
def __init__(
|
||||
self, database, collection, spec=None, host="localhost", port=27017, conn=None
|
||||
):
|
||||
if not conn:
|
||||
from pymongo.connection import Connection
|
||||
|
||||
conn = Connection(host, port)
|
||||
self.collection = conn[database][collection]
|
||||
self.spec = spec
|
||||
@ -166,19 +168,21 @@ class MongoDBSource(object):
|
||||
for doc in self.collection.find(self.spec):
|
||||
yield dict(doc)
|
||||
|
||||
|
||||
# dict_factory for sqlite source
|
||||
def dict_factory(cursor, row):
|
||||
d = { }
|
||||
d = {}
|
||||
for idx, col in enumerate(cursor.description):
|
||||
d[col[0]] = row[idx]
|
||||
return d
|
||||
|
||||
class SqliteSource(object):
|
||||
""" Source that reads from a sqlite database.
|
||||
|
||||
The record dict is populated with the results from the
|
||||
query argument. If given, args will be passed to the query
|
||||
when executed.
|
||||
class SqliteSource:
|
||||
"""Source that reads from a sqlite database.
|
||||
|
||||
The record dict is populated with the results from the
|
||||
query argument. If given, args will be passed to the query
|
||||
when executed.
|
||||
"""
|
||||
|
||||
def __init__(self, dbpath, query, args=None, conn_params=None):
|
||||
@ -213,11 +217,11 @@ class SqliteSource(object):
|
||||
self._conn.close()
|
||||
|
||||
|
||||
class FileSource(object):
|
||||
""" Base class for sources which read from one or more files.
|
||||
class FileSource:
|
||||
"""Base class for sources which read from one or more files.
|
||||
|
||||
Takes as input a file-like, a file path, a list of file-likes,
|
||||
or a list of file paths.
|
||||
Takes as input a file-like, a file path, a list of file-likes,
|
||||
or a list of file paths.
|
||||
"""
|
||||
|
||||
def __init__(self, input):
|
||||
@ -226,34 +230,36 @@ class FileSource(object):
|
||||
def __iter__(self):
|
||||
# This method would be a lot cleaner with the proposed
|
||||
# 'yield from' expression (PEP 380)
|
||||
if hasattr(self._input, '__read__') or hasattr(self._input, 'read'):
|
||||
if hasattr(self._input, "__read__") or hasattr(self._input, "read"):
|
||||
for record in self._process_file(self._input):
|
||||
yield record
|
||||
elif isinstance(self._input, str):
|
||||
with open(self._input) as f:
|
||||
for record in self._process_file(f):
|
||||
yield record
|
||||
elif hasattr(self._input, '__iter__'):
|
||||
elif hasattr(self._input, "__iter__"):
|
||||
for el in self._input:
|
||||
if isinstance(el, str):
|
||||
with open(el) as f:
|
||||
for record in self._process_file(f):
|
||||
yield record
|
||||
elif hasattr(el, '__read__') or hasattr(el, 'read'):
|
||||
elif hasattr(el, "__read__") or hasattr(el, "read"):
|
||||
for record in self._process_file(f):
|
||||
yield record
|
||||
|
||||
def _process_file(self, file):
|
||||
raise NotImplementedError('Descendants of FileSource should implement'
|
||||
' a custom _process_file method.')
|
||||
raise NotImplementedError(
|
||||
"Descendants of FileSource should implement"
|
||||
" a custom _process_file method."
|
||||
)
|
||||
|
||||
|
||||
class JSONSource(FileSource):
|
||||
""" Source for reading from JSON files.
|
||||
"""Source for reading from JSON files.
|
||||
|
||||
When processing JSON files, if the top-level object is a list, will
|
||||
yield each member separately. Otherwise, yields the top-level
|
||||
object.
|
||||
When processing JSON files, if the top-level object is a list, will
|
||||
yield each member separately. Otherwise, yields the top-level
|
||||
object.
|
||||
"""
|
||||
|
||||
def _process_file(self, f):
|
||||
@ -271,36 +277,37 @@ class JSONSource(FileSource):
|
||||
else:
|
||||
yield obj
|
||||
|
||||
class XMLSource(FileSource):
|
||||
""" Source for reading from XML files. Use with the same kind of caution
|
||||
that you use to approach anything written in XML.
|
||||
|
||||
When processing XML files, if the top-level object is a list, will
|
||||
yield each member separately, unless the dotted path to a list is
|
||||
included. you can also do this with a SubrecordFilter, but XML is
|
||||
almost never going to be useful at the top level.
|
||||
class XMLSource(FileSource):
|
||||
"""Source for reading from XML files. Use with the same kind of caution
|
||||
that you use to approach anything written in XML.
|
||||
|
||||
When processing XML files, if the top-level object is a list, will
|
||||
yield each member separately, unless the dotted path to a list is
|
||||
included. you can also do this with a SubrecordFilter, but XML is
|
||||
almost never going to be useful at the top level.
|
||||
"""
|
||||
|
||||
def __init__(self, input, node_path=None, attr_prefix='ATTR_',
|
||||
postprocessor=None):
|
||||
super(XMLSource, self).__init__(input)
|
||||
self.node_list = node_path.split('.')
|
||||
def __init__(self, input, node_path=None, attr_prefix="ATTR_", postprocessor=None):
|
||||
super().__init__(input)
|
||||
self.node_list = node_path.split(".")
|
||||
self.attr_prefix = attr_prefix
|
||||
self.postprocessor = postprocessor
|
||||
|
||||
def _process_file(self, f, attr_prefix='ATTR_'):
|
||||
def _process_file(self, f, attr_prefix="ATTR_"):
|
||||
"""xmltodict can either return attributes of nodes as prefixed fields
|
||||
(prefixes to avoid key collisions), or ignore them altogether.
|
||||
(prefixes to avoid key collisions), or ignore them altogether.
|
||||
|
||||
set attr prefix to whatever you want. Setting it to False ignores
|
||||
attributes.
|
||||
set attr prefix to whatever you want. Setting it to False ignores
|
||||
attributes.
|
||||
"""
|
||||
|
||||
import xmltodict
|
||||
|
||||
if self.postprocessor:
|
||||
obj = xmltodict.parse(f, attr_prefix=self.attr_prefix,
|
||||
postprocessor=self.postprocessor)
|
||||
obj = xmltodict.parse(
|
||||
f, attr_prefix=self.attr_prefix, postprocessor=self.postprocessor
|
||||
)
|
||||
else:
|
||||
obj = xmltodict.parse(f, attr_prefix=self.attr_prefix)
|
||||
|
||||
@ -308,7 +315,7 @@ class XMLSource(FileSource):
|
||||
|
||||
if self.node_list:
|
||||
for node in self.node_list:
|
||||
obj = obj[node]
|
||||
obj = obj[node]
|
||||
|
||||
# If the top-level XML object in the file is a list
|
||||
# then yield each element separately; otherwise, yield
|
@ -1,22 +1,22 @@
|
||||
from saucebrush.filters import Filter
|
||||
from saucebrush.utils import FallbackCounter
|
||||
import collections
|
||||
import itertools
|
||||
import math
|
||||
|
||||
def _average(values):
|
||||
""" Calculate the average of a list of values.
|
||||
|
||||
:param values: an iterable of ints or floats to average
|
||||
def _average(values):
|
||||
"""Calculate the average of a list of values.
|
||||
|
||||
:param values: an iterable of ints or floats to average
|
||||
"""
|
||||
value_count = len(values)
|
||||
if len(values) > 0:
|
||||
return sum(values) / float(value_count)
|
||||
|
||||
def _median(values):
|
||||
""" Calculate the median of a list of values.
|
||||
|
||||
:param values: an iterable of ints or floats to calculate
|
||||
def _median(values):
|
||||
"""Calculate the median of a list of values.
|
||||
|
||||
:param values: an iterable of ints or floats to calculate
|
||||
"""
|
||||
|
||||
count = len(values)
|
||||
@ -35,14 +35,15 @@ def _median(values):
|
||||
else:
|
||||
# even number of items, return average of middle two items
|
||||
mid = int(count / 2)
|
||||
return sum(values[mid - 1:mid + 1]) / 2.0
|
||||
return sum(values[mid - 1 : mid + 1]) / 2.0
|
||||
|
||||
|
||||
def _stddev(values, population=False):
|
||||
""" Calculate the standard deviation and variance of a list of values.
|
||||
"""Calculate the standard deviation and variance of a list of values.
|
||||
|
||||
:param values: an iterable of ints or floats to calculate
|
||||
:param population: True if values represents entire population,
|
||||
False if it is a sample of the population
|
||||
:param values: an iterable of ints or floats to calculate
|
||||
:param population: True if values represents entire population,
|
||||
False if it is a sample of the population
|
||||
"""
|
||||
|
||||
avg = _average(values)
|
||||
@ -54,11 +55,11 @@ def _stddev(values, population=False):
|
||||
# the average of the squared differences
|
||||
variance = sum(diffsq) / float(count)
|
||||
|
||||
return (math.sqrt(variance), variance) # stddev is sqrt of variance
|
||||
return (math.sqrt(variance), variance) # stddev is sqrt of variance
|
||||
|
||||
|
||||
class StatsFilter(Filter):
|
||||
""" Base for all stats filters.
|
||||
"""
|
||||
"""Base for all stats filters."""
|
||||
|
||||
def __init__(self, field, test=None):
|
||||
self._field = field
|
||||
@ -70,20 +71,21 @@ class StatsFilter(Filter):
|
||||
return record
|
||||
|
||||
def process_field(self, record):
|
||||
raise NotImplementedError('process_field not defined in ' +
|
||||
self.__class__.__name__)
|
||||
raise NotImplementedError(
|
||||
"process_field not defined in " + self.__class__.__name__
|
||||
)
|
||||
|
||||
def value(self):
|
||||
raise NotImplementedError('value not defined in ' +
|
||||
self.__class__.__name__)
|
||||
raise NotImplementedError("value not defined in " + self.__class__.__name__)
|
||||
|
||||
|
||||
class Sum(StatsFilter):
|
||||
""" Calculate the sum of the values in a field. Field must contain either
|
||||
int or float values.
|
||||
"""Calculate the sum of the values in a field. Field must contain either
|
||||
int or float values.
|
||||
"""
|
||||
|
||||
def __init__(self, field, initial=0, **kwargs):
|
||||
super(Sum, self).__init__(field, **kwargs)
|
||||
super().__init__(field, **kwargs)
|
||||
self._value = initial
|
||||
|
||||
def process_field(self, item):
|
||||
@ -92,13 +94,14 @@ class Sum(StatsFilter):
|
||||
def value(self):
|
||||
return self._value
|
||||
|
||||
|
||||
class Average(StatsFilter):
|
||||
""" Calculate the average (mean) of the values in a field. Field must
|
||||
contain either int or float values.
|
||||
"""Calculate the average (mean) of the values in a field. Field must
|
||||
contain either int or float values.
|
||||
"""
|
||||
|
||||
def __init__(self, field, initial=0, **kwargs):
|
||||
super(Average, self).__init__(field, **kwargs)
|
||||
super().__init__(field, **kwargs)
|
||||
self._value = initial
|
||||
self._count = 0
|
||||
|
||||
@ -110,15 +113,16 @@ class Average(StatsFilter):
|
||||
def value(self):
|
||||
return self._value / float(self._count)
|
||||
|
||||
class Median(StatsFilter):
|
||||
""" Calculate the median of the values in a field. Field must contain
|
||||
either int or float values.
|
||||
|
||||
**This filter keeps a list of field values in memory.**
|
||||
class Median(StatsFilter):
|
||||
"""Calculate the median of the values in a field. Field must contain
|
||||
either int or float values.
|
||||
|
||||
**This filter keeps a list of field values in memory.**
|
||||
"""
|
||||
|
||||
def __init__(self, field, **kwargs):
|
||||
super(Median, self).__init__(field, **kwargs)
|
||||
super().__init__(field, **kwargs)
|
||||
self._values = []
|
||||
|
||||
def process_field(self, item):
|
||||
@ -128,13 +132,14 @@ class Median(StatsFilter):
|
||||
def value(self):
|
||||
return _median(self._values)
|
||||
|
||||
|
||||
class MinMax(StatsFilter):
|
||||
""" Find the minimum and maximum values in a field. Field must contain
|
||||
either int or float values.
|
||||
"""Find the minimum and maximum values in a field. Field must contain
|
||||
either int or float values.
|
||||
"""
|
||||
|
||||
def __init__(self, field, **kwargs):
|
||||
super(MinMax, self).__init__(field, **kwargs)
|
||||
super().__init__(field, **kwargs)
|
||||
self._max = None
|
||||
self._min = None
|
||||
|
||||
@ -148,18 +153,19 @@ class MinMax(StatsFilter):
|
||||
def value(self):
|
||||
return (self._min, self._max)
|
||||
|
||||
class StandardDeviation(StatsFilter):
|
||||
""" Calculate the standard deviation of the values in a field. Calling
|
||||
value() will return a standard deviation for the sample. Pass
|
||||
population=True to value() for the standard deviation of the
|
||||
population. Convenience methods are provided for average() and
|
||||
median(). Field must contain either int or float values.
|
||||
|
||||
**This filter keeps a list of field values in memory.**
|
||||
class StandardDeviation(StatsFilter):
|
||||
"""Calculate the standard deviation of the values in a field. Calling
|
||||
value() will return a standard deviation for the sample. Pass
|
||||
population=True to value() for the standard deviation of the
|
||||
population. Convenience methods are provided for average() and
|
||||
median(). Field must contain either int or float values.
|
||||
|
||||
**This filter keeps a list of field values in memory.**
|
||||
"""
|
||||
|
||||
def __init__(self, field, **kwargs):
|
||||
super(StandardDeviation, self).__init__(field, **kwargs)
|
||||
super().__init__(field, **kwargs)
|
||||
self._values = []
|
||||
|
||||
def process_field(self, item):
|
||||
@ -173,31 +179,29 @@ class StandardDeviation(StatsFilter):
|
||||
return _median(self._values)
|
||||
|
||||
def value(self, population=False):
|
||||
""" Return a tuple of (standard_deviation, variance).
|
||||
"""Return a tuple of (standard_deviation, variance).
|
||||
|
||||
:param population: True if values represents entire population,
|
||||
False if values is a sample. Default: False
|
||||
:param population: True if values represents entire population,
|
||||
False if values is a sample. Default: False
|
||||
"""
|
||||
return _stddev(self._values, population)
|
||||
|
||||
class Histogram(StatsFilter):
|
||||
""" Generate a basic histogram of the specified field. The value() method
|
||||
returns a dict of value to occurance count mappings. The __str__ method
|
||||
generates a basic and limited histogram useful for printing to the
|
||||
command line. The label_length attribute determines the padding and
|
||||
cut-off of the basic histogram labels.
|
||||
|
||||
**This filters maintains a dict of unique field values in memory.**
|
||||
class Histogram(StatsFilter):
|
||||
"""Generate a basic histogram of the specified field. The value() method
|
||||
returns a dict of value to occurance count mappings. The __str__ method
|
||||
generates a basic and limited histogram useful for printing to the
|
||||
command line. The label_length attribute determines the padding and
|
||||
cut-off of the basic histogram labels.
|
||||
|
||||
**This filters maintains a dict of unique field values in memory.**
|
||||
"""
|
||||
|
||||
label_length = 6
|
||||
|
||||
def __init__(self, field, **kwargs):
|
||||
super(Histogram, self).__init__(field, **kwargs)
|
||||
if hasattr(collections, 'Counter'):
|
||||
self._counter = collections.Counter()
|
||||
else:
|
||||
self._counter = FallbackCounter()
|
||||
super().__init__(field, **kwargs)
|
||||
self._counter = collections.Counter()
|
||||
|
||||
def process_field(self, item):
|
||||
self._counter[self.prep_field(item)] += 1
|
@ -1,45 +1,46 @@
|
||||
import collections
|
||||
import os
|
||||
|
||||
try:
|
||||
from urllib.request import urlopen # attemp py3 first
|
||||
except ImportError:
|
||||
from urllib2 import urlopen # fallback to py2
|
||||
from urllib.request import urlopen
|
||||
|
||||
"""
|
||||
General utilities used within saucebrush that may be useful elsewhere.
|
||||
"""
|
||||
|
||||
|
||||
def get_django_model(dj_settings, app_label, model_name):
|
||||
"""
|
||||
Get a django model given a settings file, app label, and model name.
|
||||
Get a django model given a settings file, app label, and model name.
|
||||
"""
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
if not settings.configured:
|
||||
settings.configure(DATABASE_ENGINE=dj_settings.DATABASE_ENGINE,
|
||||
DATABASE_NAME=dj_settings.DATABASE_NAME,
|
||||
DATABASE_USER=dj_settings.DATABASE_USER,
|
||||
DATABASE_PASSWORD=dj_settings.DATABASE_PASSWORD,
|
||||
DATABASE_HOST=dj_settings.DATABASE_HOST,
|
||||
INSTALLED_APPS=dj_settings.INSTALLED_APPS)
|
||||
settings.configure(
|
||||
DATABASE_ENGINE=dj_settings.DATABASE_ENGINE,
|
||||
DATABASE_NAME=dj_settings.DATABASE_NAME,
|
||||
DATABASE_USER=dj_settings.DATABASE_USER,
|
||||
DATABASE_PASSWORD=dj_settings.DATABASE_PASSWORD,
|
||||
DATABASE_HOST=dj_settings.DATABASE_HOST,
|
||||
INSTALLED_APPS=dj_settings.INSTALLED_APPS,
|
||||
)
|
||||
from django.db.models import get_model
|
||||
|
||||
return get_model(app_label, model_name)
|
||||
|
||||
def flatten(item, prefix='', separator='_', keys=None):
|
||||
"""
|
||||
Flatten nested dictionary into one with its keys concatenated together.
|
||||
|
||||
>>> flatten({'a':1, 'b':{'c':2}, 'd':[{'e':{'r':7}}, {'e':5}],
|
||||
'f':{'g':{'h':6}}})
|
||||
{'a': 1, 'b_c': 2, 'd': [{'e_r': 7}, {'e': 5}], 'f_g_h': 6}
|
||||
def flatten(item, prefix="", separator="_", keys=None):
|
||||
"""
|
||||
Flatten nested dictionary into one with its keys concatenated together.
|
||||
|
||||
>>> flatten({'a':1, 'b':{'c':2}, 'd':[{'e':{'r':7}}, {'e':5}],
|
||||
'f':{'g':{'h':6}}})
|
||||
{'a': 1, 'b_c': 2, 'd': [{'e_r': 7}, {'e': 5}], 'f_g_h': 6}
|
||||
"""
|
||||
|
||||
# update dictionaries recursively
|
||||
|
||||
if isinstance(item, dict):
|
||||
# don't prepend a leading _
|
||||
if prefix != '':
|
||||
if prefix != "":
|
||||
prefix += separator
|
||||
retval = {}
|
||||
for key, value in item.items():
|
||||
@ -48,45 +49,30 @@ def flatten(item, prefix='', separator='_', keys=None):
|
||||
else:
|
||||
retval[prefix + key] = value
|
||||
return retval
|
||||
#elif isinstance(item, (tuple, list)):
|
||||
# elif isinstance(item, (tuple, list)):
|
||||
# return {prefix: [flatten(i, prefix, separator, keys) for i in item]}
|
||||
else:
|
||||
return {prefix: item}
|
||||
|
||||
|
||||
def str_or_list(obj):
|
||||
if isinstance(obj, str):
|
||||
return [obj]
|
||||
else:
|
||||
return obj
|
||||
|
||||
|
||||
#
|
||||
# utility classes
|
||||
#
|
||||
|
||||
class FallbackCounter(collections.defaultdict):
|
||||
""" Python 2.6 does not have collections.Counter.
|
||||
This is class that does the basics of what we need from Counter.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(FallbackCounter, self).__init__(int)
|
||||
class Files:
|
||||
"""Iterate over multiple files as a single file. Pass the paths of the
|
||||
files as arguments to the class constructor:
|
||||
|
||||
def most_common(n=None):
|
||||
|
||||
l = sorted(self.items(),
|
||||
cmp=lambda x,y: cmp(x[1], y[1]))
|
||||
|
||||
if n is not None:
|
||||
l = l[:n]
|
||||
|
||||
return l
|
||||
|
||||
class Files(object):
|
||||
""" Iterate over multiple files as a single file. Pass the paths of the
|
||||
files as arguments to the class constructor:
|
||||
|
||||
for line in Files('/path/to/file/a', '/path/to/file/b'):
|
||||
pass
|
||||
for line in Files('/path/to/file/a', '/path/to/file/b'):
|
||||
pass
|
||||
"""
|
||||
|
||||
def __init__(self, *args):
|
||||
@ -111,10 +97,11 @@ class Files(object):
|
||||
yield line
|
||||
f.close()
|
||||
|
||||
class RemoteFile(object):
|
||||
""" Stream data from a remote file.
|
||||
|
||||
:param url: URL to remote file
|
||||
class RemoteFile:
|
||||
"""Stream data from a remote file.
|
||||
|
||||
:param url: URL to remote file
|
||||
"""
|
||||
|
||||
def __init__(self, url):
|
||||
@ -126,21 +113,24 @@ class RemoteFile(object):
|
||||
yield line.rstrip()
|
||||
resp.close()
|
||||
|
||||
class ZippedFiles(object):
|
||||
""" unpack a zipped collection of files on init.
|
||||
|
||||
Takes a string with file location or zipfile.ZipFile object
|
||||
class ZippedFiles:
|
||||
"""unpack a zipped collection of files on init.
|
||||
|
||||
Best to wrap this in a Files() object, if the goal is to have a
|
||||
linereader, as this only returns filelike objects.
|
||||
Takes a string with file location or zipfile.ZipFile object
|
||||
|
||||
if using a ZipFile object, make sure to set mode to 'a' or 'w' in order
|
||||
to use the add() function.
|
||||
Best to wrap this in a Files() object, if the goal is to have a
|
||||
linereader, as this only returns filelike objects.
|
||||
|
||||
if using a ZipFile object, make sure to set mode to 'a' or 'w' in order
|
||||
to use the add() function.
|
||||
"""
|
||||
|
||||
def __init__(self, zippedfile):
|
||||
import zipfile
|
||||
|
||||
if type(zippedfile) == str:
|
||||
self._zipfile = zipfile.ZipFile(zippedfile,'a')
|
||||
self._zipfile = zipfile.ZipFile(zippedfile, "a")
|
||||
else:
|
||||
self._zipfile = zippedfile
|
||||
self.paths = self._zipfile.namelist()
|
||||
@ -152,10 +142,10 @@ class ZippedFiles(object):
|
||||
def add(self, path, dirname=None, arcname=None):
|
||||
path_base = os.path.basename(path)
|
||||
if dirname:
|
||||
arcname = os.path.join(dirname,path_base)
|
||||
arcname = os.path.join(dirname, path_base)
|
||||
if not arcname:
|
||||
arcname = path_base
|
||||
self._zipfile.write(path,arcname)
|
||||
self._zipfile.write(path, arcname)
|
||||
self.paths.append(path)
|
||||
|
||||
def filereader(self):
|
107
tests/test_emitters.py
Normal file
107
tests/test_emitters.py
Normal file
@ -0,0 +1,107 @@
|
||||
from contextlib import closing
|
||||
from io import StringIO
|
||||
import os
|
||||
|
||||
from saucebrush.emitters import (
|
||||
DebugEmitter,
|
||||
CSVEmitter,
|
||||
CountEmitter,
|
||||
SqliteEmitter,
|
||||
SqlDumpEmitter,
|
||||
)
|
||||
|
||||
|
||||
def test_debug_emitter():
|
||||
with closing(StringIO()) as output:
|
||||
de = DebugEmitter(output)
|
||||
list(de.attach([1, 2, 3]))
|
||||
assert output.getvalue() == "1\n2\n3\n"
|
||||
|
||||
|
||||
def test_count_emitter():
|
||||
|
||||
# values for test
|
||||
values = [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
]
|
||||
|
||||
with closing(StringIO()) as output:
|
||||
|
||||
# test without of parameter
|
||||
ce = CountEmitter(every=10, outfile=output, format="%(count)s records\n")
|
||||
list(ce.attach(values))
|
||||
assert output.getvalue() == "10 records\n20 records\n"
|
||||
ce.done()
|
||||
assert output.getvalue() == "10 records\n20 records\n22 records\n"
|
||||
|
||||
with closing(StringIO()) as output:
|
||||
|
||||
# test with of parameter
|
||||
ce = CountEmitter(every=10, outfile=output, of=len(values))
|
||||
list(ce.attach(values))
|
||||
assert output.getvalue() == "10 of 22\n20 of 22\n"
|
||||
ce.done()
|
||||
assert output.getvalue() == "10 of 22\n20 of 22\n22 of 22\n"
|
||||
|
||||
|
||||
def test_csv_emitter():
|
||||
io = StringIO() # if Python 3.x then use StringIO
|
||||
|
||||
with closing(io) as output:
|
||||
ce = CSVEmitter(output, ("x", "y", "z"))
|
||||
list(ce.attach([{"x": 1, "y": 2, "z": 3}, {"x": 5, "y": 5, "z": 5}]))
|
||||
assert output.getvalue() == "x,y,z\r\n1,2,3\r\n5,5,5\r\n"
|
||||
|
||||
|
||||
def test_sqlite_emitter():
|
||||
|
||||
import sqlite3
|
||||
import tempfile
|
||||
|
||||
with closing(tempfile.NamedTemporaryFile(suffix=".db")) as f:
|
||||
db_path = f.name
|
||||
|
||||
sle = SqliteEmitter(db_path, "testtable", fieldnames=("a", "b", "c"))
|
||||
list(sle.attach([{"a": "1", "b": "2", "c": "3"}]))
|
||||
sle.done()
|
||||
|
||||
with closing(sqlite3.connect(db_path)) as conn:
|
||||
cur = conn.cursor()
|
||||
cur.execute("""SELECT a, b, c FROM testtable""")
|
||||
results = cur.fetchall()
|
||||
|
||||
os.unlink(db_path)
|
||||
|
||||
assert results == [("1", "2", "3")]
|
||||
|
||||
|
||||
def test_sql_dump_emitter():
|
||||
|
||||
with closing(StringIO()) as bffr:
|
||||
|
||||
sde = SqlDumpEmitter(bffr, "testtable", ("a", "b"))
|
||||
list(sde.attach([{"a": 1, "b": "2"}]))
|
||||
sde.done()
|
||||
|
||||
assert bffr.getvalue() == "INSERT INTO `testtable` (`a`,`b`) VALUES (1,'2');\n"
|
355
tests/test_filters.py
Normal file
355
tests/test_filters.py
Normal file
@ -0,0 +1,355 @@
|
||||
import unittest
|
||||
import types
|
||||
from saucebrush.filters import (
|
||||
Filter,
|
||||
YieldFilter,
|
||||
FieldFilter,
|
||||
SubrecordFilter,
|
||||
ConditionalPathFilter,
|
||||
ConditionalFilter,
|
||||
FieldModifier,
|
||||
FieldKeeper,
|
||||
FieldRemover,
|
||||
FieldMerger,
|
||||
FieldAdder,
|
||||
FieldCopier,
|
||||
FieldRenamer,
|
||||
Unique,
|
||||
)
|
||||
|
||||
|
||||
class DummyRecipe:
|
||||
rejected_record = None
|
||||
rejected_msg = None
|
||||
|
||||
def reject_record(self, record, msg):
|
||||
self.rejected_record = record
|
||||
self.rejected_msg = msg
|
||||
|
||||
|
||||
class Doubler(Filter):
|
||||
def process_record(self, record):
|
||||
return record * 2
|
||||
|
||||
|
||||
class OddRemover(Filter):
|
||||
def process_record(self, record):
|
||||
if record % 2 == 0:
|
||||
return record
|
||||
else:
|
||||
return None # explicitly return None
|
||||
|
||||
|
||||
class ListFlattener(YieldFilter):
|
||||
def process_record(self, record):
|
||||
for item in record:
|
||||
yield item
|
||||
|
||||
|
||||
class FieldDoubler(FieldFilter):
|
||||
def process_field(self, item):
|
||||
return item * 2
|
||||
|
||||
|
||||
class NonModifyingFieldDoubler(Filter):
|
||||
def __init__(self, key):
|
||||
self.key = key
|
||||
|
||||
def process_record(self, record):
|
||||
record = dict(record)
|
||||
record[self.key] *= 2
|
||||
return record
|
||||
|
||||
|
||||
class ConditionalOddRemover(ConditionalFilter):
|
||||
def test_record(self, record):
|
||||
# return True for even values
|
||||
return record % 2 == 0
|
||||
|
||||
|
||||
class FilterTestCase(unittest.TestCase):
|
||||
def _simple_data(self):
|
||||
return [
|
||||
{"a": 1, "b": 2, "c": 3},
|
||||
{"a": 5, "b": 5, "c": 5},
|
||||
{"a": 1, "b": 10, "c": 100},
|
||||
]
|
||||
|
||||
def assert_filter_result(self, filter_obj, expected_data):
|
||||
result = filter_obj.attach(self._simple_data())
|
||||
self.assertEqual(list(result), expected_data)
|
||||
|
||||
def test_reject_record(self):
|
||||
recipe = DummyRecipe()
|
||||
f = Doubler()
|
||||
result = f.attach([1, 2, 3], recipe=recipe)
|
||||
# next has to be called for attach to take effect
|
||||
next(result)
|
||||
f.reject_record("bad", "this one was bad")
|
||||
|
||||
# ensure that the rejection propagated to the recipe
|
||||
self.assertEqual("bad", recipe.rejected_record)
|
||||
self.assertEqual("this one was bad", recipe.rejected_msg)
|
||||
|
||||
def test_simple_filter(self):
|
||||
df = Doubler()
|
||||
result = df.attach([1, 2, 3])
|
||||
|
||||
# ensure we got a generator that yields 2,4,6
|
||||
self.assertEqual(type(result), types.GeneratorType)
|
||||
self.assertEqual(list(result), [2, 4, 6])
|
||||
|
||||
def test_simple_filter_return_none(self):
|
||||
cf = OddRemover()
|
||||
result = cf.attach(range(10))
|
||||
|
||||
# ensure only even numbers remain
|
||||
self.assertEqual(list(result), [0, 2, 4, 6, 8])
|
||||
|
||||
def test_simple_yield_filter(self):
|
||||
lf = ListFlattener()
|
||||
result = lf.attach([[1], [2, 3], [4, 5, 6]])
|
||||
|
||||
# ensure we got a generator that yields 1,2,3,4,5,6
|
||||
self.assertEqual(type(result), types.GeneratorType)
|
||||
self.assertEqual(list(result), [1, 2, 3, 4, 5, 6])
|
||||
|
||||
def test_simple_field_filter(self):
|
||||
ff = FieldDoubler(["a", "c"])
|
||||
|
||||
# check against expected data
|
||||
expected_data = [
|
||||
{"a": 2, "b": 2, "c": 6},
|
||||
{"a": 10, "b": 5, "c": 10},
|
||||
{"a": 2, "b": 10, "c": 200},
|
||||
]
|
||||
self.assert_filter_result(ff, expected_data)
|
||||
|
||||
def test_conditional_filter(self):
|
||||
cf = ConditionalOddRemover()
|
||||
result = cf.attach(range(10))
|
||||
|
||||
# ensure only even numbers remain
|
||||
self.assertEqual(list(result), [0, 2, 4, 6, 8])
|
||||
|
||||
# Tests for Subrecord
|
||||
|
||||
def test_subrecord_filter_list(self):
|
||||
data = [
|
||||
{"a": [{"b": 2}, {"b": 4}]},
|
||||
{"a": [{"b": 5}]},
|
||||
{"a": [{"b": 8}, {"b": 2}, {"b": 1}]},
|
||||
]
|
||||
|
||||
expected = [
|
||||
{"a": [{"b": 4}, {"b": 8}]},
|
||||
{"a": [{"b": 10}]},
|
||||
{"a": [{"b": 16}, {"b": 4}, {"b": 2}]},
|
||||
]
|
||||
|
||||
sf = SubrecordFilter("a", NonModifyingFieldDoubler("b"))
|
||||
result = sf.attach(data)
|
||||
|
||||
self.assertEqual(list(result), expected)
|
||||
|
||||
def test_subrecord_filter_deep(self):
|
||||
data = [
|
||||
{"a": {"d": [{"b": 2}, {"b": 4}]}},
|
||||
{"a": {"d": [{"b": 5}]}},
|
||||
{"a": {"d": [{"b": 8}, {"b": 2}, {"b": 1}]}},
|
||||
]
|
||||
|
||||
expected = [
|
||||
{"a": {"d": [{"b": 4}, {"b": 8}]}},
|
||||
{"a": {"d": [{"b": 10}]}},
|
||||
{"a": {"d": [{"b": 16}, {"b": 4}, {"b": 2}]}},
|
||||
]
|
||||
|
||||
sf = SubrecordFilter("a.d", NonModifyingFieldDoubler("b"))
|
||||
result = sf.attach(data)
|
||||
|
||||
self.assertEqual(list(result), expected)
|
||||
|
||||
def test_subrecord_filter_nonlist(self):
|
||||
data = [
|
||||
{"a": {"b": {"c": 1}}},
|
||||
{"a": {"b": {"c": 2}}},
|
||||
{"a": {"b": {"c": 3}}},
|
||||
]
|
||||
|
||||
expected = [
|
||||
{"a": {"b": {"c": 2}}},
|
||||
{"a": {"b": {"c": 4}}},
|
||||
{"a": {"b": {"c": 6}}},
|
||||
]
|
||||
|
||||
sf = SubrecordFilter("a.b", NonModifyingFieldDoubler("c"))
|
||||
result = sf.attach(data)
|
||||
|
||||
self.assertEqual(list(result), expected)
|
||||
|
||||
def test_subrecord_filter_list_in_path(self):
|
||||
data = [
|
||||
{"a": [{"b": {"c": 5}}, {"b": {"c": 6}}]},
|
||||
{"a": [{"b": {"c": 1}}, {"b": {"c": 2}}, {"b": {"c": 3}}]},
|
||||
{"a": [{"b": {"c": 2}}]},
|
||||
]
|
||||
|
||||
expected = [
|
||||
{"a": [{"b": {"c": 10}}, {"b": {"c": 12}}]},
|
||||
{"a": [{"b": {"c": 2}}, {"b": {"c": 4}}, {"b": {"c": 6}}]},
|
||||
{"a": [{"b": {"c": 4}}]},
|
||||
]
|
||||
|
||||
sf = SubrecordFilter("a.b", NonModifyingFieldDoubler("c"))
|
||||
result = sf.attach(data)
|
||||
|
||||
self.assertEqual(list(result), expected)
|
||||
|
||||
def test_conditional_path(self):
|
||||
|
||||
predicate = lambda r: r["a"] == 1 # noqa
|
||||
|
||||
# double b if a == 1, otherwise double c
|
||||
cpf = ConditionalPathFilter(predicate, FieldDoubler("b"), FieldDoubler("c"))
|
||||
expected_data = [
|
||||
{"a": 1, "b": 4, "c": 3},
|
||||
{"a": 5, "b": 5, "c": 10},
|
||||
{"a": 1, "b": 20, "c": 100},
|
||||
]
|
||||
|
||||
self.assert_filter_result(cpf, expected_data)
|
||||
|
||||
# Tests for Generic Filters
|
||||
|
||||
def test_field_modifier(self):
|
||||
# another version of FieldDoubler
|
||||
fm = FieldModifier(["a", "c"], lambda x: x * 2)
|
||||
|
||||
# check against expected data
|
||||
expected_data = [
|
||||
{"a": 2, "b": 2, "c": 6},
|
||||
{"a": 10, "b": 5, "c": 10},
|
||||
{"a": 2, "b": 10, "c": 200},
|
||||
]
|
||||
self.assert_filter_result(fm, expected_data)
|
||||
|
||||
def test_field_keeper(self):
|
||||
fk = FieldKeeper(["c"])
|
||||
|
||||
# check against expected results
|
||||
expected_data = [{"c": 3}, {"c": 5}, {"c": 100}]
|
||||
self.assert_filter_result(fk, expected_data)
|
||||
|
||||
def test_field_remover(self):
|
||||
fr = FieldRemover(["a", "b"])
|
||||
|
||||
# check against expected results
|
||||
expected_data = [{"c": 3}, {"c": 5}, {"c": 100}]
|
||||
self.assert_filter_result(fr, expected_data)
|
||||
|
||||
def test_field_merger(self):
|
||||
fm = FieldMerger({"sum": ("a", "b", "c")}, lambda x, y, z: x + y + z)
|
||||
|
||||
# check against expected results
|
||||
expected_data = [{"sum": 6}, {"sum": 15}, {"sum": 111}]
|
||||
self.assert_filter_result(fm, expected_data)
|
||||
|
||||
def test_field_merger_keep_fields(self):
|
||||
fm = FieldMerger(
|
||||
{"sum": ("a", "b", "c")}, lambda x, y, z: x + y + z, keep_fields=True
|
||||
)
|
||||
|
||||
# check against expected results
|
||||
expected_data = [
|
||||
{"a": 1, "b": 2, "c": 3, "sum": 6},
|
||||
{"a": 5, "b": 5, "c": 5, "sum": 15},
|
||||
{"a": 1, "b": 10, "c": 100, "sum": 111},
|
||||
]
|
||||
self.assert_filter_result(fm, expected_data)
|
||||
|
||||
def test_field_adder_scalar(self):
|
||||
fa = FieldAdder("x", 7)
|
||||
|
||||
expected_data = [
|
||||
{"a": 1, "b": 2, "c": 3, "x": 7},
|
||||
{"a": 5, "b": 5, "c": 5, "x": 7},
|
||||
{"a": 1, "b": 10, "c": 100, "x": 7},
|
||||
]
|
||||
self.assert_filter_result(fa, expected_data)
|
||||
|
||||
def test_field_adder_callable(self):
|
||||
fa = FieldAdder("x", lambda: 7)
|
||||
|
||||
expected_data = [
|
||||
{"a": 1, "b": 2, "c": 3, "x": 7},
|
||||
{"a": 5, "b": 5, "c": 5, "x": 7},
|
||||
{"a": 1, "b": 10, "c": 100, "x": 7},
|
||||
]
|
||||
self.assert_filter_result(fa, expected_data)
|
||||
|
||||
def test_field_adder_iterable(self):
|
||||
fa = FieldAdder("x", [1, 2, 3])
|
||||
|
||||
expected_data = [
|
||||
{"a": 1, "b": 2, "c": 3, "x": 1},
|
||||
{"a": 5, "b": 5, "c": 5, "x": 2},
|
||||
{"a": 1, "b": 10, "c": 100, "x": 3},
|
||||
]
|
||||
self.assert_filter_result(fa, expected_data)
|
||||
|
||||
def test_field_adder_replace(self):
|
||||
fa = FieldAdder("b", lambda: 7)
|
||||
|
||||
expected_data = [
|
||||
{"a": 1, "b": 7, "c": 3},
|
||||
{"a": 5, "b": 7, "c": 5},
|
||||
{"a": 1, "b": 7, "c": 100},
|
||||
]
|
||||
self.assert_filter_result(fa, expected_data)
|
||||
|
||||
def test_field_adder_no_replace(self):
|
||||
fa = FieldAdder("b", lambda: 7, replace=False)
|
||||
|
||||
expected_data = [
|
||||
{"a": 1, "b": 2, "c": 3},
|
||||
{"a": 5, "b": 5, "c": 5},
|
||||
{"a": 1, "b": 10, "c": 100},
|
||||
]
|
||||
self.assert_filter_result(fa, expected_data)
|
||||
|
||||
def test_field_copier(self):
|
||||
fc = FieldCopier({"a2": "a", "b2": "b"})
|
||||
|
||||
expected_data = [
|
||||
{"a": 1, "b": 2, "c": 3, "a2": 1, "b2": 2},
|
||||
{"a": 5, "b": 5, "c": 5, "a2": 5, "b2": 5},
|
||||
{"a": 1, "b": 10, "c": 100, "a2": 1, "b2": 10},
|
||||
]
|
||||
self.assert_filter_result(fc, expected_data)
|
||||
|
||||
def test_field_renamer(self):
|
||||
fr = FieldRenamer({"x": "a", "y": "b"})
|
||||
|
||||
expected_data = [
|
||||
{"x": 1, "y": 2, "c": 3},
|
||||
{"x": 5, "y": 5, "c": 5},
|
||||
{"x": 1, "y": 10, "c": 100},
|
||||
]
|
||||
self.assert_filter_result(fr, expected_data)
|
||||
|
||||
# TODO: splitter & flattner tests?
|
||||
|
||||
def test_unique_filter(self):
|
||||
u = Unique()
|
||||
in_data = [{"a": 77}, {"a": 33}, {"a": 77}]
|
||||
expected_data = [{"a": 77}, {"a": 33}]
|
||||
result = u.attach(in_data)
|
||||
|
||||
self.assertEqual(list(result), expected_data)
|
||||
|
||||
# TODO: unicode & string filter tests
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
49
tests/test_recipes.py
Normal file
49
tests/test_recipes.py
Normal file
@ -0,0 +1,49 @@
|
||||
import pytest
|
||||
from saucebrush import Recipe, run_recipe, SaucebrushError, OvercookedError
|
||||
from saucebrush.filters import Filter
|
||||
|
||||
|
||||
class Raiser(Filter):
|
||||
def process_record(self, record):
|
||||
raise Exception("bad record")
|
||||
|
||||
|
||||
class Saver(Filter):
|
||||
def __init__(self):
|
||||
self.saved = []
|
||||
|
||||
def process_record(self, record):
|
||||
self.saved.append(record)
|
||||
return record
|
||||
|
||||
|
||||
def test_error_stream():
|
||||
saver = Saver()
|
||||
recipe = Recipe(Raiser(), error_stream=saver)
|
||||
recipe.run([{"a": 1}, {"b": 2}])
|
||||
recipe.done()
|
||||
|
||||
assert saver.saved[0]["record"] == {"a": 1}
|
||||
assert saver.saved[1]["record"] == {"b": 2}
|
||||
|
||||
# Must pass either a Recipe, a Filter or an iterable of Filters
|
||||
# as the error_stream argument
|
||||
assert pytest.raises(SaucebrushError, Recipe, error_stream=5)
|
||||
|
||||
|
||||
def test_run_recipe():
|
||||
saver = Saver()
|
||||
run_recipe([1, 2], saver)
|
||||
|
||||
assert saver.saved == [1, 2]
|
||||
|
||||
|
||||
def test_done():
|
||||
saver = Saver()
|
||||
recipe = Recipe(saver)
|
||||
recipe.run([1])
|
||||
recipe.done()
|
||||
|
||||
assert pytest.raises(OvercookedError, recipe.run, [2])
|
||||
assert pytest.raises(OvercookedError, recipe.done)
|
||||
assert saver.saved == [1]
|
90
tests/test_sources.py
Normal file
90
tests/test_sources.py
Normal file
@ -0,0 +1,90 @@
|
||||
from io import StringIO
|
||||
|
||||
from saucebrush.sources import (
|
||||
CSVSource,
|
||||
FixedWidthFileSource,
|
||||
HtmlTableSource,
|
||||
JSONSource,
|
||||
)
|
||||
|
||||
|
||||
def _get_csv():
|
||||
data = """a,b,c
|
||||
1,2,3
|
||||
5,5,5
|
||||
1,10,100"""
|
||||
return StringIO(data)
|
||||
|
||||
|
||||
def test_csv_source_basic():
|
||||
source = CSVSource(_get_csv())
|
||||
expected_data = [
|
||||
{"a": "1", "b": "2", "c": "3"},
|
||||
{"a": "5", "b": "5", "c": "5"},
|
||||
{"a": "1", "b": "10", "c": "100"},
|
||||
]
|
||||
assert list(source) ==expected_data
|
||||
|
||||
|
||||
def test_csv_source_fieldnames():
|
||||
source = CSVSource(_get_csv(), ["x", "y", "z"])
|
||||
expected_data = [
|
||||
{"x": "a", "y": "b", "z": "c"},
|
||||
{"x": "1", "y": "2", "z": "3"},
|
||||
{"x": "5", "y": "5", "z": "5"},
|
||||
{"x": "1", "y": "10", "z": "100"},
|
||||
]
|
||||
assert list(source) == expected_data
|
||||
|
||||
|
||||
def test_csv_source_skiprows():
|
||||
source = CSVSource(_get_csv(), skiprows=1)
|
||||
expected_data = [
|
||||
{"a": "5", "b": "5", "c": "5"},
|
||||
{"a": "1", "b": "10", "c": "100"},
|
||||
]
|
||||
assert list(source) == expected_data
|
||||
|
||||
|
||||
def test_fixed_width_source():
|
||||
data = StringIO("JamesNovember 3 1986\nTim September151999")
|
||||
fields = (("name", 5), ("month", 9), ("day", 2), ("year", 4))
|
||||
source = FixedWidthFileSource(data, fields)
|
||||
expected_data = [
|
||||
{"name": "James", "month": "November", "day": "3", "year": "1986"},
|
||||
{"name": "Tim", "month": "September", "day": "15", "year": "1999"},
|
||||
]
|
||||
assert list(source) == expected_data
|
||||
|
||||
|
||||
def test_json_source():
|
||||
|
||||
content = StringIO("""[{"a": 1, "b": "2", "c": 3}]""")
|
||||
|
||||
js = JSONSource(content)
|
||||
assert list(js) == [{"a": 1, "b": "2", "c": 3}]
|
||||
|
||||
|
||||
def test_html_table_source():
|
||||
|
||||
content = StringIO(
|
||||
"""
|
||||
<html>
|
||||
<table id="thetable">
|
||||
<tr>
|
||||
<th>a</th>
|
||||
<th>b</th>
|
||||
<th>c</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>1</td>
|
||||
<td>2</td>
|
||||
<td>3</td>
|
||||
</tr>
|
||||
</table>
|
||||
</html>
|
||||
"""
|
||||
)
|
||||
|
||||
hts = HtmlTableSource(content, "thetable")
|
||||
assert list(hts) == [{"a": "1", "b": "2", "c": "3"}]
|
55
tests/test_stats.py
Normal file
55
tests/test_stats.py
Normal file
@ -0,0 +1,55 @@
|
||||
from saucebrush.stats import Sum, Average, Median, MinMax, StandardDeviation, Histogram
|
||||
|
||||
|
||||
def _simple_data():
|
||||
return [
|
||||
{"a": 1, "b": 2, "c": 3},
|
||||
{"a": 5, "b": 5, "c": 5},
|
||||
{"a": 1, "b": 10, "c": 100},
|
||||
]
|
||||
|
||||
|
||||
def test_sum():
|
||||
fltr = Sum("b")
|
||||
list(fltr.attach(_simple_data()))
|
||||
assert fltr.value() == 17
|
||||
|
||||
|
||||
def test_average():
|
||||
fltr = Average("c")
|
||||
list(fltr.attach(_simple_data()))
|
||||
assert fltr.value() == 36.0
|
||||
|
||||
|
||||
def test_median():
|
||||
# odd number of values
|
||||
fltr = Median("a")
|
||||
list(fltr.attach(_simple_data()))
|
||||
assert fltr.value() == 1
|
||||
|
||||
# even number of values
|
||||
fltr = Median("a")
|
||||
list(fltr.attach(_simple_data()[:2]))
|
||||
assert fltr.value() == 3
|
||||
|
||||
|
||||
def test_minmax():
|
||||
fltr = MinMax("b")
|
||||
list(fltr.attach(_simple_data()))
|
||||
assert fltr.value() == (2, 10)
|
||||
|
||||
|
||||
def test_standard_deviation():
|
||||
fltr = StandardDeviation("c")
|
||||
list(fltr.attach(_simple_data()))
|
||||
assert fltr.average() == 36.0
|
||||
assert fltr.median() == 5
|
||||
assert fltr.value() == (55.4346462061408, 3073.0)
|
||||
assert fltr.value(True) == (45.2621990922521, 2048.6666666666665)
|
||||
|
||||
|
||||
def test_histogram():
|
||||
fltr = Histogram("a")
|
||||
fltr.label_length = 1
|
||||
list(fltr.attach(_simple_data()))
|
||||
assert str(fltr) == "\n1 **\n5 *\n"
|
Loading…
Reference in New Issue
Block a user