From c7544204d83b4101aeb43adff88dc76d46ad367a Mon Sep 17 00:00:00 2001 From: Jeremy Carbaugh Date: Mon, 12 Mar 2012 15:57:57 -0700 Subject: [PATCH] add test for HtmlTableSource --- saucebrush/sources.py | 9 +++++++-- saucebrush/tests/sources.py | 31 ++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/saucebrush/sources.py b/saucebrush/sources.py index c0d925b..a3d272b 100644 --- a/saucebrush/sources.py +++ b/saucebrush/sources.py @@ -4,8 +4,9 @@ All sources must implement the iterable interface and return python dictionaries. """ - +from __future__ import unicode_literals import string + from saucebrush import utils class CSVSource(object): @@ -95,7 +96,7 @@ class HtmlTableSource(object): soup = BeautifulSoup(htmlfile.read()) if isinstance(id_or_num, int): table = soup.findAll('table')[id_or_num] - elif isinstance(id_or_num, str): + else: table = soup.find('table', id=id_or_num) # skip the necessary number of rows @@ -105,9 +106,13 @@ class HtmlTableSource(object): if not fieldnames: self._fieldnames = [td.string for td in self._rows[0].findAll(('td','th'))] + skiprows += 1 else: self._fieldnames = fieldnames + # skip the necessary number of rows + self._rows = table.findAll('tr')[skiprows:] + def process_tr(self): for row in self._rows: strings = [utils.string_dig(td) for td in row.findAll('td')] diff --git a/saucebrush/tests/sources.py b/saucebrush/tests/sources.py index 82c7086..f3a07dc 100644 --- a/saucebrush/tests/sources.py +++ b/saucebrush/tests/sources.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from io import BytesIO, StringIO import unittest -from saucebrush.sources import CSVSource, FixedWidthFileSource +from saucebrush.sources import CSVSource, FixedWidthFileSource, HtmlTableSource class SourceTestCase(unittest.TestCase): @@ -44,5 +44,34 @@ class SourceTestCase(unittest.TestCase): 'year':'1999'}] self.assertEqual(list(source), expected_data) + def test_html_table_source(self): + + content = StringIO(""" + + + + + + + + + + + + +
abc
123
+ + """) + + try: + + from BeautifulSoup import BeautifulSoup + + hts = HtmlTableSource(content, 'thetable') + self.assertEqual(list(hts), [{'a': '1', 'b': '2', 'c': '3'}]) + + except ImportError: + self.skipTest("BeautifulSoup is not installed") + if __name__ == '__main__': unittest.main()