add test for HtmlTableSource

This commit is contained in:
Jeremy Carbaugh 2012-03-12 15:57:57 -07:00
parent 4225b96ae1
commit c7544204d8
2 changed files with 37 additions and 3 deletions

View File

@ -4,8 +4,9 @@
All sources must implement the iterable interface and return python
dictionaries.
"""
from __future__ import unicode_literals
import string
from saucebrush import utils
class CSVSource(object):
@ -95,7 +96,7 @@ class HtmlTableSource(object):
soup = BeautifulSoup(htmlfile.read())
if isinstance(id_or_num, int):
table = soup.findAll('table')[id_or_num]
elif isinstance(id_or_num, str):
else:
table = soup.find('table', id=id_or_num)
# skip the necessary number of rows
@ -105,9 +106,13 @@ class HtmlTableSource(object):
if not fieldnames:
self._fieldnames = [td.string
for td in self._rows[0].findAll(('td','th'))]
skiprows += 1
else:
self._fieldnames = fieldnames
# skip the necessary number of rows
self._rows = table.findAll('tr')[skiprows:]
def process_tr(self):
for row in self._rows:
strings = [utils.string_dig(td) for td in row.findAll('td')]

View File

@ -2,7 +2,7 @@ from __future__ import unicode_literals
from io import BytesIO, StringIO
import unittest
from saucebrush.sources import CSVSource, FixedWidthFileSource
from saucebrush.sources import CSVSource, FixedWidthFileSource, HtmlTableSource
class SourceTestCase(unittest.TestCase):
@ -44,5 +44,34 @@ class SourceTestCase(unittest.TestCase):
'year':'1999'}]
self.assertEqual(list(source), expected_data)
def test_html_table_source(self):
content = StringIO("""
<html>
<table id="thetable">
<tr>
<th>a</th>
<th>b</th>
<th>c</th>
</tr>
<tr>
<td>1</td>
<td>2</td>
<td>3</td>
</tr>
</table>
</html>
""")
try:
from BeautifulSoup import BeautifulSoup
hts = HtmlTableSource(content, 'thetable')
self.assertEqual(list(hts), [{'a': '1', 'b': '2', 'c': '3'}])
except ImportError:
self.skipTest("BeautifulSoup is not installed")
if __name__ == '__main__':
unittest.main()