add test for HtmlTableSource
This commit is contained in:
parent
4225b96ae1
commit
c7544204d8
@ -4,8 +4,9 @@
|
||||
All sources must implement the iterable interface and return python
|
||||
dictionaries.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import string
|
||||
|
||||
from saucebrush import utils
|
||||
|
||||
class CSVSource(object):
|
||||
@ -95,7 +96,7 @@ class HtmlTableSource(object):
|
||||
soup = BeautifulSoup(htmlfile.read())
|
||||
if isinstance(id_or_num, int):
|
||||
table = soup.findAll('table')[id_or_num]
|
||||
elif isinstance(id_or_num, str):
|
||||
else:
|
||||
table = soup.find('table', id=id_or_num)
|
||||
|
||||
# skip the necessary number of rows
|
||||
@ -105,9 +106,13 @@ class HtmlTableSource(object):
|
||||
if not fieldnames:
|
||||
self._fieldnames = [td.string
|
||||
for td in self._rows[0].findAll(('td','th'))]
|
||||
skiprows += 1
|
||||
else:
|
||||
self._fieldnames = fieldnames
|
||||
|
||||
# skip the necessary number of rows
|
||||
self._rows = table.findAll('tr')[skiprows:]
|
||||
|
||||
def process_tr(self):
|
||||
for row in self._rows:
|
||||
strings = [utils.string_dig(td) for td in row.findAll('td')]
|
||||
|
@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
||||
from io import BytesIO, StringIO
|
||||
import unittest
|
||||
|
||||
from saucebrush.sources import CSVSource, FixedWidthFileSource
|
||||
from saucebrush.sources import CSVSource, FixedWidthFileSource, HtmlTableSource
|
||||
|
||||
class SourceTestCase(unittest.TestCase):
|
||||
|
||||
@ -44,5 +44,34 @@ class SourceTestCase(unittest.TestCase):
|
||||
'year':'1999'}]
|
||||
self.assertEqual(list(source), expected_data)
|
||||
|
||||
def test_html_table_source(self):
|
||||
|
||||
content = StringIO("""
|
||||
<html>
|
||||
<table id="thetable">
|
||||
<tr>
|
||||
<th>a</th>
|
||||
<th>b</th>
|
||||
<th>c</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>1</td>
|
||||
<td>2</td>
|
||||
<td>3</td>
|
||||
</tr>
|
||||
</table>
|
||||
</html>
|
||||
""")
|
||||
|
||||
try:
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
hts = HtmlTableSource(content, 'thetable')
|
||||
self.assertEqual(list(hts), [{'a': '1', 'b': '2', 'c': '3'}])
|
||||
|
||||
except ImportError:
|
||||
self.skipTest("BeautifulSoup is not installed")
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
Loading…
Reference in New Issue
Block a user