add test for HtmlTableSource
This commit is contained in:
parent
4225b96ae1
commit
c7544204d8
@ -4,8 +4,9 @@
|
|||||||
All sources must implement the iterable interface and return python
|
All sources must implement the iterable interface and return python
|
||||||
dictionaries.
|
dictionaries.
|
||||||
"""
|
"""
|
||||||
|
from __future__ import unicode_literals
|
||||||
import string
|
import string
|
||||||
|
|
||||||
from saucebrush import utils
|
from saucebrush import utils
|
||||||
|
|
||||||
class CSVSource(object):
|
class CSVSource(object):
|
||||||
@ -95,7 +96,7 @@ class HtmlTableSource(object):
|
|||||||
soup = BeautifulSoup(htmlfile.read())
|
soup = BeautifulSoup(htmlfile.read())
|
||||||
if isinstance(id_or_num, int):
|
if isinstance(id_or_num, int):
|
||||||
table = soup.findAll('table')[id_or_num]
|
table = soup.findAll('table')[id_or_num]
|
||||||
elif isinstance(id_or_num, str):
|
else:
|
||||||
table = soup.find('table', id=id_or_num)
|
table = soup.find('table', id=id_or_num)
|
||||||
|
|
||||||
# skip the necessary number of rows
|
# skip the necessary number of rows
|
||||||
@ -105,9 +106,13 @@ class HtmlTableSource(object):
|
|||||||
if not fieldnames:
|
if not fieldnames:
|
||||||
self._fieldnames = [td.string
|
self._fieldnames = [td.string
|
||||||
for td in self._rows[0].findAll(('td','th'))]
|
for td in self._rows[0].findAll(('td','th'))]
|
||||||
|
skiprows += 1
|
||||||
else:
|
else:
|
||||||
self._fieldnames = fieldnames
|
self._fieldnames = fieldnames
|
||||||
|
|
||||||
|
# skip the necessary number of rows
|
||||||
|
self._rows = table.findAll('tr')[skiprows:]
|
||||||
|
|
||||||
def process_tr(self):
|
def process_tr(self):
|
||||||
for row in self._rows:
|
for row in self._rows:
|
||||||
strings = [utils.string_dig(td) for td in row.findAll('td')]
|
strings = [utils.string_dig(td) for td in row.findAll('td')]
|
||||||
|
@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
|||||||
from io import BytesIO, StringIO
|
from io import BytesIO, StringIO
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from saucebrush.sources import CSVSource, FixedWidthFileSource
|
from saucebrush.sources import CSVSource, FixedWidthFileSource, HtmlTableSource
|
||||||
|
|
||||||
class SourceTestCase(unittest.TestCase):
|
class SourceTestCase(unittest.TestCase):
|
||||||
|
|
||||||
@ -44,5 +44,34 @@ class SourceTestCase(unittest.TestCase):
|
|||||||
'year':'1999'}]
|
'year':'1999'}]
|
||||||
self.assertEqual(list(source), expected_data)
|
self.assertEqual(list(source), expected_data)
|
||||||
|
|
||||||
|
def test_html_table_source(self):
|
||||||
|
|
||||||
|
content = StringIO("""
|
||||||
|
<html>
|
||||||
|
<table id="thetable">
|
||||||
|
<tr>
|
||||||
|
<th>a</th>
|
||||||
|
<th>b</th>
|
||||||
|
<th>c</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>1</td>
|
||||||
|
<td>2</td>
|
||||||
|
<td>3</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
</html>
|
||||||
|
""")
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
from BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
hts = HtmlTableSource(content, 'thetable')
|
||||||
|
self.assertEqual(list(hts), [{'a': '1', 'b': '2', 'c': '3'}])
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
self.skipTest("BeautifulSoup is not installed")
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
Loading…
Reference in New Issue
Block a user