Fixed #27906 -- Fixed test tools counting of HTML matches for subsets of elements.

Previously examples such as '<a/><b/>' would not match in '<a/><b/><c/>'.
This commit is contained in:
Jacob Walls 2020-09-20 10:14:54 -04:00 committed by Carlton Gibson
parent 01974d7f75
commit e26a7a8ef4
2 changed files with 38 additions and 1 deletions

View File

@ -86,6 +86,7 @@ class Element:
if self.children == element.children: if self.children == element.children:
return 1 return 1
i = 0 i = 0
elem_child_idx = 0
for child in self.children: for child in self.children:
# child is text content and element is also text content, then # child is text content and element is also text content, then
# make a simple "text" in "text" # make a simple "text" in "text"
@ -96,9 +97,26 @@ class Element:
elif element in child: elif element in child:
return 1 return 1
else: else:
# Look for element wholly within this child.
i += child._count(element, count=count) i += child._count(element, count=count)
if not count and i: if not count and i:
return i return i
# Also look for a sequence of element's children among self's
# children. self.children == element.children is tested above,
# but will fail if self has additional children. Ex: '<a/><b/>'
# is contained in '<a/><b/><c/>'.
if isinstance(element, RootElement) and element.children:
elem_child = element.children[elem_child_idx]
# Start or continue match, advance index.
if elem_child == child:
elem_child_idx += 1
# Match found, reset index.
if elem_child_idx == len(element.children):
i += 1
elem_child_idx = 0
# No match, reset index.
else:
elem_child_idx = 0
return i return i
def __contains__(self, element): def __contains__(self, element):

View File

@ -768,11 +768,30 @@ class HTMLEqualTests(SimpleTestCase):
dom2 = parse_html('<p>foo<p>bar</p></p>') dom2 = parse_html('<p>foo<p>bar</p></p>')
self.assertEqual(dom2.count(dom1), 0) self.assertEqual(dom2.count(dom1), 0)
# html with a root element contains the same html with no root element # HTML with a root element contains the same HTML with no root element.
dom1 = parse_html('<p>foo</p><p>bar</p>') dom1 = parse_html('<p>foo</p><p>bar</p>')
dom2 = parse_html('<div><p>foo</p><p>bar</p></div>') dom2 = parse_html('<div><p>foo</p><p>bar</p></div>')
self.assertEqual(dom2.count(dom1), 1) self.assertEqual(dom2.count(dom1), 1)
# Target of search is a sequence of child elements and appears more
# than once.
dom2 = parse_html('<div><p>foo</p><p>bar</p><p>foo</p><p>bar</p></div>')
self.assertEqual(dom2.count(dom1), 2)
# Searched HTML has additional children.
dom1 = parse_html('<a/><b/>')
dom2 = parse_html('<a/><b/><c/>')
self.assertEqual(dom2.count(dom1), 1)
# No match found in children.
dom1 = parse_html('<b/><a/>')
self.assertEqual(dom2.count(dom1), 0)
# Target of search found among children and grandchildren.
dom1 = parse_html('<b/><b/>')
dom2 = parse_html('<a><b/><b/></a><b/><b/>')
self.assertEqual(dom2.count(dom1), 2)
def test_parsing_errors(self): def test_parsing_errors(self):
with self.assertRaises(AssertionError): with self.assertRaises(AssertionError):
self.assertHTMLEqual('<p>', '') self.assertHTMLEqual('<p>', '')