Using html.parser
:
class MyHTMLParser(HTMLParser):
def handle_data(self, data: str):
line, col = self.getpos()
previous_lines = ''.join(html_string.splitlines(True)[:line - 1])
index = len(previous_lines) + col
print(data, 'at', index)
parser = MyHTMLParser()
parser.feed(html_string)