I used a function to allow certain html tags to be kept, as above - then wondered how to make sure that all html tags are completed when the wordcount runs out. I got some code working, but its just a prototype - It builds a DOM tree of the HTML, and walks it until it run out of wordcount, then deletes all siblings from then on. Does this make sense or is it too flaky?
function truncate_by_element(
string $input,
int $num_words,
string $more
) {
$dom = new DOMdocument();
@$dom->loadHTML($input, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
if ($num_words > 0) {
trimDOMNodes($dom, $num_words);
}
$output = $dom->saveHTML();
$output .= $more;
return $output;
}
function trimDOMNodes(
DOMNode $domNode,
int $num_words_left,
): int {
foreach ($domNode->childNodes as $node) {
if ($num_words_left <= 0) {
// we ran out of space, clear all future siblings
while (null != $node) {
$parent = $node->parentNode;
$delete_item = $node;
$node = $node->nextSibling;
if (null != $parent) {
$parent->removeChild($delete_item);
}
}
return $num_words_left;
}
if ($node->nodeName == '#text') {
$num_words_left -= word_count($node->nodeValue);
}
if ($node->hasChildNodes()) {
$num_words_left = trimDOMNodes($node, $num_words_left);
}
}
return $num_words_left;
}
function word_count(string $text)
{
return str_word_count($text);
}