Use html.Parse rather than html.ParseFragment (#16223) (#16225)

* Use html.Parse rather than html.ParseFragment
  There have been a few issues with html.ParseFragment - just use html.Parse instead.

* Skip document node

Signed-off-by: Andrew Thornton <art27@cantab.net>

Co-authored-by: zeripath <art27@cantab.net>
This commit is contained in:
6543 2021-06-22 03:46:39 +02:00 committed by GitHub
parent e898590c81
commit 8ac48584ec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -334,40 +334,37 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
_, _ = res.WriteString("</body></html>")
// parse the HTML
nodes, err := html.ParseFragment(res, nil)
node, err := html.Parse(res)
if err != nil {
return nil, &postProcessError{"invalid HTML", err}
}
for _, node := range nodes {
ctx.visitNode(node, true)
if node.Type == html.DocumentNode {
node = node.FirstChild
}
newNodes := make([]*html.Node, 0, len(nodes))
ctx.visitNode(node, true)
nodes := make([]*html.Node, 0, 5)
for _, node := range nodes {
if node.Data == "html" {
node = node.FirstChild
for node != nil && node.Data != "body" {
node = node.NextSibling
}
}
if node == nil {
continue
}
if node != nil {
if node.Data == "body" {
child := node.FirstChild
for child != nil {
newNodes = append(newNodes, child)
nodes = append(nodes, child)
child = child.NextSibling
}
} else {
newNodes = append(newNodes, node)
nodes = append(nodes, node)
}
}
nodes = newNodes
// Create buffer in which the data will be placed again. We know that the
// length will be at least that of res; to spare a few alloc+copy, we
// reuse res, resetting its length to 0.