* Use html.Parse rather than html.ParseFragment There have been a few issues with html.ParseFragment - just use html.Parse instead. * Skip document node Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: zeripath <art27@cantab.net>
This commit is contained in:
parent
e898590c81
commit
8ac48584ec
1 changed files with 14 additions and 17 deletions
|
@ -334,40 +334,37 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
|
||||||
_, _ = res.WriteString("</body></html>")
|
_, _ = res.WriteString("</body></html>")
|
||||||
|
|
||||||
// parse the HTML
|
// parse the HTML
|
||||||
nodes, err := html.ParseFragment(res, nil)
|
node, err := html.Parse(res)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, &postProcessError{"invalid HTML", err}
|
return nil, &postProcessError{"invalid HTML", err}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, node := range nodes {
|
if node.Type == html.DocumentNode {
|
||||||
ctx.visitNode(node, true)
|
node = node.FirstChild
|
||||||
}
|
}
|
||||||
|
|
||||||
newNodes := make([]*html.Node, 0, len(nodes))
|
ctx.visitNode(node, true)
|
||||||
|
|
||||||
for _, node := range nodes {
|
nodes := make([]*html.Node, 0, 5)
|
||||||
if node.Data == "html" {
|
|
||||||
node = node.FirstChild
|
if node.Data == "html" {
|
||||||
for node != nil && node.Data != "body" {
|
node = node.FirstChild
|
||||||
node = node.NextSibling
|
for node != nil && node.Data != "body" {
|
||||||
}
|
node = node.NextSibling
|
||||||
}
|
|
||||||
if node == nil {
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if node != nil {
|
||||||
if node.Data == "body" {
|
if node.Data == "body" {
|
||||||
child := node.FirstChild
|
child := node.FirstChild
|
||||||
for child != nil {
|
for child != nil {
|
||||||
newNodes = append(newNodes, child)
|
nodes = append(nodes, child)
|
||||||
child = child.NextSibling
|
child = child.NextSibling
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
newNodes = append(newNodes, node)
|
nodes = append(nodes, node)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
nodes = newNodes
|
|
||||||
|
|
||||||
// Create buffer in which the data will be placed again. We know that the
|
// Create buffer in which the data will be placed again. We know that the
|
||||||
// length will be at least that of res; to spare a few alloc+copy, we
|
// length will be at least that of res; to spare a few alloc+copy, we
|
||||||
// reuse res, resetting its length to 0.
|
// reuse res, resetting its length to 0.
|
||||||
|
|
Reference in a new issue