60 lines
1.3 KiB
Go
60 lines
1.3 KiB
Go
package middleware
|
|
|
|
import (
|
|
"bytes"
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/antchfx/htmlquery"
|
|
"golang.org/x/text/encoding/simplifiedchinese"
|
|
"golang.org/x/text/transform"
|
|
"softdown.com/shusou/geziyor/client"
|
|
"softdown.com/shusou/geziyor/internal"
|
|
)
|
|
|
|
const (
|
|
EncodingGBK = "gb2312"
|
|
EncodingUTF8 = "gb2312"
|
|
)
|
|
|
|
// ParseHTML parses response if response is HTML
|
|
type ParseHTML struct {
|
|
ParseHTMLDisabled bool
|
|
}
|
|
|
|
func (p *ParseHTML) ProcessResponse(r *client.Response) {
|
|
if !p.ParseHTMLDisabled && r.IsHTML() {
|
|
if r.Request.Encoding == EncodingGBK {
|
|
reader := transform.NewReader(bytes.NewReader(r.Body), simplifiedchinese.GB18030.NewDecoder())
|
|
doc, err := goquery.NewDocumentFromReader(reader)
|
|
if err != nil {
|
|
internal.Logger.Println(err.Error())
|
|
return
|
|
}
|
|
r.HTMLDoc = doc
|
|
|
|
// xpath
|
|
node, err := htmlquery.Parse(reader)
|
|
if err != nil {
|
|
internal.Logger.Println(err.Error())
|
|
return
|
|
}
|
|
r.HTMLNode = node
|
|
|
|
} else {
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
|
|
if err != nil {
|
|
internal.Logger.Println(err.Error())
|
|
return
|
|
}
|
|
r.HTMLDoc = doc
|
|
|
|
// xpath
|
|
node, err := htmlquery.Parse(bytes.NewReader(r.Body))
|
|
if err != nil {
|
|
internal.Logger.Println(err.Error())
|
|
return
|
|
}
|
|
r.HTMLNode = node
|
|
}
|
|
}
|
|
}
|