备份
This commit is contained in:
@ -3,10 +3,18 @@ package middleware
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/antchfx/htmlquery"
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
"golang.org/x/text/transform"
|
||||
"softdown.com/shusou/geziyor/client"
|
||||
"softdown.com/shusou/geziyor/internal"
|
||||
)
|
||||
|
||||
const (
|
||||
EncodingGBK = "gb2312"
|
||||
EncodingUTF8 = "gb2312"
|
||||
)
|
||||
|
||||
// ParseHTML parses response if response is HTML
|
||||
type ParseHTML struct {
|
||||
ParseHTMLDisabled bool
|
||||
@ -14,11 +22,38 @@ type ParseHTML struct {
|
||||
|
||||
func (p *ParseHTML) ProcessResponse(r *client.Response) {
|
||||
if !p.ParseHTMLDisabled && r.IsHTML() {
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
|
||||
if err != nil {
|
||||
internal.Logger.Println(err.Error())
|
||||
return
|
||||
if r.Request.Encoding == EncodingGBK {
|
||||
reader := transform.NewReader(bytes.NewReader(r.Body), simplifiedchinese.GB18030.NewDecoder())
|
||||
doc, err := goquery.NewDocumentFromReader(reader)
|
||||
if err != nil {
|
||||
internal.Logger.Println(err.Error())
|
||||
return
|
||||
}
|
||||
r.HTMLDoc = doc
|
||||
|
||||
// xpath
|
||||
node, err := htmlquery.Parse(reader)
|
||||
if err != nil {
|
||||
internal.Logger.Println(err.Error())
|
||||
return
|
||||
}
|
||||
r.HTMLNode = node
|
||||
|
||||
} else {
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
|
||||
if err != nil {
|
||||
internal.Logger.Println(err.Error())
|
||||
return
|
||||
}
|
||||
r.HTMLDoc = doc
|
||||
|
||||
// xpath
|
||||
node, err := htmlquery.Parse(bytes.NewReader(r.Body))
|
||||
if err != nil {
|
||||
internal.Logger.Println(err.Error())
|
||||
return
|
||||
}
|
||||
r.HTMLNode = node
|
||||
}
|
||||
r.HTMLDoc = doc
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user