Revert "Try parsing HTML even if content-type is empty."
This reverts commit f384fc2c
This commit is contained in:
parent
f384fc2c13
commit
936d157785
@ -43,7 +43,7 @@ func defaultHeadersMiddleware(g *Geziyor, r *Request) {
|
|||||||
|
|
||||||
// parseHTMLMiddleware parses response if response is HTML
|
// parseHTMLMiddleware parses response if response is HTML
|
||||||
func parseHTMLMiddleware(g *Geziyor, r *Response) {
|
func parseHTMLMiddleware(g *Geziyor, r *Response) {
|
||||||
if !g.Opt.ParseHTMLDisabled {
|
if !g.Opt.ParseHTMLDisabled && r.isHTML() {
|
||||||
r.DocHTML, _ = goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
|
r.DocHTML, _ = goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
14
response.go
14
response.go
@ -4,6 +4,7 @@ import (
|
|||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Response type wraps http.Response
|
// Response type wraps http.Response
|
||||||
@ -26,3 +27,16 @@ func (r *Response) JoinURL(relativeURL string) string {
|
|||||||
joinedURL := r.Response.Request.URL.ResolveReference(parsedRelativeURL)
|
joinedURL := r.Response.Request.URL.ResolveReference(parsedRelativeURL)
|
||||||
return joinedURL.String()
|
return joinedURL.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *Response) isHTML() bool {
|
||||||
|
if r.Response == nil {
|
||||||
|
return len(r.Body) != 0
|
||||||
|
}
|
||||||
|
contentType := r.Header.Get("Content-Type")
|
||||||
|
for _, htmlContentType := range []string{"text/html", "application/xhtml+xml", "application/vnd.wap.xhtml+xml"} {
|
||||||
|
if strings.Contains(contentType, htmlContentType) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user