Revert "Try parsing HTML even if content-type is empty."
This reverts commit f384fc2c
This commit is contained in:
parent
f384fc2c13
commit
936d157785
@ -43,7 +43,7 @@ func defaultHeadersMiddleware(g *Geziyor, r *Request) {
|
||||
|
||||
// parseHTMLMiddleware parses response if response is HTML
|
||||
func parseHTMLMiddleware(g *Geziyor, r *Response) {
|
||||
if !g.Opt.ParseHTMLDisabled {
|
||||
if !g.Opt.ParseHTMLDisabled && r.isHTML() {
|
||||
r.DocHTML, _ = goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
|
||||
}
|
||||
}
|
||||
|
14
response.go
14
response.go
@ -4,6 +4,7 @@ import (
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Response type wraps http.Response
|
||||
@ -26,3 +27,16 @@ func (r *Response) JoinURL(relativeURL string) string {
|
||||
joinedURL := r.Response.Request.URL.ResolveReference(parsedRelativeURL)
|
||||
return joinedURL.String()
|
||||
}
|
||||
|
||||
func (r *Response) isHTML() bool {
|
||||
if r.Response == nil {
|
||||
return len(r.Body) != 0
|
||||
}
|
||||
contentType := r.Header.Get("Content-Type")
|
||||
for _, htmlContentType := range []string{"text/html", "application/xhtml+xml", "application/vnd.wap.xhtml+xml"} {
|
||||
if strings.Contains(contentType, htmlContentType) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user