Revert "Try parsing HTML even if content-type is empty."

This reverts commit f384fc2c
This commit is contained in:
Musab Gültekin 2019-06-18 13:03:00 +03:00
parent f384fc2c13
commit 936d157785
2 changed files with 15 additions and 1 deletions

View File

@ -43,7 +43,7 @@ func defaultHeadersMiddleware(g *Geziyor, r *Request) {
// parseHTMLMiddleware parses response if response is HTML // parseHTMLMiddleware parses response if response is HTML
func parseHTMLMiddleware(g *Geziyor, r *Response) { func parseHTMLMiddleware(g *Geziyor, r *Response) {
if !g.Opt.ParseHTMLDisabled { if !g.Opt.ParseHTMLDisabled && r.isHTML() {
r.DocHTML, _ = goquery.NewDocumentFromReader(bytes.NewReader(r.Body)) r.DocHTML, _ = goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
} }
} }

View File

@ -4,6 +4,7 @@ import (
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"net/http" "net/http"
"net/url" "net/url"
"strings"
) )
// Response type wraps http.Response // Response type wraps http.Response
@ -26,3 +27,16 @@ func (r *Response) JoinURL(relativeURL string) string {
joinedURL := r.Response.Request.URL.ResolveReference(parsedRelativeURL) joinedURL := r.Response.Request.URL.ResolveReference(parsedRelativeURL)
return joinedURL.String() return joinedURL.String()
} }
func (r *Response) isHTML() bool {
if r.Response == nil {
return len(r.Body) != 0
}
contentType := r.Header.Get("Content-Type")
for _, htmlContentType := range []string{"text/html", "application/xhtml+xml", "application/vnd.wap.xhtml+xml"} {
if strings.Contains(contentType, htmlContentType) {
return true
}
}
return false
}