Request callbacks added.
Recover from all panics and continue scraping. Only parse HTML if response is HTML.
This commit is contained in:
15
response.go
15
response.go
@ -4,14 +4,15 @@ import (
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Response type wraps http.Response
|
||||
// Contains parsed response data and Geziyor functions.
|
||||
type Response struct {
|
||||
*http.Response
|
||||
Body []byte
|
||||
Doc *goquery.Document
|
||||
Body []byte
|
||||
DocHTML *goquery.Document
|
||||
|
||||
Geziyor *Geziyor
|
||||
Exports chan interface{}
|
||||
@ -27,3 +28,13 @@ func (r *Response) JoinURL(relativeURL string) string {
|
||||
joinedURL := r.Response.Request.URL.ResolveReference(parsedRelativeURL)
|
||||
return joinedURL.String()
|
||||
}
|
||||
|
||||
func (r *Response) isHTML() bool {
|
||||
contentType := r.Header.Get("Content-Type")
|
||||
for _, htmlContentType := range []string{"text/html", "application/xhtml+xml", "application/vnd.wap.xhtml+xml"} {
|
||||
if strings.Contains(contentType, htmlContentType) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
Reference in New Issue
Block a user