Added error logging for HTML parser.
This commit is contained in:
parent
cbca22fefb
commit
7d2fe57bab
@ -19,7 +19,7 @@ Geziyor is a blazing fast web crawling and web scraping framework. It can be use
|
||||
See scraper [Options](https://godoc.org/github.com/geziyor/geziyor#Options) for all custom settings.
|
||||
|
||||
## Status
|
||||
The project is in **beta** phase. Thus, we highly recommend you to use Geziyor with go modules.
|
||||
We highly recommend you to use Geziyor with go modules.
|
||||
|
||||
## Usage
|
||||
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/geziyor/geziyor/client"
|
||||
"log"
|
||||
)
|
||||
|
||||
// ParseHTML parses response if response is HTML
|
||||
@ -13,6 +14,11 @@ type ParseHTML struct {
|
||||
|
||||
func (p *ParseHTML) ProcessResponse(r *client.Response) {
|
||||
if !p.ParseHTMLDisabled && r.IsHTML() {
|
||||
r.HTMLDoc, _ = goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
|
||||
if err != nil {
|
||||
log.Println(err.Error())
|
||||
return
|
||||
}
|
||||
r.HTMLDoc = doc
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user