Added error logging for HTML parser.
This commit is contained in:
parent
cbca22fefb
commit
7d2fe57bab
@ -19,7 +19,7 @@ Geziyor is a blazing fast web crawling and web scraping framework. It can be use
|
|||||||
See scraper [Options](https://godoc.org/github.com/geziyor/geziyor#Options) for all custom settings.
|
See scraper [Options](https://godoc.org/github.com/geziyor/geziyor#Options) for all custom settings.
|
||||||
|
|
||||||
## Status
|
## Status
|
||||||
The project is in **beta** phase. Thus, we highly recommend you to use Geziyor with go modules.
|
We highly recommend you to use Geziyor with go modules.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
"github.com/geziyor/geziyor/client"
|
"github.com/geziyor/geziyor/client"
|
||||||
|
"log"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ParseHTML parses response if response is HTML
|
// ParseHTML parses response if response is HTML
|
||||||
@ -13,6 +14,11 @@ type ParseHTML struct {
|
|||||||
|
|
||||||
func (p *ParseHTML) ProcessResponse(r *client.Response) {
|
func (p *ParseHTML) ProcessResponse(r *client.Response) {
|
||||||
if !p.ParseHTMLDisabled && r.IsHTML() {
|
if !p.ParseHTMLDisabled && r.IsHTML() {
|
||||||
r.HTMLDoc, _ = goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
r.HTMLDoc = doc
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user