Added error logging for HTML parser.

This commit is contained in:
Musab Gültekin 2019-12-11 13:55:21 +03:00
parent cbca22fefb
commit 7d2fe57bab
2 changed files with 8 additions and 2 deletions

View File

@ -19,7 +19,7 @@ Geziyor is a blazing fast web crawling and web scraping framework. It can be use
See scraper [Options](https://godoc.org/github.com/geziyor/geziyor#Options) for all custom settings. See scraper [Options](https://godoc.org/github.com/geziyor/geziyor#Options) for all custom settings.
## Status ## Status
The project is in **beta** phase. Thus, we highly recommend you to use Geziyor with go modules. We highly recommend you to use Geziyor with go modules.
## Usage ## Usage

View File

@ -4,6 +4,7 @@ import (
"bytes" "bytes"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"github.com/geziyor/geziyor/client" "github.com/geziyor/geziyor/client"
"log"
) )
// ParseHTML parses response if response is HTML // ParseHTML parses response if response is HTML
@ -13,6 +14,11 @@ type ParseHTML struct {
func (p *ParseHTML) ProcessResponse(r *client.Response) { func (p *ParseHTML) ProcessResponse(r *client.Response) {
if !p.ParseHTMLDisabled && r.IsHTML() { if !p.ParseHTMLDisabled && r.IsHTML() {
r.HTMLDoc, _ = goquery.NewDocumentFromReader(bytes.NewReader(r.Body)) doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
if err != nil {
log.Println(err.Error())
return
}
r.HTMLDoc = doc
} }
} }