From 7d2fe57bab4f5160c8f100ab548987195a8c0d44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Musab=20G=C3=BCltekin?= Date: Wed, 11 Dec 2019 13:55:21 +0300 Subject: [PATCH] Added error logging for HTML parser. --- README.md | 2 +- middleware/parse_html.go | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2e57662..3180eb7 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Geziyor is a blazing fast web crawling and web scraping framework. It can be use See scraper [Options](https://godoc.org/github.com/geziyor/geziyor#Options) for all custom settings. ## Status -The project is in **beta** phase. Thus, we highly recommend you to use Geziyor with go modules. +We highly recommend you to use Geziyor with go modules. ## Usage diff --git a/middleware/parse_html.go b/middleware/parse_html.go index 4d74184..7ec52c2 100644 --- a/middleware/parse_html.go +++ b/middleware/parse_html.go @@ -4,6 +4,7 @@ import ( "bytes" "github.com/PuerkitoBio/goquery" "github.com/geziyor/geziyor/client" + "log" ) // ParseHTML parses response if response is HTML @@ -13,6 +14,11 @@ type ParseHTML struct { func (p *ParseHTML) ProcessResponse(r *client.Response) { if !p.ParseHTMLDisabled && r.IsHTML() { - r.HTMLDoc, _ = goquery.NewDocumentFromReader(bytes.NewReader(r.Body)) + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body)) + if err != nil { + log.Println(err.Error()) + return + } + r.HTMLDoc = doc } }