Middlewares refactored to multiple files in middleware package.

Extractors removed as they introduce complexity to scraper. Both in learning and developing.
This commit is contained in:
Musab Gültekin
2019-07-04 21:04:29 +03:00
parent 9adff75509
commit 2cab68d2ce
19 changed files with 202 additions and 304 deletions

18
middleware/parse_html.go Normal file
View File

@ -0,0 +1,18 @@
package middleware
import (
"bytes"
"github.com/PuerkitoBio/goquery"
"github.com/geziyor/geziyor/client"
)
// ParseHTML parses response if response is HTML
type ParseHTML struct {
ParseHTMLDisabled bool
}
func (p *ParseHTML) ProcessResponse(r *client.Response) {
if !p.ParseHTMLDisabled && r.IsHTML() {
r.HTMLDoc, _ = goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
}
}