geziyor/middleware/parse_html.go
Musab Gültekin 2cab68d2ce Middlewares refactored to multiple files in middleware package.
Extractors removed as they introduce complexity to scraper. Both in learning and developing.
2019-07-04 21:04:29 +03:00

19 lines
388 B
Go

package middleware
import (
"bytes"
"github.com/PuerkitoBio/goquery"
"github.com/geziyor/geziyor/client"
)
// ParseHTML parses response if response is HTML
type ParseHTML struct {
ParseHTMLDisabled bool
}
func (p *ParseHTML) ProcessResponse(r *client.Response) {
if !p.ParseHTMLDisabled && r.IsHTML() {
r.HTMLDoc, _ = goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
}
}