Middlewares refactored to multiple files in middleware package.
Extractors removed as they introduce complexity to scraper. Both in learning and developing.
This commit is contained in:
18
middleware/parse_html.go
Normal file
18
middleware/parse_html.go
Normal file
@ -0,0 +1,18 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/geziyor/geziyor/client"
|
||||
)
|
||||
|
||||
// ParseHTML parses response if response is HTML
|
||||
type ParseHTML struct {
|
||||
ParseHTMLDisabled bool
|
||||
}
|
||||
|
||||
func (p *ParseHTML) ProcessResponse(r *client.Response) {
|
||||
if !p.ParseHTMLDisabled && r.IsHTML() {
|
||||
r.HTMLDoc, _ = goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user