Delays and logs refactored as middlewares.
This commit is contained in:
@ -5,7 +5,10 @@ import (
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/geziyor/geziyor/internal"
|
||||
"log"
|
||||
"math/rand"
|
||||
"os"
|
||||
"runtime/debug"
|
||||
"time"
|
||||
)
|
||||
|
||||
// RequestMiddleware called before requests made.
|
||||
@ -15,6 +18,11 @@ type RequestMiddleware func(g *Geziyor, r *Request)
|
||||
// ResponseMiddleware called after request response receive
|
||||
type ResponseMiddleware func(g *Geziyor, r *Response)
|
||||
|
||||
func init() {
|
||||
log.SetOutput(os.Stdout)
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
}
|
||||
|
||||
// recoverMiddleware recovers scraping being crashed.
|
||||
// Logs error and stack trace
|
||||
func recoverMiddleware() {
|
||||
@ -51,6 +59,22 @@ func defaultHeadersMiddleware(g *Geziyor, r *Request) {
|
||||
r.Header = internal.SetDefaultHeader(r.Header, "User-Agent", g.Opt.UserAgent)
|
||||
}
|
||||
|
||||
// delayMiddleware delays requests
|
||||
func delayMiddleware(g *Geziyor, r *Request) {
|
||||
if g.Opt.RequestDelayRandomize {
|
||||
min := float64(g.Opt.RequestDelay) * 0.5
|
||||
max := float64(g.Opt.RequestDelay) * 1.5
|
||||
time.Sleep(time.Duration(rand.Intn(int(max-min)) + int(min)))
|
||||
} else {
|
||||
time.Sleep(g.Opt.RequestDelay)
|
||||
}
|
||||
}
|
||||
|
||||
// logMiddleware logs requests
|
||||
func logMiddleware(g *Geziyor, r *Request) {
|
||||
log.Println("Fetching: ", r.URL.String())
|
||||
}
|
||||
|
||||
// parseHTMLMiddleware parses response if response is HTML
|
||||
func parseHTMLMiddleware(g *Geziyor, r *Response) {
|
||||
if !g.Opt.ParseHTMLDisabled && r.isHTML() {
|
||||
|
Reference in New Issue
Block a user