diff --git a/geziyor.go b/geziyor.go index fa02d68..4275b9c 100644 --- a/geziyor.go +++ b/geziyor.go @@ -16,7 +16,6 @@ import ( "net/http/cookiejar" "net/url" "os" - "runtime/debug" "sync" "time" ) @@ -170,11 +169,7 @@ func (g *Geziyor) Do(req *Request, callback func(g *Geziyor, r *Response)) { // Do sends an HTTP request func (g *Geziyor) do(req *Request, callback func(g *Geziyor, r *Response)) { defer g.wg.Done() - defer func() { - if r := recover(); r != nil { - log.Println(r, string(debug.Stack())) - } - }() + defer recoverMiddleware() for _, middlewareFunc := range g.requestMiddlewares { middlewareFunc(g, req) diff --git a/middleware.go b/middleware.go index 432ed4b..3dcbf59 100644 --- a/middleware.go +++ b/middleware.go @@ -4,6 +4,8 @@ import ( "bytes" "github.com/PuerkitoBio/goquery" "github.com/geziyor/geziyor/internal" + "log" + "runtime/debug" ) // RequestMiddleware called before requests made. @@ -13,6 +15,14 @@ type RequestMiddleware func(g *Geziyor, r *Request) // ResponseMiddleware called after request response receive type ResponseMiddleware func(g *Geziyor, r *Response) +// recoverMiddleware recovers scraping being crashed. +// Logs error and stack trace +func recoverMiddleware() { + if r := recover(); r != nil { + log.Println(r, string(debug.Stack())) + } +} + // allowedDomainsMiddleware checks for request host if it exists in AllowedDomains func allowedDomainsMiddleware(g *Geziyor, r *Request) { if len(g.Opt.AllowedDomains) != 0 && !internal.Contains(g.Opt.AllowedDomains, r.Host) {