Recover system refactored like middleware

This commit is contained in:
Musab Gültekin
2019-06-19 22:45:40 +03:00
parent c28b228a12
commit 514fe2e8d2
2 changed files with 11 additions and 6 deletions

View File

@ -16,7 +16,6 @@ import (
"net/http/cookiejar" "net/http/cookiejar"
"net/url" "net/url"
"os" "os"
"runtime/debug"
"sync" "sync"
"time" "time"
) )
@ -170,11 +169,7 @@ func (g *Geziyor) Do(req *Request, callback func(g *Geziyor, r *Response)) {
// Do sends an HTTP request // Do sends an HTTP request
func (g *Geziyor) do(req *Request, callback func(g *Geziyor, r *Response)) { func (g *Geziyor) do(req *Request, callback func(g *Geziyor, r *Response)) {
defer g.wg.Done() defer g.wg.Done()
defer func() { defer recoverMiddleware()
if r := recover(); r != nil {
log.Println(r, string(debug.Stack()))
}
}()
for _, middlewareFunc := range g.requestMiddlewares { for _, middlewareFunc := range g.requestMiddlewares {
middlewareFunc(g, req) middlewareFunc(g, req)

View File

@ -4,6 +4,8 @@ import (
"bytes" "bytes"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"github.com/geziyor/geziyor/internal" "github.com/geziyor/geziyor/internal"
"log"
"runtime/debug"
) )
// RequestMiddleware called before requests made. // RequestMiddleware called before requests made.
@ -13,6 +15,14 @@ type RequestMiddleware func(g *Geziyor, r *Request)
// ResponseMiddleware called after request response receive // ResponseMiddleware called after request response receive
type ResponseMiddleware func(g *Geziyor, r *Response) type ResponseMiddleware func(g *Geziyor, r *Response)
// recoverMiddleware recovers scraping being crashed.
// Logs error and stack trace
func recoverMiddleware() {
if r := recover(); r != nil {
log.Println(r, string(debug.Stack()))
}
}
// allowedDomainsMiddleware checks for request host if it exists in AllowedDomains // allowedDomainsMiddleware checks for request host if it exists in AllowedDomains
func allowedDomainsMiddleware(g *Geziyor, r *Request) { func allowedDomainsMiddleware(g *Geziyor, r *Request) {
if len(g.Opt.AllowedDomains) != 0 && !internal.Contains(g.Opt.AllowedDomains, r.Host) { if len(g.Opt.AllowedDomains) != 0 && !internal.Contains(g.Opt.AllowedDomains, r.Host) {