Added custom logger. Right now, not configurable.
This commit is contained in:
		| @@ -3,7 +3,6 @@ package middleware | ||||
| import ( | ||||
| 	"github.com/geziyor/geziyor/client" | ||||
| 	"github.com/geziyor/geziyor/internal" | ||||
| 	"log" | ||||
| 	"sync" | ||||
| ) | ||||
|  | ||||
| @@ -16,7 +15,7 @@ type AllowedDomains struct { | ||||
| func (a *AllowedDomains) ProcessRequest(r *client.Request) { | ||||
| 	if len(a.AllowedDomains) != 0 && !internal.Contains(a.AllowedDomains, r.Host) { | ||||
| 		if _, logged := a.logOnlyOnce.LoadOrStore(r.Host, struct{}{}); !logged { | ||||
| 			log.Printf("Domain not allowed: %s\n", r.Host) | ||||
| 			internal.Logger.Printf("Domain not allowed: %s\n", r.Host) | ||||
| 		} | ||||
| 		r.Cancel() | ||||
| 		return | ||||
|   | ||||
| @@ -2,7 +2,7 @@ package middleware | ||||
|  | ||||
| import ( | ||||
| 	"github.com/geziyor/geziyor/client" | ||||
| 	"log" | ||||
| 	"github.com/geziyor/geziyor/internal" | ||||
| 	"sync" | ||||
| ) | ||||
|  | ||||
| @@ -18,7 +18,7 @@ func (a *DuplicateRequests) ProcessRequest(r *client.Request) { | ||||
| 		requestURL := r.Request.URL.String() | ||||
| 		if _, visited := a.visitedURLs.LoadOrStore(requestURL, struct{}{}); visited { | ||||
| 			if _, logged := a.logOnlyOnce.LoadOrStore(requestURL, struct{}{}); !logged { | ||||
| 				log.Printf("URL already visited %s\n", requestURL) | ||||
| 				internal.Logger.Printf("URL already visited %s\n", requestURL) | ||||
| 			} | ||||
| 			r.Cancel() | ||||
| 		} | ||||
|   | ||||
| @@ -2,7 +2,7 @@ package middleware | ||||
|  | ||||
| import ( | ||||
| 	"github.com/geziyor/geziyor/client" | ||||
| 	"log" | ||||
| 	"github.com/geziyor/geziyor/internal" | ||||
| ) | ||||
|  | ||||
| // LogStats logs responses | ||||
| @@ -13,6 +13,6 @@ type LogStats struct { | ||||
| func (p *LogStats) ProcessResponse(r *client.Response) { | ||||
| 	// LogDisabled check is not necessary, but done here for performance reasons | ||||
| 	if !p.LogDisabled { | ||||
| 		log.Printf("Crawled: (%d) <%s %s>", r.StatusCode, r.Request.Method, r.Request.URL.String()) | ||||
| 		internal.Logger.Printf("Crawled: (%d) <%s %s>", r.StatusCode, r.Request.Method, r.Request.URL.String()) | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -4,7 +4,7 @@ import ( | ||||
| 	"bytes" | ||||
| 	"github.com/PuerkitoBio/goquery" | ||||
| 	"github.com/geziyor/geziyor/client" | ||||
| 	"log" | ||||
| 	"github.com/geziyor/geziyor/internal" | ||||
| ) | ||||
|  | ||||
| // ParseHTML parses response if response is HTML | ||||
| @@ -16,7 +16,7 @@ func (p *ParseHTML) ProcessResponse(r *client.Response) { | ||||
| 	if !p.ParseHTMLDisabled && r.IsHTML() { | ||||
| 		doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body)) | ||||
| 		if err != nil { | ||||
| 			log.Println(err.Error()) | ||||
| 			internal.Logger.Println(err.Error()) | ||||
| 			return | ||||
| 		} | ||||
| 		r.HTMLDoc = doc | ||||
|   | ||||
| @@ -2,9 +2,9 @@ package middleware | ||||
|  | ||||
| import ( | ||||
| 	"github.com/geziyor/geziyor/client" | ||||
| 	"github.com/geziyor/geziyor/internal" | ||||
| 	"github.com/geziyor/geziyor/metrics" | ||||
| 	"github.com/temoto/robotstxt" | ||||
| 	"log" | ||||
| 	"strconv" | ||||
| 	"sync" | ||||
| ) | ||||
| @@ -62,7 +62,7 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) { | ||||
|  | ||||
| 	if !robotsData.TestAgent(r.URL.Path, r.UserAgent()) { | ||||
| 		m.metrics.RobotsTxtForbiddenCounter.With("method", r.Method).Add(1) | ||||
| 		log.Println("Forbidden by robots.txt:", r.URL.String()) | ||||
| 		internal.Logger.Println("Forbidden by robots.txt:", r.URL.String()) | ||||
| 		r.Cancel() | ||||
| 	} | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user