Added custom logger. Right now, not configurable.

This commit is contained in:
Musab Gültekin
2021-04-13 23:36:42 +03:00
parent 129402d754
commit e3d79e2574
10 changed files with 39 additions and 32 deletions

View File

@ -3,7 +3,6 @@ package middleware
import (
"github.com/geziyor/geziyor/client"
"github.com/geziyor/geziyor/internal"
"log"
"sync"
)
@ -16,7 +15,7 @@ type AllowedDomains struct {
func (a *AllowedDomains) ProcessRequest(r *client.Request) {
if len(a.AllowedDomains) != 0 && !internal.Contains(a.AllowedDomains, r.Host) {
if _, logged := a.logOnlyOnce.LoadOrStore(r.Host, struct{}{}); !logged {
log.Printf("Domain not allowed: %s\n", r.Host)
internal.Logger.Printf("Domain not allowed: %s\n", r.Host)
}
r.Cancel()
return

View File

@ -2,7 +2,7 @@ package middleware
import (
"github.com/geziyor/geziyor/client"
"log"
"github.com/geziyor/geziyor/internal"
"sync"
)
@ -18,7 +18,7 @@ func (a *DuplicateRequests) ProcessRequest(r *client.Request) {
requestURL := r.Request.URL.String()
if _, visited := a.visitedURLs.LoadOrStore(requestURL, struct{}{}); visited {
if _, logged := a.logOnlyOnce.LoadOrStore(requestURL, struct{}{}); !logged {
log.Printf("URL already visited %s\n", requestURL)
internal.Logger.Printf("URL already visited %s\n", requestURL)
}
r.Cancel()
}

View File

@ -2,7 +2,7 @@ package middleware
import (
"github.com/geziyor/geziyor/client"
"log"
"github.com/geziyor/geziyor/internal"
)
// LogStats logs responses
@ -13,6 +13,6 @@ type LogStats struct {
func (p *LogStats) ProcessResponse(r *client.Response) {
// LogDisabled check is not necessary, but done here for performance reasons
if !p.LogDisabled {
log.Printf("Crawled: (%d) <%s %s>", r.StatusCode, r.Request.Method, r.Request.URL.String())
internal.Logger.Printf("Crawled: (%d) <%s %s>", r.StatusCode, r.Request.Method, r.Request.URL.String())
}
}

View File

@ -4,7 +4,7 @@ import (
"bytes"
"github.com/PuerkitoBio/goquery"
"github.com/geziyor/geziyor/client"
"log"
"github.com/geziyor/geziyor/internal"
)
// ParseHTML parses response if response is HTML
@ -16,7 +16,7 @@ func (p *ParseHTML) ProcessResponse(r *client.Response) {
if !p.ParseHTMLDisabled && r.IsHTML() {
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
if err != nil {
log.Println(err.Error())
internal.Logger.Println(err.Error())
return
}
r.HTMLDoc = doc

View File

@ -2,9 +2,9 @@ package middleware
import (
"github.com/geziyor/geziyor/client"
"github.com/geziyor/geziyor/internal"
"github.com/geziyor/geziyor/metrics"
"github.com/temoto/robotstxt"
"log"
"strconv"
"sync"
)
@ -62,7 +62,7 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) {
if !robotsData.TestAgent(r.URL.Path, r.UserAgent()) {
m.metrics.RobotsTxtForbiddenCounter.With("method", r.Method).Add(1)
log.Println("Forbidden by robots.txt:", r.URL.String())
internal.Logger.Println("Forbidden by robots.txt:", r.URL.String())
r.Cancel()
}
}