diff --git a/client/client.go b/client/client.go index 194607d..b4b1e4d 100644 --- a/client/client.go +++ b/client/client.go @@ -5,12 +5,12 @@ import ( "github.com/chromedp/cdproto/dom" "github.com/chromedp/cdproto/network" "github.com/chromedp/chromedp" + "github.com/geziyor/geziyor/internal" "github.com/pkg/errors" "golang.org/x/net/html/charset" "golang.org/x/text/transform" "io" "io/ioutil" - "log" "net" "net/http" "net/url" @@ -98,7 +98,7 @@ func (c *Client) DoRequest(req *Request) (resp *Response, err error) { if err != nil { if req.retryCounter < c.opt.RetryTimes { req.retryCounter++ - log.Println("Retrying:", req.URL.String()) + internal.Logger.Println("Retrying:", req.URL.String()) return c.DoRequest(req) } return resp, errors.Wrap(err, "Response error") @@ -109,7 +109,7 @@ func (c *Client) DoRequest(req *Request) (resp *Response, err error) { if resp.StatusCode == statusCode { if req.retryCounter < c.opt.RetryTimes { req.retryCounter++ - log.Println("Retrying:", req.URL.String(), resp.StatusCode) + internal.Logger.Println("Retrying:", req.URL.String(), resp.StatusCode) return c.DoRequest(req) } } diff --git a/export/csv.go b/export/csv.go index 00efd05..4d91bfd 100644 --- a/export/csv.go +++ b/export/csv.go @@ -4,7 +4,6 @@ import ( "encoding/csv" "fmt" "github.com/geziyor/geziyor/internal" - "log" "os" "reflect" "sort" @@ -23,7 +22,7 @@ func (e *CSV) Export(exports chan interface{}) { // Create or append file file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.csv"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { - log.Printf("Output file creation error: %v\n", err) + internal.Logger.Printf("Output file creation error: %v\n", err) return } defer file.Close() @@ -50,7 +49,7 @@ func (e *CSV) Export(exports chan interface{}) { sort.Strings(values) } if err := writer.Write(values); err != nil { - log.Printf("CSV writing error on exporter: %v\n", err) + internal.Logger.Printf("CSV writing error on exporter: %v\n", err) } } writer.Flush() diff --git a/export/json.go b/export/json.go index d0affdd..ffeadda 100644 --- a/export/json.go +++ b/export/json.go @@ -4,7 +4,6 @@ import ( "bytes" "encoding/json" "github.com/geziyor/geziyor/internal" - "log" "os" ) @@ -22,7 +21,7 @@ func (e *JSONLine) Export(exports chan interface{}) { // Create or append file file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { - log.Printf("Output file creation error: %v\n", err) + internal.Logger.Printf("Output file creation error: %v\n", err) return } defer file.Close() @@ -34,7 +33,7 @@ func (e *JSONLine) Export(exports chan interface{}) { // Export data as responses came for res := range exports { if err := encoder.Encode(res); err != nil { - log.Printf("JSON encoding error on exporter: %v\n", err) + internal.Logger.Printf("JSON encoding error on exporter: %v\n", err) } } } @@ -51,7 +50,7 @@ func (e *JSON) Export(exports chan interface{}) { // Create or append file file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { - log.Printf("Output file creation error: %v\n", err) + internal.Logger.Printf("Output file creation error: %v\n", err) return } defer file.Close() @@ -62,7 +61,7 @@ func (e *JSON) Export(exports chan interface{}) { for res := range exports { data, err := jsonMarshalLine(res, e.EscapeHTML) if err != nil { - log.Printf("JSON encoding error on exporter: %v\n", err) + internal.Logger.Printf("JSON encoding error on exporter: %v\n", err) continue } file.Write(data) diff --git a/geziyor.go b/geziyor.go index eca6236..6625219 100644 --- a/geziyor.go +++ b/geziyor.go @@ -3,10 +3,10 @@ package geziyor import ( "github.com/geziyor/geziyor/cache" "github.com/geziyor/geziyor/client" + "github.com/geziyor/geziyor/internal" "github.com/geziyor/geziyor/metrics" "github.com/geziyor/geziyor/middleware" "io/ioutil" - "log" "net/http/cookiejar" "os" "os/signal" @@ -118,9 +118,9 @@ func NewGeziyor(opt *Options) *Geziyor { // Logging if opt.LogDisabled { - log.SetOutput(ioutil.Discard) + internal.Logger.SetOutput(ioutil.Discard) } else { - log.SetOutput(os.Stdout) + internal.Logger.SetOutput(os.Stdout) } return geziyor @@ -128,7 +128,7 @@ func NewGeziyor(opt *Options) *Geziyor { // Start starts scraping func (g *Geziyor) Start() { - log.Println("Scraping Started") + internal.Logger.Println("Scraping Started") // Metrics if g.Opt.MetricsType == metrics.Prometheus || g.Opt.MetricsType == metrics.ExpVar { @@ -171,7 +171,7 @@ func (g *Geziyor) Start() { for { select { case <-shutdownChan: - log.Println("Received SIGINT, shutting down gracefully. Send again to force") + internal.Logger.Println("Received SIGINT, shutting down gracefully. Send again to force") g.shutdown = true signal.Stop(shutdownChan) case <-shutdownDoneChan: @@ -184,14 +184,14 @@ func (g *Geziyor) Start() { close(g.Exports) g.wgExporters.Wait() shutdownDoneChan <- struct{}{} - log.Println("Scraping Finished") + internal.Logger.Println("Scraping Finished") } // Get issues a GET to the specified URL. func (g *Geziyor) Get(url string, callback func(g *Geziyor, r *client.Response)) { req, err := client.NewRequest("GET", url, nil) if err != nil { - log.Printf("Request creating error %v\n", err) + internal.Logger.Printf("Request creating error %v\n", err) return } g.Do(req, callback) @@ -203,7 +203,7 @@ func (g *Geziyor) Get(url string, callback func(g *Geziyor, r *client.Response)) func (g *Geziyor) GetRendered(url string, callback func(g *Geziyor, r *client.Response)) { req, err := client.NewRequest("GET", url, nil) if err != nil { - log.Printf("Request creating error %v\n", err) + internal.Logger.Printf("Request creating error %v\n", err) return } req.Rendered = true @@ -214,7 +214,7 @@ func (g *Geziyor) GetRendered(url string, callback func(g *Geziyor, r *client.Re func (g *Geziyor) Head(url string, callback func(g *Geziyor, r *client.Response)) { req, err := client.NewRequest("HEAD", url, nil) if err != nil { - log.Printf("Request creating error %v\n", err) + internal.Logger.Printf("Request creating error %v\n", err) return } g.Do(req, callback) @@ -254,7 +254,7 @@ func (g *Geziyor) do(req *client.Request, callback func(g *Geziyor, r *client.Re if g.Opt.ErrorFunc != nil { g.Opt.ErrorFunc(g, req, err) } else { - log.Println(err) + internal.Logger.Println(err) } return } @@ -304,7 +304,7 @@ func (g *Geziyor) releaseSem(req *client.Request) { // Logs error and stack trace func (g *Geziyor) recoverMe() { if r := recover(); r != nil { - log.Println(r, string(debug.Stack())) + internal.Logger.Println(r, string(debug.Stack())) g.metrics.PanicCounter.Add(1) } } diff --git a/internal/logger.go b/internal/logger.go new file mode 100644 index 0000000..af63fe6 --- /dev/null +++ b/internal/logger.go @@ -0,0 +1,10 @@ +package internal + +import ( + "log" + "os" +) + +var ( + Logger = log.New(os.Stdout, "", 0) +) diff --git a/middleware/allowed_domains.go b/middleware/allowed_domains.go index f61dfc1..051c7d9 100644 --- a/middleware/allowed_domains.go +++ b/middleware/allowed_domains.go @@ -3,7 +3,6 @@ package middleware import ( "github.com/geziyor/geziyor/client" "github.com/geziyor/geziyor/internal" - "log" "sync" ) @@ -16,7 +15,7 @@ type AllowedDomains struct { func (a *AllowedDomains) ProcessRequest(r *client.Request) { if len(a.AllowedDomains) != 0 && !internal.Contains(a.AllowedDomains, r.Host) { if _, logged := a.logOnlyOnce.LoadOrStore(r.Host, struct{}{}); !logged { - log.Printf("Domain not allowed: %s\n", r.Host) + internal.Logger.Printf("Domain not allowed: %s\n", r.Host) } r.Cancel() return diff --git a/middleware/duplicate_requests.go b/middleware/duplicate_requests.go index 727562b..b2fc419 100644 --- a/middleware/duplicate_requests.go +++ b/middleware/duplicate_requests.go @@ -2,7 +2,7 @@ package middleware import ( "github.com/geziyor/geziyor/client" - "log" + "github.com/geziyor/geziyor/internal" "sync" ) @@ -18,7 +18,7 @@ func (a *DuplicateRequests) ProcessRequest(r *client.Request) { requestURL := r.Request.URL.String() if _, visited := a.visitedURLs.LoadOrStore(requestURL, struct{}{}); visited { if _, logged := a.logOnlyOnce.LoadOrStore(requestURL, struct{}{}); !logged { - log.Printf("URL already visited %s\n", requestURL) + internal.Logger.Printf("URL already visited %s\n", requestURL) } r.Cancel() } diff --git a/middleware/log_stats.go b/middleware/log_stats.go index aa1ac3e..3a0c749 100644 --- a/middleware/log_stats.go +++ b/middleware/log_stats.go @@ -2,7 +2,7 @@ package middleware import ( "github.com/geziyor/geziyor/client" - "log" + "github.com/geziyor/geziyor/internal" ) // LogStats logs responses @@ -13,6 +13,6 @@ type LogStats struct { func (p *LogStats) ProcessResponse(r *client.Response) { // LogDisabled check is not necessary, but done here for performance reasons if !p.LogDisabled { - log.Printf("Crawled: (%d) <%s %s>", r.StatusCode, r.Request.Method, r.Request.URL.String()) + internal.Logger.Printf("Crawled: (%d) <%s %s>", r.StatusCode, r.Request.Method, r.Request.URL.String()) } } diff --git a/middleware/parse_html.go b/middleware/parse_html.go index 7ec52c2..7175c9c 100644 --- a/middleware/parse_html.go +++ b/middleware/parse_html.go @@ -4,7 +4,7 @@ import ( "bytes" "github.com/PuerkitoBio/goquery" "github.com/geziyor/geziyor/client" - "log" + "github.com/geziyor/geziyor/internal" ) // ParseHTML parses response if response is HTML @@ -16,7 +16,7 @@ func (p *ParseHTML) ProcessResponse(r *client.Response) { if !p.ParseHTMLDisabled && r.IsHTML() { doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body)) if err != nil { - log.Println(err.Error()) + internal.Logger.Println(err.Error()) return } r.HTMLDoc = doc diff --git a/middleware/robotstxt.go b/middleware/robotstxt.go index 8dc549f..a16084c 100644 --- a/middleware/robotstxt.go +++ b/middleware/robotstxt.go @@ -2,9 +2,9 @@ package middleware import ( "github.com/geziyor/geziyor/client" + "github.com/geziyor/geziyor/internal" "github.com/geziyor/geziyor/metrics" "github.com/temoto/robotstxt" - "log" "strconv" "sync" ) @@ -62,7 +62,7 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) { if !robotsData.TestAgent(r.URL.Path, r.UserAgent()) { m.metrics.RobotsTxtForbiddenCounter.With("method", r.Method).Add(1) - log.Println("Forbidden by robots.txt:", r.URL.String()) + internal.Logger.Println("Forbidden by robots.txt:", r.URL.String()) r.Cancel() } }