Added custom logger. Right now, not configurable.
This commit is contained in:
parent
129402d754
commit
e3d79e2574
@ -5,12 +5,12 @@ import (
|
||||
"github.com/chromedp/cdproto/dom"
|
||||
"github.com/chromedp/cdproto/network"
|
||||
"github.com/chromedp/chromedp"
|
||||
"github.com/geziyor/geziyor/internal"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/net/html/charset"
|
||||
"golang.org/x/text/transform"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
@ -98,7 +98,7 @@ func (c *Client) DoRequest(req *Request) (resp *Response, err error) {
|
||||
if err != nil {
|
||||
if req.retryCounter < c.opt.RetryTimes {
|
||||
req.retryCounter++
|
||||
log.Println("Retrying:", req.URL.String())
|
||||
internal.Logger.Println("Retrying:", req.URL.String())
|
||||
return c.DoRequest(req)
|
||||
}
|
||||
return resp, errors.Wrap(err, "Response error")
|
||||
@ -109,7 +109,7 @@ func (c *Client) DoRequest(req *Request) (resp *Response, err error) {
|
||||
if resp.StatusCode == statusCode {
|
||||
if req.retryCounter < c.opt.RetryTimes {
|
||||
req.retryCounter++
|
||||
log.Println("Retrying:", req.URL.String(), resp.StatusCode)
|
||||
internal.Logger.Println("Retrying:", req.URL.String(), resp.StatusCode)
|
||||
return c.DoRequest(req)
|
||||
}
|
||||
}
|
||||
|
@ -4,7 +4,6 @@ import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"github.com/geziyor/geziyor/internal"
|
||||
"log"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
@ -23,7 +22,7 @@ func (e *CSV) Export(exports chan interface{}) {
|
||||
// Create or append file
|
||||
file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.csv"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
||||
if err != nil {
|
||||
log.Printf("Output file creation error: %v\n", err)
|
||||
internal.Logger.Printf("Output file creation error: %v\n", err)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
@ -50,7 +49,7 @@ func (e *CSV) Export(exports chan interface{}) {
|
||||
sort.Strings(values)
|
||||
}
|
||||
if err := writer.Write(values); err != nil {
|
||||
log.Printf("CSV writing error on exporter: %v\n", err)
|
||||
internal.Logger.Printf("CSV writing error on exporter: %v\n", err)
|
||||
}
|
||||
}
|
||||
writer.Flush()
|
||||
|
@ -4,7 +4,6 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"github.com/geziyor/geziyor/internal"
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
@ -22,7 +21,7 @@ func (e *JSONLine) Export(exports chan interface{}) {
|
||||
// Create or append file
|
||||
file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
||||
if err != nil {
|
||||
log.Printf("Output file creation error: %v\n", err)
|
||||
internal.Logger.Printf("Output file creation error: %v\n", err)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
@ -34,7 +33,7 @@ func (e *JSONLine) Export(exports chan interface{}) {
|
||||
// Export data as responses came
|
||||
for res := range exports {
|
||||
if err := encoder.Encode(res); err != nil {
|
||||
log.Printf("JSON encoding error on exporter: %v\n", err)
|
||||
internal.Logger.Printf("JSON encoding error on exporter: %v\n", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -51,7 +50,7 @@ func (e *JSON) Export(exports chan interface{}) {
|
||||
// Create or append file
|
||||
file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
||||
if err != nil {
|
||||
log.Printf("Output file creation error: %v\n", err)
|
||||
internal.Logger.Printf("Output file creation error: %v\n", err)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
@ -62,7 +61,7 @@ func (e *JSON) Export(exports chan interface{}) {
|
||||
for res := range exports {
|
||||
data, err := jsonMarshalLine(res, e.EscapeHTML)
|
||||
if err != nil {
|
||||
log.Printf("JSON encoding error on exporter: %v\n", err)
|
||||
internal.Logger.Printf("JSON encoding error on exporter: %v\n", err)
|
||||
continue
|
||||
}
|
||||
file.Write(data)
|
||||
|
22
geziyor.go
22
geziyor.go
@ -3,10 +3,10 @@ package geziyor
|
||||
import (
|
||||
"github.com/geziyor/geziyor/cache"
|
||||
"github.com/geziyor/geziyor/client"
|
||||
"github.com/geziyor/geziyor/internal"
|
||||
"github.com/geziyor/geziyor/metrics"
|
||||
"github.com/geziyor/geziyor/middleware"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http/cookiejar"
|
||||
"os"
|
||||
"os/signal"
|
||||
@ -118,9 +118,9 @@ func NewGeziyor(opt *Options) *Geziyor {
|
||||
|
||||
// Logging
|
||||
if opt.LogDisabled {
|
||||
log.SetOutput(ioutil.Discard)
|
||||
internal.Logger.SetOutput(ioutil.Discard)
|
||||
} else {
|
||||
log.SetOutput(os.Stdout)
|
||||
internal.Logger.SetOutput(os.Stdout)
|
||||
}
|
||||
|
||||
return geziyor
|
||||
@ -128,7 +128,7 @@ func NewGeziyor(opt *Options) *Geziyor {
|
||||
|
||||
// Start starts scraping
|
||||
func (g *Geziyor) Start() {
|
||||
log.Println("Scraping Started")
|
||||
internal.Logger.Println("Scraping Started")
|
||||
|
||||
// Metrics
|
||||
if g.Opt.MetricsType == metrics.Prometheus || g.Opt.MetricsType == metrics.ExpVar {
|
||||
@ -171,7 +171,7 @@ func (g *Geziyor) Start() {
|
||||
for {
|
||||
select {
|
||||
case <-shutdownChan:
|
||||
log.Println("Received SIGINT, shutting down gracefully. Send again to force")
|
||||
internal.Logger.Println("Received SIGINT, shutting down gracefully. Send again to force")
|
||||
g.shutdown = true
|
||||
signal.Stop(shutdownChan)
|
||||
case <-shutdownDoneChan:
|
||||
@ -184,14 +184,14 @@ func (g *Geziyor) Start() {
|
||||
close(g.Exports)
|
||||
g.wgExporters.Wait()
|
||||
shutdownDoneChan <- struct{}{}
|
||||
log.Println("Scraping Finished")
|
||||
internal.Logger.Println("Scraping Finished")
|
||||
}
|
||||
|
||||
// Get issues a GET to the specified URL.
|
||||
func (g *Geziyor) Get(url string, callback func(g *Geziyor, r *client.Response)) {
|
||||
req, err := client.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
log.Printf("Request creating error %v\n", err)
|
||||
internal.Logger.Printf("Request creating error %v\n", err)
|
||||
return
|
||||
}
|
||||
g.Do(req, callback)
|
||||
@ -203,7 +203,7 @@ func (g *Geziyor) Get(url string, callback func(g *Geziyor, r *client.Response))
|
||||
func (g *Geziyor) GetRendered(url string, callback func(g *Geziyor, r *client.Response)) {
|
||||
req, err := client.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
log.Printf("Request creating error %v\n", err)
|
||||
internal.Logger.Printf("Request creating error %v\n", err)
|
||||
return
|
||||
}
|
||||
req.Rendered = true
|
||||
@ -214,7 +214,7 @@ func (g *Geziyor) GetRendered(url string, callback func(g *Geziyor, r *client.Re
|
||||
func (g *Geziyor) Head(url string, callback func(g *Geziyor, r *client.Response)) {
|
||||
req, err := client.NewRequest("HEAD", url, nil)
|
||||
if err != nil {
|
||||
log.Printf("Request creating error %v\n", err)
|
||||
internal.Logger.Printf("Request creating error %v\n", err)
|
||||
return
|
||||
}
|
||||
g.Do(req, callback)
|
||||
@ -254,7 +254,7 @@ func (g *Geziyor) do(req *client.Request, callback func(g *Geziyor, r *client.Re
|
||||
if g.Opt.ErrorFunc != nil {
|
||||
g.Opt.ErrorFunc(g, req, err)
|
||||
} else {
|
||||
log.Println(err)
|
||||
internal.Logger.Println(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
@ -304,7 +304,7 @@ func (g *Geziyor) releaseSem(req *client.Request) {
|
||||
// Logs error and stack trace
|
||||
func (g *Geziyor) recoverMe() {
|
||||
if r := recover(); r != nil {
|
||||
log.Println(r, string(debug.Stack()))
|
||||
internal.Logger.Println(r, string(debug.Stack()))
|
||||
g.metrics.PanicCounter.Add(1)
|
||||
}
|
||||
}
|
||||
|
10
internal/logger.go
Normal file
10
internal/logger.go
Normal file
@ -0,0 +1,10 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
var (
|
||||
Logger = log.New(os.Stdout, "", 0)
|
||||
)
|
@ -3,7 +3,6 @@ package middleware
|
||||
import (
|
||||
"github.com/geziyor/geziyor/client"
|
||||
"github.com/geziyor/geziyor/internal"
|
||||
"log"
|
||||
"sync"
|
||||
)
|
||||
|
||||
@ -16,7 +15,7 @@ type AllowedDomains struct {
|
||||
func (a *AllowedDomains) ProcessRequest(r *client.Request) {
|
||||
if len(a.AllowedDomains) != 0 && !internal.Contains(a.AllowedDomains, r.Host) {
|
||||
if _, logged := a.logOnlyOnce.LoadOrStore(r.Host, struct{}{}); !logged {
|
||||
log.Printf("Domain not allowed: %s\n", r.Host)
|
||||
internal.Logger.Printf("Domain not allowed: %s\n", r.Host)
|
||||
}
|
||||
r.Cancel()
|
||||
return
|
||||
|
@ -2,7 +2,7 @@ package middleware
|
||||
|
||||
import (
|
||||
"github.com/geziyor/geziyor/client"
|
||||
"log"
|
||||
"github.com/geziyor/geziyor/internal"
|
||||
"sync"
|
||||
)
|
||||
|
||||
@ -18,7 +18,7 @@ func (a *DuplicateRequests) ProcessRequest(r *client.Request) {
|
||||
requestURL := r.Request.URL.String()
|
||||
if _, visited := a.visitedURLs.LoadOrStore(requestURL, struct{}{}); visited {
|
||||
if _, logged := a.logOnlyOnce.LoadOrStore(requestURL, struct{}{}); !logged {
|
||||
log.Printf("URL already visited %s\n", requestURL)
|
||||
internal.Logger.Printf("URL already visited %s\n", requestURL)
|
||||
}
|
||||
r.Cancel()
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ package middleware
|
||||
|
||||
import (
|
||||
"github.com/geziyor/geziyor/client"
|
||||
"log"
|
||||
"github.com/geziyor/geziyor/internal"
|
||||
)
|
||||
|
||||
// LogStats logs responses
|
||||
@ -13,6 +13,6 @@ type LogStats struct {
|
||||
func (p *LogStats) ProcessResponse(r *client.Response) {
|
||||
// LogDisabled check is not necessary, but done here for performance reasons
|
||||
if !p.LogDisabled {
|
||||
log.Printf("Crawled: (%d) <%s %s>", r.StatusCode, r.Request.Method, r.Request.URL.String())
|
||||
internal.Logger.Printf("Crawled: (%d) <%s %s>", r.StatusCode, r.Request.Method, r.Request.URL.String())
|
||||
}
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ import (
|
||||
"bytes"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/geziyor/geziyor/client"
|
||||
"log"
|
||||
"github.com/geziyor/geziyor/internal"
|
||||
)
|
||||
|
||||
// ParseHTML parses response if response is HTML
|
||||
@ -16,7 +16,7 @@ func (p *ParseHTML) ProcessResponse(r *client.Response) {
|
||||
if !p.ParseHTMLDisabled && r.IsHTML() {
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
|
||||
if err != nil {
|
||||
log.Println(err.Error())
|
||||
internal.Logger.Println(err.Error())
|
||||
return
|
||||
}
|
||||
r.HTMLDoc = doc
|
||||
|
@ -2,9 +2,9 @@ package middleware
|
||||
|
||||
import (
|
||||
"github.com/geziyor/geziyor/client"
|
||||
"github.com/geziyor/geziyor/internal"
|
||||
"github.com/geziyor/geziyor/metrics"
|
||||
"github.com/temoto/robotstxt"
|
||||
"log"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
@ -62,7 +62,7 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) {
|
||||
|
||||
if !robotsData.TestAgent(r.URL.Path, r.UserAgent()) {
|
||||
m.metrics.RobotsTxtForbiddenCounter.With("method", r.Method).Add(1)
|
||||
log.Println("Forbidden by robots.txt:", r.URL.String())
|
||||
internal.Logger.Println("Forbidden by robots.txt:", r.URL.String())
|
||||
r.Cancel()
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user