diff --git a/README.md b/README.md index af4517a..e70b3ec 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,9 @@ Geziyor is a blazing fast web crawling and web scraping framework. It can be use - JS Rendering - Caching (Memory/Disk) - Automatic Data Exporting (JSON, CSV, or custom) +- Metrics (Prometheus, Expvar, or custom) - Limit Concurrency (Global/Per Domain) - Request Delays (Constant/Randomized) -- Metrics (Prometheus) - Cookies and Middlewares - Automatic response decoding to UTF-8 diff --git a/geziyor.go b/geziyor.go index 27faa7f..bd348cd 100644 --- a/geziyor.go +++ b/geziyor.go @@ -7,6 +7,7 @@ import ( "github.com/chromedp/chromedp" "github.com/fpfeng/httpcache" "github.com/geziyor/geziyor/internal" + "github.com/geziyor/geziyor/metrics" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus/promhttp" "golang.org/x/net/html/charset" @@ -30,7 +31,7 @@ type Geziyor struct { Client *internal.Client Exports chan interface{} - metrics *Metrics + metrics *metrics.Metrics requestMiddlewares []RequestMiddleware responseMiddlewares []ResponseMiddleware wg sync.WaitGroup @@ -61,7 +62,7 @@ func NewGeziyor(opt *Options) *Geziyor { parseHTMLMiddleware, metricsResponseMiddleware, }, - metrics: newMetrics(), + metrics: metrics.NewMetrics(opt.MetricsType), } if opt.UserAgent == "" { @@ -102,11 +103,14 @@ func NewGeziyor(opt *Options) *Geziyor { func (g *Geziyor) Start() { log.Println("Scraping Started") - // Start metrics - go func() { - http.Handle("/metrics", promhttp.Handler()) - http.ListenAndServe(":2112", nil) - }() + // Metrics + metricsServer := &http.Server{Addr: ":2112"} + if g.Opt.MetricsType == metrics.Prometheus { + go func() { + http.Handle("/metrics", promhttp.Handler()) + metricsServer.ListenAndServe() + }() + } // Start Exporters if len(g.Opt.Exporters) != 0 { @@ -131,6 +135,7 @@ func (g *Geziyor) Start() { g.wg.Wait() close(g.Exports) + metricsServer.Close() log.Println("Scraping Finished") } diff --git a/geziyor_test.go b/geziyor_test.go index 0c5ff29..1722c16 100644 --- a/geziyor_test.go +++ b/geziyor_test.go @@ -7,12 +7,14 @@ import ( "github.com/fpfeng/httpcache" "github.com/geziyor/geziyor" "github.com/geziyor/geziyor/exporter" + "github.com/geziyor/geziyor/metrics" "math/rand" "testing" "time" ) func TestSimple(t *testing.T) { + defer leaktest.Check(t)() geziyor.NewGeziyor(&geziyor.Options{ StartURLs: []string{"http://api.ipify.org"}, ParseFunc: func(g *geziyor.Geziyor, r *geziyor.Response) { @@ -155,5 +157,6 @@ func TestBasicAuth(t *testing.T) { req.SetBasicAuth("username", "password") g.Do(req, nil) }, + MetricsType: metrics.ExpVar, }).Start() } diff --git a/go.mod b/go.mod index da6cf75..b2e85b9 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.12 require ( github.com/PuerkitoBio/goquery v1.5.0 + github.com/VividCortex/gohistogram v1.0.0 // indirect github.com/chromedp/cdproto v0.0.0-20190609032908-dd39f0bf0a54 github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05 github.com/fortytw2/leaktest v1.3.0 diff --git a/go.sum b/go.sum index fc00d0c..7621e13 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= +github.com/VividCortex/gohistogram v1.0.0 h1:6+hBz+qvs0JOrrNhhmR7lFxo5sINxBCGXrdtl/UvroE= +github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= diff --git a/metrics.go b/metrics.go deleted file mode 100644 index dc41701..0000000 --- a/metrics.go +++ /dev/null @@ -1,30 +0,0 @@ -package geziyor - -import ( - "github.com/go-kit/kit/metrics" - "github.com/go-kit/kit/metrics/prometheus" - stdprometheus "github.com/prometheus/client_golang/prometheus" -) - -// Metrics type stores metrics -type Metrics struct { - requestCount metrics.Counter - responseCount metrics.Counter -} - -func newMetrics() *Metrics { - m := Metrics{ - requestCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: "geziyor", - Name: "request_count", - Help: "Request count", - }, []string{"method"}), - responseCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: "geziyor", - Name: "response_count", - Help: "Response count", - }, []string{"method"}), - } - - return &m -} diff --git a/metrics/metrics.go b/metrics/metrics.go new file mode 100644 index 0000000..ddf62be --- /dev/null +++ b/metrics/metrics.go @@ -0,0 +1,60 @@ +package metrics + +import ( + "github.com/go-kit/kit/metrics" + "github.com/go-kit/kit/metrics/discard" + "github.com/go-kit/kit/metrics/expvar" + "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" +) + +// Type represents metrics Types +type Type int + +const ( + // Discard discards any metrics. + Discard Type = iota + + // Prometheus starts server at :2112 and exports metrics data to /metrics + Prometheus + + // ExpVar uses built-in expvar package + ExpVar +) + +// Metrics type stores metrics +type Metrics struct { + RequestCounter metrics.Counter + ResponseCounter metrics.Counter +} + +// NewMetrics creates new metrics with given metrics.Type +func NewMetrics(metricsType Type) *Metrics { + switch metricsType { + case Discard: + return &Metrics{ + RequestCounter: discard.NewCounter(), + ResponseCounter: discard.NewCounter(), + } + case ExpVar: + return &Metrics{ + RequestCounter: expvar.NewCounter("request_count"), + ResponseCounter: expvar.NewCounter("response_count"), + } + case Prometheus: + return &Metrics{ + RequestCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: "geziyor", + Name: "request_count", + Help: "Request count", + }, []string{"method"}), + ResponseCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: "geziyor", + Name: "response_count", + Help: "Response count", + }, []string{"method"}), + } + default: + return nil + } +} diff --git a/middleware.go b/middleware.go index a8b1a22..1d617a7 100644 --- a/middleware.go +++ b/middleware.go @@ -77,7 +77,7 @@ func logMiddleware(g *Geziyor, r *Request) { // metricsRequestMiddleware sets stats func metricsRequestMiddleware(g *Geziyor, r *Request) { - g.metrics.requestCount.With("method", r.Method).Add(1) + g.metrics.RequestCounter.With("method", r.Method).Add(1) } // parseHTMLMiddleware parses response if response is HTML @@ -89,5 +89,5 @@ func parseHTMLMiddleware(g *Geziyor, r *Response) { // metricsResponseMiddleware sets stats func metricsResponseMiddleware(g *Geziyor, r *Response) { - g.metrics.responseCount.With("method", r.Request.Method).Add(1) + g.metrics.ResponseCounter.With("method", r.Request.Method).Add(1) } diff --git a/options.go b/options.go index 8546c1d..99b1659 100644 --- a/options.go +++ b/options.go @@ -2,6 +2,7 @@ package geziyor import ( "github.com/fpfeng/httpcache" + "github.com/geziyor/geziyor/metrics" "time" ) @@ -67,4 +68,6 @@ type Options struct { // If set true, cookies won't send. CookiesDisabled bool + + MetricsType metrics.Type }