Expvar metrics support added. Metrics refactored to its own package.
This commit is contained in:
		| @@ -9,9 +9,9 @@ Geziyor is a blazing fast web crawling and web scraping framework. It can be use | |||||||
| - JS Rendering | - JS Rendering | ||||||
| - Caching (Memory/Disk) | - Caching (Memory/Disk) | ||||||
| - Automatic Data Exporting (JSON, CSV, or custom) | - Automatic Data Exporting (JSON, CSV, or custom) | ||||||
|  | - Metrics (Prometheus, Expvar, or custom) | ||||||
| - Limit Concurrency (Global/Per Domain) | - Limit Concurrency (Global/Per Domain) | ||||||
| - Request Delays (Constant/Randomized) | - Request Delays (Constant/Randomized) | ||||||
| - Metrics (Prometheus) |  | ||||||
| - Cookies and Middlewares | - Cookies and Middlewares | ||||||
| - Automatic response decoding to UTF-8 | - Automatic response decoding to UTF-8 | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										13
									
								
								geziyor.go
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								geziyor.go
									
									
									
									
									
								
							| @@ -7,6 +7,7 @@ import ( | |||||||
| 	"github.com/chromedp/chromedp" | 	"github.com/chromedp/chromedp" | ||||||
| 	"github.com/fpfeng/httpcache" | 	"github.com/fpfeng/httpcache" | ||||||
| 	"github.com/geziyor/geziyor/internal" | 	"github.com/geziyor/geziyor/internal" | ||||||
|  | 	"github.com/geziyor/geziyor/metrics" | ||||||
| 	"github.com/pkg/errors" | 	"github.com/pkg/errors" | ||||||
| 	"github.com/prometheus/client_golang/prometheus/promhttp" | 	"github.com/prometheus/client_golang/prometheus/promhttp" | ||||||
| 	"golang.org/x/net/html/charset" | 	"golang.org/x/net/html/charset" | ||||||
| @@ -30,7 +31,7 @@ type Geziyor struct { | |||||||
| 	Client  *internal.Client | 	Client  *internal.Client | ||||||
| 	Exports chan interface{} | 	Exports chan interface{} | ||||||
|  |  | ||||||
| 	metrics             *Metrics | 	metrics             *metrics.Metrics | ||||||
| 	requestMiddlewares  []RequestMiddleware | 	requestMiddlewares  []RequestMiddleware | ||||||
| 	responseMiddlewares []ResponseMiddleware | 	responseMiddlewares []ResponseMiddleware | ||||||
| 	wg                  sync.WaitGroup | 	wg                  sync.WaitGroup | ||||||
| @@ -61,7 +62,7 @@ func NewGeziyor(opt *Options) *Geziyor { | |||||||
| 			parseHTMLMiddleware, | 			parseHTMLMiddleware, | ||||||
| 			metricsResponseMiddleware, | 			metricsResponseMiddleware, | ||||||
| 		}, | 		}, | ||||||
| 		metrics: newMetrics(), | 		metrics: metrics.NewMetrics(opt.MetricsType), | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if opt.UserAgent == "" { | 	if opt.UserAgent == "" { | ||||||
| @@ -102,11 +103,14 @@ func NewGeziyor(opt *Options) *Geziyor { | |||||||
| func (g *Geziyor) Start() { | func (g *Geziyor) Start() { | ||||||
| 	log.Println("Scraping Started") | 	log.Println("Scraping Started") | ||||||
|  |  | ||||||
| 	// Start metrics | 	// Metrics | ||||||
|  | 	metricsServer := &http.Server{Addr: ":2112"} | ||||||
|  | 	if g.Opt.MetricsType == metrics.Prometheus { | ||||||
| 		go func() { | 		go func() { | ||||||
| 			http.Handle("/metrics", promhttp.Handler()) | 			http.Handle("/metrics", promhttp.Handler()) | ||||||
| 		http.ListenAndServe(":2112", nil) | 			metricsServer.ListenAndServe() | ||||||
| 		}() | 		}() | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	// Start Exporters | 	// Start Exporters | ||||||
| 	if len(g.Opt.Exporters) != 0 { | 	if len(g.Opt.Exporters) != 0 { | ||||||
| @@ -131,6 +135,7 @@ func (g *Geziyor) Start() { | |||||||
|  |  | ||||||
| 	g.wg.Wait() | 	g.wg.Wait() | ||||||
| 	close(g.Exports) | 	close(g.Exports) | ||||||
|  | 	metricsServer.Close() | ||||||
| 	log.Println("Scraping Finished") | 	log.Println("Scraping Finished") | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -7,12 +7,14 @@ import ( | |||||||
| 	"github.com/fpfeng/httpcache" | 	"github.com/fpfeng/httpcache" | ||||||
| 	"github.com/geziyor/geziyor" | 	"github.com/geziyor/geziyor" | ||||||
| 	"github.com/geziyor/geziyor/exporter" | 	"github.com/geziyor/geziyor/exporter" | ||||||
|  | 	"github.com/geziyor/geziyor/metrics" | ||||||
| 	"math/rand" | 	"math/rand" | ||||||
| 	"testing" | 	"testing" | ||||||
| 	"time" | 	"time" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| func TestSimple(t *testing.T) { | func TestSimple(t *testing.T) { | ||||||
|  | 	defer leaktest.Check(t)() | ||||||
| 	geziyor.NewGeziyor(&geziyor.Options{ | 	geziyor.NewGeziyor(&geziyor.Options{ | ||||||
| 		StartURLs: []string{"http://api.ipify.org"}, | 		StartURLs: []string{"http://api.ipify.org"}, | ||||||
| 		ParseFunc: func(g *geziyor.Geziyor, r *geziyor.Response) { | 		ParseFunc: func(g *geziyor.Geziyor, r *geziyor.Response) { | ||||||
| @@ -155,5 +157,6 @@ func TestBasicAuth(t *testing.T) { | |||||||
| 			req.SetBasicAuth("username", "password") | 			req.SetBasicAuth("username", "password") | ||||||
| 			g.Do(req, nil) | 			g.Do(req, nil) | ||||||
| 		}, | 		}, | ||||||
|  | 		MetricsType: metrics.ExpVar, | ||||||
| 	}).Start() | 	}).Start() | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								go.mod
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								go.mod
									
									
									
									
									
								
							| @@ -4,6 +4,7 @@ go 1.12 | |||||||
|  |  | ||||||
| require ( | require ( | ||||||
| 	github.com/PuerkitoBio/goquery v1.5.0 | 	github.com/PuerkitoBio/goquery v1.5.0 | ||||||
|  | 	github.com/VividCortex/gohistogram v1.0.0 // indirect | ||||||
| 	github.com/chromedp/cdproto v0.0.0-20190609032908-dd39f0bf0a54 | 	github.com/chromedp/cdproto v0.0.0-20190609032908-dd39f0bf0a54 | ||||||
| 	github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05 | 	github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05 | ||||||
| 	github.com/fortytw2/leaktest v1.3.0 | 	github.com/fortytw2/leaktest v1.3.0 | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								go.sum
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								go.sum
									
									
									
									
									
								
							| @@ -1,5 +1,7 @@ | |||||||
| github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= | github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= | ||||||
| github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= | github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= | ||||||
|  | github.com/VividCortex/gohistogram v1.0.0 h1:6+hBz+qvs0JOrrNhhmR7lFxo5sINxBCGXrdtl/UvroE= | ||||||
|  | github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= | ||||||
| github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= | github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= | ||||||
| github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= | github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= | ||||||
| github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= | github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= | ||||||
|   | |||||||
							
								
								
									
										30
									
								
								metrics.go
									
									
									
									
									
								
							
							
						
						
									
										30
									
								
								metrics.go
									
									
									
									
									
								
							| @@ -1,30 +0,0 @@ | |||||||
| package geziyor |  | ||||||
|  |  | ||||||
| import ( |  | ||||||
| 	"github.com/go-kit/kit/metrics" |  | ||||||
| 	"github.com/go-kit/kit/metrics/prometheus" |  | ||||||
| 	stdprometheus "github.com/prometheus/client_golang/prometheus" |  | ||||||
| ) |  | ||||||
|  |  | ||||||
| // Metrics type stores metrics |  | ||||||
| type Metrics struct { |  | ||||||
| 	requestCount  metrics.Counter |  | ||||||
| 	responseCount metrics.Counter |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func newMetrics() *Metrics { |  | ||||||
| 	m := Metrics{ |  | ||||||
| 		requestCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ |  | ||||||
| 			Namespace: "geziyor", |  | ||||||
| 			Name:      "request_count", |  | ||||||
| 			Help:      "Request count", |  | ||||||
| 		}, []string{"method"}), |  | ||||||
| 		responseCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ |  | ||||||
| 			Namespace: "geziyor", |  | ||||||
| 			Name:      "response_count", |  | ||||||
| 			Help:      "Response count", |  | ||||||
| 		}, []string{"method"}), |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return &m |  | ||||||
| } |  | ||||||
							
								
								
									
										60
									
								
								metrics/metrics.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								metrics/metrics.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | |||||||
|  | package metrics | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"github.com/go-kit/kit/metrics" | ||||||
|  | 	"github.com/go-kit/kit/metrics/discard" | ||||||
|  | 	"github.com/go-kit/kit/metrics/expvar" | ||||||
|  | 	"github.com/go-kit/kit/metrics/prometheus" | ||||||
|  | 	stdprometheus "github.com/prometheus/client_golang/prometheus" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | // Type represents metrics Types | ||||||
|  | type Type int | ||||||
|  |  | ||||||
|  | const ( | ||||||
|  | 	// Discard discards any metrics. | ||||||
|  | 	Discard Type = iota | ||||||
|  |  | ||||||
|  | 	// Prometheus starts server at :2112 and exports metrics data to /metrics | ||||||
|  | 	Prometheus | ||||||
|  |  | ||||||
|  | 	// ExpVar uses built-in expvar package | ||||||
|  | 	ExpVar | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | // Metrics type stores metrics | ||||||
|  | type Metrics struct { | ||||||
|  | 	RequestCounter  metrics.Counter | ||||||
|  | 	ResponseCounter metrics.Counter | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // NewMetrics creates new metrics with given metrics.Type | ||||||
|  | func NewMetrics(metricsType Type) *Metrics { | ||||||
|  | 	switch metricsType { | ||||||
|  | 	case Discard: | ||||||
|  | 		return &Metrics{ | ||||||
|  | 			RequestCounter:  discard.NewCounter(), | ||||||
|  | 			ResponseCounter: discard.NewCounter(), | ||||||
|  | 		} | ||||||
|  | 	case ExpVar: | ||||||
|  | 		return &Metrics{ | ||||||
|  | 			RequestCounter:  expvar.NewCounter("request_count"), | ||||||
|  | 			ResponseCounter: expvar.NewCounter("response_count"), | ||||||
|  | 		} | ||||||
|  | 	case Prometheus: | ||||||
|  | 		return &Metrics{ | ||||||
|  | 			RequestCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ | ||||||
|  | 				Namespace: "geziyor", | ||||||
|  | 				Name:      "request_count", | ||||||
|  | 				Help:      "Request count", | ||||||
|  | 			}, []string{"method"}), | ||||||
|  | 			ResponseCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ | ||||||
|  | 				Namespace: "geziyor", | ||||||
|  | 				Name:      "response_count", | ||||||
|  | 				Help:      "Response count", | ||||||
|  | 			}, []string{"method"}), | ||||||
|  | 		} | ||||||
|  | 	default: | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  | } | ||||||
| @@ -77,7 +77,7 @@ func logMiddleware(g *Geziyor, r *Request) { | |||||||
|  |  | ||||||
| // metricsRequestMiddleware sets stats | // metricsRequestMiddleware sets stats | ||||||
| func metricsRequestMiddleware(g *Geziyor, r *Request) { | func metricsRequestMiddleware(g *Geziyor, r *Request) { | ||||||
| 	g.metrics.requestCount.With("method", r.Method).Add(1) | 	g.metrics.RequestCounter.With("method", r.Method).Add(1) | ||||||
| } | } | ||||||
|  |  | ||||||
| // parseHTMLMiddleware parses response if response is HTML | // parseHTMLMiddleware parses response if response is HTML | ||||||
| @@ -89,5 +89,5 @@ func parseHTMLMiddleware(g *Geziyor, r *Response) { | |||||||
|  |  | ||||||
| // metricsResponseMiddleware sets stats | // metricsResponseMiddleware sets stats | ||||||
| func metricsResponseMiddleware(g *Geziyor, r *Response) { | func metricsResponseMiddleware(g *Geziyor, r *Response) { | ||||||
| 	g.metrics.responseCount.With("method", r.Request.Method).Add(1) | 	g.metrics.ResponseCounter.With("method", r.Request.Method).Add(1) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -2,6 +2,7 @@ package geziyor | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"github.com/fpfeng/httpcache" | 	"github.com/fpfeng/httpcache" | ||||||
|  | 	"github.com/geziyor/geziyor/metrics" | ||||||
| 	"time" | 	"time" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -67,4 +68,6 @@ type Options struct { | |||||||
|  |  | ||||||
| 	// If set true, cookies won't send. | 	// If set true, cookies won't send. | ||||||
| 	CookiesDisabled bool | 	CookiesDisabled bool | ||||||
|  |  | ||||||
|  | 	MetricsType metrics.Type | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user