Expvar metrics support added. Metrics refactored to its own package.

This commit is contained in:
Musab Gültekin 2019-06-21 21:37:25 +03:00
parent 88c4b1dd35
commit 7bc782400c
9 changed files with 84 additions and 40 deletions

View File

@ -9,9 +9,9 @@ Geziyor is a blazing fast web crawling and web scraping framework. It can be use
- JS Rendering
- Caching (Memory/Disk)
- Automatic Data Exporting (JSON, CSV, or custom)
- Metrics (Prometheus, Expvar, or custom)
- Limit Concurrency (Global/Per Domain)
- Request Delays (Constant/Randomized)
- Metrics (Prometheus)
- Cookies and Middlewares
- Automatic response decoding to UTF-8

View File

@ -7,6 +7,7 @@ import (
"github.com/chromedp/chromedp"
"github.com/fpfeng/httpcache"
"github.com/geziyor/geziyor/internal"
"github.com/geziyor/geziyor/metrics"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"golang.org/x/net/html/charset"
@ -30,7 +31,7 @@ type Geziyor struct {
Client *internal.Client
Exports chan interface{}
metrics *Metrics
metrics *metrics.Metrics
requestMiddlewares []RequestMiddleware
responseMiddlewares []ResponseMiddleware
wg sync.WaitGroup
@ -61,7 +62,7 @@ func NewGeziyor(opt *Options) *Geziyor {
parseHTMLMiddleware,
metricsResponseMiddleware,
},
metrics: newMetrics(),
metrics: metrics.NewMetrics(opt.MetricsType),
}
if opt.UserAgent == "" {
@ -102,11 +103,14 @@ func NewGeziyor(opt *Options) *Geziyor {
func (g *Geziyor) Start() {
log.Println("Scraping Started")
// Start metrics
// Metrics
metricsServer := &http.Server{Addr: ":2112"}
if g.Opt.MetricsType == metrics.Prometheus {
go func() {
http.Handle("/metrics", promhttp.Handler())
http.ListenAndServe(":2112", nil)
metricsServer.ListenAndServe()
}()
}
// Start Exporters
if len(g.Opt.Exporters) != 0 {
@ -131,6 +135,7 @@ func (g *Geziyor) Start() {
g.wg.Wait()
close(g.Exports)
metricsServer.Close()
log.Println("Scraping Finished")
}

View File

@ -7,12 +7,14 @@ import (
"github.com/fpfeng/httpcache"
"github.com/geziyor/geziyor"
"github.com/geziyor/geziyor/exporter"
"github.com/geziyor/geziyor/metrics"
"math/rand"
"testing"
"time"
)
func TestSimple(t *testing.T) {
defer leaktest.Check(t)()
geziyor.NewGeziyor(&geziyor.Options{
StartURLs: []string{"http://api.ipify.org"},
ParseFunc: func(g *geziyor.Geziyor, r *geziyor.Response) {
@ -155,5 +157,6 @@ func TestBasicAuth(t *testing.T) {
req.SetBasicAuth("username", "password")
g.Do(req, nil)
},
MetricsType: metrics.ExpVar,
}).Start()
}

1
go.mod
View File

@ -4,6 +4,7 @@ go 1.12
require (
github.com/PuerkitoBio/goquery v1.5.0
github.com/VividCortex/gohistogram v1.0.0 // indirect
github.com/chromedp/cdproto v0.0.0-20190609032908-dd39f0bf0a54
github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05
github.com/fortytw2/leaktest v1.3.0

2
go.sum
View File

@ -1,5 +1,7 @@
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/VividCortex/gohistogram v1.0.0 h1:6+hBz+qvs0JOrrNhhmR7lFxo5sINxBCGXrdtl/UvroE=
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=

View File

@ -1,30 +0,0 @@
package geziyor
import (
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/prometheus"
stdprometheus "github.com/prometheus/client_golang/prometheus"
)
// Metrics type stores metrics
type Metrics struct {
requestCount metrics.Counter
responseCount metrics.Counter
}
func newMetrics() *Metrics {
m := Metrics{
requestCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: "geziyor",
Name: "request_count",
Help: "Request count",
}, []string{"method"}),
responseCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: "geziyor",
Name: "response_count",
Help: "Response count",
}, []string{"method"}),
}
return &m
}

60
metrics/metrics.go Normal file
View File

@ -0,0 +1,60 @@
package metrics
import (
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/discard"
"github.com/go-kit/kit/metrics/expvar"
"github.com/go-kit/kit/metrics/prometheus"
stdprometheus "github.com/prometheus/client_golang/prometheus"
)
// Type represents metrics Types
type Type int
const (
// Discard discards any metrics.
Discard Type = iota
// Prometheus starts server at :2112 and exports metrics data to /metrics
Prometheus
// ExpVar uses built-in expvar package
ExpVar
)
// Metrics type stores metrics
type Metrics struct {
RequestCounter metrics.Counter
ResponseCounter metrics.Counter
}
// NewMetrics creates new metrics with given metrics.Type
func NewMetrics(metricsType Type) *Metrics {
switch metricsType {
case Discard:
return &Metrics{
RequestCounter: discard.NewCounter(),
ResponseCounter: discard.NewCounter(),
}
case ExpVar:
return &Metrics{
RequestCounter: expvar.NewCounter("request_count"),
ResponseCounter: expvar.NewCounter("response_count"),
}
case Prometheus:
return &Metrics{
RequestCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: "geziyor",
Name: "request_count",
Help: "Request count",
}, []string{"method"}),
ResponseCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: "geziyor",
Name: "response_count",
Help: "Response count",
}, []string{"method"}),
}
default:
return nil
}
}

View File

@ -77,7 +77,7 @@ func logMiddleware(g *Geziyor, r *Request) {
// metricsRequestMiddleware sets stats
func metricsRequestMiddleware(g *Geziyor, r *Request) {
g.metrics.requestCount.With("method", r.Method).Add(1)
g.metrics.RequestCounter.With("method", r.Method).Add(1)
}
// parseHTMLMiddleware parses response if response is HTML
@ -89,5 +89,5 @@ func parseHTMLMiddleware(g *Geziyor, r *Response) {
// metricsResponseMiddleware sets stats
func metricsResponseMiddleware(g *Geziyor, r *Response) {
g.metrics.responseCount.With("method", r.Request.Method).Add(1)
g.metrics.ResponseCounter.With("method", r.Request.Method).Add(1)
}

View File

@ -2,6 +2,7 @@ package geziyor
import (
"github.com/fpfeng/httpcache"
"github.com/geziyor/geziyor/metrics"
"time"
)
@ -67,4 +68,6 @@ type Options struct {
// If set true, cookies won't send.
CookiesDisabled bool
MetricsType metrics.Type
}