Expvar metrics support added. Metrics refactored to its own package.
This commit is contained in:
parent
88c4b1dd35
commit
7bc782400c
@ -9,9 +9,9 @@ Geziyor is a blazing fast web crawling and web scraping framework. It can be use
|
||||
- JS Rendering
|
||||
- Caching (Memory/Disk)
|
||||
- Automatic Data Exporting (JSON, CSV, or custom)
|
||||
- Metrics (Prometheus, Expvar, or custom)
|
||||
- Limit Concurrency (Global/Per Domain)
|
||||
- Request Delays (Constant/Randomized)
|
||||
- Metrics (Prometheus)
|
||||
- Cookies and Middlewares
|
||||
- Automatic response decoding to UTF-8
|
||||
|
||||
|
13
geziyor.go
13
geziyor.go
@ -7,6 +7,7 @@ import (
|
||||
"github.com/chromedp/chromedp"
|
||||
"github.com/fpfeng/httpcache"
|
||||
"github.com/geziyor/geziyor/internal"
|
||||
"github.com/geziyor/geziyor/metrics"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"golang.org/x/net/html/charset"
|
||||
@ -30,7 +31,7 @@ type Geziyor struct {
|
||||
Client *internal.Client
|
||||
Exports chan interface{}
|
||||
|
||||
metrics *Metrics
|
||||
metrics *metrics.Metrics
|
||||
requestMiddlewares []RequestMiddleware
|
||||
responseMiddlewares []ResponseMiddleware
|
||||
wg sync.WaitGroup
|
||||
@ -61,7 +62,7 @@ func NewGeziyor(opt *Options) *Geziyor {
|
||||
parseHTMLMiddleware,
|
||||
metricsResponseMiddleware,
|
||||
},
|
||||
metrics: newMetrics(),
|
||||
metrics: metrics.NewMetrics(opt.MetricsType),
|
||||
}
|
||||
|
||||
if opt.UserAgent == "" {
|
||||
@ -102,11 +103,14 @@ func NewGeziyor(opt *Options) *Geziyor {
|
||||
func (g *Geziyor) Start() {
|
||||
log.Println("Scraping Started")
|
||||
|
||||
// Start metrics
|
||||
// Metrics
|
||||
metricsServer := &http.Server{Addr: ":2112"}
|
||||
if g.Opt.MetricsType == metrics.Prometheus {
|
||||
go func() {
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
http.ListenAndServe(":2112", nil)
|
||||
metricsServer.ListenAndServe()
|
||||
}()
|
||||
}
|
||||
|
||||
// Start Exporters
|
||||
if len(g.Opt.Exporters) != 0 {
|
||||
@ -131,6 +135,7 @@ func (g *Geziyor) Start() {
|
||||
|
||||
g.wg.Wait()
|
||||
close(g.Exports)
|
||||
metricsServer.Close()
|
||||
log.Println("Scraping Finished")
|
||||
}
|
||||
|
||||
|
@ -7,12 +7,14 @@ import (
|
||||
"github.com/fpfeng/httpcache"
|
||||
"github.com/geziyor/geziyor"
|
||||
"github.com/geziyor/geziyor/exporter"
|
||||
"github.com/geziyor/geziyor/metrics"
|
||||
"math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestSimple(t *testing.T) {
|
||||
defer leaktest.Check(t)()
|
||||
geziyor.NewGeziyor(&geziyor.Options{
|
||||
StartURLs: []string{"http://api.ipify.org"},
|
||||
ParseFunc: func(g *geziyor.Geziyor, r *geziyor.Response) {
|
||||
@ -155,5 +157,6 @@ func TestBasicAuth(t *testing.T) {
|
||||
req.SetBasicAuth("username", "password")
|
||||
g.Do(req, nil)
|
||||
},
|
||||
MetricsType: metrics.ExpVar,
|
||||
}).Start()
|
||||
}
|
||||
|
1
go.mod
1
go.mod
@ -4,6 +4,7 @@ go 1.12
|
||||
|
||||
require (
|
||||
github.com/PuerkitoBio/goquery v1.5.0
|
||||
github.com/VividCortex/gohistogram v1.0.0 // indirect
|
||||
github.com/chromedp/cdproto v0.0.0-20190609032908-dd39f0bf0a54
|
||||
github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05
|
||||
github.com/fortytw2/leaktest v1.3.0
|
||||
|
2
go.sum
2
go.sum
@ -1,5 +1,7 @@
|
||||
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
|
||||
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
|
||||
github.com/VividCortex/gohistogram v1.0.0 h1:6+hBz+qvs0JOrrNhhmR7lFxo5sINxBCGXrdtl/UvroE=
|
||||
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
|
||||
|
30
metrics.go
30
metrics.go
@ -1,30 +0,0 @@
|
||||
package geziyor
|
||||
|
||||
import (
|
||||
"github.com/go-kit/kit/metrics"
|
||||
"github.com/go-kit/kit/metrics/prometheus"
|
||||
stdprometheus "github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// Metrics type stores metrics
|
||||
type Metrics struct {
|
||||
requestCount metrics.Counter
|
||||
responseCount metrics.Counter
|
||||
}
|
||||
|
||||
func newMetrics() *Metrics {
|
||||
m := Metrics{
|
||||
requestCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: "geziyor",
|
||||
Name: "request_count",
|
||||
Help: "Request count",
|
||||
}, []string{"method"}),
|
||||
responseCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: "geziyor",
|
||||
Name: "response_count",
|
||||
Help: "Response count",
|
||||
}, []string{"method"}),
|
||||
}
|
||||
|
||||
return &m
|
||||
}
|
60
metrics/metrics.go
Normal file
60
metrics/metrics.go
Normal file
@ -0,0 +1,60 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"github.com/go-kit/kit/metrics"
|
||||
"github.com/go-kit/kit/metrics/discard"
|
||||
"github.com/go-kit/kit/metrics/expvar"
|
||||
"github.com/go-kit/kit/metrics/prometheus"
|
||||
stdprometheus "github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// Type represents metrics Types
|
||||
type Type int
|
||||
|
||||
const (
|
||||
// Discard discards any metrics.
|
||||
Discard Type = iota
|
||||
|
||||
// Prometheus starts server at :2112 and exports metrics data to /metrics
|
||||
Prometheus
|
||||
|
||||
// ExpVar uses built-in expvar package
|
||||
ExpVar
|
||||
)
|
||||
|
||||
// Metrics type stores metrics
|
||||
type Metrics struct {
|
||||
RequestCounter metrics.Counter
|
||||
ResponseCounter metrics.Counter
|
||||
}
|
||||
|
||||
// NewMetrics creates new metrics with given metrics.Type
|
||||
func NewMetrics(metricsType Type) *Metrics {
|
||||
switch metricsType {
|
||||
case Discard:
|
||||
return &Metrics{
|
||||
RequestCounter: discard.NewCounter(),
|
||||
ResponseCounter: discard.NewCounter(),
|
||||
}
|
||||
case ExpVar:
|
||||
return &Metrics{
|
||||
RequestCounter: expvar.NewCounter("request_count"),
|
||||
ResponseCounter: expvar.NewCounter("response_count"),
|
||||
}
|
||||
case Prometheus:
|
||||
return &Metrics{
|
||||
RequestCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: "geziyor",
|
||||
Name: "request_count",
|
||||
Help: "Request count",
|
||||
}, []string{"method"}),
|
||||
ResponseCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: "geziyor",
|
||||
Name: "response_count",
|
||||
Help: "Response count",
|
||||
}, []string{"method"}),
|
||||
}
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
@ -77,7 +77,7 @@ func logMiddleware(g *Geziyor, r *Request) {
|
||||
|
||||
// metricsRequestMiddleware sets stats
|
||||
func metricsRequestMiddleware(g *Geziyor, r *Request) {
|
||||
g.metrics.requestCount.With("method", r.Method).Add(1)
|
||||
g.metrics.RequestCounter.With("method", r.Method).Add(1)
|
||||
}
|
||||
|
||||
// parseHTMLMiddleware parses response if response is HTML
|
||||
@ -89,5 +89,5 @@ func parseHTMLMiddleware(g *Geziyor, r *Response) {
|
||||
|
||||
// metricsResponseMiddleware sets stats
|
||||
func metricsResponseMiddleware(g *Geziyor, r *Response) {
|
||||
g.metrics.responseCount.With("method", r.Request.Method).Add(1)
|
||||
g.metrics.ResponseCounter.With("method", r.Request.Method).Add(1)
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package geziyor
|
||||
|
||||
import (
|
||||
"github.com/fpfeng/httpcache"
|
||||
"github.com/geziyor/geziyor/metrics"
|
||||
"time"
|
||||
)
|
||||
|
||||
@ -67,4 +68,6 @@ type Options struct {
|
||||
|
||||
// If set true, cookies won't send.
|
||||
CookiesDisabled bool
|
||||
|
||||
MetricsType metrics.Type
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user