Expvar metrics support added. Metrics refactored to its own package.
This commit is contained in:
parent
88c4b1dd35
commit
7bc782400c
@ -9,9 +9,9 @@ Geziyor is a blazing fast web crawling and web scraping framework. It can be use
|
|||||||
- JS Rendering
|
- JS Rendering
|
||||||
- Caching (Memory/Disk)
|
- Caching (Memory/Disk)
|
||||||
- Automatic Data Exporting (JSON, CSV, or custom)
|
- Automatic Data Exporting (JSON, CSV, or custom)
|
||||||
|
- Metrics (Prometheus, Expvar, or custom)
|
||||||
- Limit Concurrency (Global/Per Domain)
|
- Limit Concurrency (Global/Per Domain)
|
||||||
- Request Delays (Constant/Randomized)
|
- Request Delays (Constant/Randomized)
|
||||||
- Metrics (Prometheus)
|
|
||||||
- Cookies and Middlewares
|
- Cookies and Middlewares
|
||||||
- Automatic response decoding to UTF-8
|
- Automatic response decoding to UTF-8
|
||||||
|
|
||||||
|
19
geziyor.go
19
geziyor.go
@ -7,6 +7,7 @@ import (
|
|||||||
"github.com/chromedp/chromedp"
|
"github.com/chromedp/chromedp"
|
||||||
"github.com/fpfeng/httpcache"
|
"github.com/fpfeng/httpcache"
|
||||||
"github.com/geziyor/geziyor/internal"
|
"github.com/geziyor/geziyor/internal"
|
||||||
|
"github.com/geziyor/geziyor/metrics"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
"golang.org/x/net/html/charset"
|
"golang.org/x/net/html/charset"
|
||||||
@ -30,7 +31,7 @@ type Geziyor struct {
|
|||||||
Client *internal.Client
|
Client *internal.Client
|
||||||
Exports chan interface{}
|
Exports chan interface{}
|
||||||
|
|
||||||
metrics *Metrics
|
metrics *metrics.Metrics
|
||||||
requestMiddlewares []RequestMiddleware
|
requestMiddlewares []RequestMiddleware
|
||||||
responseMiddlewares []ResponseMiddleware
|
responseMiddlewares []ResponseMiddleware
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
@ -61,7 +62,7 @@ func NewGeziyor(opt *Options) *Geziyor {
|
|||||||
parseHTMLMiddleware,
|
parseHTMLMiddleware,
|
||||||
metricsResponseMiddleware,
|
metricsResponseMiddleware,
|
||||||
},
|
},
|
||||||
metrics: newMetrics(),
|
metrics: metrics.NewMetrics(opt.MetricsType),
|
||||||
}
|
}
|
||||||
|
|
||||||
if opt.UserAgent == "" {
|
if opt.UserAgent == "" {
|
||||||
@ -102,11 +103,14 @@ func NewGeziyor(opt *Options) *Geziyor {
|
|||||||
func (g *Geziyor) Start() {
|
func (g *Geziyor) Start() {
|
||||||
log.Println("Scraping Started")
|
log.Println("Scraping Started")
|
||||||
|
|
||||||
// Start metrics
|
// Metrics
|
||||||
go func() {
|
metricsServer := &http.Server{Addr: ":2112"}
|
||||||
http.Handle("/metrics", promhttp.Handler())
|
if g.Opt.MetricsType == metrics.Prometheus {
|
||||||
http.ListenAndServe(":2112", nil)
|
go func() {
|
||||||
}()
|
http.Handle("/metrics", promhttp.Handler())
|
||||||
|
metricsServer.ListenAndServe()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
// Start Exporters
|
// Start Exporters
|
||||||
if len(g.Opt.Exporters) != 0 {
|
if len(g.Opt.Exporters) != 0 {
|
||||||
@ -131,6 +135,7 @@ func (g *Geziyor) Start() {
|
|||||||
|
|
||||||
g.wg.Wait()
|
g.wg.Wait()
|
||||||
close(g.Exports)
|
close(g.Exports)
|
||||||
|
metricsServer.Close()
|
||||||
log.Println("Scraping Finished")
|
log.Println("Scraping Finished")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,12 +7,14 @@ import (
|
|||||||
"github.com/fpfeng/httpcache"
|
"github.com/fpfeng/httpcache"
|
||||||
"github.com/geziyor/geziyor"
|
"github.com/geziyor/geziyor"
|
||||||
"github.com/geziyor/geziyor/exporter"
|
"github.com/geziyor/geziyor/exporter"
|
||||||
|
"github.com/geziyor/geziyor/metrics"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestSimple(t *testing.T) {
|
func TestSimple(t *testing.T) {
|
||||||
|
defer leaktest.Check(t)()
|
||||||
geziyor.NewGeziyor(&geziyor.Options{
|
geziyor.NewGeziyor(&geziyor.Options{
|
||||||
StartURLs: []string{"http://api.ipify.org"},
|
StartURLs: []string{"http://api.ipify.org"},
|
||||||
ParseFunc: func(g *geziyor.Geziyor, r *geziyor.Response) {
|
ParseFunc: func(g *geziyor.Geziyor, r *geziyor.Response) {
|
||||||
@ -155,5 +157,6 @@ func TestBasicAuth(t *testing.T) {
|
|||||||
req.SetBasicAuth("username", "password")
|
req.SetBasicAuth("username", "password")
|
||||||
g.Do(req, nil)
|
g.Do(req, nil)
|
||||||
},
|
},
|
||||||
|
MetricsType: metrics.ExpVar,
|
||||||
}).Start()
|
}).Start()
|
||||||
}
|
}
|
||||||
|
1
go.mod
1
go.mod
@ -4,6 +4,7 @@ go 1.12
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/PuerkitoBio/goquery v1.5.0
|
github.com/PuerkitoBio/goquery v1.5.0
|
||||||
|
github.com/VividCortex/gohistogram v1.0.0 // indirect
|
||||||
github.com/chromedp/cdproto v0.0.0-20190609032908-dd39f0bf0a54
|
github.com/chromedp/cdproto v0.0.0-20190609032908-dd39f0bf0a54
|
||||||
github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05
|
github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05
|
||||||
github.com/fortytw2/leaktest v1.3.0
|
github.com/fortytw2/leaktest v1.3.0
|
||||||
|
2
go.sum
2
go.sum
@ -1,5 +1,7 @@
|
|||||||
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
|
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
|
||||||
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
|
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
|
||||||
|
github.com/VividCortex/gohistogram v1.0.0 h1:6+hBz+qvs0JOrrNhhmR7lFxo5sINxBCGXrdtl/UvroE=
|
||||||
|
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=
|
||||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||||
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
|
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
|
||||||
|
30
metrics.go
30
metrics.go
@ -1,30 +0,0 @@
|
|||||||
package geziyor
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/go-kit/kit/metrics"
|
|
||||||
"github.com/go-kit/kit/metrics/prometheus"
|
|
||||||
stdprometheus "github.com/prometheus/client_golang/prometheus"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Metrics type stores metrics
|
|
||||||
type Metrics struct {
|
|
||||||
requestCount metrics.Counter
|
|
||||||
responseCount metrics.Counter
|
|
||||||
}
|
|
||||||
|
|
||||||
func newMetrics() *Metrics {
|
|
||||||
m := Metrics{
|
|
||||||
requestCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
|
||||||
Namespace: "geziyor",
|
|
||||||
Name: "request_count",
|
|
||||||
Help: "Request count",
|
|
||||||
}, []string{"method"}),
|
|
||||||
responseCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
|
||||||
Namespace: "geziyor",
|
|
||||||
Name: "response_count",
|
|
||||||
Help: "Response count",
|
|
||||||
}, []string{"method"}),
|
|
||||||
}
|
|
||||||
|
|
||||||
return &m
|
|
||||||
}
|
|
60
metrics/metrics.go
Normal file
60
metrics/metrics.go
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
package metrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/go-kit/kit/metrics"
|
||||||
|
"github.com/go-kit/kit/metrics/discard"
|
||||||
|
"github.com/go-kit/kit/metrics/expvar"
|
||||||
|
"github.com/go-kit/kit/metrics/prometheus"
|
||||||
|
stdprometheus "github.com/prometheus/client_golang/prometheus"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Type represents metrics Types
|
||||||
|
type Type int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Discard discards any metrics.
|
||||||
|
Discard Type = iota
|
||||||
|
|
||||||
|
// Prometheus starts server at :2112 and exports metrics data to /metrics
|
||||||
|
Prometheus
|
||||||
|
|
||||||
|
// ExpVar uses built-in expvar package
|
||||||
|
ExpVar
|
||||||
|
)
|
||||||
|
|
||||||
|
// Metrics type stores metrics
|
||||||
|
type Metrics struct {
|
||||||
|
RequestCounter metrics.Counter
|
||||||
|
ResponseCounter metrics.Counter
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewMetrics creates new metrics with given metrics.Type
|
||||||
|
func NewMetrics(metricsType Type) *Metrics {
|
||||||
|
switch metricsType {
|
||||||
|
case Discard:
|
||||||
|
return &Metrics{
|
||||||
|
RequestCounter: discard.NewCounter(),
|
||||||
|
ResponseCounter: discard.NewCounter(),
|
||||||
|
}
|
||||||
|
case ExpVar:
|
||||||
|
return &Metrics{
|
||||||
|
RequestCounter: expvar.NewCounter("request_count"),
|
||||||
|
ResponseCounter: expvar.NewCounter("response_count"),
|
||||||
|
}
|
||||||
|
case Prometheus:
|
||||||
|
return &Metrics{
|
||||||
|
RequestCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||||
|
Namespace: "geziyor",
|
||||||
|
Name: "request_count",
|
||||||
|
Help: "Request count",
|
||||||
|
}, []string{"method"}),
|
||||||
|
ResponseCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||||
|
Namespace: "geziyor",
|
||||||
|
Name: "response_count",
|
||||||
|
Help: "Response count",
|
||||||
|
}, []string{"method"}),
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
@ -77,7 +77,7 @@ func logMiddleware(g *Geziyor, r *Request) {
|
|||||||
|
|
||||||
// metricsRequestMiddleware sets stats
|
// metricsRequestMiddleware sets stats
|
||||||
func metricsRequestMiddleware(g *Geziyor, r *Request) {
|
func metricsRequestMiddleware(g *Geziyor, r *Request) {
|
||||||
g.metrics.requestCount.With("method", r.Method).Add(1)
|
g.metrics.RequestCounter.With("method", r.Method).Add(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseHTMLMiddleware parses response if response is HTML
|
// parseHTMLMiddleware parses response if response is HTML
|
||||||
@ -89,5 +89,5 @@ func parseHTMLMiddleware(g *Geziyor, r *Response) {
|
|||||||
|
|
||||||
// metricsResponseMiddleware sets stats
|
// metricsResponseMiddleware sets stats
|
||||||
func metricsResponseMiddleware(g *Geziyor, r *Response) {
|
func metricsResponseMiddleware(g *Geziyor, r *Response) {
|
||||||
g.metrics.responseCount.With("method", r.Request.Method).Add(1)
|
g.metrics.ResponseCounter.With("method", r.Request.Method).Add(1)
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@ package geziyor
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/fpfeng/httpcache"
|
"github.com/fpfeng/httpcache"
|
||||||
|
"github.com/geziyor/geziyor/metrics"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -67,4 +68,6 @@ type Options struct {
|
|||||||
|
|
||||||
// If set true, cookies won't send.
|
// If set true, cookies won't send.
|
||||||
CookiesDisabled bool
|
CookiesDisabled bool
|
||||||
|
|
||||||
|
MetricsType metrics.Type
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user