Robotstxt metrics added.
This commit is contained in:
parent
d3c4389c46
commit
d19465c44a
@ -124,7 +124,7 @@ geziyor.NewGeziyor(&geziyor.Options{
|
||||
### Exporting Data
|
||||
|
||||
You can export data automatically using exporters. Just send data to ```Geziyor.Exports``` chan.
|
||||
[Available exporters](https://godoc.org/github.com/geziyor/geziyor/exporter)
|
||||
[Available exporters](https://godoc.org/github.com/geziyor/geziyor/export)
|
||||
|
||||
```go
|
||||
geziyor.NewGeziyor(&geziyor.Options{
|
||||
|
@ -102,7 +102,7 @@ func NewGeziyor(opt *Options) *Geziyor {
|
||||
geziyor.reqMiddlewares = append(geziyor.reqMiddlewares, metricsMiddleware)
|
||||
geziyor.resMiddlewares = append(geziyor.resMiddlewares, metricsMiddleware)
|
||||
|
||||
robotsMiddleware := middleware.NewRobotsTxt(geziyor.Client, opt.RobotsTxtDisabled)
|
||||
robotsMiddleware := middleware.NewRobotsTxt(geziyor.Client, geziyor.metrics, opt.RobotsTxtDisabled)
|
||||
geziyor.reqMiddlewares = append(geziyor.reqMiddlewares, robotsMiddleware)
|
||||
|
||||
// Custom Middlewares
|
||||
|
@ -26,9 +26,12 @@ const (
|
||||
|
||||
// Metrics type stores metrics
|
||||
type Metrics struct {
|
||||
RequestCounter metrics.Counter
|
||||
ResponseCounter metrics.Counter
|
||||
PanicCounter metrics.Counter
|
||||
RequestCounter metrics.Counter
|
||||
ResponseCounter metrics.Counter
|
||||
PanicCounter metrics.Counter
|
||||
RobotsTxtRequestCounter metrics.Counter
|
||||
RobotsTxtResponseCounter metrics.Counter
|
||||
RobotsTxtForbiddenCounter metrics.Counter
|
||||
}
|
||||
|
||||
// NewMetrics creates new metrics with given metrics.Type
|
||||
@ -36,15 +39,21 @@ func NewMetrics(metricsType Type) *Metrics {
|
||||
switch metricsType {
|
||||
case Discard:
|
||||
return &Metrics{
|
||||
RequestCounter: discard.NewCounter(),
|
||||
ResponseCounter: discard.NewCounter(),
|
||||
PanicCounter: discard.NewCounter(),
|
||||
RequestCounter: discard.NewCounter(),
|
||||
ResponseCounter: discard.NewCounter(),
|
||||
PanicCounter: discard.NewCounter(),
|
||||
RobotsTxtRequestCounter: discard.NewCounter(),
|
||||
RobotsTxtResponseCounter: discard.NewCounter(),
|
||||
RobotsTxtForbiddenCounter: discard.NewCounter(),
|
||||
}
|
||||
case ExpVar:
|
||||
return &Metrics{
|
||||
RequestCounter: expvar.NewCounter("request_count"),
|
||||
ResponseCounter: expvar.NewCounter("response_count"),
|
||||
PanicCounter: expvar.NewCounter("panic_count"),
|
||||
RequestCounter: expvar.NewCounter("request_count"),
|
||||
ResponseCounter: expvar.NewCounter("response_count"),
|
||||
PanicCounter: expvar.NewCounter("panic_count"),
|
||||
RobotsTxtRequestCounter: expvar.NewCounter("robotstxt_request_count"),
|
||||
RobotsTxtResponseCounter: expvar.NewCounter("robotstxt_response_count"),
|
||||
RobotsTxtForbiddenCounter: expvar.NewCounter("robotstxt_forbidden_count"),
|
||||
}
|
||||
case Prometheus:
|
||||
return &Metrics{
|
||||
@ -57,12 +66,27 @@ func NewMetrics(metricsType Type) *Metrics {
|
||||
Namespace: "geziyor",
|
||||
Name: "response_count",
|
||||
Help: "Response count",
|
||||
}, []string{"method"}),
|
||||
}, []string{"status"}),
|
||||
PanicCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: "geziyor",
|
||||
Name: "panic_count",
|
||||
Help: "Panic count",
|
||||
}, []string{}),
|
||||
RobotsTxtRequestCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: "geziyor",
|
||||
Name: "robotstxt_request_count",
|
||||
Help: "Robotstxt request count",
|
||||
}, []string{}),
|
||||
RobotsTxtResponseCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: "geziyor",
|
||||
Name: "robotstxt_response_count",
|
||||
Help: "Robotstxt response count",
|
||||
}, []string{"status"}),
|
||||
RobotsTxtForbiddenCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: "geziyor",
|
||||
Name: "robotstxt_forbidden_count",
|
||||
Help: "Robotstxt forbidden count",
|
||||
}, []string{"method"}),
|
||||
}
|
||||
default:
|
||||
return nil
|
||||
|
@ -3,6 +3,7 @@ package middleware
|
||||
import (
|
||||
"github.com/geziyor/geziyor/client"
|
||||
"github.com/geziyor/geziyor/metrics"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// Metrics sets stats for request and responses
|
||||
@ -15,5 +16,5 @@ func (a *Metrics) ProcessRequest(r *client.Request) {
|
||||
}
|
||||
|
||||
func (a *Metrics) ProcessResponse(r *client.Response) {
|
||||
a.Metrics.ResponseCounter.With("method", r.Request.Method).Add(1)
|
||||
a.Metrics.ResponseCounter.With("status", strconv.Itoa(r.StatusCode)).Add(1)
|
||||
}
|
||||
|
@ -2,21 +2,25 @@ package middleware
|
||||
|
||||
import (
|
||||
"github.com/geziyor/geziyor/client"
|
||||
"github.com/geziyor/geziyor/metrics"
|
||||
"github.com/temoto/robotstxt"
|
||||
"log"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// RobotsTxt middleware filters out requests forbidden by the robots.txt exclusion standard.
|
||||
type RobotsTxt struct {
|
||||
metrics *metrics.Metrics
|
||||
robotsDisabled bool
|
||||
client *client.Client
|
||||
mut sync.RWMutex
|
||||
robotsMap map[string]*robotstxt.RobotsData
|
||||
}
|
||||
|
||||
func NewRobotsTxt(client *client.Client, robotsDisabled bool) RequestProcessor {
|
||||
func NewRobotsTxt(client *client.Client, metrics *metrics.Metrics, robotsDisabled bool) RequestProcessor {
|
||||
return &RobotsTxt{
|
||||
metrics: metrics,
|
||||
robotsDisabled: robotsDisabled,
|
||||
client: client,
|
||||
robotsMap: make(map[string]*robotstxt.RobotsData),
|
||||
@ -28,7 +32,7 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: Locking like this improves performance but causes duplicate requests to robots.txt,
|
||||
// TODO: Locking like this improves performance but sometimes it causes duplicate requests to robots.txt
|
||||
m.mut.RLock()
|
||||
robotsData, exists := m.robotsMap[r.Host]
|
||||
m.mut.RUnlock()
|
||||
@ -39,10 +43,12 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) {
|
||||
return // Don't Do anything
|
||||
}
|
||||
|
||||
m.metrics.RobotsTxtRequestCounter.Add(1)
|
||||
robotsResp, err := m.client.DoRequestClient(robotsReq)
|
||||
if err != nil {
|
||||
return // Don't Do anything
|
||||
}
|
||||
m.metrics.RobotsTxtResponseCounter.With("status", strconv.Itoa(robotsResp.StatusCode)).Add(1)
|
||||
|
||||
robotsData, err = robotstxt.FromStatusAndBytes(robotsResp.StatusCode, robotsResp.Body)
|
||||
if err != nil {
|
||||
@ -55,7 +61,7 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) {
|
||||
}
|
||||
|
||||
if !robotsData.TestAgent(r.URL.Path, r.UserAgent()) {
|
||||
// TODO: Forbidden requests metrics
|
||||
m.metrics.RobotsTxtForbiddenCounter.With("method", r.Method).Add(1)
|
||||
log.Println("Forbidden by robots.txt:", r.URL.String())
|
||||
r.Cancel()
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user