Robotstxt metrics added.
This commit is contained in:
parent
d3c4389c46
commit
d19465c44a
@ -124,7 +124,7 @@ geziyor.NewGeziyor(&geziyor.Options{
|
|||||||
### Exporting Data
|
### Exporting Data
|
||||||
|
|
||||||
You can export data automatically using exporters. Just send data to ```Geziyor.Exports``` chan.
|
You can export data automatically using exporters. Just send data to ```Geziyor.Exports``` chan.
|
||||||
[Available exporters](https://godoc.org/github.com/geziyor/geziyor/exporter)
|
[Available exporters](https://godoc.org/github.com/geziyor/geziyor/export)
|
||||||
|
|
||||||
```go
|
```go
|
||||||
geziyor.NewGeziyor(&geziyor.Options{
|
geziyor.NewGeziyor(&geziyor.Options{
|
||||||
|
@ -102,7 +102,7 @@ func NewGeziyor(opt *Options) *Geziyor {
|
|||||||
geziyor.reqMiddlewares = append(geziyor.reqMiddlewares, metricsMiddleware)
|
geziyor.reqMiddlewares = append(geziyor.reqMiddlewares, metricsMiddleware)
|
||||||
geziyor.resMiddlewares = append(geziyor.resMiddlewares, metricsMiddleware)
|
geziyor.resMiddlewares = append(geziyor.resMiddlewares, metricsMiddleware)
|
||||||
|
|
||||||
robotsMiddleware := middleware.NewRobotsTxt(geziyor.Client, opt.RobotsTxtDisabled)
|
robotsMiddleware := middleware.NewRobotsTxt(geziyor.Client, geziyor.metrics, opt.RobotsTxtDisabled)
|
||||||
geziyor.reqMiddlewares = append(geziyor.reqMiddlewares, robotsMiddleware)
|
geziyor.reqMiddlewares = append(geziyor.reqMiddlewares, robotsMiddleware)
|
||||||
|
|
||||||
// Custom Middlewares
|
// Custom Middlewares
|
||||||
|
@ -29,6 +29,9 @@ type Metrics struct {
|
|||||||
RequestCounter metrics.Counter
|
RequestCounter metrics.Counter
|
||||||
ResponseCounter metrics.Counter
|
ResponseCounter metrics.Counter
|
||||||
PanicCounter metrics.Counter
|
PanicCounter metrics.Counter
|
||||||
|
RobotsTxtRequestCounter metrics.Counter
|
||||||
|
RobotsTxtResponseCounter metrics.Counter
|
||||||
|
RobotsTxtForbiddenCounter metrics.Counter
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewMetrics creates new metrics with given metrics.Type
|
// NewMetrics creates new metrics with given metrics.Type
|
||||||
@ -39,12 +42,18 @@ func NewMetrics(metricsType Type) *Metrics {
|
|||||||
RequestCounter: discard.NewCounter(),
|
RequestCounter: discard.NewCounter(),
|
||||||
ResponseCounter: discard.NewCounter(),
|
ResponseCounter: discard.NewCounter(),
|
||||||
PanicCounter: discard.NewCounter(),
|
PanicCounter: discard.NewCounter(),
|
||||||
|
RobotsTxtRequestCounter: discard.NewCounter(),
|
||||||
|
RobotsTxtResponseCounter: discard.NewCounter(),
|
||||||
|
RobotsTxtForbiddenCounter: discard.NewCounter(),
|
||||||
}
|
}
|
||||||
case ExpVar:
|
case ExpVar:
|
||||||
return &Metrics{
|
return &Metrics{
|
||||||
RequestCounter: expvar.NewCounter("request_count"),
|
RequestCounter: expvar.NewCounter("request_count"),
|
||||||
ResponseCounter: expvar.NewCounter("response_count"),
|
ResponseCounter: expvar.NewCounter("response_count"),
|
||||||
PanicCounter: expvar.NewCounter("panic_count"),
|
PanicCounter: expvar.NewCounter("panic_count"),
|
||||||
|
RobotsTxtRequestCounter: expvar.NewCounter("robotstxt_request_count"),
|
||||||
|
RobotsTxtResponseCounter: expvar.NewCounter("robotstxt_response_count"),
|
||||||
|
RobotsTxtForbiddenCounter: expvar.NewCounter("robotstxt_forbidden_count"),
|
||||||
}
|
}
|
||||||
case Prometheus:
|
case Prometheus:
|
||||||
return &Metrics{
|
return &Metrics{
|
||||||
@ -57,12 +66,27 @@ func NewMetrics(metricsType Type) *Metrics {
|
|||||||
Namespace: "geziyor",
|
Namespace: "geziyor",
|
||||||
Name: "response_count",
|
Name: "response_count",
|
||||||
Help: "Response count",
|
Help: "Response count",
|
||||||
}, []string{"method"}),
|
}, []string{"status"}),
|
||||||
PanicCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
PanicCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||||
Namespace: "geziyor",
|
Namespace: "geziyor",
|
||||||
Name: "panic_count",
|
Name: "panic_count",
|
||||||
Help: "Panic count",
|
Help: "Panic count",
|
||||||
}, []string{}),
|
}, []string{}),
|
||||||
|
RobotsTxtRequestCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||||
|
Namespace: "geziyor",
|
||||||
|
Name: "robotstxt_request_count",
|
||||||
|
Help: "Robotstxt request count",
|
||||||
|
}, []string{}),
|
||||||
|
RobotsTxtResponseCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||||
|
Namespace: "geziyor",
|
||||||
|
Name: "robotstxt_response_count",
|
||||||
|
Help: "Robotstxt response count",
|
||||||
|
}, []string{"status"}),
|
||||||
|
RobotsTxtForbiddenCounter: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||||
|
Namespace: "geziyor",
|
||||||
|
Name: "robotstxt_forbidden_count",
|
||||||
|
Help: "Robotstxt forbidden count",
|
||||||
|
}, []string{"method"}),
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
return nil
|
return nil
|
||||||
|
@ -3,6 +3,7 @@ package middleware
|
|||||||
import (
|
import (
|
||||||
"github.com/geziyor/geziyor/client"
|
"github.com/geziyor/geziyor/client"
|
||||||
"github.com/geziyor/geziyor/metrics"
|
"github.com/geziyor/geziyor/metrics"
|
||||||
|
"strconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Metrics sets stats for request and responses
|
// Metrics sets stats for request and responses
|
||||||
@ -15,5 +16,5 @@ func (a *Metrics) ProcessRequest(r *client.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *Metrics) ProcessResponse(r *client.Response) {
|
func (a *Metrics) ProcessResponse(r *client.Response) {
|
||||||
a.Metrics.ResponseCounter.With("method", r.Request.Method).Add(1)
|
a.Metrics.ResponseCounter.With("status", strconv.Itoa(r.StatusCode)).Add(1)
|
||||||
}
|
}
|
||||||
|
@ -2,21 +2,25 @@ package middleware
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/geziyor/geziyor/client"
|
"github.com/geziyor/geziyor/client"
|
||||||
|
"github.com/geziyor/geziyor/metrics"
|
||||||
"github.com/temoto/robotstxt"
|
"github.com/temoto/robotstxt"
|
||||||
"log"
|
"log"
|
||||||
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
// RobotsTxt middleware filters out requests forbidden by the robots.txt exclusion standard.
|
// RobotsTxt middleware filters out requests forbidden by the robots.txt exclusion standard.
|
||||||
type RobotsTxt struct {
|
type RobotsTxt struct {
|
||||||
|
metrics *metrics.Metrics
|
||||||
robotsDisabled bool
|
robotsDisabled bool
|
||||||
client *client.Client
|
client *client.Client
|
||||||
mut sync.RWMutex
|
mut sync.RWMutex
|
||||||
robotsMap map[string]*robotstxt.RobotsData
|
robotsMap map[string]*robotstxt.RobotsData
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewRobotsTxt(client *client.Client, robotsDisabled bool) RequestProcessor {
|
func NewRobotsTxt(client *client.Client, metrics *metrics.Metrics, robotsDisabled bool) RequestProcessor {
|
||||||
return &RobotsTxt{
|
return &RobotsTxt{
|
||||||
|
metrics: metrics,
|
||||||
robotsDisabled: robotsDisabled,
|
robotsDisabled: robotsDisabled,
|
||||||
client: client,
|
client: client,
|
||||||
robotsMap: make(map[string]*robotstxt.RobotsData),
|
robotsMap: make(map[string]*robotstxt.RobotsData),
|
||||||
@ -28,7 +32,7 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Locking like this improves performance but causes duplicate requests to robots.txt,
|
// TODO: Locking like this improves performance but sometimes it causes duplicate requests to robots.txt
|
||||||
m.mut.RLock()
|
m.mut.RLock()
|
||||||
robotsData, exists := m.robotsMap[r.Host]
|
robotsData, exists := m.robotsMap[r.Host]
|
||||||
m.mut.RUnlock()
|
m.mut.RUnlock()
|
||||||
@ -39,10 +43,12 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) {
|
|||||||
return // Don't Do anything
|
return // Don't Do anything
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m.metrics.RobotsTxtRequestCounter.Add(1)
|
||||||
robotsResp, err := m.client.DoRequestClient(robotsReq)
|
robotsResp, err := m.client.DoRequestClient(robotsReq)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return // Don't Do anything
|
return // Don't Do anything
|
||||||
}
|
}
|
||||||
|
m.metrics.RobotsTxtResponseCounter.With("status", strconv.Itoa(robotsResp.StatusCode)).Add(1)
|
||||||
|
|
||||||
robotsData, err = robotstxt.FromStatusAndBytes(robotsResp.StatusCode, robotsResp.Body)
|
robotsData, err = robotstxt.FromStatusAndBytes(robotsResp.StatusCode, robotsResp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -55,7 +61,7 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !robotsData.TestAgent(r.URL.Path, r.UserAgent()) {
|
if !robotsData.TestAgent(r.URL.Path, r.UserAgent()) {
|
||||||
// TODO: Forbidden requests metrics
|
m.metrics.RobotsTxtForbiddenCounter.With("method", r.Method).Add(1)
|
||||||
log.Println("Forbidden by robots.txt:", r.URL.String())
|
log.Println("Forbidden by robots.txt:", r.URL.String())
|
||||||
r.Cancel()
|
r.Cancel()
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user