Middlewares refactored to multiple files in middleware package.
Extractors removed as they introduce complexity to scraper. Both in learning and developing.
This commit is contained in:
21
middleware/duplicate_requests.go
Normal file
21
middleware/duplicate_requests.go
Normal file
@ -0,0 +1,21 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"github.com/geziyor/geziyor/client"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// DuplicateRequests checks for already visited URLs
|
||||
type DuplicateRequests struct {
|
||||
RevisitEnabled bool
|
||||
visitedURLs sync.Map
|
||||
}
|
||||
|
||||
func (a *DuplicateRequests) ProcessRequest(r *client.Request) {
|
||||
if !a.RevisitEnabled && r.Request.Method == "GET" {
|
||||
if _, visited := a.visitedURLs.LoadOrStore(r.Request.URL.String(), struct{}{}); visited {
|
||||
//log.Printf("URL already visited %s\n")
|
||||
r.Cancel()
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user