Allowed domains support added

This commit is contained in:
Musab Gültekin 2019-06-07 21:02:31 +03:00
parent 5271095c8e
commit ee7d498f22

View File

@ -7,6 +7,7 @@ import (
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"io/ioutil" "io/ioutil"
"net/http" "net/http"
"net/url"
"os" "os"
"sync" "sync"
"time" "time"
@ -19,8 +20,9 @@ type Gezer struct {
} }
type Opt struct { type Opt struct {
StartURLs []string AllowedDomains []string
ParseFunc func(response *Response) StartURLs []string
ParseFunc func(response *Response)
} }
func NewGezer(opt Opt) *Gezer { func NewGezer(opt Opt) *Gezer {
@ -47,14 +49,34 @@ func (g *Gezer) Get(url string) {
go g.getRequest(url) go g.getRequest(url)
} }
func (g *Gezer) getRequest(url string) { func (g *Gezer) getRequest(rawURL string) {
defer g.wg.Done() defer g.wg.Done()
// Parse URL
parsedURL, err := url.Parse(rawURL)
if err != nil {
fmt.Fprintf(os.Stderr, "url parsing error: %v", err)
return
}
// Check for allowed domains
var allowed bool
for _, domain := range g.opt.AllowedDomains {
if domain == parsedURL.Host {
allowed = true
break
}
}
if !allowed && len(g.opt.AllowedDomains) != 0 {
fmt.Fprintf(os.Stderr, "domain not allowed: %s", parsedURL.Host)
return
}
// Log // Log
fmt.Println("Fetching: ", url) fmt.Println("Fetching: ", rawURL)
// Get request // Get request
resp, err := g.client.Get(url) resp, err := g.client.Get(rawURL)
if resp != nil { if resp != nil {
defer resp.Body.Close() defer resp.Body.Close()
} }
@ -65,6 +87,7 @@ func (g *Gezer) getRequest(url string) {
// Read body // Read body
body, err := ioutil.ReadAll(resp.Body) body, err := ioutil.ReadAll(resp.Body)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "reading body error: %v", err)
return return
} }