diff --git a/README.md b/README.md index a12cee1..1073095 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# Gezer -Scraper and crawler framework for Golang. Gezer uses go *channels* over *callbacks* +# Geziyor +Scraper and crawler framework for Golang. Geziyor uses go *channels* over *callbacks* ## Features - 1.000+ Requests/Sec @@ -9,7 +9,7 @@ Scraper and crawler framework for Golang. Gezer uses go *channels* over *callbac ## Example ```go -gezer := NewGezer(Opt{ +geziyor := NewGeziyor(Opt{ StartURLs: []string{"http://quotes.toscrape.com/"}, ParseFunc: func(r *Response) { r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) { @@ -25,14 +25,14 @@ gezer := NewGezer(Opt{ // Next Page if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok { - go r.Gezer.Get(r.JoinURL(href)) + go r.Geziyor.Get(r.JoinURL(href)) } }, }) -gezer.Start() +geziyor.Start() ``` ## Installation - go get github.com/gogezer/gezer \ No newline at end of file + go get github.com/geziyor/geziyor \ No newline at end of file diff --git a/export.go b/export.go index 9cd50d6..2b0befb 100644 --- a/export.go +++ b/export.go @@ -1,4 +1,4 @@ -package gezer +package geziyor import ( "encoding/json" diff --git a/gezer.go b/geziyor.go similarity index 87% rename from gezer.go rename to geziyor.go index 38e9206..3e72926 100644 --- a/gezer.go +++ b/geziyor.go @@ -1,4 +1,4 @@ -package gezer +package geziyor import ( "bytes" @@ -14,7 +14,7 @@ import ( "time" ) -type Gezer struct { +type Geziyor struct { client *http.Client wg sync.WaitGroup opt Opt @@ -33,8 +33,8 @@ func init() { log.SetOutput(os.Stdout) } -func NewGezer(opt Opt) *Gezer { - gezer := &Gezer{ +func NewGeziyor(opt Opt) *Geziyor { + geziyor := &Geziyor{ client: &http.Client{ Timeout: time.Second * 10, }, @@ -42,13 +42,13 @@ func NewGezer(opt Opt) *Gezer { } if opt.Cache != nil { - gezer.client.Transport = httpcache.NewTransport(opt.Cache) + geziyor.client.Transport = httpcache.NewTransport(opt.Cache) } - return gezer + return geziyor } -func (g *Gezer) Start() { +func (g *Geziyor) Start() { for _, startURL := range g.opt.StartURLs { go g.Get(startURL) } @@ -57,7 +57,7 @@ func (g *Gezer) Start() { g.wg.Wait() } -func (g *Gezer) Get(rawURL string) { +func (g *Geziyor) Get(rawURL string) { g.wg.Add(1) defer g.wg.Done() @@ -92,7 +92,7 @@ func (g *Gezer) Get(rawURL string) { Response: resp, Body: body, Doc: doc, - Gezer: g, + Geziyor: g, Exports: make(chan map[string]interface{}, 1), } @@ -104,7 +104,7 @@ func (g *Gezer) Get(rawURL string) { time.Sleep(time.Millisecond) } -func checkURL(rawURL string, g *Gezer) bool { +func checkURL(rawURL string, g *Geziyor) bool { // Parse URL parsedURL, err := url.Parse(rawURL) diff --git a/gezer_test.go b/geziyor_test.go similarity index 72% rename from gezer_test.go rename to geziyor_test.go index c860560..4087322 100644 --- a/gezer_test.go +++ b/geziyor_test.go @@ -1,4 +1,4 @@ -package gezer +package geziyor import ( "fmt" @@ -7,20 +7,20 @@ import ( "testing" ) -func TestGezer_StartURLs_Simple(t *testing.T) { - gezer := NewGezer(Opt{ +func TestGeziyor_StartURLs_Simple(t *testing.T) { + geziyor := NewGeziyor(Opt{ StartURLs: []string{"http://api.ipify.org"}, Cache: httpcache.NewMemoryCache(), ParseFunc: func(r *Response) { fmt.Println(string(r.Body)) - r.Gezer.Get("http://api.ipify.org") + r.Geziyor.Get("http://api.ipify.org") }, }) - gezer.Start() + geziyor.Start() } -func TestGezer_StartURLs_HTML(t *testing.T) { - gezer := NewGezer(Opt{ +func TestGeziyor_StartURLs_HTML(t *testing.T) { + geziyor := NewGeziyor(Opt{ StartURLs: []string{"http://quotes.toscrape.com/"}, ParseFunc: func(r *Response) { r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) { @@ -36,25 +36,25 @@ func TestGezer_StartURLs_HTML(t *testing.T) { // Next Page if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok { - go r.Gezer.Get(r.JoinURL(href)) + go r.Geziyor.Get(r.JoinURL(href)) } }, }) - gezer.Start() + geziyor.Start() } -func TestGezer_Concurrent_Requests(t *testing.T) { - gezer := NewGezer(Opt{ +func TestGeziyor_Concurrent_Requests(t *testing.T) { + geziyor := NewGeziyor(Opt{ AllowedDomains: []string{"quotes.toscrape.com"}, StartURLs: []string{"http://quotes.toscrape.com/"}, ParseFunc: func(r *Response) { //r.Exports <- map[string]interface{}{"href": r.Request.URL.String()} r.Doc.Find("a").Each(func(i int, s *goquery.Selection) { if href, ok := s.Attr("href"); ok { - go r.Gezer.Get(r.JoinURL(href)) + go r.Geziyor.Get(r.JoinURL(href)) } }) }, }) - gezer.Start() + geziyor.Start() } diff --git a/go.mod b/go.mod index d264ce5..c1c814d 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/gogezer/gezer +module github.com/geziyor/geziyor go 1.12 diff --git a/response.go b/response.go index ea5d9e5..7799191 100644 --- a/response.go +++ b/response.go @@ -1,4 +1,4 @@ -package gezer +package geziyor import ( "github.com/PuerkitoBio/goquery" @@ -11,7 +11,7 @@ type Response struct { Body []byte Doc *goquery.Document - Gezer *Gezer + Geziyor *Geziyor Exports chan map[string]interface{} }