Gezer renamed to Geziyor

This commit is contained in:
Musab Gültekin 2019-06-08 17:14:10 +03:00
parent c525e0d7d0
commit 54c7d3550f
6 changed files with 33 additions and 33 deletions

View File

@ -1,5 +1,5 @@
# Gezer
Scraper and crawler framework for Golang. Gezer uses go *channels* over *callbacks*
# Geziyor
Scraper and crawler framework for Golang. Geziyor uses go *channels* over *callbacks*
## Features
- 1.000+ Requests/Sec
@ -9,7 +9,7 @@ Scraper and crawler framework for Golang. Gezer uses go *channels* over *callbac
## Example
```go
gezer := NewGezer(Opt{
geziyor := NewGeziyor(Opt{
StartURLs: []string{"http://quotes.toscrape.com/"},
ParseFunc: func(r *Response) {
r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) {
@ -25,14 +25,14 @@ gezer := NewGezer(Opt{
// Next Page
if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok {
go r.Gezer.Get(r.JoinURL(href))
go r.Geziyor.Get(r.JoinURL(href))
}
},
})
gezer.Start()
geziyor.Start()
```
## Installation
go get github.com/gogezer/gezer
go get github.com/geziyor/geziyor

View File

@ -1,4 +1,4 @@
package gezer
package geziyor
import (
"encoding/json"

View File

@ -1,4 +1,4 @@
package gezer
package geziyor
import (
"bytes"
@ -14,7 +14,7 @@ import (
"time"
)
type Gezer struct {
type Geziyor struct {
client *http.Client
wg sync.WaitGroup
opt Opt
@ -33,8 +33,8 @@ func init() {
log.SetOutput(os.Stdout)
}
func NewGezer(opt Opt) *Gezer {
gezer := &Gezer{
func NewGeziyor(opt Opt) *Geziyor {
geziyor := &Geziyor{
client: &http.Client{
Timeout: time.Second * 10,
},
@ -42,13 +42,13 @@ func NewGezer(opt Opt) *Gezer {
}
if opt.Cache != nil {
gezer.client.Transport = httpcache.NewTransport(opt.Cache)
geziyor.client.Transport = httpcache.NewTransport(opt.Cache)
}
return gezer
return geziyor
}
func (g *Gezer) Start() {
func (g *Geziyor) Start() {
for _, startURL := range g.opt.StartURLs {
go g.Get(startURL)
}
@ -57,7 +57,7 @@ func (g *Gezer) Start() {
g.wg.Wait()
}
func (g *Gezer) Get(rawURL string) {
func (g *Geziyor) Get(rawURL string) {
g.wg.Add(1)
defer g.wg.Done()
@ -92,7 +92,7 @@ func (g *Gezer) Get(rawURL string) {
Response: resp,
Body: body,
Doc: doc,
Gezer: g,
Geziyor: g,
Exports: make(chan map[string]interface{}, 1),
}
@ -104,7 +104,7 @@ func (g *Gezer) Get(rawURL string) {
time.Sleep(time.Millisecond)
}
func checkURL(rawURL string, g *Gezer) bool {
func checkURL(rawURL string, g *Geziyor) bool {
// Parse URL
parsedURL, err := url.Parse(rawURL)

View File

@ -1,4 +1,4 @@
package gezer
package geziyor
import (
"fmt"
@ -7,20 +7,20 @@ import (
"testing"
)
func TestGezer_StartURLs_Simple(t *testing.T) {
gezer := NewGezer(Opt{
func TestGeziyor_StartURLs_Simple(t *testing.T) {
geziyor := NewGeziyor(Opt{
StartURLs: []string{"http://api.ipify.org"},
Cache: httpcache.NewMemoryCache(),
ParseFunc: func(r *Response) {
fmt.Println(string(r.Body))
r.Gezer.Get("http://api.ipify.org")
r.Geziyor.Get("http://api.ipify.org")
},
})
gezer.Start()
geziyor.Start()
}
func TestGezer_StartURLs_HTML(t *testing.T) {
gezer := NewGezer(Opt{
func TestGeziyor_StartURLs_HTML(t *testing.T) {
geziyor := NewGeziyor(Opt{
StartURLs: []string{"http://quotes.toscrape.com/"},
ParseFunc: func(r *Response) {
r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) {
@ -36,25 +36,25 @@ func TestGezer_StartURLs_HTML(t *testing.T) {
// Next Page
if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok {
go r.Gezer.Get(r.JoinURL(href))
go r.Geziyor.Get(r.JoinURL(href))
}
},
})
gezer.Start()
geziyor.Start()
}
func TestGezer_Concurrent_Requests(t *testing.T) {
gezer := NewGezer(Opt{
func TestGeziyor_Concurrent_Requests(t *testing.T) {
geziyor := NewGeziyor(Opt{
AllowedDomains: []string{"quotes.toscrape.com"},
StartURLs: []string{"http://quotes.toscrape.com/"},
ParseFunc: func(r *Response) {
//r.Exports <- map[string]interface{}{"href": r.Request.URL.String()}
r.Doc.Find("a").Each(func(i int, s *goquery.Selection) {
if href, ok := s.Attr("href"); ok {
go r.Gezer.Get(r.JoinURL(href))
go r.Geziyor.Get(r.JoinURL(href))
}
})
},
})
gezer.Start()
geziyor.Start()
}

2
go.mod
View File

@ -1,4 +1,4 @@
module github.com/gogezer/gezer
module github.com/geziyor/geziyor
go 1.12

View File

@ -1,4 +1,4 @@
package gezer
package geziyor
import (
"github.com/PuerkitoBio/goquery"
@ -11,7 +11,7 @@ type Response struct {
Body []byte
Doc *goquery.Document
Gezer *Gezer
Geziyor *Geziyor
Exports chan map[string]interface{}
}