Gezer renamed to Geziyor

This commit is contained in:
Musab Gültekin 2019-06-08 17:14:10 +03:00
parent c525e0d7d0
commit 54c7d3550f
6 changed files with 33 additions and 33 deletions

View File

@ -1,5 +1,5 @@
# Gezer # Geziyor
Scraper and crawler framework for Golang. Gezer uses go *channels* over *callbacks* Scraper and crawler framework for Golang. Geziyor uses go *channels* over *callbacks*
## Features ## Features
- 1.000+ Requests/Sec - 1.000+ Requests/Sec
@ -9,7 +9,7 @@ Scraper and crawler framework for Golang. Gezer uses go *channels* over *callbac
## Example ## Example
```go ```go
gezer := NewGezer(Opt{ geziyor := NewGeziyor(Opt{
StartURLs: []string{"http://quotes.toscrape.com/"}, StartURLs: []string{"http://quotes.toscrape.com/"},
ParseFunc: func(r *Response) { ParseFunc: func(r *Response) {
r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) { r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) {
@ -25,14 +25,14 @@ gezer := NewGezer(Opt{
// Next Page // Next Page
if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok { if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok {
go r.Gezer.Get(r.JoinURL(href)) go r.Geziyor.Get(r.JoinURL(href))
} }
}, },
}) })
gezer.Start() geziyor.Start()
``` ```
## Installation ## Installation
go get github.com/gogezer/gezer go get github.com/geziyor/geziyor

View File

@ -1,4 +1,4 @@
package gezer package geziyor
import ( import (
"encoding/json" "encoding/json"

View File

@ -1,4 +1,4 @@
package gezer package geziyor
import ( import (
"bytes" "bytes"
@ -14,7 +14,7 @@ import (
"time" "time"
) )
type Gezer struct { type Geziyor struct {
client *http.Client client *http.Client
wg sync.WaitGroup wg sync.WaitGroup
opt Opt opt Opt
@ -33,8 +33,8 @@ func init() {
log.SetOutput(os.Stdout) log.SetOutput(os.Stdout)
} }
func NewGezer(opt Opt) *Gezer { func NewGeziyor(opt Opt) *Geziyor {
gezer := &Gezer{ geziyor := &Geziyor{
client: &http.Client{ client: &http.Client{
Timeout: time.Second * 10, Timeout: time.Second * 10,
}, },
@ -42,13 +42,13 @@ func NewGezer(opt Opt) *Gezer {
} }
if opt.Cache != nil { if opt.Cache != nil {
gezer.client.Transport = httpcache.NewTransport(opt.Cache) geziyor.client.Transport = httpcache.NewTransport(opt.Cache)
} }
return gezer return geziyor
} }
func (g *Gezer) Start() { func (g *Geziyor) Start() {
for _, startURL := range g.opt.StartURLs { for _, startURL := range g.opt.StartURLs {
go g.Get(startURL) go g.Get(startURL)
} }
@ -57,7 +57,7 @@ func (g *Gezer) Start() {
g.wg.Wait() g.wg.Wait()
} }
func (g *Gezer) Get(rawURL string) { func (g *Geziyor) Get(rawURL string) {
g.wg.Add(1) g.wg.Add(1)
defer g.wg.Done() defer g.wg.Done()
@ -92,7 +92,7 @@ func (g *Gezer) Get(rawURL string) {
Response: resp, Response: resp,
Body: body, Body: body,
Doc: doc, Doc: doc,
Gezer: g, Geziyor: g,
Exports: make(chan map[string]interface{}, 1), Exports: make(chan map[string]interface{}, 1),
} }
@ -104,7 +104,7 @@ func (g *Gezer) Get(rawURL string) {
time.Sleep(time.Millisecond) time.Sleep(time.Millisecond)
} }
func checkURL(rawURL string, g *Gezer) bool { func checkURL(rawURL string, g *Geziyor) bool {
// Parse URL // Parse URL
parsedURL, err := url.Parse(rawURL) parsedURL, err := url.Parse(rawURL)

View File

@ -1,4 +1,4 @@
package gezer package geziyor
import ( import (
"fmt" "fmt"
@ -7,20 +7,20 @@ import (
"testing" "testing"
) )
func TestGezer_StartURLs_Simple(t *testing.T) { func TestGeziyor_StartURLs_Simple(t *testing.T) {
gezer := NewGezer(Opt{ geziyor := NewGeziyor(Opt{
StartURLs: []string{"http://api.ipify.org"}, StartURLs: []string{"http://api.ipify.org"},
Cache: httpcache.NewMemoryCache(), Cache: httpcache.NewMemoryCache(),
ParseFunc: func(r *Response) { ParseFunc: func(r *Response) {
fmt.Println(string(r.Body)) fmt.Println(string(r.Body))
r.Gezer.Get("http://api.ipify.org") r.Geziyor.Get("http://api.ipify.org")
}, },
}) })
gezer.Start() geziyor.Start()
} }
func TestGezer_StartURLs_HTML(t *testing.T) { func TestGeziyor_StartURLs_HTML(t *testing.T) {
gezer := NewGezer(Opt{ geziyor := NewGeziyor(Opt{
StartURLs: []string{"http://quotes.toscrape.com/"}, StartURLs: []string{"http://quotes.toscrape.com/"},
ParseFunc: func(r *Response) { ParseFunc: func(r *Response) {
r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) { r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) {
@ -36,25 +36,25 @@ func TestGezer_StartURLs_HTML(t *testing.T) {
// Next Page // Next Page
if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok { if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok {
go r.Gezer.Get(r.JoinURL(href)) go r.Geziyor.Get(r.JoinURL(href))
} }
}, },
}) })
gezer.Start() geziyor.Start()
} }
func TestGezer_Concurrent_Requests(t *testing.T) { func TestGeziyor_Concurrent_Requests(t *testing.T) {
gezer := NewGezer(Opt{ geziyor := NewGeziyor(Opt{
AllowedDomains: []string{"quotes.toscrape.com"}, AllowedDomains: []string{"quotes.toscrape.com"},
StartURLs: []string{"http://quotes.toscrape.com/"}, StartURLs: []string{"http://quotes.toscrape.com/"},
ParseFunc: func(r *Response) { ParseFunc: func(r *Response) {
//r.Exports <- map[string]interface{}{"href": r.Request.URL.String()} //r.Exports <- map[string]interface{}{"href": r.Request.URL.String()}
r.Doc.Find("a").Each(func(i int, s *goquery.Selection) { r.Doc.Find("a").Each(func(i int, s *goquery.Selection) {
if href, ok := s.Attr("href"); ok { if href, ok := s.Attr("href"); ok {
go r.Gezer.Get(r.JoinURL(href)) go r.Geziyor.Get(r.JoinURL(href))
} }
}) })
}, },
}) })
gezer.Start() geziyor.Start()
} }

2
go.mod
View File

@ -1,4 +1,4 @@
module github.com/gogezer/gezer module github.com/geziyor/geziyor
go 1.12 go 1.12

View File

@ -1,4 +1,4 @@
package gezer package geziyor
import ( import (
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
@ -11,7 +11,7 @@ type Response struct {
Body []byte Body []byte
Doc *goquery.Document Doc *goquery.Document
Gezer *Gezer Geziyor *Geziyor
Exports chan map[string]interface{} Exports chan map[string]interface{}
} }