From bbdc3bcacd8457db6499691656734cb37258bfa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Musab=20G=C3=BCltekin?= Date: Tue, 11 Jun 2019 18:59:37 +0300 Subject: [PATCH] Exporters made optional, as some scrapers only want to see data in console. --- export.go | 6 ------ exporter/json.go | 5 +++-- geziyor.go | 19 ++++++++++++++----- geziyor_test.go | 2 ++ 4 files changed, 19 insertions(+), 13 deletions(-) delete mode 100644 export.go diff --git a/export.go b/export.go deleted file mode 100644 index 294af74..0000000 --- a/export.go +++ /dev/null @@ -1,6 +0,0 @@ -package geziyor - -// Exporter interface is for extracting data to external resources -type Exporter interface { - Export(exports chan interface{}) -} diff --git a/exporter/json.go b/exporter/json.go index cafad54..610927c 100644 --- a/exporter/json.go +++ b/exporter/json.go @@ -3,6 +3,7 @@ package exporter import ( "encoding/json" "fmt" + "github.com/geziyor/geziyor" "os" "sync" ) @@ -17,7 +18,7 @@ type JSONExporter struct { } // Export exports response data as JSON streaming file -func (e JSONExporter) Export(exports chan interface{}) { +func (e JSONExporter) Export(response *geziyor.Response) { // Default Filename if e.Filename == "" { @@ -35,7 +36,7 @@ func (e JSONExporter) Export(exports chan interface{}) { }) // Export data as responses came - for res := range exports { + for res := range response.Exports { encoder := json.NewEncoder(e.file) encoder.SetEscapeHTML(e.EscapeHTML) encoder.Encode(res) diff --git a/geziyor.go b/geziyor.go index 0673aea..4f65bb1 100644 --- a/geziyor.go +++ b/geziyor.go @@ -4,7 +4,6 @@ import ( "bytes" "github.com/PuerkitoBio/goquery" "github.com/fpfeng/httpcache" - "github.com/geziyor/geziyor/exporter" "golang.org/x/net/html/charset" "io" "io/ioutil" @@ -18,6 +17,11 @@ import ( "time" ) +// Exporter interface is for extracting data to external resources +type Exporter interface { + Export(exports *Response) +} + // Geziyor is our main scraper type type Geziyor struct { client *http.Client @@ -68,9 +72,6 @@ func NewGeziyor(opt Options) *Geziyor { if opt.LogDisabled { log.SetOutput(ioutil.Discard) } - if len(opt.Exporters) == 0 { - geziyor.opt.Exporters = []Exporter{exporter.JSONExporter{}} - } if opt.MaxBodySize == 0 { geziyor.opt.MaxBodySize = 1024 * 1024 * 1024 // 1GB } @@ -189,7 +190,15 @@ func (g *Geziyor) Do(req *http.Request, callback func(resp *Response)) { // Export Functions for _, exp := range g.opt.Exporters { - go exp.Export(response.Exports) + go exp.Export(&response) + } + + // Drain exports chan if no exporters added + if len(g.opt.Exporters) == 0 { + go func() { + for range response.Exports { + } + }() } // Callbacks diff --git a/geziyor_test.go b/geziyor_test.go index d9dc628..499e129 100644 --- a/geziyor_test.go +++ b/geziyor_test.go @@ -5,6 +5,7 @@ import ( "github.com/PuerkitoBio/goquery" "github.com/fpfeng/httpcache" "github.com/geziyor/geziyor" + "github.com/geziyor/geziyor/exporter" "math/rand" "testing" "time" @@ -36,6 +37,7 @@ func TestQuotes(t *testing.T) { geziyor.NewGeziyor(geziyor.Options{ StartURLs: []string{"http://quotes.toscrape.com/"}, ParseFunc: quotesParse, + Exporters: []geziyor.Exporter{exporter.JSONExporter{}}, }).Start() }