Exporters made optional, as some scrapers only want to see data in console.

This commit is contained in:
Musab Gültekin 2019-06-11 18:59:37 +03:00
parent b8305d5e1a
commit bbdc3bcacd
4 changed files with 19 additions and 13 deletions

View File

@ -1,6 +0,0 @@
package geziyor
// Exporter interface is for extracting data to external resources
type Exporter interface {
Export(exports chan interface{})
}

View File

@ -3,6 +3,7 @@ package exporter
import (
"encoding/json"
"fmt"
"github.com/geziyor/geziyor"
"os"
"sync"
)
@ -17,7 +18,7 @@ type JSONExporter struct {
}
// Export exports response data as JSON streaming file
func (e JSONExporter) Export(exports chan interface{}) {
func (e JSONExporter) Export(response *geziyor.Response) {
// Default Filename
if e.Filename == "" {
@ -35,7 +36,7 @@ func (e JSONExporter) Export(exports chan interface{}) {
})
// Export data as responses came
for res := range exports {
for res := range response.Exports {
encoder := json.NewEncoder(e.file)
encoder.SetEscapeHTML(e.EscapeHTML)
encoder.Encode(res)

View File

@ -4,7 +4,6 @@ import (
"bytes"
"github.com/PuerkitoBio/goquery"
"github.com/fpfeng/httpcache"
"github.com/geziyor/geziyor/exporter"
"golang.org/x/net/html/charset"
"io"
"io/ioutil"
@ -18,6 +17,11 @@ import (
"time"
)
// Exporter interface is for extracting data to external resources
type Exporter interface {
Export(exports *Response)
}
// Geziyor is our main scraper type
type Geziyor struct {
client *http.Client
@ -68,9 +72,6 @@ func NewGeziyor(opt Options) *Geziyor {
if opt.LogDisabled {
log.SetOutput(ioutil.Discard)
}
if len(opt.Exporters) == 0 {
geziyor.opt.Exporters = []Exporter{exporter.JSONExporter{}}
}
if opt.MaxBodySize == 0 {
geziyor.opt.MaxBodySize = 1024 * 1024 * 1024 // 1GB
}
@ -189,7 +190,15 @@ func (g *Geziyor) Do(req *http.Request, callback func(resp *Response)) {
// Export Functions
for _, exp := range g.opt.Exporters {
go exp.Export(response.Exports)
go exp.Export(&response)
}
// Drain exports chan if no exporters added
if len(g.opt.Exporters) == 0 {
go func() {
for range response.Exports {
}
}()
}
// Callbacks

View File

@ -5,6 +5,7 @@ import (
"github.com/PuerkitoBio/goquery"
"github.com/fpfeng/httpcache"
"github.com/geziyor/geziyor"
"github.com/geziyor/geziyor/exporter"
"math/rand"
"testing"
"time"
@ -36,6 +37,7 @@ func TestQuotes(t *testing.T) {
geziyor.NewGeziyor(geziyor.Options{
StartURLs: []string{"http://quotes.toscrape.com/"},
ParseFunc: quotesParse,
Exporters: []geziyor.Exporter{exporter.JSONExporter{}},
}).Start()
}