From 5271095c8e33a26131ef5c3dc58ca3f7977e16e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Musab=20G=C3=BCltekin?= Date: Fri, 7 Jun 2019 20:52:31 +0300 Subject: [PATCH] Export chan added. We can now export data by sending to this chan. --- gezer.go | 19 +++++++++++++++++++ gezer_test.go | 8 +++++++- response.go | 3 ++- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/gezer.go b/gezer.go index 20c2f16..fe9d598 100644 --- a/gezer.go +++ b/gezer.go @@ -2,10 +2,12 @@ package gezer import ( "bytes" + "encoding/json" "fmt" "github.com/PuerkitoBio/goquery" "io/ioutil" "net/http" + "os" "sync" "time" ) @@ -75,8 +77,25 @@ func (g *Gezer) getRequest(url string) { Body: body, Doc: doc, Gezer: g, + Exports: make(chan map[string]interface{}, 1), } + // Export Function + go func() { + file, err := os.Create("out.json") + if err != nil { + fmt.Fprintf(os.Stderr, "output file creation error: %v", err) + return + } + + for res := range response.Exports { + fmt.Println(res) + _ = json.NewEncoder(file).Encode(res) + } + + }() + // ParseFunc response g.opt.ParseFunc(&response) + } diff --git a/gezer_test.go b/gezer_test.go index 1156996..2a06ef6 100644 --- a/gezer_test.go +++ b/gezer_test.go @@ -21,7 +21,13 @@ func TestGezer_StartURLs_HTML(t *testing.T) { StartURLs: []string{"http://quotes.toscrape.com/"}, ParseFunc: func(r *Response) { r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) { - fmt.Println(i, s.Find("span.text").Text(), s.Find("small.author").Text()) + r.Exports <- map[string]interface{}{ + "text": s.Find("span.text").Text(), + "author": s.Find("small.author").Text(), + "tags": s.Find("div.tags > a.tag").Map(func(_ int, s *goquery.Selection) string { + return s.Text() + }), + } }) if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok { r.Gezer.Get(r.JoinURL(href)) diff --git a/response.go b/response.go index 1136fef..ea5d9e5 100644 --- a/response.go +++ b/response.go @@ -11,7 +11,8 @@ type Response struct { Body []byte Doc *goquery.Document - Gezer *Gezer + Gezer *Gezer + Exports chan map[string]interface{} } func (r *Response) JoinURL(relativeURL string) string {