Export chan added. We can now export data by sending to this chan.

This commit is contained in:
Musab Gültekin 2019-06-07 20:52:31 +03:00
parent e58b08cbd6
commit 5271095c8e
3 changed files with 28 additions and 2 deletions

View File

@ -2,10 +2,12 @@ package gezer
import ( import (
"bytes" "bytes"
"encoding/json"
"fmt" "fmt"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"io/ioutil" "io/ioutil"
"net/http" "net/http"
"os"
"sync" "sync"
"time" "time"
) )
@ -75,8 +77,25 @@ func (g *Gezer) getRequest(url string) {
Body: body, Body: body,
Doc: doc, Doc: doc,
Gezer: g, Gezer: g,
Exports: make(chan map[string]interface{}, 1),
} }
// Export Function
go func() {
file, err := os.Create("out.json")
if err != nil {
fmt.Fprintf(os.Stderr, "output file creation error: %v", err)
return
}
for res := range response.Exports {
fmt.Println(res)
_ = json.NewEncoder(file).Encode(res)
}
}()
// ParseFunc response // ParseFunc response
g.opt.ParseFunc(&response) g.opt.ParseFunc(&response)
} }

View File

@ -21,7 +21,13 @@ func TestGezer_StartURLs_HTML(t *testing.T) {
StartURLs: []string{"http://quotes.toscrape.com/"}, StartURLs: []string{"http://quotes.toscrape.com/"},
ParseFunc: func(r *Response) { ParseFunc: func(r *Response) {
r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) { r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) {
fmt.Println(i, s.Find("span.text").Text(), s.Find("small.author").Text()) r.Exports <- map[string]interface{}{
"text": s.Find("span.text").Text(),
"author": s.Find("small.author").Text(),
"tags": s.Find("div.tags > a.tag").Map(func(_ int, s *goquery.Selection) string {
return s.Text()
}),
}
}) })
if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok { if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok {
r.Gezer.Get(r.JoinURL(href)) r.Gezer.Get(r.JoinURL(href))

View File

@ -11,7 +11,8 @@ type Response struct {
Body []byte Body []byte
Doc *goquery.Document Doc *goquery.Document
Gezer *Gezer Gezer *Gezer
Exports chan map[string]interface{}
} }
func (r *Response) JoinURL(relativeURL string) string { func (r *Response) JoinURL(relativeURL string) string {