From a311a0f998035e4f140cb42c63322ac00c6d6536 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Musab=20G=C3=BCltekin?= Date: Tue, 11 Jun 2019 20:42:22 +0300 Subject: [PATCH] CSV exporter support added. Not finished for map type. --- .gitignore | 2 +- exporter/csv.go | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ geziyor.go | 2 +- geziyor_test.go | 2 ++ 4 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 exporter/csv.go diff --git a/.gitignore b/.gitignore index cb60bc9..38c9c13 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,4 @@ .idea/ # Output files -out.json +out.* diff --git a/exporter/csv.go b/exporter/csv.go new file mode 100644 index 0000000..b4d8f79 --- /dev/null +++ b/exporter/csv.go @@ -0,0 +1,61 @@ +package exporter + +import ( + "encoding/csv" + "fmt" + "github.com/geziyor/geziyor" + "os" + "reflect" + "sync" +) + +// CSVExporter exports response data as CSV streaming file +type CSVExporter struct { + Filename string + + once sync.Once + file *os.File + writer *csv.Writer +} + +func (e CSVExporter) Export(response *geziyor.Response) { + + // Default Filename + if e.Filename == "" { + e.Filename = "out.csv" + } + + // Create File + e.once.Do(func() { + newFile, err := os.OpenFile(e.Filename, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) + if err != nil { + fmt.Fprintf(os.Stderr, "output file creation error: %v", err) + return + } + e.file = newFile + e.writer = csv.NewWriter(e.file) + }) + + // Export data as responses came + for res := range response.Exports { + var values []string + + val := reflect.ValueOf(res) + switch val.Kind() { + // TODO: Map type support is temporary. Ordering is wrong. Needs to be sorted by map keys (CSV headers). + case reflect.Map: + iter := val.MapRange() + for iter.Next() { + values = append(values, fmt.Sprint(iter.Value())) + } + + case reflect.Slice: + for i := 0; i < val.Len(); i++ { + values = append(values, fmt.Sprint(val.Index(i))) + } + } + + e.writer.Write(values) + e.writer.Flush() + } +} diff --git a/geziyor.go b/geziyor.go index 4f65bb1..a65b719 100644 --- a/geziyor.go +++ b/geziyor.go @@ -19,7 +19,7 @@ import ( // Exporter interface is for extracting data to external resources type Exporter interface { - Export(exports *Response) + Export(response *Response) } // Geziyor is our main scraper type diff --git a/geziyor_test.go b/geziyor_test.go index 499e129..1d34112 100644 --- a/geziyor_test.go +++ b/geziyor_test.go @@ -45,12 +45,14 @@ func quotesParse(r *geziyor.Response) { r.DocHTML.Find("div.quote").Each(func(i int, s *goquery.Selection) { // Export Data r.Exports <- map[string]interface{}{ + "number": i, "text": s.Find("span.text").Text(), "author": s.Find("small.author").Text(), "tags": s.Find("div.tags > a.tag").Map(func(_ int, s *goquery.Selection) string { return s.Text() }), } + //r.Exports <- []string{s.Find("span.text").Text(), s.Find("small.author").Text()} }) // Next Page