CSV exporter support added. Not finished for map type.

This commit is contained in:
Musab Gültekin 2019-06-11 20:42:22 +03:00
parent bbdc3bcacd
commit a311a0f998
4 changed files with 65 additions and 2 deletions

2
.gitignore vendored
View File

@ -18,4 +18,4 @@
.idea/
# Output files
out.json
out.*

61
exporter/csv.go Normal file
View File

@ -0,0 +1,61 @@
package exporter
import (
"encoding/csv"
"fmt"
"github.com/geziyor/geziyor"
"os"
"reflect"
"sync"
)
// CSVExporter exports response data as CSV streaming file
type CSVExporter struct {
Filename string
once sync.Once
file *os.File
writer *csv.Writer
}
func (e CSVExporter) Export(response *geziyor.Response) {
// Default Filename
if e.Filename == "" {
e.Filename = "out.csv"
}
// Create File
e.once.Do(func() {
newFile, err := os.OpenFile(e.Filename, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
if err != nil {
fmt.Fprintf(os.Stderr, "output file creation error: %v", err)
return
}
e.file = newFile
e.writer = csv.NewWriter(e.file)
})
// Export data as responses came
for res := range response.Exports {
var values []string
val := reflect.ValueOf(res)
switch val.Kind() {
// TODO: Map type support is temporary. Ordering is wrong. Needs to be sorted by map keys (CSV headers).
case reflect.Map:
iter := val.MapRange()
for iter.Next() {
values = append(values, fmt.Sprint(iter.Value()))
}
case reflect.Slice:
for i := 0; i < val.Len(); i++ {
values = append(values, fmt.Sprint(val.Index(i)))
}
}
e.writer.Write(values)
e.writer.Flush()
}
}

View File

@ -19,7 +19,7 @@ import (
// Exporter interface is for extracting data to external resources
type Exporter interface {
Export(exports *Response)
Export(response *Response)
}
// Geziyor is our main scraper type

View File

@ -45,12 +45,14 @@ func quotesParse(r *geziyor.Response) {
r.DocHTML.Find("div.quote").Each(func(i int, s *goquery.Selection) {
// Export Data
r.Exports <- map[string]interface{}{
"number": i,
"text": s.Find("span.text").Text(),
"author": s.Find("small.author").Text(),
"tags": s.Find("div.tags > a.tag").Map(func(_ int, s *goquery.Selection) string {
return s.Text()
}),
}
//r.Exports <- []string{s.Find("span.text").Text(), s.Find("small.author").Text()}
})
// Next Page