CSV exporter support added. Not finished for map type.
This commit is contained in:
parent
bbdc3bcacd
commit
a311a0f998
2
.gitignore
vendored
2
.gitignore
vendored
@ -18,4 +18,4 @@
|
||||
.idea/
|
||||
|
||||
# Output files
|
||||
out.json
|
||||
out.*
|
||||
|
61
exporter/csv.go
Normal file
61
exporter/csv.go
Normal file
@ -0,0 +1,61 @@
|
||||
package exporter
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"github.com/geziyor/geziyor"
|
||||
"os"
|
||||
"reflect"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// CSVExporter exports response data as CSV streaming file
|
||||
type CSVExporter struct {
|
||||
Filename string
|
||||
|
||||
once sync.Once
|
||||
file *os.File
|
||||
writer *csv.Writer
|
||||
}
|
||||
|
||||
func (e CSVExporter) Export(response *geziyor.Response) {
|
||||
|
||||
// Default Filename
|
||||
if e.Filename == "" {
|
||||
e.Filename = "out.csv"
|
||||
}
|
||||
|
||||
// Create File
|
||||
e.once.Do(func() {
|
||||
newFile, err := os.OpenFile(e.Filename, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "output file creation error: %v", err)
|
||||
return
|
||||
}
|
||||
e.file = newFile
|
||||
e.writer = csv.NewWriter(e.file)
|
||||
})
|
||||
|
||||
// Export data as responses came
|
||||
for res := range response.Exports {
|
||||
var values []string
|
||||
|
||||
val := reflect.ValueOf(res)
|
||||
switch val.Kind() {
|
||||
// TODO: Map type support is temporary. Ordering is wrong. Needs to be sorted by map keys (CSV headers).
|
||||
case reflect.Map:
|
||||
iter := val.MapRange()
|
||||
for iter.Next() {
|
||||
values = append(values, fmt.Sprint(iter.Value()))
|
||||
}
|
||||
|
||||
case reflect.Slice:
|
||||
for i := 0; i < val.Len(); i++ {
|
||||
values = append(values, fmt.Sprint(val.Index(i)))
|
||||
}
|
||||
}
|
||||
|
||||
e.writer.Write(values)
|
||||
e.writer.Flush()
|
||||
}
|
||||
}
|
@ -19,7 +19,7 @@ import (
|
||||
|
||||
// Exporter interface is for extracting data to external resources
|
||||
type Exporter interface {
|
||||
Export(exports *Response)
|
||||
Export(response *Response)
|
||||
}
|
||||
|
||||
// Geziyor is our main scraper type
|
||||
|
@ -45,12 +45,14 @@ func quotesParse(r *geziyor.Response) {
|
||||
r.DocHTML.Find("div.quote").Each(func(i int, s *goquery.Selection) {
|
||||
// Export Data
|
||||
r.Exports <- map[string]interface{}{
|
||||
"number": i,
|
||||
"text": s.Find("span.text").Text(),
|
||||
"author": s.Find("small.author").Text(),
|
||||
"tags": s.Find("div.tags > a.tag").Map(func(_ int, s *goquery.Selection) string {
|
||||
return s.Text()
|
||||
}),
|
||||
}
|
||||
//r.Exports <- []string{s.Find("span.text").Text(), s.Find("small.author").Text()}
|
||||
})
|
||||
|
||||
// Next Page
|
||||
|
Loading…
x
Reference in New Issue
Block a user