CSV exporter support added. Not finished for map type.
This commit is contained in:
parent
bbdc3bcacd
commit
a311a0f998
2
.gitignore
vendored
2
.gitignore
vendored
@ -18,4 +18,4 @@
|
|||||||
.idea/
|
.idea/
|
||||||
|
|
||||||
# Output files
|
# Output files
|
||||||
out.json
|
out.*
|
||||||
|
61
exporter/csv.go
Normal file
61
exporter/csv.go
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
package exporter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/csv"
|
||||||
|
"fmt"
|
||||||
|
"github.com/geziyor/geziyor"
|
||||||
|
"os"
|
||||||
|
"reflect"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CSVExporter exports response data as CSV streaming file
|
||||||
|
type CSVExporter struct {
|
||||||
|
Filename string
|
||||||
|
|
||||||
|
once sync.Once
|
||||||
|
file *os.File
|
||||||
|
writer *csv.Writer
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e CSVExporter) Export(response *geziyor.Response) {
|
||||||
|
|
||||||
|
// Default Filename
|
||||||
|
if e.Filename == "" {
|
||||||
|
e.Filename = "out.csv"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create File
|
||||||
|
e.once.Do(func() {
|
||||||
|
newFile, err := os.OpenFile(e.Filename, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "output file creation error: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
e.file = newFile
|
||||||
|
e.writer = csv.NewWriter(e.file)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Export data as responses came
|
||||||
|
for res := range response.Exports {
|
||||||
|
var values []string
|
||||||
|
|
||||||
|
val := reflect.ValueOf(res)
|
||||||
|
switch val.Kind() {
|
||||||
|
// TODO: Map type support is temporary. Ordering is wrong. Needs to be sorted by map keys (CSV headers).
|
||||||
|
case reflect.Map:
|
||||||
|
iter := val.MapRange()
|
||||||
|
for iter.Next() {
|
||||||
|
values = append(values, fmt.Sprint(iter.Value()))
|
||||||
|
}
|
||||||
|
|
||||||
|
case reflect.Slice:
|
||||||
|
for i := 0; i < val.Len(); i++ {
|
||||||
|
values = append(values, fmt.Sprint(val.Index(i)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
e.writer.Write(values)
|
||||||
|
e.writer.Flush()
|
||||||
|
}
|
||||||
|
}
|
@ -19,7 +19,7 @@ import (
|
|||||||
|
|
||||||
// Exporter interface is for extracting data to external resources
|
// Exporter interface is for extracting data to external resources
|
||||||
type Exporter interface {
|
type Exporter interface {
|
||||||
Export(exports *Response)
|
Export(response *Response)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Geziyor is our main scraper type
|
// Geziyor is our main scraper type
|
||||||
|
@ -45,12 +45,14 @@ func quotesParse(r *geziyor.Response) {
|
|||||||
r.DocHTML.Find("div.quote").Each(func(i int, s *goquery.Selection) {
|
r.DocHTML.Find("div.quote").Each(func(i int, s *goquery.Selection) {
|
||||||
// Export Data
|
// Export Data
|
||||||
r.Exports <- map[string]interface{}{
|
r.Exports <- map[string]interface{}{
|
||||||
|
"number": i,
|
||||||
"text": s.Find("span.text").Text(),
|
"text": s.Find("span.text").Text(),
|
||||||
"author": s.Find("small.author").Text(),
|
"author": s.Find("small.author").Text(),
|
||||||
"tags": s.Find("div.tags > a.tag").Map(func(_ int, s *goquery.Selection) string {
|
"tags": s.Find("div.tags > a.tag").Map(func(_ int, s *goquery.Selection) string {
|
||||||
return s.Text()
|
return s.Text()
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
|
//r.Exports <- []string{s.Find("span.text").Text(), s.Find("small.author").Text()}
|
||||||
})
|
})
|
||||||
|
|
||||||
// Next Page
|
// Next Page
|
||||||
|
Loading…
x
Reference in New Issue
Block a user