JSON renamed to JSONLine. JSON List support added.
This commit is contained in:
16
README.md
16
README.md
@ -21,26 +21,16 @@ See scraper [Options](https://godoc.org/github.com/geziyor/geziyor#Options) for
|
||||
## Status
|
||||
The project is in **development phase**. Thus, we highly recommend you to use Geziyor with go modules.
|
||||
|
||||
## Examples
|
||||
Simple usage
|
||||
## Usage
|
||||
|
||||
```go
|
||||
geziyor.NewGeziyor(&geziyor.Options{
|
||||
StartURLs: []string{"http://api.ipify.org"},
|
||||
ParseFunc: func(g *geziyor.Geziyor, r *client.Response) {
|
||||
fmt.Println(string(r.Body))
|
||||
},
|
||||
}).Start()
|
||||
```
|
||||
|
||||
Advanced usage
|
||||
This example extracts all quotes from *quotes.toscrape.com* and exports to JSON file.
|
||||
|
||||
```go
|
||||
func main() {
|
||||
geziyor.NewGeziyor(&geziyor.Options{
|
||||
StartURLs: []string{"http://quotes.toscrape.com/"},
|
||||
ParseFunc: quotesParse,
|
||||
Exporters: []export.Exporter{export.JSON{}},
|
||||
Exporters: []export.Exporter{&export.JSON{}},
|
||||
}).Start()
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
package export
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"github.com/geziyor/geziyor/internal"
|
||||
"log"
|
||||
@ -8,7 +9,7 @@ import (
|
||||
)
|
||||
|
||||
// JSON exports response data as JSON streaming file
|
||||
type JSON struct {
|
||||
type JSONLine struct {
|
||||
FileName string
|
||||
EscapeHTML bool
|
||||
Prefix string
|
||||
@ -16,7 +17,7 @@ type JSON struct {
|
||||
}
|
||||
|
||||
// Export exports response data as JSON streaming file
|
||||
func (e *JSON) Export(exports chan interface{}) {
|
||||
func (e *JSONLine) Export(exports chan interface{}) {
|
||||
|
||||
// Create or append file
|
||||
file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
||||
@ -37,3 +38,58 @@ func (e *JSON) Export(exports chan interface{}) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// JSON exports response data as JSON
|
||||
type JSON struct {
|
||||
FileName string
|
||||
EscapeHTML bool
|
||||
Prefix string
|
||||
Indent string
|
||||
}
|
||||
|
||||
// Export exports response data as JSON
|
||||
func (e *JSON) Export(exports chan interface{}) {
|
||||
|
||||
// Create or append file
|
||||
file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
||||
if err != nil {
|
||||
log.Printf("Output file creation error: %v\n", err)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
file.Write([]byte("[\n"))
|
||||
|
||||
// Export data as responses came
|
||||
for res := range exports {
|
||||
data, err := jsonMarshalLine(res, e.EscapeHTML, e.Prefix, e.Indent)
|
||||
if err != nil {
|
||||
log.Printf("JSON encoding error on exporter: %v\n", err)
|
||||
continue
|
||||
}
|
||||
file.Write(data)
|
||||
}
|
||||
|
||||
// Override on last comma
|
||||
stat, err := file.Stat()
|
||||
if err != nil {
|
||||
file.Write([]byte("]\n"))
|
||||
return
|
||||
}
|
||||
file.WriteAt([]byte("\n]\n"), stat.Size()-2)
|
||||
}
|
||||
|
||||
// jsonMarshalLine behaves like json.Marshal but supports escapeHTML and indenting
|
||||
func jsonMarshalLine(t interface{}, escapeHTML bool, prefix string, indent string) ([]byte, error) {
|
||||
buffer := &bytes.Buffer{}
|
||||
encoder := json.NewEncoder(buffer)
|
||||
encoder.SetEscapeHTML(escapeHTML)
|
||||
encoder.SetIndent(prefix, indent)
|
||||
|
||||
buffer.Write([]byte(" ")) // Tab char
|
||||
err := encoder.Encode(t) // Write actual data
|
||||
buffer.Truncate(buffer.Len() - 1) // Remove last newline char
|
||||
buffer.Write([]byte(",\n")) // Write comma and newline char
|
||||
|
||||
return buffer.Bytes(), err
|
||||
}
|
||||
|
@ -8,8 +8,8 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestJSONExporter_Export(t *testing.T) {
|
||||
exporter := &JSON{
|
||||
func TestJSONLineExporter_Export(t *testing.T) {
|
||||
exporter := &JSONLine{
|
||||
FileName: "out.json",
|
||||
Indent: " ",
|
||||
}
|
||||
@ -19,9 +19,26 @@ func TestJSONExporter_Export(t *testing.T) {
|
||||
|
||||
exports <- map[string]string{"key": "value"}
|
||||
close(exports)
|
||||
time.Sleep(time.Millisecond)
|
||||
time.Sleep(time.Millisecond) // Wait for writing to disk
|
||||
|
||||
contents, err := ioutil.ReadFile(exporter.FileName)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "{\n \"key\": \"value\"\n}\n", string(contents))
|
||||
}
|
||||
|
||||
func TestJSONExporter_Export(t *testing.T) {
|
||||
exporter := &JSON{
|
||||
FileName: "out.json",
|
||||
}
|
||||
_ = os.Remove(exporter.FileName)
|
||||
exports := make(chan interface{})
|
||||
go exporter.Export(exports)
|
||||
|
||||
exports <- map[string]string{"key": "value"}
|
||||
close(exports)
|
||||
time.Sleep(time.Millisecond) // Wait for writing to disk
|
||||
|
||||
contents, err := ioutil.ReadFile(exporter.FileName)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "[\n\t{\"key\":\"value\"}\n]\n", string(contents))
|
||||
}
|
||||
|
Reference in New Issue
Block a user