JSON renamed to JSONLine. JSON List support added.
This commit is contained in:
16
README.md
16
README.md
@ -21,26 +21,16 @@ See scraper [Options](https://godoc.org/github.com/geziyor/geziyor#Options) for
|
|||||||
## Status
|
## Status
|
||||||
The project is in **development phase**. Thus, we highly recommend you to use Geziyor with go modules.
|
The project is in **development phase**. Thus, we highly recommend you to use Geziyor with go modules.
|
||||||
|
|
||||||
## Examples
|
## Usage
|
||||||
Simple usage
|
|
||||||
|
|
||||||
```go
|
This example extracts all quotes from *quotes.toscrape.com* and exports to JSON file.
|
||||||
geziyor.NewGeziyor(&geziyor.Options{
|
|
||||||
StartURLs: []string{"http://api.ipify.org"},
|
|
||||||
ParseFunc: func(g *geziyor.Geziyor, r *client.Response) {
|
|
||||||
fmt.Println(string(r.Body))
|
|
||||||
},
|
|
||||||
}).Start()
|
|
||||||
```
|
|
||||||
|
|
||||||
Advanced usage
|
|
||||||
|
|
||||||
```go
|
```go
|
||||||
func main() {
|
func main() {
|
||||||
geziyor.NewGeziyor(&geziyor.Options{
|
geziyor.NewGeziyor(&geziyor.Options{
|
||||||
StartURLs: []string{"http://quotes.toscrape.com/"},
|
StartURLs: []string{"http://quotes.toscrape.com/"},
|
||||||
ParseFunc: quotesParse,
|
ParseFunc: quotesParse,
|
||||||
Exporters: []export.Exporter{export.JSON{}},
|
Exporters: []export.Exporter{&export.JSON{}},
|
||||||
}).Start()
|
}).Start()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package export
|
package export
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"github.com/geziyor/geziyor/internal"
|
"github.com/geziyor/geziyor/internal"
|
||||||
"log"
|
"log"
|
||||||
@ -8,7 +9,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// JSON exports response data as JSON streaming file
|
// JSON exports response data as JSON streaming file
|
||||||
type JSON struct {
|
type JSONLine struct {
|
||||||
FileName string
|
FileName string
|
||||||
EscapeHTML bool
|
EscapeHTML bool
|
||||||
Prefix string
|
Prefix string
|
||||||
@ -16,7 +17,7 @@ type JSON struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Export exports response data as JSON streaming file
|
// Export exports response data as JSON streaming file
|
||||||
func (e *JSON) Export(exports chan interface{}) {
|
func (e *JSONLine) Export(exports chan interface{}) {
|
||||||
|
|
||||||
// Create or append file
|
// Create or append file
|
||||||
file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
||||||
@ -37,3 +38,58 @@ func (e *JSON) Export(exports chan interface{}) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// JSON exports response data as JSON
|
||||||
|
type JSON struct {
|
||||||
|
FileName string
|
||||||
|
EscapeHTML bool
|
||||||
|
Prefix string
|
||||||
|
Indent string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export exports response data as JSON
|
||||||
|
func (e *JSON) Export(exports chan interface{}) {
|
||||||
|
|
||||||
|
// Create or append file
|
||||||
|
file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("Output file creation error: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
file.Write([]byte("[\n"))
|
||||||
|
|
||||||
|
// Export data as responses came
|
||||||
|
for res := range exports {
|
||||||
|
data, err := jsonMarshalLine(res, e.EscapeHTML, e.Prefix, e.Indent)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("JSON encoding error on exporter: %v\n", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
file.Write(data)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Override on last comma
|
||||||
|
stat, err := file.Stat()
|
||||||
|
if err != nil {
|
||||||
|
file.Write([]byte("]\n"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
file.WriteAt([]byte("\n]\n"), stat.Size()-2)
|
||||||
|
}
|
||||||
|
|
||||||
|
// jsonMarshalLine behaves like json.Marshal but supports escapeHTML and indenting
|
||||||
|
func jsonMarshalLine(t interface{}, escapeHTML bool, prefix string, indent string) ([]byte, error) {
|
||||||
|
buffer := &bytes.Buffer{}
|
||||||
|
encoder := json.NewEncoder(buffer)
|
||||||
|
encoder.SetEscapeHTML(escapeHTML)
|
||||||
|
encoder.SetIndent(prefix, indent)
|
||||||
|
|
||||||
|
buffer.Write([]byte(" ")) // Tab char
|
||||||
|
err := encoder.Encode(t) // Write actual data
|
||||||
|
buffer.Truncate(buffer.Len() - 1) // Remove last newline char
|
||||||
|
buffer.Write([]byte(",\n")) // Write comma and newline char
|
||||||
|
|
||||||
|
return buffer.Bytes(), err
|
||||||
|
}
|
||||||
|
@ -8,8 +8,8 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestJSONExporter_Export(t *testing.T) {
|
func TestJSONLineExporter_Export(t *testing.T) {
|
||||||
exporter := &JSON{
|
exporter := &JSONLine{
|
||||||
FileName: "out.json",
|
FileName: "out.json",
|
||||||
Indent: " ",
|
Indent: " ",
|
||||||
}
|
}
|
||||||
@ -19,9 +19,26 @@ func TestJSONExporter_Export(t *testing.T) {
|
|||||||
|
|
||||||
exports <- map[string]string{"key": "value"}
|
exports <- map[string]string{"key": "value"}
|
||||||
close(exports)
|
close(exports)
|
||||||
time.Sleep(time.Millisecond)
|
time.Sleep(time.Millisecond) // Wait for writing to disk
|
||||||
|
|
||||||
contents, err := ioutil.ReadFile(exporter.FileName)
|
contents, err := ioutil.ReadFile(exporter.FileName)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, "{\n \"key\": \"value\"\n}\n", string(contents))
|
assert.Equal(t, "{\n \"key\": \"value\"\n}\n", string(contents))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestJSONExporter_Export(t *testing.T) {
|
||||||
|
exporter := &JSON{
|
||||||
|
FileName: "out.json",
|
||||||
|
}
|
||||||
|
_ = os.Remove(exporter.FileName)
|
||||||
|
exports := make(chan interface{})
|
||||||
|
go exporter.Export(exports)
|
||||||
|
|
||||||
|
exports <- map[string]string{"key": "value"}
|
||||||
|
close(exports)
|
||||||
|
time.Sleep(time.Millisecond) // Wait for writing to disk
|
||||||
|
|
||||||
|
contents, err := ioutil.ReadFile(exporter.FileName)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "[\n\t{\"key\":\"value\"}\n]\n", string(contents))
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user