JSON renamed to JSONLine. JSON List support added.

This commit is contained in:
Musab Gültekin
2019-07-14 03:30:59 +03:00
parent d19465c44a
commit dfabcb84fd
3 changed files with 81 additions and 18 deletions

View File

@ -21,26 +21,16 @@ See scraper [Options](https://godoc.org/github.com/geziyor/geziyor#Options) for
## Status
The project is in **development phase**. Thus, we highly recommend you to use Geziyor with go modules.
## Examples
Simple usage
## Usage
```go
geziyor.NewGeziyor(&geziyor.Options{
StartURLs: []string{"http://api.ipify.org"},
ParseFunc: func(g *geziyor.Geziyor, r *client.Response) {
fmt.Println(string(r.Body))
},
}).Start()
```
Advanced usage
This example extracts all quotes from *quotes.toscrape.com* and exports to JSON file.
```go
func main() {
geziyor.NewGeziyor(&geziyor.Options{
StartURLs: []string{"http://quotes.toscrape.com/"},
ParseFunc: quotesParse,
Exporters: []export.Exporter{export.JSON{}},
Exporters: []export.Exporter{&export.JSON{}},
}).Start()
}

View File

@ -1,6 +1,7 @@
package export
import (
"bytes"
"encoding/json"
"github.com/geziyor/geziyor/internal"
"log"
@ -8,7 +9,7 @@ import (
)
// JSON exports response data as JSON streaming file
type JSON struct {
type JSONLine struct {
FileName string
EscapeHTML bool
Prefix string
@ -16,7 +17,7 @@ type JSON struct {
}
// Export exports response data as JSON streaming file
func (e *JSON) Export(exports chan interface{}) {
func (e *JSONLine) Export(exports chan interface{}) {
// Create or append file
file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
@ -37,3 +38,58 @@ func (e *JSON) Export(exports chan interface{}) {
}
}
}
// JSON exports response data as JSON
type JSON struct {
FileName string
EscapeHTML bool
Prefix string
Indent string
}
// Export exports response data as JSON
func (e *JSON) Export(exports chan interface{}) {
// Create or append file
file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
if err != nil {
log.Printf("Output file creation error: %v\n", err)
return
}
defer file.Close()
file.Write([]byte("[\n"))
// Export data as responses came
for res := range exports {
data, err := jsonMarshalLine(res, e.EscapeHTML, e.Prefix, e.Indent)
if err != nil {
log.Printf("JSON encoding error on exporter: %v\n", err)
continue
}
file.Write(data)
}
// Override on last comma
stat, err := file.Stat()
if err != nil {
file.Write([]byte("]\n"))
return
}
file.WriteAt([]byte("\n]\n"), stat.Size()-2)
}
// jsonMarshalLine behaves like json.Marshal but supports escapeHTML and indenting
func jsonMarshalLine(t interface{}, escapeHTML bool, prefix string, indent string) ([]byte, error) {
buffer := &bytes.Buffer{}
encoder := json.NewEncoder(buffer)
encoder.SetEscapeHTML(escapeHTML)
encoder.SetIndent(prefix, indent)
buffer.Write([]byte(" ")) // Tab char
err := encoder.Encode(t) // Write actual data
buffer.Truncate(buffer.Len() - 1) // Remove last newline char
buffer.Write([]byte(",\n")) // Write comma and newline char
return buffer.Bytes(), err
}

View File

@ -8,8 +8,8 @@ import (
"time"
)
func TestJSONExporter_Export(t *testing.T) {
exporter := &JSON{
func TestJSONLineExporter_Export(t *testing.T) {
exporter := &JSONLine{
FileName: "out.json",
Indent: " ",
}
@ -19,9 +19,26 @@ func TestJSONExporter_Export(t *testing.T) {
exports <- map[string]string{"key": "value"}
close(exports)
time.Sleep(time.Millisecond)
time.Sleep(time.Millisecond) // Wait for writing to disk
contents, err := ioutil.ReadFile(exporter.FileName)
assert.NoError(t, err)
assert.Equal(t, "{\n \"key\": \"value\"\n}\n", string(contents))
}
func TestJSONExporter_Export(t *testing.T) {
exporter := &JSON{
FileName: "out.json",
}
_ = os.Remove(exporter.FileName)
exports := make(chan interface{})
go exporter.Export(exports)
exports <- map[string]string{"key": "value"}
close(exports)
time.Sleep(time.Millisecond) // Wait for writing to disk
contents, err := ioutil.ReadFile(exporter.FileName)
assert.NoError(t, err)
assert.Equal(t, "[\n\t{\"key\":\"value\"}\n]\n", string(contents))
}