From dfabcb84fdf949f540301dad7b6bef2718466140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Musab=20G=C3=BCltekin?= Date: Sun, 14 Jul 2019 03:30:59 +0300 Subject: [PATCH] JSON renamed to JSONLine. JSON List support added. --- README.md | 16 +++--------- export/json.go | 60 +++++++++++++++++++++++++++++++++++++++++++-- export/json_test.go | 23 ++++++++++++++--- 3 files changed, 81 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 147addd..fcd5ff2 100644 --- a/README.md +++ b/README.md @@ -21,26 +21,16 @@ See scraper [Options](https://godoc.org/github.com/geziyor/geziyor#Options) for ## Status The project is in **development phase**. Thus, we highly recommend you to use Geziyor with go modules. -## Examples -Simple usage +## Usage -```go -geziyor.NewGeziyor(&geziyor.Options{ - StartURLs: []string{"http://api.ipify.org"}, - ParseFunc: func(g *geziyor.Geziyor, r *client.Response) { - fmt.Println(string(r.Body)) - }, -}).Start() -``` - -Advanced usage +This example extracts all quotes from *quotes.toscrape.com* and exports to JSON file. ```go func main() { geziyor.NewGeziyor(&geziyor.Options{ StartURLs: []string{"http://quotes.toscrape.com/"}, ParseFunc: quotesParse, - Exporters: []export.Exporter{export.JSON{}}, + Exporters: []export.Exporter{&export.JSON{}}, }).Start() } diff --git a/export/json.go b/export/json.go index 55cab72..cbaf938 100644 --- a/export/json.go +++ b/export/json.go @@ -1,6 +1,7 @@ package export import ( + "bytes" "encoding/json" "github.com/geziyor/geziyor/internal" "log" @@ -8,7 +9,7 @@ import ( ) // JSON exports response data as JSON streaming file -type JSON struct { +type JSONLine struct { FileName string EscapeHTML bool Prefix string @@ -16,7 +17,7 @@ type JSON struct { } // Export exports response data as JSON streaming file -func (e *JSON) Export(exports chan interface{}) { +func (e *JSONLine) Export(exports chan interface{}) { // Create or append file file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) @@ -37,3 +38,58 @@ func (e *JSON) Export(exports chan interface{}) { } } } + +// JSON exports response data as JSON +type JSON struct { + FileName string + EscapeHTML bool + Prefix string + Indent string +} + +// Export exports response data as JSON +func (e *JSON) Export(exports chan interface{}) { + + // Create or append file + file, err := os.OpenFile(internal.DefaultString(e.FileName, "out.json"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) + if err != nil { + log.Printf("Output file creation error: %v\n", err) + return + } + defer file.Close() + + file.Write([]byte("[\n")) + + // Export data as responses came + for res := range exports { + data, err := jsonMarshalLine(res, e.EscapeHTML, e.Prefix, e.Indent) + if err != nil { + log.Printf("JSON encoding error on exporter: %v\n", err) + continue + } + file.Write(data) + } + + // Override on last comma + stat, err := file.Stat() + if err != nil { + file.Write([]byte("]\n")) + return + } + file.WriteAt([]byte("\n]\n"), stat.Size()-2) +} + +// jsonMarshalLine behaves like json.Marshal but supports escapeHTML and indenting +func jsonMarshalLine(t interface{}, escapeHTML bool, prefix string, indent string) ([]byte, error) { + buffer := &bytes.Buffer{} + encoder := json.NewEncoder(buffer) + encoder.SetEscapeHTML(escapeHTML) + encoder.SetIndent(prefix, indent) + + buffer.Write([]byte(" ")) // Tab char + err := encoder.Encode(t) // Write actual data + buffer.Truncate(buffer.Len() - 1) // Remove last newline char + buffer.Write([]byte(",\n")) // Write comma and newline char + + return buffer.Bytes(), err +} diff --git a/export/json_test.go b/export/json_test.go index b743579..17110fd 100644 --- a/export/json_test.go +++ b/export/json_test.go @@ -8,8 +8,8 @@ import ( "time" ) -func TestJSONExporter_Export(t *testing.T) { - exporter := &JSON{ +func TestJSONLineExporter_Export(t *testing.T) { + exporter := &JSONLine{ FileName: "out.json", Indent: " ", } @@ -19,9 +19,26 @@ func TestJSONExporter_Export(t *testing.T) { exports <- map[string]string{"key": "value"} close(exports) - time.Sleep(time.Millisecond) + time.Sleep(time.Millisecond) // Wait for writing to disk contents, err := ioutil.ReadFile(exporter.FileName) assert.NoError(t, err) assert.Equal(t, "{\n \"key\": \"value\"\n}\n", string(contents)) } + +func TestJSONExporter_Export(t *testing.T) { + exporter := &JSON{ + FileName: "out.json", + } + _ = os.Remove(exporter.FileName) + exports := make(chan interface{}) + go exporter.Export(exports) + + exports <- map[string]string{"key": "value"} + close(exports) + time.Sleep(time.Millisecond) // Wait for writing to disk + + contents, err := ioutil.ReadFile(exporter.FileName) + assert.NoError(t, err) + assert.Equal(t, "[\n\t{\"key\":\"value\"}\n]\n", string(contents)) +}