Added more tests and refactored exporter tests. Added code coverage badge.
This commit is contained in:
parent
4ab7cfd904
commit
b355a566cf
@ -3,6 +3,7 @@ Geziyor is a blazing fast web crawling and web scraping framework. It can be use
|
||||
|
||||
[](https://godoc.org/github.com/geziyor/geziyor)
|
||||
[](http://goreportcard.com/report/geziyor/geziyor)
|
||||
[](https://codecov.io/github/geziyor/geziyor?branch=master)
|
||||
|
||||
## Features
|
||||
- 5.000+ Requests/Sec
|
||||
@ -163,7 +164,7 @@ geziyor.NewGeziyor(&geziyor.Options{
|
||||
|
||||
## Benchmark
|
||||
|
||||
**8452 request per seconds** on *Macbook Pro 15" 2016*
|
||||
**8748 request per seconds** on *Macbook Pro 15" 2016*
|
||||
|
||||
See [tests](https://github.com/geziyor/geziyor/blob/master/geziyor_test.go) for this benchmark function:
|
||||
|
||||
|
91
client/client_test.go
Normal file
91
client/client_test.go
Normal file
@ -0,0 +1,91 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSetDefaultHeader(t *testing.T) {
|
||||
type args struct {
|
||||
header http.Header
|
||||
key string
|
||||
value string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want http.Header
|
||||
}{
|
||||
{
|
||||
name: "Simple",
|
||||
args: args{http.Header{}, "key", "value"},
|
||||
want: http.Header{"Key": []string{"value"}},
|
||||
},
|
||||
{
|
||||
name: "Dont Override",
|
||||
args: args{http.Header{"Key": []string{"value"}}, "key", "new value"},
|
||||
want: http.Header{"Key": []string{"value"}},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := SetDefaultHeader(tt.args.header, tt.args.key, tt.args.value); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("SetDefaultHeader() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertHeaderToMap(t *testing.T) {
|
||||
type args struct {
|
||||
header http.Header
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want map[string]interface{}
|
||||
}{
|
||||
{
|
||||
name: "Simple",
|
||||
args: args{http.Header{"Key": []string{"value"}}},
|
||||
want: map[string]interface{}{"Key": "value"},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := ConvertHeaderToMap(tt.args.header); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("ConvertHeaderToMap() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertMapToHeader(t *testing.T) {
|
||||
type args struct {
|
||||
m map[string]interface{}
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want http.Header
|
||||
}{
|
||||
{
|
||||
name: "Simple",
|
||||
args: args{map[string]interface{}{"Key": "value"}},
|
||||
want: http.Header{"Key": []string{"value"}},
|
||||
},
|
||||
{
|
||||
name: "Non standard key",
|
||||
args: args{map[string]interface{}{"key": "value"}},
|
||||
want: http.Header{"Key": []string{"value"}},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := ConvertMapToHeader(tt.args.m); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("ConvertMapToHeader() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
@ -28,7 +28,7 @@ func (r *Response) JoinURL(relativeURL string) string {
|
||||
return joinedURL.String()
|
||||
}
|
||||
|
||||
// IsHTML checks if response content is HTML by looking to content-type header
|
||||
// IsHTML checks if response content is HTML by looking content-type header
|
||||
func (r *Response) IsHTML() bool {
|
||||
contentType := r.Header.Get("Content-Type")
|
||||
for _, htmlContentType := range []string{"text/html", "application/xhtml+xml", "application/vnd.wap.xhtml+xml"} {
|
||||
|
@ -1,17 +1,29 @@
|
||||
package export
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestCSVExporter_Export(t *testing.T) {
|
||||
ch := make(chan interface{})
|
||||
defer close(ch)
|
||||
|
||||
exporter := &CSV{
|
||||
FileName: "out.csv",
|
||||
Comma: ';',
|
||||
}
|
||||
go exporter.Export(ch)
|
||||
_ = os.Remove(exporter.FileName)
|
||||
exports := make(chan interface{})
|
||||
go exporter.Export(exports)
|
||||
|
||||
ch <- []string{"1", "2"}
|
||||
ch <- map[string]string{"key1": "value1", "key2": "value2"}
|
||||
exports <- []string{"1", "2"}
|
||||
exports <- map[string]string{"key1": "value1", "key2": "value2"}
|
||||
close(exports)
|
||||
time.Sleep(time.Millisecond)
|
||||
|
||||
contents, err := ioutil.ReadFile(exporter.FileName)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "1;2\nvalue1;value2\n", string(contents))
|
||||
}
|
||||
|
@ -1,16 +1,27 @@
|
||||
package export
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestJSONExporter_Export(t *testing.T) {
|
||||
ch := make(chan interface{})
|
||||
defer close(ch)
|
||||
|
||||
exporter := &JSON{
|
||||
FileName: "out.json",
|
||||
Indent: " ",
|
||||
}
|
||||
go exporter.Export(ch)
|
||||
_ = os.Remove(exporter.FileName)
|
||||
exports := make(chan interface{})
|
||||
go exporter.Export(exports)
|
||||
|
||||
ch <- map[string]string{"key": "value"}
|
||||
exports <- map[string]string{"key": "value"}
|
||||
close(exports)
|
||||
time.Sleep(time.Millisecond)
|
||||
|
||||
contents, err := ioutil.ReadFile(exporter.FileName)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "{\n \"key\": \"value\"\n}\n", string(contents))
|
||||
}
|
||||
|
@ -26,7 +26,6 @@ func (e *HTML) Extract(doc *goquery.Document) (interface{}, error) {
|
||||
ret += h
|
||||
return true
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -92,7 +92,7 @@ func TestAllLinks(t *testing.T) {
|
||||
func TestStartRequestsFunc(t *testing.T) {
|
||||
geziyor.NewGeziyor(&geziyor.Options{
|
||||
StartRequestsFunc: func(g *geziyor.Geziyor) {
|
||||
g.Get("http://quotes.toscrape.com/", g.Opt.ParseFunc)
|
||||
g.Get("http://quotes.toscrape.com/", nil)
|
||||
},
|
||||
ParseFunc: func(g *geziyor.Geziyor, r *client.Response) {
|
||||
r.HTMLDoc.Find("a").Each(func(_ int, s *goquery.Selection) {
|
||||
@ -223,6 +223,15 @@ func TestRedirect(t *testing.T) {
|
||||
}).Start()
|
||||
}
|
||||
|
||||
func TestConcurrentRequests(t *testing.T) {
|
||||
defer leaktest.Check(t)()
|
||||
geziyor.NewGeziyor(&geziyor.Options{
|
||||
StartURLs: []string{"https://httpbin.org/delay/1", "https://httpbin.org/delay/2"},
|
||||
ConcurrentRequests: 1,
|
||||
ConcurrentRequestsPerDomain: 1,
|
||||
}).Start()
|
||||
}
|
||||
|
||||
// Make sure to increase open file descriptor limits before running
|
||||
func BenchmarkRequests(b *testing.B) {
|
||||
|
||||
|
1
go.mod
1
go.mod
@ -12,6 +12,7 @@ require (
|
||||
github.com/go-kit/kit v0.8.0
|
||||
github.com/pkg/errors v0.8.1
|
||||
github.com/prometheus/client_golang v1.0.0
|
||||
github.com/stretchr/testify v1.3.0
|
||||
golang.org/x/net v0.0.0-20190522155817-f3200d17e092
|
||||
golang.org/x/text v0.3.2 // indirect
|
||||
)
|
||||
|
3
go.sum
3
go.sum
@ -14,6 +14,7 @@ github.com/chromedp/cdproto v0.0.0-20190609032908-dd39f0bf0a54/go.mod h1:5NWqr1R
|
||||
github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05 h1:5iy45UjpWvkgTcd7GrGQSPr7sifrp9nNweI/eAsMjGE=
|
||||
github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05/go.mod h1:MsTqWB2yT7cErDFnF1F3y0PN8i/a/qQj+0GXKLW/I3s=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
|
||||
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
|
||||
@ -49,6 +50,7 @@ github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRW
|
||||
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
|
||||
github.com/prometheus/client_golang v1.0.0 h1:vrDKnkGzuGvhNAL56c7DBz29ZL+KxnoR0x7enabFceM=
|
||||
@ -65,6 +67,7 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
|
Loading…
x
Reference in New Issue
Block a user