Added more tests and refactored exporter tests. Added code coverage badge.

This commit is contained in:
Musab Gültekin 2019-07-02 14:53:06 +03:00
parent 4ab7cfd904
commit b355a566cf
9 changed files with 144 additions and 17 deletions

View File

@ -3,6 +3,7 @@ Geziyor is a blazing fast web crawling and web scraping framework. It can be use
[![GoDoc](https://godoc.org/github.com/geziyor/geziyor?status.svg)](https://godoc.org/github.com/geziyor/geziyor) [![GoDoc](https://godoc.org/github.com/geziyor/geziyor?status.svg)](https://godoc.org/github.com/geziyor/geziyor)
[![report card](https://goreportcard.com/badge/github.com/geziyor/geziyor)](http://goreportcard.com/report/geziyor/geziyor) [![report card](https://goreportcard.com/badge/github.com/geziyor/geziyor)](http://goreportcard.com/report/geziyor/geziyor)
[![Code Coverage](https://img.shields.io/codecov/c/github/geziyor/geziyor/master.svg)](https://codecov.io/github/geziyor/geziyor?branch=master)
## Features ## Features
- 5.000+ Requests/Sec - 5.000+ Requests/Sec
@ -163,7 +164,7 @@ geziyor.NewGeziyor(&geziyor.Options{
## Benchmark ## Benchmark
**8452 request per seconds** on *Macbook Pro 15" 2016* **8748 request per seconds** on *Macbook Pro 15" 2016*
See [tests](https://github.com/geziyor/geziyor/blob/master/geziyor_test.go) for this benchmark function: See [tests](https://github.com/geziyor/geziyor/blob/master/geziyor_test.go) for this benchmark function:

91
client/client_test.go Normal file
View File

@ -0,0 +1,91 @@
package client
import (
"net/http"
"reflect"
"testing"
)
func TestSetDefaultHeader(t *testing.T) {
type args struct {
header http.Header
key string
value string
}
tests := []struct {
name string
args args
want http.Header
}{
{
name: "Simple",
args: args{http.Header{}, "key", "value"},
want: http.Header{"Key": []string{"value"}},
},
{
name: "Dont Override",
args: args{http.Header{"Key": []string{"value"}}, "key", "new value"},
want: http.Header{"Key": []string{"value"}},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := SetDefaultHeader(tt.args.header, tt.args.key, tt.args.value); !reflect.DeepEqual(got, tt.want) {
t.Errorf("SetDefaultHeader() = %v, want %v", got, tt.want)
}
})
}
}
func TestConvertHeaderToMap(t *testing.T) {
type args struct {
header http.Header
}
tests := []struct {
name string
args args
want map[string]interface{}
}{
{
name: "Simple",
args: args{http.Header{"Key": []string{"value"}}},
want: map[string]interface{}{"Key": "value"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := ConvertHeaderToMap(tt.args.header); !reflect.DeepEqual(got, tt.want) {
t.Errorf("ConvertHeaderToMap() = %v, want %v", got, tt.want)
}
})
}
}
func TestConvertMapToHeader(t *testing.T) {
type args struct {
m map[string]interface{}
}
tests := []struct {
name string
args args
want http.Header
}{
{
name: "Simple",
args: args{map[string]interface{}{"Key": "value"}},
want: http.Header{"Key": []string{"value"}},
},
{
name: "Non standard key",
args: args{map[string]interface{}{"key": "value"}},
want: http.Header{"Key": []string{"value"}},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := ConvertMapToHeader(tt.args.m); !reflect.DeepEqual(got, tt.want) {
t.Errorf("ConvertMapToHeader() = %v, want %v", got, tt.want)
}
})
}
}

View File

@ -28,7 +28,7 @@ func (r *Response) JoinURL(relativeURL string) string {
return joinedURL.String() return joinedURL.String()
} }
// IsHTML checks if response content is HTML by looking to content-type header // IsHTML checks if response content is HTML by looking content-type header
func (r *Response) IsHTML() bool { func (r *Response) IsHTML() bool {
contentType := r.Header.Get("Content-Type") contentType := r.Header.Get("Content-Type")
for _, htmlContentType := range []string{"text/html", "application/xhtml+xml", "application/vnd.wap.xhtml+xml"} { for _, htmlContentType := range []string{"text/html", "application/xhtml+xml", "application/vnd.wap.xhtml+xml"} {

View File

@ -1,17 +1,29 @@
package export package export
import "testing" import (
"io/ioutil"
"os"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestCSVExporter_Export(t *testing.T) { func TestCSVExporter_Export(t *testing.T) {
ch := make(chan interface{})
defer close(ch)
exporter := &CSV{ exporter := &CSV{
FileName: "out.csv", FileName: "out.csv",
Comma: ';', Comma: ';',
} }
go exporter.Export(ch) _ = os.Remove(exporter.FileName)
exports := make(chan interface{})
go exporter.Export(exports)
ch <- []string{"1", "2"} exports <- []string{"1", "2"}
ch <- map[string]string{"key1": "value1", "key2": "value2"} exports <- map[string]string{"key1": "value1", "key2": "value2"}
close(exports)
time.Sleep(time.Millisecond)
contents, err := ioutil.ReadFile(exporter.FileName)
assert.NoError(t, err)
assert.Equal(t, "1;2\nvalue1;value2\n", string(contents))
} }

View File

@ -1,16 +1,27 @@
package export package export
import "testing" import (
"github.com/stretchr/testify/assert"
"io/ioutil"
"os"
"testing"
"time"
)
func TestJSONExporter_Export(t *testing.T) { func TestJSONExporter_Export(t *testing.T) {
ch := make(chan interface{})
defer close(ch)
exporter := &JSON{ exporter := &JSON{
FileName: "out.json", FileName: "out.json",
Indent: " ", Indent: " ",
} }
go exporter.Export(ch) _ = os.Remove(exporter.FileName)
exports := make(chan interface{})
go exporter.Export(exports)
ch <- map[string]string{"key": "value"} exports <- map[string]string{"key": "value"}
close(exports)
time.Sleep(time.Millisecond)
contents, err := ioutil.ReadFile(exporter.FileName)
assert.NoError(t, err)
assert.Equal(t, "{\n \"key\": \"value\"\n}\n", string(contents))
} }

View File

@ -26,7 +26,6 @@ func (e *HTML) Extract(doc *goquery.Document) (interface{}, error) {
ret += h ret += h
return true return true
}) })
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -92,7 +92,7 @@ func TestAllLinks(t *testing.T) {
func TestStartRequestsFunc(t *testing.T) { func TestStartRequestsFunc(t *testing.T) {
geziyor.NewGeziyor(&geziyor.Options{ geziyor.NewGeziyor(&geziyor.Options{
StartRequestsFunc: func(g *geziyor.Geziyor) { StartRequestsFunc: func(g *geziyor.Geziyor) {
g.Get("http://quotes.toscrape.com/", g.Opt.ParseFunc) g.Get("http://quotes.toscrape.com/", nil)
}, },
ParseFunc: func(g *geziyor.Geziyor, r *client.Response) { ParseFunc: func(g *geziyor.Geziyor, r *client.Response) {
r.HTMLDoc.Find("a").Each(func(_ int, s *goquery.Selection) { r.HTMLDoc.Find("a").Each(func(_ int, s *goquery.Selection) {
@ -223,6 +223,15 @@ func TestRedirect(t *testing.T) {
}).Start() }).Start()
} }
func TestConcurrentRequests(t *testing.T) {
defer leaktest.Check(t)()
geziyor.NewGeziyor(&geziyor.Options{
StartURLs: []string{"https://httpbin.org/delay/1", "https://httpbin.org/delay/2"},
ConcurrentRequests: 1,
ConcurrentRequestsPerDomain: 1,
}).Start()
}
// Make sure to increase open file descriptor limits before running // Make sure to increase open file descriptor limits before running
func BenchmarkRequests(b *testing.B) { func BenchmarkRequests(b *testing.B) {

1
go.mod
View File

@ -12,6 +12,7 @@ require (
github.com/go-kit/kit v0.8.0 github.com/go-kit/kit v0.8.0
github.com/pkg/errors v0.8.1 github.com/pkg/errors v0.8.1
github.com/prometheus/client_golang v1.0.0 github.com/prometheus/client_golang v1.0.0
github.com/stretchr/testify v1.3.0
golang.org/x/net v0.0.0-20190522155817-f3200d17e092 golang.org/x/net v0.0.0-20190522155817-f3200d17e092
golang.org/x/text v0.3.2 // indirect golang.org/x/text v0.3.2 // indirect
) )

3
go.sum
View File

@ -14,6 +14,7 @@ github.com/chromedp/cdproto v0.0.0-20190609032908-dd39f0bf0a54/go.mod h1:5NWqr1R
github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05 h1:5iy45UjpWvkgTcd7GrGQSPr7sifrp9nNweI/eAsMjGE= github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05 h1:5iy45UjpWvkgTcd7GrGQSPr7sifrp9nNweI/eAsMjGE=
github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05/go.mod h1:MsTqWB2yT7cErDFnF1F3y0PN8i/a/qQj+0GXKLW/I3s= github.com/chromedp/chromedp v0.3.1-0.20190617065505-d55cf9043e05/go.mod h1:MsTqWB2yT7cErDFnF1F3y0PN8i/a/qQj+0GXKLW/I3s=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
@ -49,6 +50,7 @@ github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRW
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0 h1:vrDKnkGzuGvhNAL56c7DBz29ZL+KxnoR0x7enabFceM= github.com/prometheus/client_golang v1.0.0 h1:vrDKnkGzuGvhNAL56c7DBz29ZL+KxnoR0x7enabFceM=
@ -65,6 +67,7 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=