Retrying support added for chrome. Fixed robots.txt retry issue. Fixed Meta issue

This commit is contained in:
Musab Gültekin 2019-07-07 19:50:15 +03:00
parent 90d2be2210
commit d3c4389c46
4 changed files with 41 additions and 28 deletions

View File

@ -21,6 +21,7 @@ import (
var (
// ErrNoCookieJar is the error type for missing cookie jar
ErrNoCookieJar = errors.New("cookie jar is not available")
ErrWrongStatus = errors.New("wrong response status code")
)
// Client is a small wrapper around *http.Client to provide new methods.
@ -73,11 +74,36 @@ func NewClient(maxBodySize int64, charsetDetectDisabled bool, retryTimes int, re
}
// DoRequest selects appropriate request handler, client or Chrome
func (c *Client) DoRequest(req *Request) (*Response, error) {
func (c *Client) DoRequest(req *Request) (resp *Response, err error) {
if req.Rendered {
return c.DoRequestChrome(req)
resp, err = c.DoRequestChrome(req)
}
return c.DoRequestClient(req)
resp, err = c.DoRequestClient(req)
// Retry on Error
if err != nil {
if req.retryCounter < c.retryTimes {
req.retryCounter++
log.Println("Retrying:", req.URL.String())
return c.DoRequest(req)
}
return resp, errors.Wrap(err, "Response error")
}
// Retry on http status codes
for _, statusCode := range c.retryHTTPCodes {
if req.retryCounter < c.retryTimes {
if resp.StatusCode == statusCode {
req.retryCounter++
log.Println("Retrying:", req.URL.String(), resp.StatusCode)
return c.DoRequest(req)
}
} else {
return nil, ErrWrongStatus
}
}
return
}
// DoRequestClient is a simple wrapper to read response according to options.
@ -88,31 +114,14 @@ func (c *Client) DoRequestClient(req *Request) (*Response, error) {
defer resp.Body.Close()
}
if err != nil {
// Retry on Error
if req.retryCounter < c.retryTimes {
req.retryCounter++
log.Println("Retrying:", req.URL.String())
return c.DoRequestClient(req)
}
return nil, errors.Wrap(err, "Response error")
}
// Checks status code to retry
if req.retryCounter < c.retryTimes {
for _, statusCode := range c.retryHTTPCodes {
if resp.StatusCode == statusCode {
req.retryCounter++
log.Println("Retrying:", req.URL.String(), resp.StatusCode)
return c.DoRequestClient(req)
}
}
return nil, err
}
// Limit response body reading
bodyReader := io.LimitReader(resp.Body, c.maxBodySize)
// Decode response
if resp.Request.Method != "HEAD" {
if resp.Request.Method != "HEAD" && resp.ContentLength > 0 {
if req.Encoding != "" {
if enc, _ := charset.Lookup(req.Encoding); enc != nil {
bodyReader = transform.NewReader(bodyReader, enc.NewDecoder())

View File

@ -101,7 +101,7 @@ func TestCharsetFromHeaders(t *testing.T) {
defer ts.Close()
req, _ := NewRequest("GET", ts.URL, nil)
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req)
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req)
if string(res.Body) != "Gültekin" {
t.Fatal(string(res.Body))
@ -116,7 +116,7 @@ func TestCharsetFromBody(t *testing.T) {
defer ts.Close()
req, _ := NewRequest("GET", ts.URL, nil)
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req)
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req)
if string(res.Body) != "Gültekin" {
t.Fatal(string(res.Body))
@ -132,7 +132,7 @@ func TestCharsetProvidedWithRequest(t *testing.T) {
req, _ := NewRequest("GET", ts.URL, nil)
req.Encoding = "windows-1254"
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req)
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req)
if string(res.Body) != "Gültekin" {
t.Fatal(string(res.Body))
@ -141,7 +141,7 @@ func TestCharsetProvidedWithRequest(t *testing.T) {
func TestRetry(t *testing.T) {
req, _ := NewRequest("GET", "https://httpbin.org/status/500", nil)
res, err := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req)
res, err := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req)
assert.Nil(t, res)
assert.Error(t, err)
}

View File

@ -42,5 +42,10 @@ func NewRequest(method, url string, body io.Reader) (*Request, error) {
return nil, err
}
return &Request{Request: req}, nil
request := Request{
Request: req,
Meta: make(map[string]interface{}),
}
return &request, nil
}

View File

@ -34,7 +34,6 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) {
m.mut.RUnlock()
if !exists {
// TODO: Disable retry
robotsReq, err := client.NewRequest("GET", r.URL.Scheme+"://"+r.Host+"/robots.txt", nil)
if err != nil {
return // Don't Do anything