Retrying support added for chrome. Fixed robots.txt retry issue. Fixed Meta issue
This commit is contained in:
parent
90d2be2210
commit
d3c4389c46
@ -21,6 +21,7 @@ import (
|
||||
var (
|
||||
// ErrNoCookieJar is the error type for missing cookie jar
|
||||
ErrNoCookieJar = errors.New("cookie jar is not available")
|
||||
ErrWrongStatus = errors.New("wrong response status code")
|
||||
)
|
||||
|
||||
// Client is a small wrapper around *http.Client to provide new methods.
|
||||
@ -73,11 +74,36 @@ func NewClient(maxBodySize int64, charsetDetectDisabled bool, retryTimes int, re
|
||||
}
|
||||
|
||||
// DoRequest selects appropriate request handler, client or Chrome
|
||||
func (c *Client) DoRequest(req *Request) (*Response, error) {
|
||||
func (c *Client) DoRequest(req *Request) (resp *Response, err error) {
|
||||
if req.Rendered {
|
||||
return c.DoRequestChrome(req)
|
||||
resp, err = c.DoRequestChrome(req)
|
||||
}
|
||||
return c.DoRequestClient(req)
|
||||
resp, err = c.DoRequestClient(req)
|
||||
|
||||
// Retry on Error
|
||||
if err != nil {
|
||||
if req.retryCounter < c.retryTimes {
|
||||
req.retryCounter++
|
||||
log.Println("Retrying:", req.URL.String())
|
||||
return c.DoRequest(req)
|
||||
}
|
||||
return resp, errors.Wrap(err, "Response error")
|
||||
}
|
||||
|
||||
// Retry on http status codes
|
||||
for _, statusCode := range c.retryHTTPCodes {
|
||||
if req.retryCounter < c.retryTimes {
|
||||
if resp.StatusCode == statusCode {
|
||||
req.retryCounter++
|
||||
log.Println("Retrying:", req.URL.String(), resp.StatusCode)
|
||||
return c.DoRequest(req)
|
||||
}
|
||||
} else {
|
||||
return nil, ErrWrongStatus
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// DoRequestClient is a simple wrapper to read response according to options.
|
||||
@ -88,31 +114,14 @@ func (c *Client) DoRequestClient(req *Request) (*Response, error) {
|
||||
defer resp.Body.Close()
|
||||
}
|
||||
if err != nil {
|
||||
// Retry on Error
|
||||
if req.retryCounter < c.retryTimes {
|
||||
req.retryCounter++
|
||||
log.Println("Retrying:", req.URL.String())
|
||||
return c.DoRequestClient(req)
|
||||
}
|
||||
return nil, errors.Wrap(err, "Response error")
|
||||
}
|
||||
|
||||
// Checks status code to retry
|
||||
if req.retryCounter < c.retryTimes {
|
||||
for _, statusCode := range c.retryHTTPCodes {
|
||||
if resp.StatusCode == statusCode {
|
||||
req.retryCounter++
|
||||
log.Println("Retrying:", req.URL.String(), resp.StatusCode)
|
||||
return c.DoRequestClient(req)
|
||||
}
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Limit response body reading
|
||||
bodyReader := io.LimitReader(resp.Body, c.maxBodySize)
|
||||
|
||||
// Decode response
|
||||
if resp.Request.Method != "HEAD" {
|
||||
if resp.Request.Method != "HEAD" && resp.ContentLength > 0 {
|
||||
if req.Encoding != "" {
|
||||
if enc, _ := charset.Lookup(req.Encoding); enc != nil {
|
||||
bodyReader = transform.NewReader(bodyReader, enc.NewDecoder())
|
||||
|
@ -101,7 +101,7 @@ func TestCharsetFromHeaders(t *testing.T) {
|
||||
defer ts.Close()
|
||||
|
||||
req, _ := NewRequest("GET", ts.URL, nil)
|
||||
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req)
|
||||
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req)
|
||||
|
||||
if string(res.Body) != "Gültekin" {
|
||||
t.Fatal(string(res.Body))
|
||||
@ -116,7 +116,7 @@ func TestCharsetFromBody(t *testing.T) {
|
||||
defer ts.Close()
|
||||
|
||||
req, _ := NewRequest("GET", ts.URL, nil)
|
||||
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req)
|
||||
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req)
|
||||
|
||||
if string(res.Body) != "Gültekin" {
|
||||
t.Fatal(string(res.Body))
|
||||
@ -132,7 +132,7 @@ func TestCharsetProvidedWithRequest(t *testing.T) {
|
||||
|
||||
req, _ := NewRequest("GET", ts.URL, nil)
|
||||
req.Encoding = "windows-1254"
|
||||
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req)
|
||||
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req)
|
||||
|
||||
if string(res.Body) != "Gültekin" {
|
||||
t.Fatal(string(res.Body))
|
||||
@ -141,7 +141,7 @@ func TestCharsetProvidedWithRequest(t *testing.T) {
|
||||
|
||||
func TestRetry(t *testing.T) {
|
||||
req, _ := NewRequest("GET", "https://httpbin.org/status/500", nil)
|
||||
res, err := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req)
|
||||
res, err := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req)
|
||||
assert.Nil(t, res)
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
@ -42,5 +42,10 @@ func NewRequest(method, url string, body io.Reader) (*Request, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Request{Request: req}, nil
|
||||
request := Request{
|
||||
Request: req,
|
||||
Meta: make(map[string]interface{}),
|
||||
}
|
||||
|
||||
return &request, nil
|
||||
}
|
||||
|
@ -34,7 +34,6 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) {
|
||||
m.mut.RUnlock()
|
||||
|
||||
if !exists {
|
||||
// TODO: Disable retry
|
||||
robotsReq, err := client.NewRequest("GET", r.URL.Scheme+"://"+r.Host+"/robots.txt", nil)
|
||||
if err != nil {
|
||||
return // Don't Do anything
|
||||
|
Loading…
x
Reference in New Issue
Block a user