Retrying support added for chrome. Fixed robots.txt retry issue. Fixed Meta issue
This commit is contained in:
parent
90d2be2210
commit
d3c4389c46
@ -21,6 +21,7 @@ import (
|
|||||||
var (
|
var (
|
||||||
// ErrNoCookieJar is the error type for missing cookie jar
|
// ErrNoCookieJar is the error type for missing cookie jar
|
||||||
ErrNoCookieJar = errors.New("cookie jar is not available")
|
ErrNoCookieJar = errors.New("cookie jar is not available")
|
||||||
|
ErrWrongStatus = errors.New("wrong response status code")
|
||||||
)
|
)
|
||||||
|
|
||||||
// Client is a small wrapper around *http.Client to provide new methods.
|
// Client is a small wrapper around *http.Client to provide new methods.
|
||||||
@ -73,11 +74,36 @@ func NewClient(maxBodySize int64, charsetDetectDisabled bool, retryTimes int, re
|
|||||||
}
|
}
|
||||||
|
|
||||||
// DoRequest selects appropriate request handler, client or Chrome
|
// DoRequest selects appropriate request handler, client or Chrome
|
||||||
func (c *Client) DoRequest(req *Request) (*Response, error) {
|
func (c *Client) DoRequest(req *Request) (resp *Response, err error) {
|
||||||
if req.Rendered {
|
if req.Rendered {
|
||||||
return c.DoRequestChrome(req)
|
resp, err = c.DoRequestChrome(req)
|
||||||
}
|
}
|
||||||
return c.DoRequestClient(req)
|
resp, err = c.DoRequestClient(req)
|
||||||
|
|
||||||
|
// Retry on Error
|
||||||
|
if err != nil {
|
||||||
|
if req.retryCounter < c.retryTimes {
|
||||||
|
req.retryCounter++
|
||||||
|
log.Println("Retrying:", req.URL.String())
|
||||||
|
return c.DoRequest(req)
|
||||||
|
}
|
||||||
|
return resp, errors.Wrap(err, "Response error")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retry on http status codes
|
||||||
|
for _, statusCode := range c.retryHTTPCodes {
|
||||||
|
if req.retryCounter < c.retryTimes {
|
||||||
|
if resp.StatusCode == statusCode {
|
||||||
|
req.retryCounter++
|
||||||
|
log.Println("Retrying:", req.URL.String(), resp.StatusCode)
|
||||||
|
return c.DoRequest(req)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return nil, ErrWrongStatus
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// DoRequestClient is a simple wrapper to read response according to options.
|
// DoRequestClient is a simple wrapper to read response according to options.
|
||||||
@ -88,31 +114,14 @@ func (c *Client) DoRequestClient(req *Request) (*Response, error) {
|
|||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Retry on Error
|
return nil, err
|
||||||
if req.retryCounter < c.retryTimes {
|
|
||||||
req.retryCounter++
|
|
||||||
log.Println("Retrying:", req.URL.String())
|
|
||||||
return c.DoRequestClient(req)
|
|
||||||
}
|
|
||||||
return nil, errors.Wrap(err, "Response error")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Checks status code to retry
|
|
||||||
if req.retryCounter < c.retryTimes {
|
|
||||||
for _, statusCode := range c.retryHTTPCodes {
|
|
||||||
if resp.StatusCode == statusCode {
|
|
||||||
req.retryCounter++
|
|
||||||
log.Println("Retrying:", req.URL.String(), resp.StatusCode)
|
|
||||||
return c.DoRequestClient(req)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Limit response body reading
|
// Limit response body reading
|
||||||
bodyReader := io.LimitReader(resp.Body, c.maxBodySize)
|
bodyReader := io.LimitReader(resp.Body, c.maxBodySize)
|
||||||
|
|
||||||
// Decode response
|
// Decode response
|
||||||
if resp.Request.Method != "HEAD" {
|
if resp.Request.Method != "HEAD" && resp.ContentLength > 0 {
|
||||||
if req.Encoding != "" {
|
if req.Encoding != "" {
|
||||||
if enc, _ := charset.Lookup(req.Encoding); enc != nil {
|
if enc, _ := charset.Lookup(req.Encoding); enc != nil {
|
||||||
bodyReader = transform.NewReader(bodyReader, enc.NewDecoder())
|
bodyReader = transform.NewReader(bodyReader, enc.NewDecoder())
|
||||||
|
@ -101,7 +101,7 @@ func TestCharsetFromHeaders(t *testing.T) {
|
|||||||
defer ts.Close()
|
defer ts.Close()
|
||||||
|
|
||||||
req, _ := NewRequest("GET", ts.URL, nil)
|
req, _ := NewRequest("GET", ts.URL, nil)
|
||||||
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req)
|
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req)
|
||||||
|
|
||||||
if string(res.Body) != "Gültekin" {
|
if string(res.Body) != "Gültekin" {
|
||||||
t.Fatal(string(res.Body))
|
t.Fatal(string(res.Body))
|
||||||
@ -116,7 +116,7 @@ func TestCharsetFromBody(t *testing.T) {
|
|||||||
defer ts.Close()
|
defer ts.Close()
|
||||||
|
|
||||||
req, _ := NewRequest("GET", ts.URL, nil)
|
req, _ := NewRequest("GET", ts.URL, nil)
|
||||||
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req)
|
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req)
|
||||||
|
|
||||||
if string(res.Body) != "Gültekin" {
|
if string(res.Body) != "Gültekin" {
|
||||||
t.Fatal(string(res.Body))
|
t.Fatal(string(res.Body))
|
||||||
@ -132,7 +132,7 @@ func TestCharsetProvidedWithRequest(t *testing.T) {
|
|||||||
|
|
||||||
req, _ := NewRequest("GET", ts.URL, nil)
|
req, _ := NewRequest("GET", ts.URL, nil)
|
||||||
req.Encoding = "windows-1254"
|
req.Encoding = "windows-1254"
|
||||||
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req)
|
res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req)
|
||||||
|
|
||||||
if string(res.Body) != "Gültekin" {
|
if string(res.Body) != "Gültekin" {
|
||||||
t.Fatal(string(res.Body))
|
t.Fatal(string(res.Body))
|
||||||
@ -141,7 +141,7 @@ func TestCharsetProvidedWithRequest(t *testing.T) {
|
|||||||
|
|
||||||
func TestRetry(t *testing.T) {
|
func TestRetry(t *testing.T) {
|
||||||
req, _ := NewRequest("GET", "https://httpbin.org/status/500", nil)
|
req, _ := NewRequest("GET", "https://httpbin.org/status/500", nil)
|
||||||
res, err := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req)
|
res, err := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req)
|
||||||
assert.Nil(t, res)
|
assert.Nil(t, res)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
}
|
}
|
||||||
|
@ -42,5 +42,10 @@ func NewRequest(method, url string, body io.Reader) (*Request, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Request{Request: req}, nil
|
request := Request{
|
||||||
|
Request: req,
|
||||||
|
Meta: make(map[string]interface{}),
|
||||||
|
}
|
||||||
|
|
||||||
|
return &request, nil
|
||||||
}
|
}
|
||||||
|
@ -34,7 +34,6 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) {
|
|||||||
m.mut.RUnlock()
|
m.mut.RUnlock()
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
// TODO: Disable retry
|
|
||||||
robotsReq, err := client.NewRequest("GET", r.URL.Scheme+"://"+r.Host+"/robots.txt", nil)
|
robotsReq, err := client.NewRequest("GET", r.URL.Scheme+"://"+r.Host+"/robots.txt", nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return // Don't Do anything
|
return // Don't Do anything
|
||||||
|
Loading…
x
Reference in New Issue
Block a user