diff --git a/client/client.go b/client/client.go index cfcc1e1..541d27a 100644 --- a/client/client.go +++ b/client/client.go @@ -21,6 +21,7 @@ import ( var ( // ErrNoCookieJar is the error type for missing cookie jar ErrNoCookieJar = errors.New("cookie jar is not available") + ErrWrongStatus = errors.New("wrong response status code") ) // Client is a small wrapper around *http.Client to provide new methods. @@ -73,11 +74,36 @@ func NewClient(maxBodySize int64, charsetDetectDisabled bool, retryTimes int, re } // DoRequest selects appropriate request handler, client or Chrome -func (c *Client) DoRequest(req *Request) (*Response, error) { +func (c *Client) DoRequest(req *Request) (resp *Response, err error) { if req.Rendered { - return c.DoRequestChrome(req) + resp, err = c.DoRequestChrome(req) } - return c.DoRequestClient(req) + resp, err = c.DoRequestClient(req) + + // Retry on Error + if err != nil { + if req.retryCounter < c.retryTimes { + req.retryCounter++ + log.Println("Retrying:", req.URL.String()) + return c.DoRequest(req) + } + return resp, errors.Wrap(err, "Response error") + } + + // Retry on http status codes + for _, statusCode := range c.retryHTTPCodes { + if req.retryCounter < c.retryTimes { + if resp.StatusCode == statusCode { + req.retryCounter++ + log.Println("Retrying:", req.URL.String(), resp.StatusCode) + return c.DoRequest(req) + } + } else { + return nil, ErrWrongStatus + } + } + + return } // DoRequestClient is a simple wrapper to read response according to options. @@ -88,31 +114,14 @@ func (c *Client) DoRequestClient(req *Request) (*Response, error) { defer resp.Body.Close() } if err != nil { - // Retry on Error - if req.retryCounter < c.retryTimes { - req.retryCounter++ - log.Println("Retrying:", req.URL.String()) - return c.DoRequestClient(req) - } - return nil, errors.Wrap(err, "Response error") - } - - // Checks status code to retry - if req.retryCounter < c.retryTimes { - for _, statusCode := range c.retryHTTPCodes { - if resp.StatusCode == statusCode { - req.retryCounter++ - log.Println("Retrying:", req.URL.String(), resp.StatusCode) - return c.DoRequestClient(req) - } - } + return nil, err } // Limit response body reading bodyReader := io.LimitReader(resp.Body, c.maxBodySize) // Decode response - if resp.Request.Method != "HEAD" { + if resp.Request.Method != "HEAD" && resp.ContentLength > 0 { if req.Encoding != "" { if enc, _ := charset.Lookup(req.Encoding); enc != nil { bodyReader = transform.NewReader(bodyReader, enc.NewDecoder()) diff --git a/client/client_test.go b/client/client_test.go index bd98829..db26895 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -101,7 +101,7 @@ func TestCharsetFromHeaders(t *testing.T) { defer ts.Close() req, _ := NewRequest("GET", ts.URL, nil) - res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req) + res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req) if string(res.Body) != "Gültekin" { t.Fatal(string(res.Body)) @@ -116,7 +116,7 @@ func TestCharsetFromBody(t *testing.T) { defer ts.Close() req, _ := NewRequest("GET", ts.URL, nil) - res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req) + res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req) if string(res.Body) != "Gültekin" { t.Fatal(string(res.Body)) @@ -132,7 +132,7 @@ func TestCharsetProvidedWithRequest(t *testing.T) { req, _ := NewRequest("GET", ts.URL, nil) req.Encoding = "windows-1254" - res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req) + res, _ := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req) if string(res.Body) != "Gültekin" { t.Fatal(string(res.Body)) @@ -141,7 +141,7 @@ func TestCharsetProvidedWithRequest(t *testing.T) { func TestRetry(t *testing.T) { req, _ := NewRequest("GET", "https://httpbin.org/status/500", nil) - res, err := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequestClient(req) + res, err := NewClient(DefaultMaxBody, false, DefaultRetryTimes, DefaultRetryHTTPCodes).DoRequest(req) assert.Nil(t, res) assert.Error(t, err) } diff --git a/client/request.go b/client/request.go index 9b00322..8dffef2 100644 --- a/client/request.go +++ b/client/request.go @@ -42,5 +42,10 @@ func NewRequest(method, url string, body io.Reader) (*Request, error) { return nil, err } - return &Request{Request: req}, nil + request := Request{ + Request: req, + Meta: make(map[string]interface{}), + } + + return &request, nil } diff --git a/middleware/robotstxt.go b/middleware/robotstxt.go index 0c12800..3c8339b 100644 --- a/middleware/robotstxt.go +++ b/middleware/robotstxt.go @@ -34,7 +34,6 @@ func (m *RobotsTxt) ProcessRequest(r *client.Request) { m.mut.RUnlock() if !exists { - // TODO: Disable retry robotsReq, err := client.NewRequest("GET", r.URL.Scheme+"://"+r.Host+"/robots.txt", nil) if err != nil { return // Don't Do anything