Proxy support

This commit is contained in:
Musab Gültekin
2021-09-24 16:15:20 +03:00
parent 242b025c9a
commit 97ecb7f118
8 changed files with 115 additions and 1 deletions

View File

@ -38,6 +38,7 @@ type Options struct {
RetryHTTPCodes []int
RemoteAllocatorURL string
AllocatorOptions []chromedp.ExecAllocatorOption
ProxyFunc func(*http.Request) (*url.URL, error)
}
// Default values for client
@ -53,9 +54,15 @@ var (
// NewClient creates http.Client with modified values for typical web scraper
func NewClient(opt *Options) *Client {
// Default proxy function is http.ProxyFunction
var proxyFunction = http.ProxyFromEnvironment
if opt.ProxyFunc != nil {
proxyFunction = opt.ProxyFunc
}
httpClient := &http.Client{
Transport: &http.Transport{
Proxy: http.ProxyFromEnvironment,
Proxy: proxyFunction,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,

47
client/proxy.go Normal file
View File

@ -0,0 +1,47 @@
package client
import (
"context"
"github.com/geziyor/geziyor/internal"
"net/http"
"net/url"
"sync/atomic"
)
const ProxyURLKey = 0
type roundRobinProxy struct {
proxyURLs []*url.URL
index uint32
}
func (r *roundRobinProxy) GetProxy(pr *http.Request) (*url.URL, error) {
index := atomic.AddUint32(&r.index, 1) - 1
u := r.proxyURLs[index%uint32(len(r.proxyURLs))]
// Set proxy url to context
ctx := context.WithValue(pr.Context(), ProxyURLKey, u.String())
*pr = *pr.WithContext(ctx)
return u, nil
}
// RoundRobinProxy creates a proxy switcher function which rotates
// ProxyURLs on every request.
// The proxy type is determined by the URL scheme. "http", "https"
// and "socks5" are supported. If the scheme is empty,
// "http" is assumed.
func RoundRobinProxy(proxyURLs ...string) func(*http.Request) (*url.URL, error) {
if len(proxyURLs) < 1 {
return http.ProxyFromEnvironment
}
parsedProxyURLs := make([]*url.URL, len(proxyURLs))
for i, u := range proxyURLs {
parsedURL, err := url.Parse(u)
if err != nil {
internal.Logger.Printf("proxy url parse: %v", err)
return nil
}
parsedProxyURLs[i] = parsedURL
}
return (&roundRobinProxy{parsedProxyURLs, 0}).GetProxy
}