Fixed Chrome response not right on some sites.

This commit is contained in:
Musab Gültekin 2019-07-01 12:32:15 +03:00
parent fb5b4e3406
commit 80f3500a69
3 changed files with 8 additions and 7 deletions

View File

@ -12,6 +12,7 @@ import (
"net"
"net/http"
"net/url"
"strings"
"time"
)
@ -95,7 +96,6 @@ func (c *Client) DoRequestClient(req *Request, maxBodySize int64, charsetDetectD
// DoRequestChrome opens up a new chrome instance and makes request
func (c *Client) DoRequestChrome(req *Request) (*Response, error) {
var body string
var reqID network.RequestID
var res *network.Response
ctx, cancel := chromedp.NewContext(context.Background())
@ -105,16 +105,16 @@ func (c *Client) DoRequestChrome(req *Request) (*Response, error) {
network.Enable(),
network.SetExtraHTTPHeaders(network.Headers(ConvertHeaderToMap(req.Header))),
chromedp.ActionFunc(func(ctx context.Context) error {
var reqID network.RequestID
chromedp.ListenTarget(ctx, func(ev interface{}) {
switch ev.(type) {
case *network.EventRequestWillBeSent:
reqEvent := ev.(*network.EventRequestWillBeSent)
if _, exists := reqEvent.Request.Headers["Referer"]; !exists {
if strings.HasPrefix(reqEvent.Request.URL, "http") {
reqID = reqEvent.RequestID
}
//if reqEvent := ev.(*network.EventRequestWillBeSent); reqEvent.Request.URL == req.URL.String() {
// reqID = reqEvent.RequestID
//}
}
case *network.EventResponseReceived:
if resEvent := ev.(*network.EventResponseReceived); resEvent.RequestID == reqID {
res = resEvent.Response
@ -144,6 +144,7 @@ func (c *Client) DoRequestChrome(req *Request) (*Response, error) {
Response: &http.Response{
Request: req.Request,
StatusCode: int(res.Status),
Proto: res.Protocol,
Header: ConvertMapToHeader(res.Headers),
},
Body: []byte(body),

View File

@ -153,7 +153,7 @@ func (g *Geziyor) Get(url string, callback func(g *Geziyor, r *client.Response))
}
// GetRendered issues GET request using headless browser
// Opens up a new Chrome instance, makes request, waits for 1 second to render HTML DOM and closed.
// Opens up a new Chrome instance, makes request, waits for rendering HTML DOM and closed.
// Rendered requests only supported for GET requests.
func (g *Geziyor) GetRendered(url string, callback func(g *Geziyor, r *client.Response)) {
req, err := client.NewRequest("GET", url, nil)

View File

@ -110,7 +110,7 @@ func TestGetRendered(t *testing.T) {
},
ParseFunc: func(g *geziyor.Geziyor, r *client.Response) {
fmt.Println(string(r.Body))
fmt.Println(r.Header)
fmt.Println(r.Request.URL.String(), r.Header)
},
//URLRevisitEnabled: true,
}).Start()