Automatic determining response and decoding it.

This commit is contained in:
Musab Gültekin 2019-06-09 10:46:32 +03:00
parent f2fba85266
commit a9aaf86df3
3 changed files with 19 additions and 4 deletions

View File

@ -2,9 +2,9 @@ package geziyor
import (
"bytes"
"fmt"
"github.com/PuerkitoBio/goquery"
"github.com/fpfeng/httpcache"
"golang.org/x/net/html/charset"
"io/ioutil"
"log"
"net/http"
@ -89,6 +89,9 @@ func (g *Geziyor) Do(req *http.Request) {
// Log
log.Println("Fetching: ", req.URL.String())
// Modify Request
req.Header.Set("Accept-Charset", "utf-8")
// Do request
resp, err := g.client.Do(req)
if resp != nil {
@ -99,10 +102,17 @@ func (g *Geziyor) Do(req *http.Request) {
return
}
// Read body
body, err := ioutil.ReadAll(resp.Body)
// Start reading body and determine encoding
reader, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
if err != nil {
fmt.Fprintf(os.Stderr, "reading body error: %v\n", err)
log.Printf("Determine encoding error: %v\n", err)
return
}
// Continue reading body
body, err := ioutil.ReadAll(reader)
if err != nil {
log.Printf("Reading Body error: %v\n", err)
return
}

2
go.mod
View File

@ -5,4 +5,6 @@ go 1.12
require (
github.com/PuerkitoBio/goquery v1.5.0
github.com/fpfeng/httpcache v0.0.0-20181220155740-6b8f16a92be3
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a
golang.org/x/text v0.3.2 // indirect
)

3
go.sum
View File

@ -7,3 +7,6 @@ github.com/fpfeng/httpcache v0.0.0-20181220155740-6b8f16a92be3/go.mod h1:QThlC5q
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a h1:gOpx8G595UYyvj8UK4+OFyY4rx037g3fmfhe5SasG3U=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=