Relative URL handling added to Response type.

This commit is contained in:
Musab Gültekin 2019-06-07 15:44:14 +03:00
parent 944bd3bada
commit e58b08cbd6
4 changed files with 39 additions and 7 deletions

View File

@ -2,6 +2,7 @@ package gezer
import (
"bytes"
"fmt"
"github.com/PuerkitoBio/goquery"
"io/ioutil"
"net/http"
@ -20,12 +21,6 @@ type Opt struct {
ParseFunc func(response *Response)
}
type Response struct {
*http.Response
Body []byte
Doc *goquery.Document
}
func NewGezer(opt Opt) *Gezer {
return &Gezer{
client: &http.Client{
@ -45,9 +40,17 @@ func (g *Gezer) Start() {
g.wg.Wait()
}
func (g *Gezer) Get(url string) {
g.wg.Add(1)
go g.getRequest(url)
}
func (g *Gezer) getRequest(url string) {
defer g.wg.Done()
// Log
fmt.Println("Fetching: ", url)
// Get request
resp, err := g.client.Get(url)
if resp != nil {
@ -71,6 +74,7 @@ func (g *Gezer) getRequest(url string) {
Response: resp,
Body: body,
Doc: doc,
Gezer: g,
}
// ParseFunc response

View File

@ -23,6 +23,9 @@ func TestGezer_StartURLs_HTML(t *testing.T) {
r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) {
fmt.Println(i, s.Find("span.text").Text(), s.Find("small.author").Text())
})
if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok {
r.Gezer.Get(r.JoinURL(href))
}
},
})
gezer.Start()

25
response.go Normal file
View File

@ -0,0 +1,25 @@
package gezer
import (
"github.com/PuerkitoBio/goquery"
"net/http"
"net/url"
)
type Response struct {
*http.Response
Body []byte
Doc *goquery.Document
Gezer *Gezer
}
func (r *Response) JoinURL(relativeURL string) string {
parsedRelativeURL, err := url.Parse(relativeURL)
if err != nil {
return ""
}
joinedURL := r.Response.Request.URL.ResolveReference(parsedRelativeURL)
return joinedURL.String()
}