diff --git a/.gitignore b/.gitignore index 1695833..ec6c7f9 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,4 @@ # vendor/ # IDE directories -.idea/ \ No newline at end of file +.idea/ diff --git a/gezer.go b/gezer.go index 4496ce1..20c2f16 100644 --- a/gezer.go +++ b/gezer.go @@ -2,6 +2,7 @@ package gezer import ( "bytes" + "fmt" "github.com/PuerkitoBio/goquery" "io/ioutil" "net/http" @@ -20,12 +21,6 @@ type Opt struct { ParseFunc func(response *Response) } -type Response struct { - *http.Response - Body []byte - Doc *goquery.Document -} - func NewGezer(opt Opt) *Gezer { return &Gezer{ client: &http.Client{ @@ -45,9 +40,17 @@ func (g *Gezer) Start() { g.wg.Wait() } +func (g *Gezer) Get(url string) { + g.wg.Add(1) + go g.getRequest(url) +} + func (g *Gezer) getRequest(url string) { defer g.wg.Done() + // Log + fmt.Println("Fetching: ", url) + // Get request resp, err := g.client.Get(url) if resp != nil { @@ -71,6 +74,7 @@ func (g *Gezer) getRequest(url string) { Response: resp, Body: body, Doc: doc, + Gezer: g, } // ParseFunc response diff --git a/gezer_test.go b/gezer_test.go index 3ab5b61..1156996 100644 --- a/gezer_test.go +++ b/gezer_test.go @@ -23,6 +23,9 @@ func TestGezer_StartURLs_HTML(t *testing.T) { r.Doc.Find("div.quote").Each(func(i int, s *goquery.Selection) { fmt.Println(i, s.Find("span.text").Text(), s.Find("small.author").Text()) }) + if href, ok := r.Doc.Find("li.next > a").Attr("href"); ok { + r.Gezer.Get(r.JoinURL(href)) + } }, }) gezer.Start() diff --git a/response.go b/response.go new file mode 100644 index 0000000..1136fef --- /dev/null +++ b/response.go @@ -0,0 +1,25 @@ +package gezer + +import ( + "github.com/PuerkitoBio/goquery" + "net/http" + "net/url" +) + +type Response struct { + *http.Response + Body []byte + Doc *goquery.Document + + Gezer *Gezer +} + +func (r *Response) JoinURL(relativeURL string) string { + parsedRelativeURL, err := url.Parse(relativeURL) + if err != nil { + return "" + } + + joinedURL := r.Response.Request.URL.ResolveReference(parsedRelativeURL) + return joinedURL.String() +}