package main import ( "github.com/PuerkitoBio/goquery" "softdown.com/shusou/geziyor" "softdown.com/shusou/geziyor/client" "softdown.com/shusou/geziyor/export" "softdown.com/shusou/geziyor/filter" "softdown.com/shusou/geziyor/middleware" ) func main() { geziyor.NewGeziyor(&geziyor.Options{ StartURLs: []string{"https://dytt.dytt8.net/index.htm"}, Encoding: middleware.EncodingGBK, ParseFunc: menuParse, Exporters: []export.Exporter{&export.MeiliSearch{}}, }).Start() } func menuParse(g *geziyor.Geziyor, r *client.Response) { r.HTMLDoc.Find("div#menu ul li").Each(func(i int, s *goquery.Selection) { if url, exists := s.Find("a").Attr("href"); exists == true { g.Get(r.JoinURL2(url), pageParse) } }) } func pageParse(g *geziyor.Geziyor, r *client.Response) { r.HTMLDoc.Find("div.co_content8 ul b").Each(func(i int, s *goquery.Selection) { if url, exists := s.Find("a").Eq(1).Attr("href"); exists == true { g.Get(r.JoinURL2(url), detailParse) } }) // 分页解析 r.HTMLDoc.Find("div.x a").Each(func(i int, selection *goquery.Selection) { if href, exists := selection.Attr("href"); exists { g.Get(r.JoinURL2(href), pageParse) } }) //htmlquery.Find(r.HTMLNode, "") } func detailParse(g *geziyor.Geziyor, r *client.Response) { s := r.HTMLDoc.Find("body") url := r.Request.URL.String() title := s.Find("div.title_all h1").Text() html, _ := s.Find("div#Zoom").Html() text := s.Find("div#Zoom").Text() image, _ := s.Find("div#Zoom").Find("img").Attr("src") g.Exports <- map[string]interface{}{ "image": image, "text": text, "html": filter.FilterScriptTags(html), "url": url, "title": title, } }