61 lines
1.6 KiB
Go
61 lines
1.6 KiB
Go
package main
|
|
|
|
import (
|
|
"github.com/PuerkitoBio/goquery"
|
|
"softdown.com/shusou/geziyor"
|
|
"softdown.com/shusou/geziyor/client"
|
|
"softdown.com/shusou/geziyor/export"
|
|
"softdown.com/shusou/geziyor/filter"
|
|
"softdown.com/shusou/geziyor/middleware"
|
|
)
|
|
|
|
func main() {
|
|
geziyor.NewGeziyor(&geziyor.Options{
|
|
StartURLs: []string{"https://dytt.dytt8.net/index.htm"},
|
|
Encoding: middleware.EncodingGBK,
|
|
ParseFunc: menuParse,
|
|
Exporters: []export.Exporter{&export.MeiliSearch{}},
|
|
}).Start()
|
|
}
|
|
|
|
func menuParse(g *geziyor.Geziyor, r *client.Response) {
|
|
r.HTMLDoc.Find("div#menu ul li").Each(func(i int, s *goquery.Selection) {
|
|
if url, exists := s.Find("a").Attr("href"); exists == true {
|
|
g.Get(r.JoinURL2(url), pageParse)
|
|
}
|
|
})
|
|
}
|
|
|
|
func pageParse(g *geziyor.Geziyor, r *client.Response) {
|
|
r.HTMLDoc.Find("div.co_content8 ul b").Each(func(i int, s *goquery.Selection) {
|
|
if url, exists := s.Find("a").Eq(1).Attr("href"); exists == true {
|
|
g.Get(r.JoinURL2(url), detailParse)
|
|
}
|
|
})
|
|
// 分页解析
|
|
r.HTMLDoc.Find("div.x a").Each(func(i int, selection *goquery.Selection) {
|
|
if href, exists := selection.Attr("href"); exists {
|
|
g.Get(r.JoinURL2(href), pageParse)
|
|
}
|
|
})
|
|
|
|
//htmlquery.Find(r.HTMLNode, "")
|
|
}
|
|
|
|
func detailParse(g *geziyor.Geziyor, r *client.Response) {
|
|
s := r.HTMLDoc.Find("body")
|
|
url := r.Request.URL.String()
|
|
title := s.Find("div.title_all h1").Text()
|
|
html, _ := s.Find("div#Zoom").Html()
|
|
text := s.Find("div#Zoom").Text()
|
|
image, _ := s.Find("div#Zoom").Find("img").Attr("src")
|
|
|
|
g.Exports <- map[string]interface{}{
|
|
"image": image,
|
|
"text": text,
|
|
"html": filter.FilterScriptTags(html),
|
|
"url": url,
|
|
"title": title,
|
|
}
|
|
}
|