Extractors implemented. Exporters name simplified. README Updated for extracting data. Removed go 1.11 support

This commit is contained in:
Musab Gültekin
2019-06-28 13:00:30 +03:00
parent 679fd8ab7a
commit b000581c3d
13 changed files with 138 additions and 27 deletions

View File

@ -2,6 +2,7 @@ package geziyor
import (
"context"
"github.com/PuerkitoBio/goquery"
"github.com/chromedp/cdproto/dom"
"github.com/chromedp/cdproto/network"
"github.com/chromedp/chromedp"
@ -20,7 +21,14 @@ import (
"sync"
)
// Exporter interface is for extracting data to external resources
// Extractor interface is for extracting data from HTML document
type Extractor interface {
Extract(doc *goquery.Document) interface{}
}
// Exporter interface is for extracting data to external resources.
// Geziyor calls every extractors Export functions before any scraping starts.
// Export functions should wait for new data from exports chan.
type Exporter interface {
Export(exports chan interface{})
}
@ -61,6 +69,7 @@ func NewGeziyor(opt *Options) *Geziyor {
responseMiddlewares: []ResponseMiddleware{
parseHTMLMiddleware,
metricsResponseMiddleware,
extractorsMiddleware,
},
metrics: metrics.NewMetrics(opt.MetricsType),
}