Extractors refactored to support pass by value. Documentation added for request and response.
This commit is contained in:
@ -15,8 +15,8 @@ type Attr struct {
|
||||
}
|
||||
|
||||
// Extract returns HTML attribute value of provided selector
|
||||
func (e *Attr) Extract(doc *goquery.Document) (interface{}, error) {
|
||||
attr, exists := doc.Find(e.Selector).Attr(e.Attr)
|
||||
func (e Attr) Extract(sel *goquery.Selection) (interface{}, error) {
|
||||
attr, exists := sel.Find(e.Selector).Attr(e.Attr)
|
||||
if !exists {
|
||||
return nil, ErrAttrNotExists
|
||||
}
|
||||
|
@ -4,5 +4,5 @@ import "github.com/PuerkitoBio/goquery"
|
||||
|
||||
// Extractor interface is for extracting data from HTML document
|
||||
type Extractor interface {
|
||||
Extract(doc *goquery.Document) (interface{}, error)
|
||||
Extract(sel *goquery.Selection) (interface{}, error)
|
||||
}
|
||||
|
@ -13,11 +13,11 @@ type HTML struct {
|
||||
}
|
||||
|
||||
// Extract extracts and returns the HTML from inside each element of the given selection.
|
||||
func (e *HTML) Extract(doc *goquery.Document) (interface{}, error) {
|
||||
func (e HTML) Extract(sel *goquery.Selection) (interface{}, error) {
|
||||
var ret, h string
|
||||
var err error
|
||||
|
||||
doc.Find(e.Selector).EachWithBreak(func(i int, s *goquery.Selection) bool {
|
||||
sel.Find(e.Selector).EachWithBreak(func(i int, s *goquery.Selection) bool {
|
||||
h, err = s.Html()
|
||||
if err != nil {
|
||||
return false
|
||||
@ -40,9 +40,9 @@ type OuterHTML struct {
|
||||
}
|
||||
|
||||
// Extract extracts and returns the HTML of each element of the given selection.
|
||||
func (e *OuterHTML) Extract(doc *goquery.Document) (interface{}, error) {
|
||||
func (e OuterHTML) Extract(sel *goquery.Selection) (interface{}, error) {
|
||||
output := bytes.NewBufferString("")
|
||||
for _, node := range doc.Find(e.Selector).Nodes {
|
||||
for _, node := range sel.Find(e.Selector).Nodes {
|
||||
if err := html.Render(output, node); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -1,14 +1,22 @@
|
||||
package extract
|
||||
|
||||
import "github.com/PuerkitoBio/goquery"
|
||||
import (
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Text returns the combined text contents of provided selector.
|
||||
type Text struct {
|
||||
Name string
|
||||
Selector string
|
||||
Name string
|
||||
Selector string
|
||||
TrimSpace bool
|
||||
}
|
||||
|
||||
// Extract returns the combined text contents of provided selector.
|
||||
func (e *Text) Extract(doc *goquery.Document) (interface{}, error) {
|
||||
return map[string]string{e.Name: doc.Find(e.Selector).Text()}, nil
|
||||
func (e Text) Extract(sel *goquery.Selection) (interface{}, error) {
|
||||
text := sel.Find(e.Selector).Text()
|
||||
if e.TrimSpace {
|
||||
text = strings.TrimSpace(text)
|
||||
}
|
||||
return map[string]string{e.Name: text}, nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user