Attribute extractor added. HTML extractor added. Outer HTML Extractor added.
exporter package renamed to export, extractor package renamed to extract for simplicity.
This commit is contained in:
24
extract/attr.go
Normal file
24
extract/attr.go
Normal file
@ -0,0 +1,24 @@
|
||||
package extract
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var ErrAttrNotExists = errors.New("attribute not exist")
|
||||
|
||||
// Attr returns HTML attribute value of provided selector
|
||||
type Attr struct {
|
||||
Name string
|
||||
Selector string
|
||||
Attr string
|
||||
}
|
||||
|
||||
// Extract returns HTML attribute value of provided selector
|
||||
func (e *Attr) Extract(doc *goquery.Document) (interface{}, error) {
|
||||
attr, exists := doc.Find(e.Selector).Attr(e.Attr)
|
||||
if !exists {
|
||||
return nil, ErrAttrNotExists
|
||||
}
|
||||
return map[string]string{e.Name: attr}, nil
|
||||
}
|
53
extract/html.go
Normal file
53
extract/html.go
Normal file
@ -0,0 +1,53 @@
|
||||
package extract
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// HTML extracts and returns the HTML from inside each element of the given selection.
|
||||
type HTML struct {
|
||||
Name string
|
||||
Selector string
|
||||
}
|
||||
|
||||
// Extract extracts and returns the HTML from inside each element of the given selection.
|
||||
func (e *HTML) Extract(doc *goquery.Document) (interface{}, error) {
|
||||
var ret, h string
|
||||
var err error
|
||||
|
||||
doc.Find(e.Selector).EachWithBreak(func(i int, s *goquery.Selection) bool {
|
||||
h, err = s.Html()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
ret += h
|
||||
return true
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return map[string]string{e.Name: ret}, nil
|
||||
}
|
||||
|
||||
// OuterHTML extracts and returns the HTML of each element of the given selection.
|
||||
type OuterHTML struct {
|
||||
Name string
|
||||
Selector string
|
||||
}
|
||||
|
||||
// Extract extracts and returns the HTML of each element of the given selection.
|
||||
func (e *OuterHTML) Extract(doc *goquery.Document) (interface{}, error) {
|
||||
output := bytes.NewBufferString("")
|
||||
for _, node := range doc.Find(e.Selector).Nodes {
|
||||
if err := html.Render(output, node); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return map[string]string{e.Name: output.String()}, nil
|
||||
}
|
14
extract/text.go
Normal file
14
extract/text.go
Normal file
@ -0,0 +1,14 @@
|
||||
package extract
|
||||
|
||||
import "github.com/PuerkitoBio/goquery"
|
||||
|
||||
// Text returns the combined text contents of provided selector.
|
||||
type Text struct {
|
||||
Name string
|
||||
Selector string
|
||||
}
|
||||
|
||||
// Extract returns the combined text contents of provided selector.
|
||||
func (e *Text) Extract(doc *goquery.Document) (interface{}, error) {
|
||||
return map[string]string{e.Name: doc.Find(e.Selector).Text()}, nil
|
||||
}
|
Reference in New Issue
Block a user