Attribute extractor added. HTML extractor added. Outer HTML Extractor added.

exporter package renamed to export, extractor package renamed to extract for simplicity.
This commit is contained in:
Musab Gültekin
2019-06-30 22:20:17 +03:00
parent 7c383b175f
commit 0eda056065
12 changed files with 115 additions and 31 deletions

24
extract/attr.go Normal file
View File

@ -0,0 +1,24 @@
package extract
import (
"errors"
"github.com/PuerkitoBio/goquery"
)
var ErrAttrNotExists = errors.New("attribute not exist")
// Attr returns HTML attribute value of provided selector
type Attr struct {
Name string
Selector string
Attr string
}
// Extract returns HTML attribute value of provided selector
func (e *Attr) Extract(doc *goquery.Document) (interface{}, error) {
attr, exists := doc.Find(e.Selector).Attr(e.Attr)
if !exists {
return nil, ErrAttrNotExists
}
return map[string]string{e.Name: attr}, nil
}

53
extract/html.go Normal file
View File

@ -0,0 +1,53 @@
package extract
import (
"bytes"
"github.com/PuerkitoBio/goquery"
"golang.org/x/net/html"
)
// HTML extracts and returns the HTML from inside each element of the given selection.
type HTML struct {
Name string
Selector string
}
// Extract extracts and returns the HTML from inside each element of the given selection.
func (e *HTML) Extract(doc *goquery.Document) (interface{}, error) {
var ret, h string
var err error
doc.Find(e.Selector).EachWithBreak(func(i int, s *goquery.Selection) bool {
h, err = s.Html()
if err != nil {
return false
}
ret += h
return true
})
if err != nil {
return nil, err
}
return map[string]string{e.Name: ret}, nil
}
// OuterHTML extracts and returns the HTML of each element of the given selection.
type OuterHTML struct {
Name string
Selector string
}
// Extract extracts and returns the HTML of each element of the given selection.
func (e *OuterHTML) Extract(doc *goquery.Document) (interface{}, error) {
output := bytes.NewBufferString("")
for _, node := range doc.Find(e.Selector).Nodes {
if err := html.Render(output, node); err != nil {
return nil, err
}
}
return map[string]string{e.Name: output.String()}, nil
}

14
extract/text.go Normal file
View File

@ -0,0 +1,14 @@
package extract
import "github.com/PuerkitoBio/goquery"
// Text returns the combined text contents of provided selector.
type Text struct {
Name string
Selector string
}
// Extract returns the combined text contents of provided selector.
func (e *Text) Extract(doc *goquery.Document) (interface{}, error) {
return map[string]string{e.Name: doc.Find(e.Selector).Text()}, nil
}