[Go] goquery 教學

程式語言:Go
Package:goquery
官方文件
官方 GitHub

功能:jquery-like 分析 html
jQuery Selectors
必需自行處理成 UTF-8 編碼,才丟進 goquery 處理
package main

import (
    "bytes"
    "fmt"
    "io/ioutil"
    "log"
    "net/http"

    "github.com/PuerkitoBio/goquery"
    "golang.org/x/text/encoding/traditionalchinese"
    "golang.org/x/text/transform"
)

func main() {
    // Request the HTML page.
    res, err := http.Get("https://www.google.com.tw/")
    if err != nil {
        log.Fatal(err)
    }
    defer res.Body.Close()

    if res.StatusCode != 200 {
        log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
    }

    b, err := ioutil.ReadAll(res.Body)
    // goquery 限定需為 UTF-8
    b, _ = DecodeBig5(b)
    r := bytes.NewReader(b)
    // Load the HTML document
    dom, err := goquery.NewDocumentFromReader(r)
    if err != nil {
        log.Fatal(err)
    }

    data := make(map[string]string)
    // Find the link items
    dom.Find("a").Each(func(i int, s *goquery.Selection) {
        // For each item found, get the band and title
        title := s.Text()
        url, _ := s.Attr("href")
        data[title] = url
    })

    fmt.Printf("%+v", data)
}

//convert BIG5 to UTF-8
func DecodeBig5(s []byte) ([]byte, error) {
    I := bytes.NewReader(s)
    O := transform.NewReader(I, traditionalchinese.Big5.NewDecoder())
    b, err := ioutil.ReadAll(O)
    if err != nil {
        return nil, err
    }
    return b, nil
}

//convert UTF-8 to BIG5
func EncodeBig5(s []byte) ([]byte, error) {
    I := bytes.NewReader(s)
    O := transform.NewReader(I, traditionalchinese.Big5.NewEncoder())
    b, err := ioutil.ReadAll(O)
    if err != nil {
        return nil, err
    }
    return b, nil
}
package main

import (
    "bufio"
    "fmt"
    "io"
    "log"
    "net/http"

    "github.com/PuerkitoBio/goquery"
    "github.com/pkg/errors"
    "golang.org/x/net/html/charset"
    "golang.org/x/text/encoding"
    "golang.org/x/text/transform"
)

func main() {
    // Request the HTML page.
    resp, err := http.Get("http://www.zhenai.com/zhenghun")
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    if resp.StatusCode != 200 {
        log.Fatalf("status code error: %d %s", resp.StatusCode, resp.Status)
    }

    // 自動判斷編碼,轉為 UTF-8,需注意有時會判斷錯誤
    e, _, _, _ := DetermineEncodingFromReader(resp.Body)
    utf8Reader := transform.NewReader(resp.Body, e.NewDecoder())

    // Load the HTML document
    dom, err := goquery.NewDocumentFromReader(utf8Reader)
    if err != nil {
        log.Fatal(err)
    }

    data := make(map[string]string)
    // Find the link items
    dom.Find("a").Each(func(i int, s *goquery.Selection) {
        // For each item found, get the band and title
        title := s.Text()
        url, _ := s.Attr("href")
        data[title] = url
    })

    fmt.Printf("%+v", data)
}

// DetermineEncodingFromReader 偵測 reader 的編碼
func DetermineEncodingFromReader(r io.Reader) (encoding.Encoding, string, bool, error) {
    b, err := bufio.NewReader(r).Peek(1024)
    if err != nil {
        fmt.Printf("r: %s : %s", r, err)
        err = errors.Wrap(err, "bufio.NewReader")
        return nil, "", false, err
    }

    e, name, certain := charset.DetermineEncoding(b, "")
    return e, name, certain, nil
}

讀取網頁

package main

import (
    "fmt"
    "log"
    "strings"

    "github.com/PuerkitoBio/goquery"
)

func main() {
    html := ""

    // Load the HTML document
    dom, err := goquery.NewDocumentFromReader(strings.NewReader(html))
    if err != nil {
        log.Fatal(err)
    }

    str, _ := dom.Html()

    fmt.Printf("%s", str)
}

使用範例

html 如下
html := `
<html>
<head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
    <p class="story">Once upon a time there were three little sisters; and their names were
    <a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
    <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
    <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
    and they lived at the bottom of a well.
</p>
<p class="story">...</p>`

// Load the HTML document
dom, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
    log.Fatal(err)
}

var sel *goquery.Selection

訪問方法

幾乎等同 jQuery,可參考 [jQuery] 基本架構
可將 dom 視為 $,可理解到 goquery & jQuery 差異極小
sel = dom.Find("a")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]
// "a" [{ href http://example.com/lacie} { class sister} { id link2}]
// "a" [{ href http://example.com/tillie} { class sister} { id link3}]

sel = dom.Find("a.sister#link1")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]

搜尋方法

sel = dom.Find("p").Children()
prettyPrint(sel.Nodes)
// "b" []
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]
// "a" [{ href http://example.com/lacie} { class sister} { id link2}]
// "a" [{ href http://example.com/tillie} { class sister} { id link3}]

sel = dom.Find("p").ChildrenFiltered("#link1")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]
sel = dom.Find("p.title").Siblings()
prettyPrint(sel.Nodes)
// "p" [{ class story}]
// "p" [{ class story}]

sel = dom.Find("a#link2").Siblings()
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]
// "a" [{ href http://example.com/tillie} { class sister} { id link3}]

sel = dom.Find("a#link2").SiblingsFiltered("#link1")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]
// 除 children 外,包含 text nodes
sel = dom.Find("body").Contents()
prettyPrint(sel.Nodes)
// "\n    " []
// "p" [{ class title}]
// "\n        " []
// "p" [{ class story}]
// "\n    " []
// "p" [{ class story}]
// 含自己往上找,找到最接近的 parents
sel = dom.Find("a").Closest("a")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]
// "a" [{ href http://example.com/lacie} { class sister} { id link2}]
// "a" [{ href http://example.com/tillie} { class sister} { id link3}]

sel = dom.Find("a").Closest("p")
prettyPrint(sel.Nodes)
// "p" [{ class story}]
// 找到其 parent,只往上查找一層,會自動移除重覆的
sel = dom.Find("a").Parent()
prettyPrint(sel.Nodes)
// "p" [{ class story}]

sel = dom.Find("a").ParentFiltered("body")
prettyPrint(sel.Nodes)
// None
// 找到其 parents,往上查找不停止,找出所有符合條件的,會自動移除重覆的
sel = dom.Find("a").Parents()
prettyPrint(sel.Nodes)
// "p" [{ class story}]
// "body" []
// "html" []

sel = dom.Find("a").ParentsFiltered("body")
prettyPrint(sel.Nodes)
// "body" []
// 選擇第一個
sel = dom.Find("a").Eq(0)
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]
// 可為 string 也可以是 function
sel = dom.Find("p").Filter(".story")
prettyPrint(sel.Nodes)
// "p" [{ class story}]
// "p" [{ class story}]

sel = dom.Find("p").FilterFunction(func(i int, s *goquery.Selection) bool { return s.Text() == "..." })
prettyPrint(sel.Nodes)
// "p" [{ class story}]
sel = dom.Find("a").Not("#link1")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/lacie} { class sister} { id link2}]
// "a" [{ href http://example.com/tillie} { class sister} { id link3}]
sel = dom.Find("a#link2").Next()
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/tillie} { class sister} { id link3}]

sel = dom.Find("a#link2").NextFiltered("a")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/tillie} { class sister} { id link3}]
sel = dom.Find("a#link2").Prev()
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]

sel = dom.Find("a#link2").PrevFiltered("a")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]
sel = dom.Find("a#link1").NextAll()
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/lacie} { class sister} { id link2}]
// "a" [{ href http://example.com/tillie} { class sister} { id link3}]

sel = dom.Find("a#link1").NextAllFiltered("#link3")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/tillie} { class sister} { id link3}]
sel = dom.Find("a#link3").PrevAll()
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/lacie} { class sister} { id link2}]
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]

sel = dom.Find("a#link3").PrevAllFiltered("#link1")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]
 
// 回傳上一個 filter 結果
sel = dom.Find("a").Parent().End() // 等同 sel = dom.Find("a")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]
// "a" [{ href http://example.com/lacie} { class sister} { id link2}]
// "a" [{ href http://example.com/tillie} { class sister} { id link3}]

Attributes

// 會以第一個 a 為主
sel = dom.Find("a")
fmt.Println(sel.Attr("id"))
// link1 true
fmt.Println(sel.AttrOr("noid", "nil"))
// nil

sel = dom.Find("a")
fmt.Println(sel.Attr("class"))
// sister true
fmt.Println(sel.AttrOr("noclass", "nil"))
// nil
// 只要任何 elements 有就是 True
sel = dom.Find("p")
fmt.Println(sel.HasClass("story"))
// true
// 回傳所有元件的 text
sel = dom.Find("title")
fmt.Println(sel.Text())
// The Dormouse's story

Properties

// 兩者意義一樣
sel = dom.Find("a")
fmt.Println(sel.Length())
// 3
fmt.Println(sel.Size())
// 3

網頁內容

sel = dom.Find("head")
fmt.Println(sel.Html())
//<title>The Dormouse&#39;s story</title> <nil>
sel = dom.Find("head")
fmt.Println(goquery.OuterHtml(sel))
// <head><title>The Dormouse&#39;s story</title></head> <nil>

更改方法

// # 複製原本的內容並回傳,可用在不想被更改的元件上
var domCopy *goquery.Selection
domCopy = dom.Find(".title").Clone()
// # 會將原本的 class 替換掉
domCopy = dom.Clone()
sel = domCopy.Find("a").SetAttr("class", "classA")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class classA} { id link1}]
// "a" [{ href http://example.com/lacie} { class classA} { id link2}]
// "a" [{ href http://example.com/tillie} { class classA} { id link3}]
domCopy = dom.Clone()
sel = domCopy.Find("a#link2").SetAttr("id", "link4")
prettyPrint(domCopy.Find("a").Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1}]
// "a" [{ href http://example.com/lacie} { class sister} { id link4}]
// "a" [{ href http://example.com/tillie} { class sister} { id link3}]
// # 保留原本的 class
domCopy = dom.Clone()
sel = domCopy.Find("a").AddClass("classB")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister  classB} { id link1}]
// "a" [{ href http://example.com/lacie} { class sister  classB} { id link2}]
// "a" [{ href http://example.com/tillie} { class sister  classB} { id link3}]
domCopy = dom.Clone()
sel = domCopy.Find("p").RemoveClass("story")
prettyPrint(sel.Nodes)
// "p" [{ class title}]
// "p" []
// "p" []
domCopy = dom.Clone()
sel = domCopy.Find("a").ToggleClass("brother").ToggleClass("sister")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class brother} { id link1}]
// "a" [{ href http://example.com/lacie} { class brother} { id link2}]
// "a" [{ href http://example.com/tillie} { class brother} { id link3}]
domCopy = dom.Clone()
sel = domCopy.Find("a").SetAttr("val", "123")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { class sister} { id link1} { val 123}]
// "a" [{ href http://example.com/lacie} { class sister} { id link2} { val 123}]
// "a" [{ href http://example.com/tillie} { class sister} { id link3} { val 123}]
domCopy = dom.Clone()
sel = domCopy.Find("a").RemoveAttr("class")
prettyPrint(sel.Nodes)
// "a" [{ href http://example.com/elsie} { id link1}]
// "a" [{ href http://example.com/lacie} { id link2}]
// "a" [{ href http://example.com/tillie} { id link3}]
domCopy = dom.Clone()
sel = domCopy.Find(".title").SetText("123")
fmt.Println(goquery.OuterHtml(sel))
// # 原來的 <b> 被移除了
// <p class="title">123</p> <nil>
domCopy = dom.Clone()
sel = domCopy.Find(".title").BeforeHtml("<div>before</div>")
fmt.Println(domCopy.Find("body").Html())
// <div>before</div><p class="title"><b>The Dormouse&#39;s story</b></p>
domCopy = dom.Clone()
sel = domCopy.Find(".title").AfterHtml("<div>after</div>")
fmt.Println(domCopy.Find("body").Html())
// <p class="title"><b>The Dormouse&#39;s story</b></p><div>after</div>
domCopy = dom.Clone()
sel = domCopy.Find(".title").PrependHtml("<div>prepend</div>")
fmt.Println(goquery.OuterHtml(sel))
// <p class="title"><div>prepend</div><b>The Dormouse&#39;s story</b></p>
domCopy = dom.Clone()
sel = domCopy.Find(".title").AppendHtml("<div>append</div>")
fmt.Println(goquery.OuterHtml(sel))
// <p class="title"><b>The Dormouse&#39;s story</b><div>append</div></p>
// 將 prepend 元件放到 find 所有元件裡面的前面,原本的 prepend 元件仍存在
domCopy = dom.Clone()
sel = domCopy.Find("a").Prepend("title")
fmt.Println(domCopy.Find("body").Html())
fmt.Println("=============")
// <p class="title"><b>The Dormouse&#39;s story</b></p>
//     <p class="story">Once upon a time there were three little sisters; and their names were
//     <a href="http://example.com/elsie" class="sister" id="link1"><title>The Dormouse&#39;s story</title>Elsie</a>,
//     <a href="http://example.com/lacie" class="sister" id="link2"><title>The Dormouse&#39;s story</title>Lacie</a> and
//     <a href="http://example.com/tillie" class="sister" id="link3"><title>The Dormouse&#39;s story</title>Tillie</a>;
//     and they lived at the bottom of a well.
// </p>
//將 append 元件放到 find 所有元件裡面的後面,原本的 append 元件仍存在
domCopy = dom.Clone()
sel = domCopy.Find("a").Append("title")
fmt.Println(domCopy.Find("body").Html())
fmt.Println("=============")
// <p class="title"><b>The Dormouse&#39;s story</b></p>
//     <p class="story">Once upon a time there were three little sisters; and their names were
//     <a href="http://example.com/elsie" class="sister" id="link1">Elsie<title>The Dormouse&#39;s story</title></a>,
//     <a href="http://example.com/lacie" class="sister" id="link2">Lacie<title>The Dormouse&#39;s story</title></a> and
//     <a href="http://example.com/tillie" class="sister" id="link3">Tillie<title>The Dormouse&#39;s story</title></a>;
//     and they lived at the bottom of a well.
// </p>
// 清空內容
domCopy = dom.Clone()
sel = domCopy.Find("p").Empty()
fmt.Println(domCopy.Find("body").Html())
fmt.Println("=============")
// <p class="title"></p>
//     <p class="story"></p>
// <p class="story"></p>
domCopy = dom.Clone()
sel = domCopy.Find("p").Remove()
fmt.Println(domCopy.Html())
fmt.Println("=============")
// <html><head><title>The Dormouse&#39;s story</title></head>
// <body>
//
// </body></html>
domCopy = dom.Clone()
sel = domCopy.Find("p").SetHtml("<b>test</b>")
fmt.Println(domCopy.Find("body").Html())
fmt.Println("=============")
// <p class="title"><b>test</b></p>
//     <p class="story"><b>test</b></p>
// <p class="story"><b>test</b></p>
domCopy = dom.Clone()
sel = domCopy.Find("a").ReplaceWithHtml("<p></p>")
fmt.Println(domCopy.Find("body").Html())
fmt.Println("=============")
// <p class="title"><b>The Dormouse&#39;s story</b></p>
//     <p class="story">Once upon a time there were three little sisters; and their names were
//     <p></p>,
//     <p></p> and
//     <p></p>;
//     and they lived at the bottom of a well.
// </p>
domCopy = dom.Clone()
sel = domCopy.Find("a").WrapHtml("<div></div>")
fmt.Println(domCopy.Find("body").Html())
fmt.Println("=============")
// <div><a href="http://example.com/elsie" class="sister" id="link1">Elsie</a></div>,
// <div><a href="http://example.com/lacie" class="sister" id="link2">Lacie</a></div> and
// <div><a href="http://example.com/tillie" class="sister" id="link3">Tillie</a></div>;
domCopy = dom.Clone()
sel = domCopy.Find("a").WrapAllHtml("<div></div>")
fmt.Println(domCopy.Find("body").Html())
fmt.Println("=============")
// <p class="title"><b>The Dormouse&#39;s story</b></p>
//     <p class="story">Once upon a time there were three little sisters; and their names were
//     <div><a href="http://example.com/elsie" class="sister" id="link1">Elsie</a><a href="http://example.com/lacie" class="sister" id="link2">Lacie</a><a href="http://example.com/tillie" class="sister" id="link3">Tillie</a></div>,
//      and
//     ;
//     and they lived at the bottom of a well.
// </p>

特殊方法

domCopy = dom.Clone()
strList := domCopy.Find("a").Map(func(i int, s *goquery.Selection) string {
 str := s.AttrOr("href", "None")
 return str
})
fmt.Println(strList)
// [http://example.com/elsie http://example.com/lacie http://example.com/tillie]
// 以當前元件做判斷
sel = domCopy.Find("p")
fmt.Println(sel.Is("b"))
// false
// for loop
sel := dom.Find(".selector")
for i := range sel.Nodes {
    single := sel.Eq(i)
    // use `single` as a selection of 1 node
}

完整原始碼

package main

import (
    "fmt"
    "log"
    "strings"

    "github.com/PuerkitoBio/goquery"
    "golang.org/x/net/html"
)

func main() {
    html := `
<html>
<head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
    <p class="story">Once upon a time there were three little sisters; and their names were
    <a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
    <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
    <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
    and they lived at the bottom of a well.
</p>
<p class="story">...</p>`

    // Load the HTML document
    dom, err := goquery.NewDocumentFromReader(strings.NewReader(html))
    if err != nil {
        log.Fatal(err)
    }

    var sel *goquery.Selection

    sel = dom.Find("a")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
    // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
    // "a" [{ href http://example.com/tillie} { class sister} { id link3}]

    sel = dom.Find("a.sister#link1")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]

    sel = dom.Find("p").Children()
    prettyPrint(sel.Nodes)
    // "b" []
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
    // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
    // "a" [{ href http://example.com/tillie} { class sister} { id link3}]

    sel = dom.Find("p").ChildrenFiltered("#link1")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]

    sel = dom.Find("p.title").Siblings()
    prettyPrint(sel.Nodes)
    // "p" [{ class story}]
    // "p" [{ class story}]

    sel = dom.Find("a#link2").Siblings()
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
    // "a" [{ href http://example.com/tillie} { class sister} { id link3}]

    sel = dom.Find("a#link2").SiblingsFiltered("#link1")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]

    // 除 children 外,包含 text nodes
    sel = dom.Find("body").Contents()
    prettyPrint(sel.Nodes)
    // "\n    " []
    // "p" [{ class title}]
    // "\n        " []
    // "p" [{ class story}]
    // "\n    " []
    // "p" [{ class story}]

    // 含自己往上找,找到最接近的 parents
    sel = dom.Find("a").Closest("a")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
    // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
    // "a" [{ href http://example.com/tillie} { class sister} { id link3}]

    sel = dom.Find("a").Closest("p")
    prettyPrint(sel.Nodes)
    // "p" [{ class story}]

    // 找到其 parent,只往上查找一層,會自動移除重覆的
    sel = dom.Find("a").Parent()
    prettyPrint(sel.Nodes)
    // "p" [{ class story}]

    sel = dom.Find("a").ParentFiltered("body")
    prettyPrint(sel.Nodes)
    // None

    // 找到其 parents,往上查找不停止,找出所有符合條件的,會自動移除重覆的
    sel = dom.Find("a").Parents()
    prettyPrint(sel.Nodes)
    // "p" [{ class story}]
    // "body" []
    // "html" []

    sel = dom.Find("a").ParentsFiltered("body")
    prettyPrint(sel.Nodes)
    // "body" []

    // 選擇第一個
    sel = dom.Find("a").Eq(0)
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]

    // 可為 string 也可以是 function
    sel = dom.Find("p").Filter(".story")
    prettyPrint(sel.Nodes)
    // "p" [{ class story}]
    // "p" [{ class story}]

    sel = dom.Find("p").FilterFunction(func(i int, s *goquery.Selection) bool { return s.Text() == "..." })
    prettyPrint(sel.Nodes)
    // "p" [{ class story}]

    sel = dom.Find("a").Not("#link1")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
    // "a" [{ href http://example.com/tillie} { class sister} { id link3}]

    sel = dom.Find("a#link2").Next()
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/tillie} { class sister} { id link3}]

    sel = dom.Find("a#link2").NextFiltered("a")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/tillie} { class sister} { id link3}]

    sel = dom.Find("a#link2").Prev()
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]

    sel = dom.Find("a#link2").PrevFiltered("a")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]

    sel = dom.Find("a#link1").NextAll()
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
    // "a" [{ href http://example.com/tillie} { class sister} { id link3}]

    sel = dom.Find("a#link1").NextAllFiltered("#link3")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/tillie} { class sister} { id link3}]

    sel = dom.Find("a#link3").PrevAll()
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]

    sel = dom.Find("a#link3").PrevAllFiltered("#link1")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]

    // 回傳上一個 filter 結果
    sel = dom.Find("a").Parent().End() // 等同 sel = dom.Find("a")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
    // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
    // "a" [{ href http://example.com/tillie} { class sister} { id link3}]

    // 會以第一個 <a> 為主
    sel = dom.Find("a")
    fmt.Println(sel.Attr("id"))
    // link1 true
    fmt.Println(sel.AttrOr("noid", "nil"))
    // nil

    sel = dom.Find("a")
    fmt.Println(sel.Attr("class"))
    // sister true
    fmt.Println(sel.AttrOr("noclass", "nil"))
    // nil

    // 只要任何 elements 有就是 True
    sel = dom.Find("p")
    fmt.Println(sel.HasClass("story"))
    // true

    // 回傳所有元件的 text
    sel = dom.Find("title")
    fmt.Println(sel.Text())
    // The Dormouse's story

    // 兩者意義一樣
    sel = dom.Find("a")
    fmt.Println(sel.Length())
    // 3
    fmt.Println(sel.Size())
    // 3

    sel = dom.Find("head")
    fmt.Println(sel.Html())
    //<title>The Dormouse&#39;s story</title> <nil>

    sel = dom.Find("head")
    fmt.Println(goquery.OuterHtml(sel))
    // <head><title>The Dormouse&#39;s story</title></head> <nil>

    // # 複製原本的內容並回傳,可用在不想被更改的元件上
    var domCopy *goquery.Selection
    domCopy = dom.Find(".title").Clone()

    // # 會將原本的 class 替換掉
    domCopy = dom.Clone()
    sel = domCopy.Find("a").SetAttr("class", "classA")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class classA} { id link1}]
    // "a" [{ href http://example.com/lacie} { class classA} { id link2}]
    // "a" [{ href http://example.com/tillie} { class classA} { id link3}]
    domCopy = dom.Clone()
    sel = domCopy.Find("a#link2").SetAttr("id", "link4")
    prettyPrint(domCopy.Find("a").Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
    // "a" [{ href http://example.com/lacie} { class sister} { id link4}]
    // "a" [{ href http://example.com/tillie} { class sister} { id link3}]

    // # 保留原本的 class
    domCopy = dom.Clone()
    sel = domCopy.Find("a").AddClass("classB")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister  classB} { id link1}]
    // "a" [{ href http://example.com/lacie} { class sister  classB} { id link2}]
    // "a" [{ href http://example.com/tillie} { class sister  classB} { id link3}]

    domCopy = dom.Clone()
    sel = domCopy.Find("p").RemoveClass("story")
    prettyPrint(sel.Nodes)
    // "p" [{ class title}]
    // "p" []
    // "p" []

    domCopy = dom.Clone()
    sel = domCopy.Find("a").ToggleClass("brother").ToggleClass("sister")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class brother} { id link1}]
    // "a" [{ href http://example.com/lacie} { class brother} { id link2}]
    // "a" [{ href http://example.com/tillie} { class brother} { id link3}]

    domCopy = dom.Clone()
    sel = domCopy.Find("a").SetAttr("val", "123")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { class sister} { id link1} { val 123}]
    // "a" [{ href http://example.com/lacie} { class sister} { id link2} { val 123}]
    // "a" [{ href http://example.com/tillie} { class sister} { id link3} { val 123}]

    domCopy = dom.Clone()
    sel = domCopy.Find("a").RemoveAttr("class")
    prettyPrint(sel.Nodes)
    // "a" [{ href http://example.com/elsie} { id link1}]
    // "a" [{ href http://example.com/lacie} { id link2}]
    // "a" [{ href http://example.com/tillie} { id link3}]

    domCopy = dom.Clone()
    sel = domCopy.Find(".title").SetText("123")
    fmt.Println(goquery.OuterHtml(sel))
    // # 原來的 <b> 被移除了
    // <p class="title">123</p> <nil>

    domCopy = dom.Clone()
    sel = domCopy.Find(".title").BeforeHtml("<div>before</div>")
    fmt.Println(domCopy.Find("body").Html())
    // <div>before</div><p class="title"><b>The Dormouse&#39;s story</b></p>

    domCopy = dom.Clone()
    sel = domCopy.Find(".title").AfterHtml("<div>after</div>")
    fmt.Println(domCopy.Find("body").Html())
    // <p class="title"><b>The Dormouse&#39;s story</b></p><div>after</div>

    domCopy = dom.Clone()
    sel = domCopy.Find(".title").PrependHtml("<div>prepend</div>")
    fmt.Println(goquery.OuterHtml(sel))
    // <p class="title"><div>prepend</div><b>The Dormouse&#39;s story</b></p>

    domCopy = dom.Clone()
    sel = domCopy.Find(".title").AppendHtml("<div>append</div>")
    fmt.Println(goquery.OuterHtml(sel))
    // <p class="title"><b>The Dormouse&#39;s story</b><div>append</div></p>

    // 將 prepend 元件放到 find 所有元件裡面的前面,原本的 prepend 元件仍存在
    domCopy = dom.Clone()
    sel = domCopy.Find("a").Prepend("title")
    fmt.Println(domCopy.Find("body").Html())
    fmt.Println("=============")
    // <p class="title"><b>The Dormouse&#39;s story</b></p>
    //     <p class="story">Once upon a time there were three little sisters; and their names were
    //     <a href="http://example.com/elsie" class="sister" id="link1"><title>The Dormouse&#39;s story</title>Elsie</a>,
    //     <a href="http://example.com/lacie" class="sister" id="link2"><title>The Dormouse&#39;s story</title>Lacie</a> and
    //     <a href="http://example.com/tillie" class="sister" id="link3"><title>The Dormouse&#39;s story</title>Tillie</a>;
    //     and they lived at the bottom of a well.
    // </p>

    //將 append 元件放到 find 所有元件裡面的後面,原本的 append 元件仍存在
    domCopy = dom.Clone()
    sel = domCopy.Find("a").Append("title")
    fmt.Println(domCopy.Find("body").Html())
    fmt.Println("=============")
    // <p class="title"><b>The Dormouse&#39;s story</b></p>
    //     <p class="story">Once upon a time there were three little sisters; and their names were
    //     <a href="http://example.com/elsie" class="sister" id="link1">Elsie<title>The Dormouse&#39;s story</title></a>,
    //     <a href="http://example.com/lacie" class="sister" id="link2">Lacie<title>The Dormouse&#39;s story</title></a> and
    //     <a href="http://example.com/tillie" class="sister" id="link3">Tillie<title>The Dormouse&#39;s story</title></a>;
    //     and they lived at the bottom of a well.
    // </p>

    // 清空內容
    domCopy = dom.Clone()
    sel = domCopy.Find("p").Empty()
    fmt.Println(domCopy.Find("body").Html())
    fmt.Println("=============")
    // <p class="title"></p>
    //     <p class="story"></p>
    // <p class="story"></p>

    domCopy = dom.Clone()
    sel = domCopy.Find("p").Remove()
    fmt.Println(domCopy.Html())
    fmt.Println("=============")
    // <html><head><title>The Dormouse&#39;s story</title></head>
    // <body>
    //
    // </body></html>

    domCopy = dom.Clone()
    sel = domCopy.Find("p").SetHtml("<b>test</b>")
    fmt.Println(domCopy.Find("body").Html())
    fmt.Println("=============")
    // <p class="title"><b>test</b></p>
    //     <p class="story"><b>test</b></p>
    // <p class="story"><b>test</b></p>

    domCopy = dom.Clone()
    sel = domCopy.Find("a").ReplaceWithHtml("<p></p>")
    fmt.Println(domCopy.Find("body").Html())
    fmt.Println("=============")
    // <p class="title"><b>The Dormouse&#39;s story</b></p>
    //     <p class="story">Once upon a time there were three little sisters; and their names were
    //     <p></p>,
    //     <p></p> and
    //     <p></p>;
    //     and they lived at the bottom of a well.
    // </p>

    domCopy = dom.Clone()
    sel = domCopy.Find("a").WrapHtml("<div></div>")
    fmt.Println(domCopy.Find("body").Html())
    fmt.Println("=============")
    // <div><a href="http://example.com/elsie" class="sister" id="link1">Elsie</a></div>,
    // <div><a href="http://example.com/lacie" class="sister" id="link2">Lacie</a></div> and
    // <div><a href="http://example.com/tillie" class="sister" id="link3">Tillie</a></div>;

    domCopy = dom.Clone()
    sel = domCopy.Find("a").WrapAllHtml("<div></div>")
    fmt.Println(domCopy.Find("body").Html())
    fmt.Println("=============")
    // <p class="title"><b>The Dormouse&#39;s story</b></p>
    //     <p class="story">Once upon a time there were three little sisters; and their names were
    //     <div><a href="http://example.com/elsie" class="sister" id="link1">Elsie</a><a href="http://example.com/lacie" class="sister" id="link2">Lacie</a><a href="http://example.com/tillie" class="sister" id="link3">Tillie</a></div>,
    //      and
    //     ;
    //     and they lived at the bottom of a well.
    // </p>

    domCopy = dom.Clone()
    strList := domCopy.Find("a").Map(func(i int, s *goquery.Selection) string {
        str := s.AttrOr("href", "None")
        return str
    })
    fmt.Println(strList)
    // [http://example.com/elsie http://example.com/lacie http://example.com/tillie]

    // 以當前元件做判斷
    sel = domCopy.Find("p")
    fmt.Println(sel.Is("b"))
    // false

}

func prettyPrint(nodes []*html.Node) {
    for _, n := range nodes {
        fmt.Printf("%#v %v\n", n.Data, n.Attr)
    }
    fmt.Println()
    fmt.Println("============")
}

參考

golang goquery selector(选择器) 示例大全

留言