[Go] goquery 教學

程式語言:Go
Package:goquery
官方文件
官方 GitHub

功能:jquery-like 分析 html
jQuery Selectors
必需自行處理成 UTF-8 編碼,才丟進 goquery 處理
  1. package main
  2.  
  3. import (
  4. "bytes"
  5. "fmt"
  6. "io/ioutil"
  7. "log"
  8. "net/http"
  9.  
  10. "github.com/PuerkitoBio/goquery"
  11. "golang.org/x/text/encoding/traditionalchinese"
  12. "golang.org/x/text/transform"
  13. )
  14.  
  15. func main() {
  16. // Request the HTML page.
  17. res, err := http.Get("https://www.google.com.tw/")
  18. if err != nil {
  19. log.Fatal(err)
  20. }
  21. defer res.Body.Close()
  22.  
  23. if res.StatusCode != 200 {
  24. log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
  25. }
  26.  
  27. b, err := ioutil.ReadAll(res.Body)
  28. // goquery 限定需為 UTF-8
  29. b, _ = DecodeBig5(b)
  30. r := bytes.NewReader(b)
  31. // Load the HTML document
  32. dom, err := goquery.NewDocumentFromReader(r)
  33. if err != nil {
  34. log.Fatal(err)
  35. }
  36.  
  37. data := make(map[string]string)
  38. // Find the link items
  39. dom.Find("a").Each(func(i int, s *goquery.Selection) {
  40. // For each item found, get the band and title
  41. title := s.Text()
  42. url, _ := s.Attr("href")
  43. data[title] = url
  44. })
  45.  
  46. fmt.Printf("%+v", data)
  47. }
  48.  
  49. //convert BIG5 to UTF-8
  50. func DecodeBig5(s []byte) ([]byte, error) {
  51. I := bytes.NewReader(s)
  52. O := transform.NewReader(I, traditionalchinese.Big5.NewDecoder())
  53. b, err := ioutil.ReadAll(O)
  54. if err != nil {
  55. return nil, err
  56. }
  57. return b, nil
  58. }
  59.  
  60. //convert UTF-8 to BIG5
  61. func EncodeBig5(s []byte) ([]byte, error) {
  62. I := bytes.NewReader(s)
  63. O := transform.NewReader(I, traditionalchinese.Big5.NewEncoder())
  64. b, err := ioutil.ReadAll(O)
  65. if err != nil {
  66. return nil, err
  67. }
  68. return b, nil
  69. }
  1. package main
  2.  
  3. import (
  4. "bufio"
  5. "fmt"
  6. "io"
  7. "log"
  8. "net/http"
  9.  
  10. "github.com/PuerkitoBio/goquery"
  11. "github.com/pkg/errors"
  12. "golang.org/x/net/html/charset"
  13. "golang.org/x/text/encoding"
  14. "golang.org/x/text/transform"
  15. )
  16.  
  17. func main() {
  18. // Request the HTML page.
  19. resp, err := http.Get("http://www.zhenai.com/zhenghun")
  20. if err != nil {
  21. panic(err)
  22. }
  23. defer resp.Body.Close()
  24.  
  25. if resp.StatusCode != 200 {
  26. log.Fatalf("status code error: %d %s", resp.StatusCode, resp.Status)
  27. }
  28.  
  29. // 自動判斷編碼,轉為 UTF-8,需注意有時會判斷錯誤
  30. e, _, _, _ := DetermineEncodingFromReader(resp.Body)
  31. utf8Reader := transform.NewReader(resp.Body, e.NewDecoder())
  32.  
  33. // Load the HTML document
  34. dom, err := goquery.NewDocumentFromReader(utf8Reader)
  35. if err != nil {
  36. log.Fatal(err)
  37. }
  38.  
  39. data := make(map[string]string)
  40. // Find the link items
  41. dom.Find("a").Each(func(i int, s *goquery.Selection) {
  42. // For each item found, get the band and title
  43. title := s.Text()
  44. url, _ := s.Attr("href")
  45. data[title] = url
  46. })
  47.  
  48. fmt.Printf("%+v", data)
  49. }
  50.  
  51. // DetermineEncodingFromReader 偵測 reader 的編碼
  52. func DetermineEncodingFromReader(r io.Reader) (encoding.Encoding, string, bool, error) {
  53. b, err := bufio.NewReader(r).Peek(1024)
  54. if err != nil {
  55. fmt.Printf("r: %s : %s", r, err)
  56. err = errors.Wrap(err, "bufio.NewReader")
  57. return nil, "", false, err
  58. }
  59.  
  60. e, name, certain := charset.DetermineEncoding(b, "")
  61. return e, name, certain, nil
  62. }

讀取網頁

  1. package main
  2.  
  3. import (
  4. "fmt"
  5. "log"
  6. "strings"
  7.  
  8. "github.com/PuerkitoBio/goquery"
  9. )
  10.  
  11. func main() {
  12. html := ""
  13.  
  14. // Load the HTML document
  15. dom, err := goquery.NewDocumentFromReader(strings.NewReader(html))
  16. if err != nil {
  17. log.Fatal(err)
  18. }
  19.  
  20. str, _ := dom.Html()
  21.  
  22. fmt.Printf("%s", str)
  23. }

使用範例

html 如下
html := `
<html>
<head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
    <p class="story">Once upon a time there were three little sisters; and their names were
    <a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
    <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
    <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
    and they lived at the bottom of a well.
</p>
<p class="story">...</p>`

// Load the HTML document
dom, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
    log.Fatal(err)
}

var sel *goquery.Selection

訪問方法

幾乎等同 jQuery,可參考 [jQuery] 基本架構
可將 dom 視為 $,可理解到 goquery & jQuery 差異極小
  1. sel = dom.Find("a")
  2. prettyPrint(sel.Nodes)
  3. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  4. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  5. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  6.  
  7. sel = dom.Find("a.sister#link1")
  8. prettyPrint(sel.Nodes)
  9. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]

搜尋方法

  1. sel = dom.Find("p").Children()
  2. prettyPrint(sel.Nodes)
  3. // "b" []
  4. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  5. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  6. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  7.  
  8. sel = dom.Find("p").ChildrenFiltered("#link1")
  9. prettyPrint(sel.Nodes)
  10. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  1. sel = dom.Find("p.title").Siblings()
  2. prettyPrint(sel.Nodes)
  3. // "p" [{ class story}]
  4. // "p" [{ class story}]
  5.  
  6. sel = dom.Find("a#link2").Siblings()
  7. prettyPrint(sel.Nodes)
  8. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  9. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  10.  
  11. sel = dom.Find("a#link2").SiblingsFiltered("#link1")
  12. prettyPrint(sel.Nodes)
  13. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  1. // 除 children 外,包含 text nodes
  2. sel = dom.Find("body").Contents()
  3. prettyPrint(sel.Nodes)
  4. // "\n " []
  5. // "p" [{ class title}]
  6. // "\n " []
  7. // "p" [{ class story}]
  8. // "\n " []
  9. // "p" [{ class story}]
  1. // 含自己往上找,找到最接近的 parents
  2. sel = dom.Find("a").Closest("a")
  3. prettyPrint(sel.Nodes)
  4. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  5. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  6. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  7.  
  8. sel = dom.Find("a").Closest("p")
  9. prettyPrint(sel.Nodes)
  10. // "p" [{ class story}]
  1. // 找到其 parent,只往上查找一層,會自動移除重覆的
  2. sel = dom.Find("a").Parent()
  3. prettyPrint(sel.Nodes)
  4. // "p" [{ class story}]
  5.  
  6. sel = dom.Find("a").ParentFiltered("body")
  7. prettyPrint(sel.Nodes)
  8. // None
  1. // 找到其 parents,往上查找不停止,找出所有符合條件的,會自動移除重覆的
  2. sel = dom.Find("a").Parents()
  3. prettyPrint(sel.Nodes)
  4. // "p" [{ class story}]
  5. // "body" []
  6. // "html" []
  7.  
  8. sel = dom.Find("a").ParentsFiltered("body")
  9. prettyPrint(sel.Nodes)
  10. // "body" []
  1. // 選擇第一個
  2. sel = dom.Find("a").Eq(0)
  3. prettyPrint(sel.Nodes)
  4. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  1. // 可為 string 也可以是 function
  2. sel = dom.Find("p").Filter(".story")
  3. prettyPrint(sel.Nodes)
  4. // "p" [{ class story}]
  5. // "p" [{ class story}]
  6.  
  7. sel = dom.Find("p").FilterFunction(func(i int, s *goquery.Selection) bool { return s.Text() == "..." })
  8. prettyPrint(sel.Nodes)
  9. // "p" [{ class story}]
  1. sel = dom.Find("a").Not("#link1")
  2. prettyPrint(sel.Nodes)
  3. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  4. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  1. sel = dom.Find("a#link2").Next()
  2. prettyPrint(sel.Nodes)
  3. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  4.  
  5. sel = dom.Find("a#link2").NextFiltered("a")
  6. prettyPrint(sel.Nodes)
  7. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  1. sel = dom.Find("a#link2").Prev()
  2. prettyPrint(sel.Nodes)
  3. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  4.  
  5. sel = dom.Find("a#link2").PrevFiltered("a")
  6. prettyPrint(sel.Nodes)
  7. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  1. sel = dom.Find("a#link1").NextAll()
  2. prettyPrint(sel.Nodes)
  3. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  4. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  5.  
  6. sel = dom.Find("a#link1").NextAllFiltered("#link3")
  7. prettyPrint(sel.Nodes)
  8. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  1. sel = dom.Find("a#link3").PrevAll()
  2. prettyPrint(sel.Nodes)
  3. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  4. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  5.  
  6. sel = dom.Find("a#link3").PrevAllFiltered("#link1")
  7. prettyPrint(sel.Nodes)
  8. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  1. // 回傳上一個 filter 結果
  2. sel = dom.Find("a").Parent().End() // 等同 sel = dom.Find("a")
  3. prettyPrint(sel.Nodes)
  4. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  5. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  6. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]

Attributes

  1. // 會以第一個 a 為主
  2. sel = dom.Find("a")
  3. fmt.Println(sel.Attr("id"))
  4. // link1 true
  5. fmt.Println(sel.AttrOr("noid", "nil"))
  6. // nil
  7.  
  8. sel = dom.Find("a")
  9. fmt.Println(sel.Attr("class"))
  10. // sister true
  11. fmt.Println(sel.AttrOr("noclass", "nil"))
  12. // nil
  1. // 只要任何 elements 有就是 True
  2. sel = dom.Find("p")
  3. fmt.Println(sel.HasClass("story"))
  4. // true
  1. // 回傳所有元件的 text
  2. sel = dom.Find("title")
  3. fmt.Println(sel.Text())
  4. // The Dormouse's story

Properties

  1. // 兩者意義一樣
  2. sel = dom.Find("a")
  3. fmt.Println(sel.Length())
  4. // 3
  5. fmt.Println(sel.Size())
  6. // 3

網頁內容

  1. sel = dom.Find("head")
  2. fmt.Println(sel.Html())
  3. //<title>The Dormouse&#39;s story</title> <nil>
  1. sel = dom.Find("head")
  2. fmt.Println(goquery.OuterHtml(sel))
  3. // <head><title>The Dormouse&#39;s story</title></head> <nil>

更改方法

  1. // # 複製原本的內容並回傳,可用在不想被更改的元件上
  2. var domCopy *goquery.Selection
  3. domCopy = dom.Find(".title").Clone()
  1. // # 會將原本的 class 替換掉
  2. domCopy = dom.Clone()
  3. sel = domCopy.Find("a").SetAttr("class", "classA")
  4. prettyPrint(sel.Nodes)
  5. // "a" [{ href http://example.com/elsie} { class classA} { id link1}]
  6. // "a" [{ href http://example.com/lacie} { class classA} { id link2}]
  7. // "a" [{ href http://example.com/tillie} { class classA} { id link3}]
  8. domCopy = dom.Clone()
  9. sel = domCopy.Find("a#link2").SetAttr("id", "link4")
  10. prettyPrint(domCopy.Find("a").Nodes)
  11. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  12. // "a" [{ href http://example.com/lacie} { class sister} { id link4}]
  13. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  1. // # 保留原本的 class
  2. domCopy = dom.Clone()
  3. sel = domCopy.Find("a").AddClass("classB")
  4. prettyPrint(sel.Nodes)
  5. // "a" [{ href http://example.com/elsie} { class sister classB} { id link1}]
  6. // "a" [{ href http://example.com/lacie} { class sister classB} { id link2}]
  7. // "a" [{ href http://example.com/tillie} { class sister classB} { id link3}]
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find("p").RemoveClass("story")
  3. prettyPrint(sel.Nodes)
  4. // "p" [{ class title}]
  5. // "p" []
  6. // "p" []
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find("a").ToggleClass("brother").ToggleClass("sister")
  3. prettyPrint(sel.Nodes)
  4. // "a" [{ href http://example.com/elsie} { class brother} { id link1}]
  5. // "a" [{ href http://example.com/lacie} { class brother} { id link2}]
  6. // "a" [{ href http://example.com/tillie} { class brother} { id link3}]
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find("a").SetAttr("val", "123")
  3. prettyPrint(sel.Nodes)
  4. // "a" [{ href http://example.com/elsie} { class sister} { id link1} { val 123}]
  5. // "a" [{ href http://example.com/lacie} { class sister} { id link2} { val 123}]
  6. // "a" [{ href http://example.com/tillie} { class sister} { id link3} { val 123}]
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find("a").RemoveAttr("class")
  3. prettyPrint(sel.Nodes)
  4. // "a" [{ href http://example.com/elsie} { id link1}]
  5. // "a" [{ href http://example.com/lacie} { id link2}]
  6. // "a" [{ href http://example.com/tillie} { id link3}]
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find(".title").SetText("123")
  3. fmt.Println(goquery.OuterHtml(sel))
  4. // # 原來的 <b> 被移除了
  5. // <p class="title">123</p> <nil>
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find(".title").BeforeHtml("<div>before</div>")
  3. fmt.Println(domCopy.Find("body").Html())
  4. // <div>before</div><p class="title"><b>The Dormouse&#39;s story</b></p>
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find(".title").AfterHtml("<div>after</div>")
  3. fmt.Println(domCopy.Find("body").Html())
  4. // <p class="title"><b>The Dormouse&#39;s story</b></p><div>after</div>
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find(".title").PrependHtml("<div>prepend</div>")
  3. fmt.Println(goquery.OuterHtml(sel))
  4. // <p class="title"><div>prepend</div><b>The Dormouse&#39;s story</b></p>
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find(".title").AppendHtml("<div>append</div>")
  3. fmt.Println(goquery.OuterHtml(sel))
  4. // <p class="title"><b>The Dormouse&#39;s story</b><div>append</div></p>
  1. // 將 prepend 元件放到 find 所有元件裡面的前面,原本的 prepend 元件仍存在
  2. domCopy = dom.Clone()
  3. sel = domCopy.Find("a").Prepend("title")
  4. fmt.Println(domCopy.Find("body").Html())
  5. fmt.Println("=============")
  6. // <p class="title"><b>The Dormouse&#39;s story</b></p>
  7. // <p class="story">Once upon a time there were three little sisters; and their names were
  8. // <a href="http://example.com/elsie" class="sister" id="link1"><title>The Dormouse&#39;s story</title>Elsie</a>,
  9. // <a href="http://example.com/lacie" class="sister" id="link2"><title>The Dormouse&#39;s story</title>Lacie</a> and
  10. // <a href="http://example.com/tillie" class="sister" id="link3"><title>The Dormouse&#39;s story</title>Tillie</a>;
  11. // and they lived at the bottom of a well.
  12. // </p>
  1. //將 append 元件放到 find 所有元件裡面的後面,原本的 append 元件仍存在
  2. domCopy = dom.Clone()
  3. sel = domCopy.Find("a").Append("title")
  4. fmt.Println(domCopy.Find("body").Html())
  5. fmt.Println("=============")
  6. // <p class="title"><b>The Dormouse&#39;s story</b></p>
  7. // <p class="story">Once upon a time there were three little sisters; and their names were
  8. // <a href="http://example.com/elsie" class="sister" id="link1">Elsie<title>The Dormouse&#39;s story</title></a>,
  9. // <a href="http://example.com/lacie" class="sister" id="link2">Lacie<title>The Dormouse&#39;s story</title></a> and
  10. // <a href="http://example.com/tillie" class="sister" id="link3">Tillie<title>The Dormouse&#39;s story</title></a>;
  11. // and they lived at the bottom of a well.
  12. // </p>
  1. // 清空內容
  2. domCopy = dom.Clone()
  3. sel = domCopy.Find("p").Empty()
  4. fmt.Println(domCopy.Find("body").Html())
  5. fmt.Println("=============")
  6. // <p class="title"></p>
  7. // <p class="story"></p>
  8. // <p class="story"></p>
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find("p").Remove()
  3. fmt.Println(domCopy.Html())
  4. fmt.Println("=============")
  5. // <html><head><title>The Dormouse&#39;s story</title></head>
  6. // <body>
  7. //
  8. // </body></html>
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find("p").SetHtml("<b>test</b>")
  3. fmt.Println(domCopy.Find("body").Html())
  4. fmt.Println("=============")
  5. // <p class="title"><b>test</b></p>
  6. // <p class="story"><b>test</b></p>
  7. // <p class="story"><b>test</b></p>
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find("a").ReplaceWithHtml("<p></p>")
  3. fmt.Println(domCopy.Find("body").Html())
  4. fmt.Println("=============")
  5. // <p class="title"><b>The Dormouse&#39;s story</b></p>
  6. // <p class="story">Once upon a time there were three little sisters; and their names were
  7. // <p></p>,
  8. // <p></p> and
  9. // <p></p>;
  10. // and they lived at the bottom of a well.
  11. // </p>
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find("a").WrapHtml("<div></div>")
  3. fmt.Println(domCopy.Find("body").Html())
  4. fmt.Println("=============")
  5. // <div><a href="http://example.com/elsie" class="sister" id="link1">Elsie</a></div>,
  6. // <div><a href="http://example.com/lacie" class="sister" id="link2">Lacie</a></div> and
  7. // <div><a href="http://example.com/tillie" class="sister" id="link3">Tillie</a></div>;
  1. domCopy = dom.Clone()
  2. sel = domCopy.Find("a").WrapAllHtml("<div></div>")
  3. fmt.Println(domCopy.Find("body").Html())
  4. fmt.Println("=============")
  5. // <p class="title"><b>The Dormouse&#39;s story</b></p>
  6. // <p class="story">Once upon a time there were three little sisters; and their names were
  7. // <div><a href="http://example.com/elsie" class="sister" id="link1">Elsie</a><a href="http://example.com/lacie" class="sister" id="link2">Lacie</a><a href="http://example.com/tillie" class="sister" id="link3">Tillie</a></div>,
  8. // and
  9. // ;
  10. // and they lived at the bottom of a well.
  11. // </p>

特殊方法

  1. domCopy = dom.Clone()
  2. strList := domCopy.Find("a").Map(func(i int, s *goquery.Selection) string {
  3. str := s.AttrOr("href", "None")
  4. return str
  5. })
  6. fmt.Println(strList)
  7. // [http://example.com/elsie http://example.com/lacie http://example.com/tillie]
  1. // 以當前元件做判斷
  2. sel = domCopy.Find("p")
  3. fmt.Println(sel.Is("b"))
  4. // false
  1. // for loop
  2. sel := dom.Find(".selector")
  3. for i := range sel.Nodes {
  4. single := sel.Eq(i)
  5. // use `single` as a selection of 1 node
  6. }

完整原始碼

  1. package main
  2.  
  3. import (
  4. "fmt"
  5. "log"
  6. "strings"
  7.  
  8. "github.com/PuerkitoBio/goquery"
  9. "golang.org/x/net/html"
  10. )
  11.  
  12. func main() {
  13. html := `
  14. <html>
  15. <head><title>The Dormouse's story</title></head>
  16. <body>
  17. <p class="title"><b>The Dormouse's story</b></p>
  18. <p class="story">Once upon a time there were three little sisters; and their names were
  19. <a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
  20. <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
  21. <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
  22. and they lived at the bottom of a well.
  23. </p>
  24. <p class="story">...</p>`
  25.  
  26. // Load the HTML document
  27. dom, err := goquery.NewDocumentFromReader(strings.NewReader(html))
  28. if err != nil {
  29. log.Fatal(err)
  30. }
  31.  
  32. var sel *goquery.Selection
  33.  
  34. sel = dom.Find("a")
  35. prettyPrint(sel.Nodes)
  36. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  37. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  38. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  39.  
  40. sel = dom.Find("a.sister#link1")
  41. prettyPrint(sel.Nodes)
  42. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  43.  
  44. sel = dom.Find("p").Children()
  45. prettyPrint(sel.Nodes)
  46. // "b" []
  47. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  48. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  49. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  50.  
  51. sel = dom.Find("p").ChildrenFiltered("#link1")
  52. prettyPrint(sel.Nodes)
  53. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  54.  
  55. sel = dom.Find("p.title").Siblings()
  56. prettyPrint(sel.Nodes)
  57. // "p" [{ class story}]
  58. // "p" [{ class story}]
  59.  
  60. sel = dom.Find("a#link2").Siblings()
  61. prettyPrint(sel.Nodes)
  62. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  63. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  64.  
  65. sel = dom.Find("a#link2").SiblingsFiltered("#link1")
  66. prettyPrint(sel.Nodes)
  67. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  68.  
  69. // 除 children 外,包含 text nodes
  70. sel = dom.Find("body").Contents()
  71. prettyPrint(sel.Nodes)
  72. // "\n " []
  73. // "p" [{ class title}]
  74. // "\n " []
  75. // "p" [{ class story}]
  76. // "\n " []
  77. // "p" [{ class story}]
  78.  
  79. // 含自己往上找,找到最接近的 parents
  80. sel = dom.Find("a").Closest("a")
  81. prettyPrint(sel.Nodes)
  82. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  83. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  84. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  85.  
  86. sel = dom.Find("a").Closest("p")
  87. prettyPrint(sel.Nodes)
  88. // "p" [{ class story}]
  89.  
  90. // 找到其 parent,只往上查找一層,會自動移除重覆的
  91. sel = dom.Find("a").Parent()
  92. prettyPrint(sel.Nodes)
  93. // "p" [{ class story}]
  94.  
  95. sel = dom.Find("a").ParentFiltered("body")
  96. prettyPrint(sel.Nodes)
  97. // None
  98.  
  99. // 找到其 parents,往上查找不停止,找出所有符合條件的,會自動移除重覆的
  100. sel = dom.Find("a").Parents()
  101. prettyPrint(sel.Nodes)
  102. // "p" [{ class story}]
  103. // "body" []
  104. // "html" []
  105.  
  106. sel = dom.Find("a").ParentsFiltered("body")
  107. prettyPrint(sel.Nodes)
  108. // "body" []
  109.  
  110. // 選擇第一個
  111. sel = dom.Find("a").Eq(0)
  112. prettyPrint(sel.Nodes)
  113. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  114.  
  115. // 可為 string 也可以是 function
  116. sel = dom.Find("p").Filter(".story")
  117. prettyPrint(sel.Nodes)
  118. // "p" [{ class story}]
  119. // "p" [{ class story}]
  120.  
  121. sel = dom.Find("p").FilterFunction(func(i int, s *goquery.Selection) bool { return s.Text() == "..." })
  122. prettyPrint(sel.Nodes)
  123. // "p" [{ class story}]
  124.  
  125. sel = dom.Find("a").Not("#link1")
  126. prettyPrint(sel.Nodes)
  127. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  128. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  129.  
  130. sel = dom.Find("a#link2").Next()
  131. prettyPrint(sel.Nodes)
  132. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  133.  
  134. sel = dom.Find("a#link2").NextFiltered("a")
  135. prettyPrint(sel.Nodes)
  136. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  137.  
  138. sel = dom.Find("a#link2").Prev()
  139. prettyPrint(sel.Nodes)
  140. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  141.  
  142. sel = dom.Find("a#link2").PrevFiltered("a")
  143. prettyPrint(sel.Nodes)
  144. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  145.  
  146. sel = dom.Find("a#link1").NextAll()
  147. prettyPrint(sel.Nodes)
  148. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  149. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  150.  
  151. sel = dom.Find("a#link1").NextAllFiltered("#link3")
  152. prettyPrint(sel.Nodes)
  153. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  154.  
  155. sel = dom.Find("a#link3").PrevAll()
  156. prettyPrint(sel.Nodes)
  157. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  158. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  159.  
  160. sel = dom.Find("a#link3").PrevAllFiltered("#link1")
  161. prettyPrint(sel.Nodes)
  162. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  163.  
  164. // 回傳上一個 filter 結果
  165. sel = dom.Find("a").Parent().End() // 等同 sel = dom.Find("a")
  166. prettyPrint(sel.Nodes)
  167. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  168. // "a" [{ href http://example.com/lacie} { class sister} { id link2}]
  169. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  170.  
  171. // 會以第一個 <a> 為主
  172. sel = dom.Find("a")
  173. fmt.Println(sel.Attr("id"))
  174. // link1 true
  175. fmt.Println(sel.AttrOr("noid", "nil"))
  176. // nil
  177.  
  178. sel = dom.Find("a")
  179. fmt.Println(sel.Attr("class"))
  180. // sister true
  181. fmt.Println(sel.AttrOr("noclass", "nil"))
  182. // nil
  183.  
  184. // 只要任何 elements 有就是 True
  185. sel = dom.Find("p")
  186. fmt.Println(sel.HasClass("story"))
  187. // true
  188.  
  189. // 回傳所有元件的 text
  190. sel = dom.Find("title")
  191. fmt.Println(sel.Text())
  192. // The Dormouse's story
  193.  
  194. // 兩者意義一樣
  195. sel = dom.Find("a")
  196. fmt.Println(sel.Length())
  197. // 3
  198. fmt.Println(sel.Size())
  199. // 3
  200.  
  201. sel = dom.Find("head")
  202. fmt.Println(sel.Html())
  203. //<title>The Dormouse&#39;s story</title> <nil>
  204.  
  205. sel = dom.Find("head")
  206. fmt.Println(goquery.OuterHtml(sel))
  207. // <head><title>The Dormouse&#39;s story</title></head> <nil>
  208.  
  209. // # 複製原本的內容並回傳,可用在不想被更改的元件上
  210. var domCopy *goquery.Selection
  211. domCopy = dom.Find(".title").Clone()
  212.  
  213. // # 會將原本的 class 替換掉
  214. domCopy = dom.Clone()
  215. sel = domCopy.Find("a").SetAttr("class", "classA")
  216. prettyPrint(sel.Nodes)
  217. // "a" [{ href http://example.com/elsie} { class classA} { id link1}]
  218. // "a" [{ href http://example.com/lacie} { class classA} { id link2}]
  219. // "a" [{ href http://example.com/tillie} { class classA} { id link3}]
  220. domCopy = dom.Clone()
  221. sel = domCopy.Find("a#link2").SetAttr("id", "link4")
  222. prettyPrint(domCopy.Find("a").Nodes)
  223. // "a" [{ href http://example.com/elsie} { class sister} { id link1}]
  224. // "a" [{ href http://example.com/lacie} { class sister} { id link4}]
  225. // "a" [{ href http://example.com/tillie} { class sister} { id link3}]
  226.  
  227. // # 保留原本的 class
  228. domCopy = dom.Clone()
  229. sel = domCopy.Find("a").AddClass("classB")
  230. prettyPrint(sel.Nodes)
  231. // "a" [{ href http://example.com/elsie} { class sister classB} { id link1}]
  232. // "a" [{ href http://example.com/lacie} { class sister classB} { id link2}]
  233. // "a" [{ href http://example.com/tillie} { class sister classB} { id link3}]
  234.  
  235. domCopy = dom.Clone()
  236. sel = domCopy.Find("p").RemoveClass("story")
  237. prettyPrint(sel.Nodes)
  238. // "p" [{ class title}]
  239. // "p" []
  240. // "p" []
  241.  
  242. domCopy = dom.Clone()
  243. sel = domCopy.Find("a").ToggleClass("brother").ToggleClass("sister")
  244. prettyPrint(sel.Nodes)
  245. // "a" [{ href http://example.com/elsie} { class brother} { id link1}]
  246. // "a" [{ href http://example.com/lacie} { class brother} { id link2}]
  247. // "a" [{ href http://example.com/tillie} { class brother} { id link3}]
  248.  
  249. domCopy = dom.Clone()
  250. sel = domCopy.Find("a").SetAttr("val", "123")
  251. prettyPrint(sel.Nodes)
  252. // "a" [{ href http://example.com/elsie} { class sister} { id link1} { val 123}]
  253. // "a" [{ href http://example.com/lacie} { class sister} { id link2} { val 123}]
  254. // "a" [{ href http://example.com/tillie} { class sister} { id link3} { val 123}]
  255.  
  256. domCopy = dom.Clone()
  257. sel = domCopy.Find("a").RemoveAttr("class")
  258. prettyPrint(sel.Nodes)
  259. // "a" [{ href http://example.com/elsie} { id link1}]
  260. // "a" [{ href http://example.com/lacie} { id link2}]
  261. // "a" [{ href http://example.com/tillie} { id link3}]
  262.  
  263. domCopy = dom.Clone()
  264. sel = domCopy.Find(".title").SetText("123")
  265. fmt.Println(goquery.OuterHtml(sel))
  266. // # 原來的 <b> 被移除了
  267. // <p class="title">123</p> <nil>
  268.  
  269. domCopy = dom.Clone()
  270. sel = domCopy.Find(".title").BeforeHtml("<div>before</div>")
  271. fmt.Println(domCopy.Find("body").Html())
  272. // <div>before</div><p class="title"><b>The Dormouse&#39;s story</b></p>
  273.  
  274. domCopy = dom.Clone()
  275. sel = domCopy.Find(".title").AfterHtml("<div>after</div>")
  276. fmt.Println(domCopy.Find("body").Html())
  277. // <p class="title"><b>The Dormouse&#39;s story</b></p><div>after</div>
  278.  
  279. domCopy = dom.Clone()
  280. sel = domCopy.Find(".title").PrependHtml("<div>prepend</div>")
  281. fmt.Println(goquery.OuterHtml(sel))
  282. // <p class="title"><div>prepend</div><b>The Dormouse&#39;s story</b></p>
  283.  
  284. domCopy = dom.Clone()
  285. sel = domCopy.Find(".title").AppendHtml("<div>append</div>")
  286. fmt.Println(goquery.OuterHtml(sel))
  287. // <p class="title"><b>The Dormouse&#39;s story</b><div>append</div></p>
  288.  
  289. // 將 prepend 元件放到 find 所有元件裡面的前面,原本的 prepend 元件仍存在
  290. domCopy = dom.Clone()
  291. sel = domCopy.Find("a").Prepend("title")
  292. fmt.Println(domCopy.Find("body").Html())
  293. fmt.Println("=============")
  294. // <p class="title"><b>The Dormouse&#39;s story</b></p>
  295. // <p class="story">Once upon a time there were three little sisters; and their names were
  296. // <a href="http://example.com/elsie" class="sister" id="link1"><title>The Dormouse&#39;s story</title>Elsie</a>,
  297. // <a href="http://example.com/lacie" class="sister" id="link2"><title>The Dormouse&#39;s story</title>Lacie</a> and
  298. // <a href="http://example.com/tillie" class="sister" id="link3"><title>The Dormouse&#39;s story</title>Tillie</a>;
  299. // and they lived at the bottom of a well.
  300. // </p>
  301.  
  302. //將 append 元件放到 find 所有元件裡面的後面,原本的 append 元件仍存在
  303. domCopy = dom.Clone()
  304. sel = domCopy.Find("a").Append("title")
  305. fmt.Println(domCopy.Find("body").Html())
  306. fmt.Println("=============")
  307. // <p class="title"><b>The Dormouse&#39;s story</b></p>
  308. // <p class="story">Once upon a time there were three little sisters; and their names were
  309. // <a href="http://example.com/elsie" class="sister" id="link1">Elsie<title>The Dormouse&#39;s story</title></a>,
  310. // <a href="http://example.com/lacie" class="sister" id="link2">Lacie<title>The Dormouse&#39;s story</title></a> and
  311. // <a href="http://example.com/tillie" class="sister" id="link3">Tillie<title>The Dormouse&#39;s story</title></a>;
  312. // and they lived at the bottom of a well.
  313. // </p>
  314.  
  315. // 清空內容
  316. domCopy = dom.Clone()
  317. sel = domCopy.Find("p").Empty()
  318. fmt.Println(domCopy.Find("body").Html())
  319. fmt.Println("=============")
  320. // <p class="title"></p>
  321. // <p class="story"></p>
  322. // <p class="story"></p>
  323.  
  324. domCopy = dom.Clone()
  325. sel = domCopy.Find("p").Remove()
  326. fmt.Println(domCopy.Html())
  327. fmt.Println("=============")
  328. // <html><head><title>The Dormouse&#39;s story</title></head>
  329. // <body>
  330. //
  331. // </body></html>
  332.  
  333. domCopy = dom.Clone()
  334. sel = domCopy.Find("p").SetHtml("<b>test</b>")
  335. fmt.Println(domCopy.Find("body").Html())
  336. fmt.Println("=============")
  337. // <p class="title"><b>test</b></p>
  338. // <p class="story"><b>test</b></p>
  339. // <p class="story"><b>test</b></p>
  340.  
  341. domCopy = dom.Clone()
  342. sel = domCopy.Find("a").ReplaceWithHtml("<p></p>")
  343. fmt.Println(domCopy.Find("body").Html())
  344. fmt.Println("=============")
  345. // <p class="title"><b>The Dormouse&#39;s story</b></p>
  346. // <p class="story">Once upon a time there were three little sisters; and their names were
  347. // <p></p>,
  348. // <p></p> and
  349. // <p></p>;
  350. // and they lived at the bottom of a well.
  351. // </p>
  352.  
  353. domCopy = dom.Clone()
  354. sel = domCopy.Find("a").WrapHtml("<div></div>")
  355. fmt.Println(domCopy.Find("body").Html())
  356. fmt.Println("=============")
  357. // <div><a href="http://example.com/elsie" class="sister" id="link1">Elsie</a></div>,
  358. // <div><a href="http://example.com/lacie" class="sister" id="link2">Lacie</a></div> and
  359. // <div><a href="http://example.com/tillie" class="sister" id="link3">Tillie</a></div>;
  360.  
  361. domCopy = dom.Clone()
  362. sel = domCopy.Find("a").WrapAllHtml("<div></div>")
  363. fmt.Println(domCopy.Find("body").Html())
  364. fmt.Println("=============")
  365. // <p class="title"><b>The Dormouse&#39;s story</b></p>
  366. // <p class="story">Once upon a time there were three little sisters; and their names were
  367. // <div><a href="http://example.com/elsie" class="sister" id="link1">Elsie</a><a href="http://example.com/lacie" class="sister" id="link2">Lacie</a><a href="http://example.com/tillie" class="sister" id="link3">Tillie</a></div>,
  368. // and
  369. // ;
  370. // and they lived at the bottom of a well.
  371. // </p>
  372.  
  373. domCopy = dom.Clone()
  374. strList := domCopy.Find("a").Map(func(i int, s *goquery.Selection) string {
  375. str := s.AttrOr("href", "None")
  376. return str
  377. })
  378. fmt.Println(strList)
  379. // [http://example.com/elsie http://example.com/lacie http://example.com/tillie]
  380.  
  381. // 以當前元件做判斷
  382. sel = domCopy.Find("p")
  383. fmt.Println(sel.Is("b"))
  384. // false
  385.  
  386. }
  387.  
  388. func prettyPrint(nodes []*html.Node) {
  389. for _, n := range nodes {
  390. fmt.Printf("%#v %v\n", n.Data, n.Attr)
  391. }
  392. fmt.Println()
  393. fmt.Println("============")
  394. }

參考

golang goquery selector(选择器) 示例大全

留言