Commit Diff
Diff:
4852f411b4f51da99a0ac2323e9c160d2bae4a05
deee939b86f13061b42fc41d4187dd4d4f8a86ed
Commit:
deee939b86f13061b42fc41d4187dd4d4f8a86ed
Tree:
21f52d567eb0bdb9fa7c9d91a0974def58fd94a5
Author:
Omar Polo <op@omarpolo.com>
Date:
Sat Jan 8 15:04:43 2022 UTC
Message:
improve the title fetching stuff
commit - 4852f411b4f51da99a0ac2323e9c160d2bae4a05
commit + deee939b86f13061b42fc41d4187dd4d4f8a86ed
blob - f06515a0b0efcfbf239261c81c56a4b26c77d096
blob + bfe51f1f83bd8262e6ab3e94a0b1961a3dbc3b54
--- go.mod
+++ go.mod
@@ -3,6 +3,7 @@ module gemitbot
go 1.16
require (
+ github.com/andybalholm/cascadia v1.3.1
github.com/fluffle/goirc v1.1.1
- golang.org/x/net v0.0.0-20210119194325-5f4716e94777
+ golang.org/x/net v0.0.0-20210916014120-12bc252f5db8
)
blob - 4400525037d9afcf26c5591597d77f854a7968d3
blob + 41d801dc01f379780cd6b0bef1a387d63353bf95
--- go.sum
+++ go.sum
@@ -1,3 +1,5 @@
+github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
+github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/fluffle/goirc v1.1.1 h1:6nO+7rrED3Kp3mngoi9OmQmQHevNwDfjGpYUdWc1s0k=
github.com/fluffle/goirc v1.1.1/go.mod h1:iRzPLv2vkuZEtbns5LioYguJkRh/bvshuWg7izf1yeE=
github.com/golang/mock v1.5.0 h1:jlYHihg//f7RRwuPfptm04yp4s7O6Kw8EZiVYIGcH0g=
@@ -7,15 +9,18 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20210119194325-5f4716e94777 h1:003p0dJM77cxMSyCPFphvZf/Y5/NXf5fzg6ufd1/Oew=
golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
+golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
blob - 476e044795871e2d885340e6a29994c7fd265153
blob + 0f32a038718310378d29fe88c2811aca3466e8a0
--- main.go
+++ main.go
@@ -13,6 +13,7 @@ import (
"regexp"
"strings"
+ "github.com/andybalholm/cascadia"
irc "github.com/fluffle/goirc/client"
"golang.org/x/net/html"
)
@@ -118,29 +119,6 @@ func stringifyNode(node *html.Node) string {
return s
}
-func getPageTitle(node *html.Node) string {
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- if child.Type == html.ElementNode && child.Data == "title" {
- text := stringifyNode(node)
- return strings.Trim(text, " \t\n")
- }
- }
- return ""
-}
-
-func getPageHead(node *html.Node) *html.Node {
- if node.Type == html.ElementNode && node.Data == "head" {
- return node
- }
-
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- if n := getPageHead(child); n != nil {
- return n
- }
- }
- return nil
-}
-
func pagetitle(conn *irc.Conn, line *irc.Line) {
log.Println("text is", line.Text())
matches := httplink.FindAllString(line.Text(), -1)
@@ -159,13 +137,14 @@ func pagetitle(conn *irc.Conn, line *irc.Line) {
continue
}
- head := getPageHead(doc)
- if head == nil {
+ sel := cascadia.MustCompile("head > title")
+ n := cascadia.Query(doc, sel)
+
+ if n == nil {
continue
}
- if title := getPageTitle(head); title != "" {
- conn.Privmsg(channel, title)
- }
+
+ conn.Privmsg(channel, stringifyNode(n))
}
}
Omar Polo