commit deee939b86f13061b42fc41d4187dd4d4f8a86ed from: Omar Polo date: Sat Jan 08 15:04:43 2022 UTC improve the title fetching stuff commit - 4852f411b4f51da99a0ac2323e9c160d2bae4a05 commit + deee939b86f13061b42fc41d4187dd4d4f8a86ed blob - f06515a0b0efcfbf239261c81c56a4b26c77d096 blob + bfe51f1f83bd8262e6ab3e94a0b1961a3dbc3b54 --- go.mod +++ go.mod @@ -3,6 +3,7 @@ module gemitbot go 1.16 require ( + github.com/andybalholm/cascadia v1.3.1 github.com/fluffle/goirc v1.1.1 - golang.org/x/net v0.0.0-20210119194325-5f4716e94777 + golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 ) blob - 4400525037d9afcf26c5591597d77f854a7968d3 blob + 41d801dc01f379780cd6b0bef1a387d63353bf95 --- go.sum +++ go.sum @@ -1,3 +1,5 @@ +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= github.com/fluffle/goirc v1.1.1 h1:6nO+7rrED3Kp3mngoi9OmQmQHevNwDfjGpYUdWc1s0k= github.com/fluffle/goirc v1.1.1/go.mod h1:iRzPLv2vkuZEtbns5LioYguJkRh/bvshuWg7izf1yeE= github.com/golang/mock v1.5.0 h1:jlYHihg//f7RRwuPfptm04yp4s7O6Kw8EZiVYIGcH0g= @@ -7,15 +9,18 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20210119194325-5f4716e94777 h1:003p0dJM77cxMSyCPFphvZf/Y5/NXf5fzg6ufd1/Oew= golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= blob - 476e044795871e2d885340e6a29994c7fd265153 blob + 0f32a038718310378d29fe88c2811aca3466e8a0 --- main.go +++ main.go @@ -13,6 +13,7 @@ import ( "regexp" "strings" + "github.com/andybalholm/cascadia" irc "github.com/fluffle/goirc/client" "golang.org/x/net/html" ) @@ -118,29 +119,6 @@ func stringifyNode(node *html.Node) string { return s } -func getPageTitle(node *html.Node) string { - for child := node.FirstChild; child != nil; child = child.NextSibling { - if child.Type == html.ElementNode && child.Data == "title" { - text := stringifyNode(node) - return strings.Trim(text, " \t\n") - } - } - return "" -} - -func getPageHead(node *html.Node) *html.Node { - if node.Type == html.ElementNode && node.Data == "head" { - return node - } - - for child := node.FirstChild; child != nil; child = child.NextSibling { - if n := getPageHead(child); n != nil { - return n - } - } - return nil -} - func pagetitle(conn *irc.Conn, line *irc.Line) { log.Println("text is", line.Text()) matches := httplink.FindAllString(line.Text(), -1) @@ -159,13 +137,14 @@ func pagetitle(conn *irc.Conn, line *irc.Line) { continue } - head := getPageHead(doc) - if head == nil { + sel := cascadia.MustCompile("head > title") + n := cascadia.Query(doc, sel) + + if n == nil { continue } - if title := getPageTitle(head); title != "" { - conn.Privmsg(channel, title) - } + + conn.Privmsg(channel, stringifyNode(n)) } }