commit 5659492e52e116119898d17ce6202a0db3196951 from: Omar Polo date: Thu Dec 02 14:24:51 2021 UTC fetch links title and send them back as privmsg just like the playonbsd bot :) commit - 88370dfe7b3c35f7d841ab7fb32434be38812b25 commit + 5659492e52e116119898d17ce6202a0db3196951 blob - bb46e0405388b275de9287aecdea2a0fd347c5a3 blob + cb2b1ede1a2c3ba542ef04b7d6717d807764cb94 --- main.go +++ main.go @@ -14,6 +14,7 @@ import ( "strings" irc "github.com/fluffle/goirc/client" + "golang.org/x/net/html" ) var ( @@ -24,6 +25,8 @@ var ( channel = "#gemini-it" tooLongRe = regexp.MustCompile(`full message at (https://libera.ems.host/.*)[)]`) + + httplink = regexp.MustCompile(`https?://[^\s]+`) ) func matrix2gemini(conn *irc.Conn, line *irc.Line) { @@ -101,9 +104,58 @@ func messageTooLong(conn *irc.Conn, line *irc.Line) { } } +func getPageTitle(node *html.Node) string { + for child := node.FirstChild; child != nil; child = child.NextSibling { + if child.Type == html.ElementNode && child.Data == "title" { + text := node.FirstChild.Data + return strings.Trim(text, " \t\n") + } + } + return "" +} + +func getPageHead(node *html.Node) *html.Node { + if node.Type == html.ElementNode && node.Data == "head" { + return node + } + + for child := node.FirstChild; child != nil; child = child.NextSibling { + if n := getPageHead(node); n != nil { + return n + } + } + return nil +} + +func pagetitle(conn *irc.Conn, line *irc.Line) { + matches := httplink.FindAllString(line.Text(), -1) + + for _, link := range matches { + resp, err := http.Get(link) + if err != nil { + continue + } + defer resp.Body.Close() + + doc, err := html.Parse(resp.Body) + if err != nil { + continue + } + + head := getPageHead(doc) + if head == nil { + continue + } + if title := getPageTitle(head); title != "" { + conn.Privmsg(channel, title) + } + } +} + func dostuff(conn *irc.Conn, line *irc.Line) { matrix2gemini(conn, line) messageTooLong(conn, line) + pagetitle(conn, line) // ... }