Commit Diff


commit - 88370dfe7b3c35f7d841ab7fb32434be38812b25
commit + 5659492e52e116119898d17ce6202a0db3196951
blob - bb46e0405388b275de9287aecdea2a0fd347c5a3
blob + cb2b1ede1a2c3ba542ef04b7d6717d807764cb94
--- main.go
+++ main.go
@@ -14,6 +14,7 @@ import (
 	"strings"
 
 	irc "github.com/fluffle/goirc/client"
+	"golang.org/x/net/html"
 )
 
 var (
@@ -24,6 +25,8 @@ var (
 	channel = "#gemini-it"
 
 	tooLongRe = regexp.MustCompile(`full message at (https://libera.ems.host/.*)[)]`)
+
+	httplink = regexp.MustCompile(`https?://[^\s]+`)
 )
 
 func matrix2gemini(conn *irc.Conn, line *irc.Line) {
@@ -101,9 +104,58 @@ func messageTooLong(conn *irc.Conn, line *irc.Line) {
 	}
 }
 
+func getPageTitle(node *html.Node) string {
+        for child := node.FirstChild; child != nil; child = child.NextSibling {
+		if child.Type == html.ElementNode && child.Data == "title" {
+			text := node.FirstChild.Data
+			return strings.Trim(text, " \t\n")
+		}
+	}
+	return ""
+}
+
+func getPageHead(node *html.Node) *html.Node {
+	if node.Type == html.ElementNode && node.Data == "head" {
+		return node
+	}
+
+	for child := node.FirstChild; child != nil; child = child.NextSibling {
+		if n := getPageHead(node); n != nil {
+			return n
+		}
+	}
+	return nil
+}
+
+func pagetitle(conn *irc.Conn, line *irc.Line) {
+	matches := httplink.FindAllString(line.Text(), -1)
+
+	for _, link := range matches {
+		resp, err := http.Get(link)
+		if err != nil {
+			continue
+		}
+		defer resp.Body.Close()
+
+		doc, err := html.Parse(resp.Body)
+		if err != nil {
+			continue
+		}
+
+		head := getPageHead(doc)
+		if head == nil {
+			continue
+		}
+                if title := getPageTitle(head); title != "" {
+			conn.Privmsg(channel, title)
+		}
+	}
+}
+
 func dostuff(conn *irc.Conn, line *irc.Line) {
 	matrix2gemini(conn, line)
 	messageTooLong(conn, line)
+	pagetitle(conn, line)
 	// ...
 }