commit cfb62e6e82c03731d922d8f2e54d08ae9616cf8d from: Omar Polo date: Sat Jan 08 15:47:57 2022 UTC fetch the title also for gemini URLs commit - 0c3e95627a6f16e7468e7ec0a01633355122eb89 commit + cfb62e6e82c03731d922d8f2e54d08ae9616cf8d blob - bfe51f1f83bd8262e6ab3e94a0b1961a3dbc3b54 blob + 8ace7de4c43d7500dc95f2fb085cbc6a89f7633f --- go.mod +++ go.mod @@ -3,6 +3,7 @@ module gemitbot go 1.16 require ( + git.sr.ht/~adnano/go-gemini v0.2.2 github.com/andybalholm/cascadia v1.3.1 github.com/fluffle/goirc v1.1.1 golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 blob - /dev/null blob + 85ee086f1d957431da3fb057f6df6ceb5de0fd93 (mode 644) --- /dev/null +++ gemini.go @@ -0,0 +1,59 @@ +package main + +import ( + "bufio" + "context" + "errors" + "regexp" + "strings" + + gemini "git.sr.ht/~adnano/go-gemini" +) + +var ( + ErrNoTitle = errors.New(`no title found`) + + title1 = regexp.MustCompile(`^#[^#]`) + title2 = regexp.MustCompile(`^##[^#]`) + title3 = regexp.MustCompile(`^###`) +) + +func geminiTitle(url string) (string, error) { + client := gemini.Client{} + ctx := context.Background() + resp, err := client.Get(ctx, url) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.Status != 20 || !strings.HasPrefix(resp.Meta, "text/gemini") { + return "", ErrNoTitle + } + + var t2, t3 string + + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := scanner.Text() + + switch { + case title1.MatchString(line): + return line, nil + case title2.MatchString(line) && t2 == "": + t2 = line + case title3.MatchString(line) && t3 == "": + t3 = line + } + } + + if t2 != "" { + return t2, nil + } + + if t3 != "" { + return t3, nil + } + + return "", ErrNoTitle +} blob - 41d801dc01f379780cd6b0bef1a387d63353bf95 blob + 1963e717e1fb1c22a859e0aeb889d6977928ea07 --- go.sum +++ go.sum @@ -1,3 +1,5 @@ +git.sr.ht/~adnano/go-gemini v0.2.2 h1:p2owKzrQ1wTgvPS5CZCPYArQyNUL8ZgYOHHrTjH9sdI= +git.sr.ht/~adnano/go-gemini v0.2.2/go.mod h1:hQ75Y0i5jSFL+FQ7AzWVAYr5LQsaFC7v3ZviNyj46dY= github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= github.com/fluffle/goirc v1.1.1 h1:6nO+7rrED3Kp3mngoi9OmQmQHevNwDfjGpYUdWc1s0k= @@ -20,6 +22,7 @@ golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= blob - ecee4dd688afb8a03ccd28ea8928a58252638d5d blob + 10ec0af5ff92b641100e4475e3534e6930f2a0ec --- main.go +++ main.go @@ -28,6 +28,7 @@ var ( tooLongRe = regexp.MustCompile(`full message at (https://libera.ems.host/.*)[)]`) httplink = regexp.MustCompile(`https?://[^\s)]+`) + gemlink = regexp.MustCompile(`gemini://[^\s)]+`) ) func matrix2gemini(conn *irc.Conn, line *irc.Line) { @@ -119,7 +120,7 @@ func stringifyNode(node *html.Node) string { return s } -func pagetitle(conn *irc.Conn, line *irc.Line) { +func wwwpagetitle(conn *irc.Conn, line *irc.Line) { log.Println("text is", line.Text()) matches := httplink.FindAllString(line.Text(), -1) @@ -152,10 +153,25 @@ func pagetitle(conn *irc.Conn, line *irc.Line) { } } +func gempagetitle(conn *irc.Conn, line *irc.Line) { + matches := gemlink.FindAllString(line.Text(), -1) + + for _, link := range matches { + log.Println("fetching", link, "...") + title, err := geminiTitle(link) + if err != nil { + continue + } + + conn.Privmsg(channel, title) + } +} + func dostuff(conn *irc.Conn, line *irc.Line) { matrix2gemini(conn, line) messageTooLong(conn, line) - pagetitle(conn, line) + wwwpagetitle(conn, line) + gempagetitle(conn, line) // ... }