10 char urlexpr[] = "^(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero)://([a-zA-Z0-9_@\\-]+([.:][a-zA-Z0-9_@\\-]+)*)";
25 u = emalloc(sizeof(URLwin));
28 u->url = estrdup(url);
31 b = emalloc(sizeof(Bytes));
32 while((n = read(fd, buf, sizeof buf)) > 0)
35 return nil; /* empty file */
42 runetobyte(Rune *r, int n)
48 s = smprint("%.*S", n, r);
50 error("malloc failed");
57 return strchr(".,:;'\")]}>!?", c) != nil;
61 emitword(Bytes *b, Rune *r, int nr)
68 s = smprint("%.*S", nr, r);
69 space = (b->n>0) && !isspace(b->b[b->n-1]) && !closingpunct(r[0]);
70 if(col>0 && col+space+nr > width){
71 growbytes(b, "\n", 1);
79 growbytes(b, s, strlen(s));
86 renderrunes(Bytes *b, Rune *r)
95 emitword(b, r+wordi, i-wordi);
98 break; /* don't start with blank lines */
99 if(b->n<2 || b->b[b->n-1]!='\n' || b->b[b->n-2]!='\n')
100 growbytes(b, "\n", 1);
104 emitword(b, r+wordi, i-wordi);
114 emitword(b, r+wordi, i-wordi);
118 renderbytes(Bytes *b, char *fmt, ...)
124 r = runevsmprint(fmt, arg);
139 urlprog = regcomp(urlexpr);
141 error("can't compile URL regexp");
143 memset(rs, 0, sizeof rs);
144 if(regexec(urlprog, url, rs, nelem(rs)) == 0)
147 slash = strrchr(base, '/');
148 if(slash!=nil && slash>=&base[rs[0].e.ep-rs[0].s.sp])
151 base[rs[0].e.ep-rs[0].s.sp] = '\0';
156 fullurl(URLwin *u, Rune *rhref)
158 char *base, *href, *hrefbase;
162 return estrdup("NULL URL");
163 href = runetobyte(rhref, runestrlen(rhref));
164 hrefbase = baseurl(href);
166 if(hrefbase==nil && (base = baseurl(u->url))!=nil){
167 result = estrdup(base);
168 if(base[strlen(base)-1]!='/' && (href==nil || href[0]!='/'))
169 result = eappend(result, "/", "");
174 result = eappend(result, "", href);
176 result = estrdup(href);
180 return estrdup("***unknown***");
185 render(URLwin *u, Bytes *t, Item *items, int curanchor)
202 for(il=items; il!=nil; il=il->next){
203 if(il->state & IFbrk)
204 renderbytes(t, "\n");
205 if(il->state & IFbrksp)
206 renderbytes(t, "\n");
211 renderrunes(t, it->s);
214 if(t->n>0 && t->b[t->n-1]!='\n')
215 renderbytes(t, "\n");
216 renderbytes(t, "=======\n");
223 href = fullurl(u, im->imsrc);
224 renderbytes(t, "[image %s]", href);
230 renderbytes(t, "[formfield]");
235 for(cell=tab->cells; cell!=nil; cell=cell->next){
236 render(u, t, cell->content, curanchor);
238 if(t->n>0 && t->b[t->n-1]!='\n')
239 renderbytes(t, "\n");
243 render(u, t, ifl->item, curanchor);
247 if(is->spkind != ISPnull)
251 error("unknown item tag %d\n", il->tag);
253 if(il->anchorid != 0 && il->anchorid!=curanchor){
254 for(a=u->docinfo->anchors; a!=nil; a=a->next)
255 if(aflag && a->index == il->anchorid){
256 href = fullurl(u, a->href);
257 renderbytes(t, "[%s]", href);
261 curanchor = il->anchorid;
264 if(t->n>0 && t->b[t->n-1]!='\n')
265 renderbytes(t, "\n");
273 t = emalloc(sizeof(Bytes));
275 render(u, t, u->items, 0);
278 write(u->outfd, (char*)t->b, t->n);
284 * Somewhat of a hack. Not a full parse, just looks for strings in the beginning
285 * of the document (cistrstr only looks at first somewhat bytes).
290 char *meta, *emeta, *charset;
293 defcharset = ISO_8859_1;
294 meta = cistrstr(s, "<meta");
297 for(emeta=meta; *emeta!='>' && *emeta!='\0'; emeta++)
299 charset = cistrstr(s, "charset=");
305 if(cistrncmp(charset, "utf-8", 5) || cistrncmp(charset, "utf8", 4))
311 rendertext(URLwin *u, Bytes *b)
315 rurl = toStr((uchar*)u->url, strlen(u->url), ISO_8859_1);
316 u->items = parsehtml(b->b, b->n, rurl, u->type, charset((char*)b->b), &u->docinfo);
324 freeurlwin(URLwin *u)
328 freedocinfo(u->docinfo);