Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <draw.h>
5 #include <regexp.h>
6 #include <html.h>
7 #include <ctype.h>
8 #include "dat.h"
10 char urlexpr[] = "^(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero)://([a-zA-Z0-9_@\\-]+([.:][a-zA-Z0-9_@\\-]+)*)";
11 Reprog *urlprog;
13 int inword = 0;
14 int col = 0;
15 int wordi = 0;
17 char*
18 loadhtml(int fd)
19 {
20 URLwin *u;
21 Bytes *b;
22 int n;
23 char buf[4096];
25 u = emalloc(sizeof(URLwin));
26 u->infd = fd;
27 u->outfd = 1;
28 u->url = estrdup(url);
29 u->type = TextHtml;
31 b = emalloc(sizeof(Bytes));
32 while((n = read(fd, buf, sizeof buf)) > 0)
33 growbytes(b, buf, n);
34 if(b->b == nil)
35 return nil; /* empty file */
36 rendertext(u, b);
37 freeurlwin(u);
38 return nil;
39 }
41 char*
42 runetobyte(Rune *r, int n)
43 {
44 char *s;
46 if(n == 0)
47 return emalloc(1);
48 s = smprint("%.*S", n, r);
49 if(s == nil)
50 error("malloc failed");
51 return s;
52 }
54 int
55 closingpunct(int c)
56 {
57 return strchr(".,:;'\")]}>!?", c) != nil;
58 }
60 void
61 emitword(Bytes *b, Rune *r, int nr)
62 {
63 char *s;
64 int space;
66 if(nr == 0)
67 return;
68 s = smprint("%.*S", nr, r);
69 space = (b->n>0) && !isspace(b->b[b->n-1]) && !closingpunct(r[0]);
70 if(col>0 && col+space+nr > width){
71 growbytes(b, "\n", 1);
72 space = 0;
73 col = 0;
74 }
75 if(space && col>0){
76 growbytes(b, " ", 1);
77 col++;
78 }
79 growbytes(b, s, strlen(s));
80 col += nr;
81 free(s);
82 inword = 0;
83 }
85 void
86 renderrunes(Bytes *b, Rune *r)
87 {
88 int i, n;
90 n = runestrlen(r);
91 for(i=0; i<n; i++){
92 switch(r[i]){
93 case '\n':
94 if(inword)
95 emitword(b, r+wordi, i-wordi);
96 col = 0;
97 if(b->n == 0)
98 break; /* don't start with blank lines */
99 if(b->n<2 || b->b[b->n-1]!='\n' || b->b[b->n-2]!='\n')
100 growbytes(b, "\n", 1);
101 break;
102 case ' ':
103 if(inword)
104 emitword(b, r+wordi, i-wordi);
105 break;
106 default:
107 if(!inword)
108 wordi = i;
109 inword = 1;
110 break;
113 if(inword)
114 emitword(b, r+wordi, i-wordi);
117 void
118 renderbytes(Bytes *b, char *fmt, ...)
120 Rune *r;
121 va_list arg;
123 va_start(arg, fmt);
124 r = runevsmprint(fmt, arg);
125 va_end(arg);
126 renderrunes(b, r);
127 free(r);
130 char*
131 baseurl(char *url)
133 char *base, *slash;
134 Resub rs[10];
136 if(url == nil)
137 return nil;
138 if(urlprog == nil){
139 urlprog = regcomp(urlexpr);
140 if(urlprog == nil)
141 error("can't compile URL regexp");
143 memset(rs, 0, sizeof rs);
144 if(regexec(urlprog, url, rs, nelem(rs)) == 0)
145 return nil;
146 base = estrdup(url);
147 slash = strrchr(base, '/');
148 if(slash!=nil && slash>=&base[rs[0].e.ep-rs[0].s.sp])
149 *slash = '\0';
150 else
151 base[rs[0].e.ep-rs[0].s.sp] = '\0';
152 return base;
155 char*
156 fullurl(URLwin *u, Rune *rhref)
158 char *base, *href, *hrefbase;
159 char *result;
161 if(rhref == nil)
162 return estrdup("NULL URL");
163 href = runetobyte(rhref, runestrlen(rhref));
164 hrefbase = baseurl(href);
165 result = nil;
166 if(hrefbase==nil && (base = baseurl(u->url))!=nil){
167 result = estrdup(base);
168 if(base[strlen(base)-1]!='/' && (href==nil || href[0]!='/'))
169 result = eappend(result, "/", "");
170 free(base);
172 if(href){
173 if(result)
174 result = eappend(result, "", href);
175 else
176 result = estrdup(href);
178 free(hrefbase);
179 if(result == nil)
180 return estrdup("***unknown***");
181 return result;
184 void
185 render(URLwin *u, Bytes *t, Item *items, int curanchor)
187 Item *il;
188 Itext *it;
189 Ifloat *ifl;
190 Ispacer *is;
191 Itable *ita;
192 Iimage *im;
193 Anchor *a;
194 Table *tab;
195 Tablecell *cell;
196 char *href;
198 inword = 0;
199 col = 0;
200 wordi = 0;
202 for(il=items; il!=nil; il=il->next){
203 if(il->state & IFbrk)
204 renderbytes(t, "\n");
205 if(il->state & IFbrksp)
206 renderbytes(t, "\n");
208 switch(il->tag){
209 case Itexttag:
210 it = (Itext*)il;
211 renderrunes(t, it->s);
212 break;
213 case Iruletag:
214 if(t->n>0 && t->b[t->n-1]!='\n')
215 renderbytes(t, "\n");
216 renderbytes(t, "=======\n");
217 break;
218 case Iimagetag:
219 if(!aflag)
220 break;
221 im = (Iimage*)il;
222 if(im->imsrc){
223 href = fullurl(u, im->imsrc);
224 renderbytes(t, "[image %s]", href);
225 free(href);
227 break;
228 case Iformfieldtag:
229 if(aflag)
230 renderbytes(t, "[formfield]");
231 break;
232 case Itabletag:
233 ita = (Itable*)il;
234 tab = ita->table;
235 for(cell=tab->cells; cell!=nil; cell=cell->next){
236 render(u, t, cell->content, curanchor);
238 if(t->n>0 && t->b[t->n-1]!='\n')
239 renderbytes(t, "\n");
240 break;
241 case Ifloattag:
242 ifl = (Ifloat*)il;
243 render(u, t, ifl->item, curanchor);
244 break;
245 case Ispacertag:
246 is = (Ispacer*)il;
247 if(is->spkind != ISPnull)
248 renderbytes(t, " ");
249 break;
250 default:
251 error("unknown item tag %d\n", il->tag);
253 if(il->anchorid != 0 && il->anchorid!=curanchor){
254 for(a=u->docinfo->anchors; a!=nil; a=a->next)
255 if(aflag && a->index == il->anchorid){
256 href = fullurl(u, a->href);
257 renderbytes(t, "[%s]", href);
258 free(href);
259 break;
261 curanchor = il->anchorid;
264 if(t->n>0 && t->b[t->n-1]!='\n')
265 renderbytes(t, "\n");
268 void
269 rerender(URLwin *u)
271 Bytes *t;
273 t = emalloc(sizeof(Bytes));
275 render(u, t, u->items, 0);
277 if(t->n)
278 write(u->outfd, (char*)t->b, t->n);
279 free(t->b);
280 free(t);
283 /*
284 * Somewhat of a hack. Not a full parse, just looks for strings in the beginning
285 * of the document (cistrstr only looks at first somewhat bytes).
286 */
287 int
288 charset(char *s)
290 char *meta, *emeta, *charset;
292 if(defcharset == 0)
293 defcharset = ISO_8859_1;
294 meta = cistrstr(s, "<meta");
295 if(meta == nil)
296 return defcharset;
297 for(emeta=meta; *emeta!='>' && *emeta!='\0'; emeta++)
299 charset = cistrstr(s, "charset=");
300 if(charset == nil)
301 return defcharset;
302 charset += 8;
303 if(*charset == '"')
304 charset++;
305 if(cistrncmp(charset, "utf-8", 5) || cistrncmp(charset, "utf8", 4))
306 return UTF_8;
307 return defcharset;
310 void
311 rendertext(URLwin *u, Bytes *b)
313 Rune *rurl;
315 rurl = toStr((uchar*)u->url, strlen(u->url), ISO_8859_1);
316 u->items = parsehtml(b->b, b->n, rurl, u->type, charset((char*)b->b), &u->docinfo);
317 /* free(rurl); */
319 rerender(u);
323 void
324 freeurlwin(URLwin *u)
326 freeitems(u->items);
327 u->items = nil;
328 freedocinfo(u->docinfo);
329 u->docinfo = nil;
330 free(u);