Blob


1 #ifndef _HTML_H_
2 #define _HTML_H_ 1
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
7 AUTOLIB(html)
8 /*
9 #pragma lib "libhtml.a"
10 #pragma src "/sys/src/libhtml"
11 */
13 /* UTILS */
14 extern uchar* fromStr(Rune* buf, int n, int chset);
15 extern Rune* toStr(uchar* buf, int n, int chset);
17 /* Common LEX and BUILD enums */
19 /* Media types */
20 enum
21 {
22 ApplMsword,
23 ApplOctets,
24 ApplPdf,
25 ApplPostscript,
26 ApplRtf,
27 ApplFramemaker,
28 ApplMsexcel,
29 ApplMspowerpoint,
30 UnknownType,
31 Audio32kadpcm,
32 AudioBasic,
33 ImageCgm,
34 ImageG3fax,
35 ImageGif,
36 ImageIef,
37 ImageJpeg,
38 ImagePng,
39 ImageTiff,
40 ImageXBit,
41 ImageXBit2,
42 ImageXBitmulti,
43 ImageXXBitmap,
44 ModelVrml,
45 MultiDigest,
46 MultiMixed,
47 TextCss,
48 TextEnriched,
49 TextHtml,
50 TextJavascript,
51 TextPlain,
52 TextRichtext,
53 TextSgml,
54 TextTabSeparatedValues,
55 TextXml,
56 VideoMpeg,
57 VideoQuicktime,
58 NMEDIATYPES
59 };
61 /* HTTP methods */
62 enum
63 {
64 HGet,
65 HPost
66 };
68 /* Charsets */
69 enum
70 {
71 UnknownCharset,
72 US_Ascii,
73 ISO_8859_1,
74 UTF_8,
75 Unicode,
76 NCHARSETS
77 };
79 /* Frame Target IDs */
80 enum {
81 FTtop,
82 FTself,
83 FTparent,
84 FTblank
85 };
87 /* LEX */
88 typedef struct Token Token;
89 typedef struct Attr Attr;
91 /* BUILD */
93 typedef struct Item Item;
94 typedef struct Itext Itext;
95 typedef struct Irule Irule;
96 typedef struct Iimage Iimage;
97 typedef struct Iformfield Iformfield;
98 typedef struct Itable Itable;
99 typedef struct Ifloat Ifloat;
100 typedef struct Ispacer Ispacer;
101 typedef struct Genattr Genattr;
102 typedef struct SEvent SEvent;
103 typedef struct Formfield Formfield;
104 typedef struct Option Option;
105 typedef struct Form Form;
106 typedef struct Table Table;
107 typedef struct Tablecol Tablecol;
108 typedef struct Tablerow Tablerow;
109 typedef struct Tablecell Tablecell;
110 typedef struct Align Align;
111 typedef struct Dimen Dimen;
112 typedef struct Anchor Anchor;
113 typedef struct DestAnchor DestAnchor;
114 typedef struct Map Map;
115 typedef struct Area Area;
116 typedef struct Background Background;
117 typedef struct Kidinfo Kidinfo;
118 typedef struct Docinfo Docinfo;
119 typedef struct Stack Stack;
120 typedef struct Pstate Pstate;
121 typedef struct ItemSource ItemSource;
122 typedef struct Lay Lay; /* defined in Layout module */
124 /* Alignment types */
125 enum {
126 ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
127 ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
128 };
130 struct Align
132 uchar halign; /* one of ALnone, ALleft, etc. */
133 uchar valign; /* one of ALnone, ALtop, etc. */
134 };
136 /* A Dimen holds a dimension specification, especially for those */
137 /* cases when a number can be followed by a % or a * to indicate */
138 /* percentage of total or relative weight. */
139 /* Dnone means no dimension was specified */
141 /* To fit in a word, use top bits to identify kind, rest for value */
142 enum {
143 Dnone = 0,
144 Dpixels = (1<<29),
145 Dpercent = (2<<29),
146 Drelative = (3<<29),
147 Dkindmask = (3<<29),
148 Dspecmask = (~Dkindmask)
149 };
151 struct Dimen
153 int kindspec; /* kind | spec */
154 };
156 /* Background is either an image or a color. */
157 /* If both are set, the image has precedence. */
158 struct Background
160 Rune* image; /* url */
161 int color;
162 };
165 /* There are about a half dozen Item variants. */
166 /* The all look like this at the start (using Plan 9 C's */
167 /* anonymous structure member mechanism), */
168 /* and then the tag field dictates what extra fields there are. */
169 struct Item
171 Item* next; /* successor in list of items */
172 int width; /* width in pixels (0 for floating items) */
173 int height; /* height in pixels */
174 Rectangle r;
175 int ascent; /* ascent (from top to baseline) in pixels */
176 int anchorid; /* if nonzero, which anchor we're in */
177 int state; /* flags and values (see below) */
178 Genattr* genattr; /* generic attributes and events */
179 int tag; /* variant discriminator: Itexttag, etc. */
180 };
182 /* Item variant tags */
183 enum {
184 Itexttag,
185 Iruletag,
186 Iimagetag,
187 Iformfieldtag,
188 Itabletag,
189 Ifloattag,
190 Ispacertag
191 };
193 struct Itext
195 Item item; /* (with tag ==Itexttag) */
196 Rune* s; /* the characters */
197 int fnt; /* style*NumSize+size (see font stuff, below) */
198 int fg; /* Pixel (color) for text */
199 uchar voff; /* Voffbias+vertical offset from baseline, in pixels (+ve == down) */
200 uchar ul; /* ULnone, ULunder, or ULmid */
201 };
203 struct Irule
205 Item item; /* (with tag ==Iruletag) */
206 uchar align; /* alignment spec */
207 uchar noshade; /* if true, don't shade */
208 int size; /* size attr (rule height) */
209 Dimen wspec; /* width spec */
210 };
213 struct Iimage
215 Item item; /* (with tag ==Iimagetag) */
216 Rune* imsrc; /* image src url */
217 int imwidth; /* spec width (actual, if no spec) */
218 int imheight; /* spec height (actual, if no spec) */
219 Rune* altrep; /* alternate representation, in absence of image */
220 Map* map; /* if non-nil, client side map */
221 int ctlid; /* if animated */
222 uchar align; /* vertical alignment */
223 uchar hspace; /* in pixels; buffer space on each side */
224 uchar vspace; /* in pixels; buffer space on top and bottom */
225 uchar border; /* in pixels: border width to draw around image */
226 Iimage* nextimage; /* next in list of document's images */
227 void *aux;
228 };
231 struct Iformfield
233 Item item; /* (with tag ==Iformfieldtag) */
234 Formfield* formfield;
235 void *aux;
236 };
239 struct Itable
241 Item item; /* (with tag ==Itabletag) */
242 Table* table;
243 };
246 struct Ifloat
248 Item _item; /* (with tag ==Ifloattag) */
249 Item* item; /* table or image item that floats */
250 int x; /* x coord of top (from right, if ALright) */
251 int y; /* y coord of top */
252 uchar side; /* margin it floats to: ALleft or ALright */
253 uchar infloats; /* true if this has been added to a lay.floats */
254 Ifloat* nextfloat; /* in list of floats */
255 };
258 struct Ispacer
260 Item item; /* (with tag ==Ispacertag) */
261 int spkind; /* ISPnull, etc. */
262 };
264 /* Item state flags and value fields */
265 enum {
266 /* IFbrk = 0x80000000, // forced break before this item */
267 #define IFbrk 0x80000000 /* too big for sun */
268 IFbrksp = 0x40000000, /* add 1 line space to break (IFbrk set too) */
269 IFnobrk = 0x20000000, /* break not allowed before this item */
270 IFcleft = 0x10000000, /* clear left floats (IFbrk set too) */
271 IFcright = 0x08000000, /* clear right floats (IFbrk set too) */
272 IFwrap = 0x04000000, /* in a wrapping (non-pre) line */
273 IFhang = 0x02000000, /* in a hanging (into left indent) item */
274 IFrjust = 0x01000000, /* right justify current line */
275 IFcjust = 0x00800000, /* center justify current line */
276 IFsmap = 0x00400000, /* image is server-side map */
277 IFindentshift = 8,
278 IFindentmask = (255<<IFindentshift), /* current indent, in tab stops */
279 IFhangmask = 255 /* current hang into left indent, in 1/10th tabstops */
280 };
282 /* Bias added to Itext's voff field */
283 enum { Voffbias = 128 };
285 /* Spacer kinds */
286 enum {
287 ISPnull, /* 0 height and width */
288 ISPvline, /* height and ascent of current font */
289 ISPhspace, /* width of space in current font */
290 ISPgeneral /* other purposes (e.g., between markers and list) */
291 };
293 /* Generic attributes and events (not many elements will have any of these set) */
294 struct Genattr
296 Rune* id;
297 Rune* class;
298 Rune* style;
299 Rune* title;
300 SEvent* events;
301 };
303 struct SEvent
305 SEvent* next; /* in list of events */
306 int type; /* SEonblur, etc. */
307 Rune* script;
308 };
310 enum {
311 SEonblur, SEonchange, SEonclick, SEondblclick,
312 SEonfocus, SEonkeypress, SEonkeyup, SEonload,
313 SEonmousedown, SEonmousemove, SEonmouseout,
314 SEonmouseover, SEonmouseup, SEonreset, SEonselect,
315 SEonsubmit, SEonunload,
316 Numscriptev
317 };
319 /* Form field types */
320 enum {
321 Ftext,
322 Fpassword,
323 Fcheckbox,
324 Fradio,
325 Fsubmit,
326 Fhidden,
327 Fimage,
328 Freset,
329 Ffile,
330 Fbutton,
331 Fselect,
332 Ftextarea
333 };
335 /* Information about a field in a form */
336 struct Formfield
338 Formfield* next; /* in list of fields for a form */
339 int ftype; /* Ftext, Fpassword, etc. */
340 int fieldid; /* serial no. of field within its form */
341 Form* form; /* containing form */
342 Rune* name; /* name attr */
343 Rune* value; /* value attr */
344 int size; /* size attr */
345 int maxlength; /* maxlength attr */
346 int rows; /* rows attr */
347 int cols; /* cols attr */
348 uchar flags; /* FFchecked, etc. */
349 Option* options; /* for Fselect fields */
350 Item* image; /* image item, for Fimage fields */
351 int ctlid; /* identifies control for this field in layout */
352 SEvent* events; /* same as genattr->events of containing item */
353 };
355 enum {
356 FFchecked = (1<<7),
357 FFmultiple = (1<<6)
358 };
360 /* Option holds info about an option in a "select" form field */
361 struct Option
363 Option* next; /* next in list of options for a field */
364 int selected; /* true if selected initially */
365 Rune* value; /* value attr */
366 Rune* display; /* display string */
367 };
369 /* Form holds info about a form */
370 struct Form
372 Form* next; /* in list of forms for document */
373 int formid; /* serial no. of form within its doc */
374 Rune* name; /* name or id attr (netscape uses name, HTML 4.0 uses id) */
375 Rune* action; /* action attr */
376 int target; /* target attr as targetid */
377 int method; /* HGet or HPost */
378 int nfields; /* number of fields */
379 Formfield* fields; /* field's forms, in input order */
380 };
382 /* Flags used in various table structures */
383 enum {
384 TFparsing = (1<<7),
385 TFnowrap = (1<<6),
386 TFisth = (1<<5)
387 };
390 /* Information about a table */
391 struct Table
393 Table* next; /* next in list of document's tables */
394 int tableid; /* serial no. of table within its doc */
395 Tablerow* rows; /* array of row specs (list during parsing) */
396 int nrow; /* total number of rows */
397 Tablecol* cols; /* array of column specs */
398 int ncol; /* total number of columns */
399 Tablecell* cells; /* list of unique cells */
400 int ncell; /* total number of cells */
401 Tablecell*** grid; /* 2-D array of cells */
402 Align align; /* alignment spec for whole table */
403 Dimen width; /* width spec for whole table */
404 int border; /* border attr */
405 int cellspacing; /* cellspacing attr */
406 int cellpadding; /* cellpadding attr */
407 Background background; /* table background */
408 Item* caption; /* linked list of Items, giving caption */
409 uchar caption_place; /* ALtop or ALbottom */
410 Lay* caption_lay; /* layout of caption */
411 int totw; /* total width */
412 int toth; /* total height */
413 int caph; /* caption height */
414 int availw; /* used for previous 3 sizes */
415 Token* tabletok; /* token that started the table */
416 uchar flags; /* Lchanged, perhaps */
417 };
420 struct Tablecol
422 int width;
423 Align align;
424 Point pos;
425 };
428 struct Tablerow
430 Tablerow* next; /* Next in list of rows, during parsing */
431 Tablecell* cells; /* Cells in row, linked through nextinrow */
432 int height;
433 int ascent;
434 Align align;
435 Background background;
436 Point pos;
437 uchar flags; /* 0 or TFparsing */
438 };
441 /* A Tablecell is one cell of a table. */
442 /* It may span multiple rows and multiple columns. */
443 /* Cells are linked on two lists: the list for all the cells of */
444 /* a document (the next pointers), and the list of all the */
445 /* cells that start in a given row (the nextinrow pointers) */
446 struct Tablecell
448 Tablecell* next; /* next in list of table's cells */
449 Tablecell* nextinrow; /* next in list of row's cells */
450 int cellid; /* serial no. of cell within table */
451 Item* content; /* contents before layout */
452 Lay* lay; /* layout of cell */
453 int rowspan; /* number of rows spanned by this cell */
454 int colspan; /* number of cols spanned by this cell */
455 Align align; /* alignment spec */
456 uchar flags; /* TFparsing, TFnowrap, TFisth */
457 Dimen wspec; /* suggested width */
458 int hspec; /* suggested height */
459 Background background; /* cell background */
460 int minw; /* minimum possible width */
461 int maxw; /* maximum width */
462 int ascent; /* cell's ascent */
463 int row; /* row of upper left corner */
464 int col; /* col of upper left corner */
465 Point pos; /* nw corner of cell contents, in cell */
466 };
468 /* Anchor is for info about hyperlinks that go somewhere */
469 struct Anchor
471 Anchor* next; /* next in list of document's anchors */
472 int index; /* serial no. of anchor within its doc */
473 Rune* name; /* name attr */
474 Rune* href; /* href attr */
475 int target; /* target attr as targetid */
476 };
479 /* DestAnchor is for info about hyperlinks that are destinations */
480 struct DestAnchor
482 DestAnchor* next; /* next in list of document's destanchors */
483 int index; /* serial no. of anchor within its doc */
484 Rune* name; /* name attr */
485 Item* item; /* the destination */
486 };
489 /* Maps (client side) */
490 struct Map
492 Map* next; /* next in list of document's maps */
493 Rune* name; /* map name */
494 Area* areas; /* list of map areas */
495 };
498 struct Area
500 Area* next; /* next in list of a map's areas */
501 int shape; /* SHrect, etc. */
502 Rune* href; /* associated hypertext link */
503 int target; /* associated target frame */
504 Dimen* coords; /* array of coords for shape */
505 int ncoords; /* size of coords array */
506 };
508 /* Area shapes */
509 enum {
510 SHrect, SHcircle, SHpoly
511 };
513 /* Fonts are represented by integers: style*NumSize + size */
515 /* Font styles */
516 enum {
517 FntR, /* roman */
518 FntI, /* italic */
519 FntB, /* bold */
520 FntT, /* typewriter */
521 NumStyle
522 };
524 /* Font sizes */
525 enum {
526 Tiny,
527 Small,
528 Normal,
529 Large,
530 Verylarge,
531 NumSize
532 };
534 enum {
535 NumFnt = (NumStyle*NumSize),
536 DefFnt = (FntR*NumSize+Normal)
537 };
539 /* Lines are needed through some text items, for underlining or strikethrough */
540 enum {
541 ULnone, ULunder, ULmid
542 };
544 /* Kidinfo flags */
545 enum {
546 FRnoresize = (1<<0),
547 FRnoscroll = (1<<1),
548 FRhscroll = (1<<2),
549 FRvscroll = (1<<3),
550 FRhscrollauto = (1<<4),
551 FRvscrollauto = (1<<5)
552 };
554 /* Information about child frame or frameset */
555 struct Kidinfo
557 Kidinfo* next; /* in list of kidinfos for a frameset */
558 int isframeset;
560 /* fields for "frame" */
561 Rune* src; /* only nil if a "dummy" frame or this is frameset */
562 Rune* name; /* always non-empty if this isn't frameset */
563 int marginw;
564 int marginh;
565 int framebd;
566 int flags;
568 /* fields for "frameset" */
569 Dimen* rows; /* array of row dimensions */
570 int nrows; /* length of rows */
571 Dimen* cols; /* array of col dimensions */
572 int ncols; /* length of cols */
573 Kidinfo* kidinfos;
574 Kidinfo* nextframeset; /* parsing stack */
575 };
578 /* Document info (global information about HTML page) */
579 struct Docinfo
581 /* stuff from HTTP headers, doc head, and body tag */
582 Rune* src; /* original source of doc */
583 Rune* base; /* base URL of doc */
584 Rune* doctitle; /* from <title> element */
585 Background background; /* background specification */
586 Iimage* backgrounditem; /* Image Item for doc background image, or nil */
587 int text; /* doc foreground (text) color */
588 int link; /* unvisited hyperlink color */
589 int vlink; /* visited hyperlink color */
590 int alink; /* highlighting hyperlink color */
591 int target; /* target frame default */
592 int chset; /* ISO_8859, etc. */
593 int mediatype; /* TextHtml, etc. */
594 int scripttype; /* TextJavascript, etc. */
595 int hasscripts; /* true if scripts used */
596 Rune* refresh; /* content of <http-equiv=Refresh ...> */
597 Kidinfo* kidinfo; /* if a frameset */
598 int frameid; /* id of document frame */
600 /* info needed to respond to user actions */
601 Anchor* anchors; /* list of href anchors */
602 DestAnchor* dests; /* list of destination anchors */
603 Form* forms; /* list of forms */
604 Table* tables; /* list of tables */
605 Map* maps; /* list of maps */
606 Iimage* images; /* list of image items (through nextimage links) */
607 };
609 extern int dimenkind(Dimen d);
610 extern int dimenspec(Dimen d);
611 extern void freedocinfo(Docinfo* d);
612 extern void freeitems(Item* ithead);
613 extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
614 extern void printitems(Item* items, char* msg);
615 extern int targetid(Rune* s);
616 extern Rune* targetname(int targid);
617 extern int validitems(Item* i);
619 /* #pragma varargck type "I" Item* */
621 /* Control print output */
622 extern int warn;
623 extern int dbglex;
624 extern int dbgbuild;
626 /* To be provided by caller */
627 /* emalloc and erealloc should not return if can't get memory. */
628 /* emalloc should zero its memory. */
629 extern void* emalloc(ulong);
630 extern void* erealloc(void* p, ulong size);
631 #ifdef __cpluspplus
633 #endif
634 #endif