Blob


1 #ifndef _HTML_H_
2 #define _HTML_H_ 1
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
7 AUTOLIB(html)
8 /*
9 #pragma lib "libhtml.a"
10 #pragma src "/sys/src/libhtml"
11 */
13 // UTILS
14 extern uchar* fromStr(Rune* buf, int n, int chset);
15 extern Rune* toStr(uchar* buf, int n, int chset);
17 // Common LEX and BUILD enums
19 // Media types
20 enum
21 {
22 ApplMsword,
23 ApplOctets,
24 ApplPdf,
25 ApplPostscript,
26 ApplRtf,
27 ApplFramemaker,
28 ApplMsexcel,
29 ApplMspowerpoint,
30 UnknownType,
31 Audio32kadpcm,
32 AudioBasic,
33 ImageCgm,
34 ImageG3fax,
35 ImageGif,
36 ImageIef,
37 ImageJpeg,
38 ImagePng,
39 ImageTiff,
40 ImageXBit,
41 ImageXBit2,
42 ImageXBitmulti,
43 ImageXXBitmap,
44 ModelVrml,
45 MultiDigest,
46 MultiMixed,
47 TextCss,
48 TextEnriched,
49 TextHtml,
50 TextJavascript,
51 TextPlain,
52 TextRichtext,
53 TextSgml,
54 TextTabSeparatedValues,
55 TextXml,
56 VideoMpeg,
57 VideoQuicktime,
58 NMEDIATYPES
59 };
61 // HTTP methods
62 enum
63 {
64 HGet,
65 HPost
66 };
68 // Charsets
69 enum
70 {
71 UnknownCharset,
72 US_Ascii,
73 ISO_8859_1,
74 UTF_8,
75 Unicode,
76 NCHARSETS
77 };
79 // Frame Target IDs
80 enum {
81 FTtop,
82 FTself,
83 FTparent,
84 FTblank
85 };
87 // LEX
88 typedef struct Token Token;
89 typedef struct Attr Attr;
91 // BUILD
93 typedef struct Item Item;
94 typedef struct Itext Itext;
95 typedef struct Irule Irule;
96 typedef struct Iimage Iimage;
97 typedef struct Iformfield Iformfield;
98 typedef struct Itable Itable;
99 typedef struct Ifloat Ifloat;
100 typedef struct Ispacer Ispacer;
101 typedef struct Genattr Genattr;
102 typedef struct SEvent SEvent;
103 typedef struct Formfield Formfield;
104 typedef struct Option Option;
105 typedef struct Form Form;
106 typedef struct Table Table;
107 typedef struct Tablecol Tablecol;
108 typedef struct Tablerow Tablerow;
109 typedef struct Tablecell Tablecell;
110 typedef struct Align Align;
111 typedef struct Dimen Dimen;
112 typedef struct Anchor Anchor;
113 typedef struct DestAnchor DestAnchor;
114 typedef struct Map Map;
115 typedef struct Area Area;
116 typedef struct Background Background;
117 typedef struct Kidinfo Kidinfo;
118 typedef struct Docinfo Docinfo;
119 typedef struct Stack Stack;
120 typedef struct Pstate Pstate;
121 typedef struct ItemSource ItemSource;
122 typedef struct Lay Lay; // defined in Layout module
124 // Alignment types
125 enum {
126 ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
127 ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
128 };
130 struct Align
132 uchar halign; // one of ALnone, ALleft, etc.
133 uchar valign; // one of ALnone, ALtop, etc.
134 };
136 // A Dimen holds a dimension specification, especially for those
137 // cases when a number can be followed by a % or a * to indicate
138 // percentage of total or relative weight.
139 // Dnone means no dimension was specified
141 // To fit in a word, use top bits to identify kind, rest for value
142 enum {
143 Dnone = 0,
144 Dpixels = (1<<29),
145 Dpercent = (2<<29),
146 Drelative = (3<<29),
147 Dkindmask = (3<<29),
148 Dspecmask = (~Dkindmask)
149 };
151 struct Dimen
153 int kindspec; // kind | spec
154 };
156 // Background is either an image or a color.
157 // If both are set, the image has precedence.
158 struct Background
160 Rune* image; // url
161 int color;
162 };
165 // There are about a half dozen Item variants.
166 // The all look like this at the start (using Plan 9 C's
167 // anonymous structure member mechanism),
168 // and then the tag field dictates what extra fields there are.
169 struct Item
171 Item* next; // successor in list of items
172 int width; // width in pixels (0 for floating items)
173 int height; // height in pixels
174 Rectangle r;
175 int ascent; // ascent (from top to baseline) in pixels
176 int anchorid; // if nonzero, which anchor we're in
177 int state; // flags and values (see below)
178 Genattr* genattr; // generic attributes and events
179 int tag; // variant discriminator: Itexttag, etc.
180 };
182 // Item variant tags
183 enum {
184 Itexttag,
185 Iruletag,
186 Iimagetag,
187 Iformfieldtag,
188 Itabletag,
189 Ifloattag,
190 Ispacertag
191 };
193 struct Itext
195 Item item; // (with tag ==Itexttag)
196 Rune* s; // the characters
197 int fnt; // style*NumSize+size (see font stuff, below)
198 int fg; // Pixel (color) for text
199 uchar voff; // Voffbias+vertical offset from baseline, in pixels (+ve == down)
200 uchar ul; // ULnone, ULunder, or ULmid
201 };
203 struct Irule
205 Item item; // (with tag ==Iruletag)
206 uchar align; // alignment spec
207 uchar noshade; // if true, don't shade
208 int size; // size attr (rule height)
209 Dimen wspec; // width spec
210 };
213 struct Iimage
215 Item item; // (with tag ==Iimagetag)
216 Rune* imsrc; // image src url
217 int imwidth; // spec width (actual, if no spec)
218 int imheight; // spec height (actual, if no spec)
219 Rune* altrep; // alternate representation, in absence of image
220 Map* map; // if non-nil, client side map
221 int ctlid; // if animated
222 uchar align; // vertical alignment
223 uchar hspace; // in pixels; buffer space on each side
224 uchar vspace; // in pixels; buffer space on top and bottom
225 uchar border; // in pixels: border width to draw around image
226 Iimage* nextimage; // next in list of document's images
227 void *aux;
228 };
231 struct Iformfield
233 Item item; // (with tag ==Iformfieldtag)
234 Formfield* formfield;
235 void *aux;
236 };
239 struct Itable
241 Item item; // (with tag ==Itabletag)
242 Table* table;
243 };
246 struct Ifloat
248 Item _item; // (with tag ==Ifloattag)
249 Item* item; // table or image item that floats
250 int x; // x coord of top (from right, if ALright)
251 int y; // y coord of top
252 uchar side; // margin it floats to: ALleft or ALright
253 uchar infloats; // true if this has been added to a lay.floats
254 Ifloat* nextfloat; // in list of floats
255 };
258 struct Ispacer
260 Item item; // (with tag ==Ispacertag)
261 int spkind; // ISPnull, etc.
262 };
264 // Item state flags and value fields
265 enum {
266 // IFbrk = 0x80000000, // forced break before this item
267 #define IFbrk 0x80000000 /* too big for sun */
268 IFbrksp = 0x40000000, // add 1 line space to break (IFbrk set too)
269 IFnobrk = 0x20000000, // break not allowed before this item
270 IFcleft = 0x10000000, // clear left floats (IFbrk set too)
271 IFcright = 0x08000000, // clear right floats (IFbrk set too)
272 IFwrap = 0x04000000, // in a wrapping (non-pre) line
273 IFhang = 0x02000000, // in a hanging (into left indent) item
274 IFrjust = 0x01000000, // right justify current line
275 IFcjust = 0x00800000, // center justify current line
276 IFsmap = 0x00400000, // image is server-side map
277 IFindentshift = 8,
278 IFindentmask = (255<<IFindentshift), // current indent, in tab stops
279 IFhangmask = 255 // current hang into left indent, in 1/10th tabstops
280 };
282 // Bias added to Itext's voff field
283 enum { Voffbias = 128 };
285 // Spacer kinds
286 enum {
287 ISPnull, // 0 height and width
288 ISPvline, // height and ascent of current font
289 ISPhspace, // width of space in current font
290 ISPgeneral // other purposes (e.g., between markers and list)
291 };
293 // Generic attributes and events (not many elements will have any of these set)
294 struct Genattr
296 Rune* id;
297 Rune* class;
298 Rune* style;
299 Rune* title;
300 SEvent* events;
301 };
303 struct SEvent
305 SEvent* next; // in list of events
306 int type; // SEonblur, etc.
307 Rune* script;
308 };
310 enum {
311 SEonblur, SEonchange, SEonclick, SEondblclick,
312 SEonfocus, SEonkeypress, SEonkeyup, SEonload,
313 SEonmousedown, SEonmousemove, SEonmouseout,
314 SEonmouseover, SEonmouseup, SEonreset, SEonselect,
315 SEonsubmit, SEonunload,
316 Numscriptev
317 };
319 // Form field types
320 enum {
321 Ftext,
322 Fpassword,
323 Fcheckbox,
324 Fradio,
325 Fsubmit,
326 Fhidden,
327 Fimage,
328 Freset,
329 Ffile,
330 Fbutton,
331 Fselect,
332 Ftextarea
333 };
335 // Information about a field in a form
336 struct Formfield
338 Formfield* next; // in list of fields for a form
339 int ftype; // Ftext, Fpassword, etc.
340 int fieldid; // serial no. of field within its form
341 Form* form; // containing form
342 Rune* name; // name attr
343 Rune* value; // value attr
344 int size; // size attr
345 int maxlength; // maxlength attr
346 int rows; // rows attr
347 int cols; // cols attr
348 uchar flags; // FFchecked, etc.
349 Option* options; // for Fselect fields
350 Item* image; // image item, for Fimage fields
351 int ctlid; // identifies control for this field in layout
352 SEvent* events; // same as genattr->events of containing item
353 };
355 enum {
356 FFchecked = (1<<7),
357 FFmultiple = (1<<6)
358 };
360 // Option holds info about an option in a "select" form field
361 struct Option
363 Option* next; // next in list of options for a field
364 int selected; // true if selected initially
365 Rune* value; // value attr
366 Rune* display; // display string
367 };
369 // Form holds info about a form
370 struct Form
372 Form* next; // in list of forms for document
373 int formid; // serial no. of form within its doc
374 Rune* name; // name or id attr (netscape uses name, HTML 4.0 uses id)
375 Rune* action; // action attr
376 int target; // target attr as targetid
377 int method; // HGet or HPost
378 int nfields; // number of fields
379 Formfield* fields; // field's forms, in input order
380 };
382 // Flags used in various table structures
383 enum {
384 TFparsing = (1<<7),
385 TFnowrap = (1<<6),
386 TFisth = (1<<5)
387 };
390 // Information about a table
391 struct Table
393 Table* next; // next in list of document's tables
394 int tableid; // serial no. of table within its doc
395 Tablerow* rows; // array of row specs (list during parsing)
396 int nrow; // total number of rows
397 Tablecol* cols; // array of column specs
398 int ncol; // total number of columns
399 Tablecell* cells; // list of unique cells
400 int ncell; // total number of cells
401 Tablecell*** grid; // 2-D array of cells
402 Align align; // alignment spec for whole table
403 Dimen width; // width spec for whole table
404 int border; // border attr
405 int cellspacing; // cellspacing attr
406 int cellpadding; // cellpadding attr
407 Background background; // table background
408 Item* caption; // linked list of Items, giving caption
409 uchar caption_place; // ALtop or ALbottom
410 Lay* caption_lay; // layout of caption
411 int totw; // total width
412 int toth; // total height
413 int caph; // caption height
414 int availw; // used for previous 3 sizes
415 Token* tabletok; // token that started the table
416 uchar flags; // Lchanged, perhaps
417 };
420 struct Tablecol
422 int width;
423 Align align;
424 Point pos;
425 };
428 struct Tablerow
430 Tablerow* next; // Next in list of rows, during parsing
431 Tablecell* cells; // Cells in row, linked through nextinrow
432 int height;
433 int ascent;
434 Align align;
435 Background background;
436 Point pos;
437 uchar flags; // 0 or TFparsing
438 };
441 // A Tablecell is one cell of a table.
442 // It may span multiple rows and multiple columns.
443 // Cells are linked on two lists: the list for all the cells of
444 // a document (the next pointers), and the list of all the
445 // cells that start in a given row (the nextinrow pointers)
446 struct Tablecell
448 Tablecell* next; // next in list of table's cells
449 Tablecell* nextinrow; // next in list of row's cells
450 int cellid; // serial no. of cell within table
451 Item* content; // contents before layout
452 Lay* lay; // layout of cell
453 int rowspan; // number of rows spanned by this cell
454 int colspan; // number of cols spanned by this cell
455 Align align; // alignment spec
456 uchar flags; // TFparsing, TFnowrap, TFisth
457 Dimen wspec; // suggested width
458 int hspec; // suggested height
459 Background background; // cell background
460 int minw; // minimum possible width
461 int maxw; // maximum width
462 int ascent; // cell's ascent
463 int row; // row of upper left corner
464 int col; // col of upper left corner
465 Point pos; // nw corner of cell contents, in cell
466 };
468 // Anchor is for info about hyperlinks that go somewhere
469 struct Anchor
471 Anchor* next; // next in list of document's anchors
472 int index; // serial no. of anchor within its doc
473 Rune* name; // name attr
474 Rune* href; // href attr
475 int target; // target attr as targetid
476 };
479 // DestAnchor is for info about hyperlinks that are destinations
480 struct DestAnchor
482 DestAnchor* next; // next in list of document's destanchors
483 int index; // serial no. of anchor within its doc
484 Rune* name; // name attr
485 Item* item; // the destination
486 };
489 // Maps (client side)
490 struct Map
492 Map* next; // next in list of document's maps
493 Rune* name; // map name
494 Area* areas; // list of map areas
495 };
498 struct Area
500 Area* next; // next in list of a map's areas
501 int shape; // SHrect, etc.
502 Rune* href; // associated hypertext link
503 int target; // associated target frame
504 Dimen* coords; // array of coords for shape
505 int ncoords; // size of coords array
506 };
508 // Area shapes
509 enum {
510 SHrect, SHcircle, SHpoly
511 };
513 // Fonts are represented by integers: style*NumSize + size
515 // Font styles
516 enum {
517 FntR, // roman
518 FntI, // italic
519 FntB, // bold
520 FntT, // typewriter
521 NumStyle
522 };
524 // Font sizes
525 enum {
526 Tiny,
527 Small,
528 Normal,
529 Large,
530 Verylarge,
531 NumSize
532 };
534 enum {
535 NumFnt = (NumStyle*NumSize),
536 DefFnt = (FntR*NumSize+Normal)
537 };
539 // Lines are needed through some text items, for underlining or strikethrough
540 enum {
541 ULnone, ULunder, ULmid
542 };
544 // Kidinfo flags
545 enum {
546 FRnoresize = (1<<0),
547 FRnoscroll = (1<<1),
548 FRhscroll = (1<<2),
549 FRvscroll = (1<<3),
550 FRhscrollauto = (1<<4),
551 FRvscrollauto = (1<<5)
552 };
554 // Information about child frame or frameset
555 struct Kidinfo
557 Kidinfo* next; // in list of kidinfos for a frameset
558 int isframeset;
560 // fields for "frame"
561 Rune* src; // only nil if a "dummy" frame or this is frameset
562 Rune* name; // always non-empty if this isn't frameset
563 int marginw;
564 int marginh;
565 int framebd;
566 int flags;
568 // fields for "frameset"
569 Dimen* rows; // array of row dimensions
570 int nrows; // length of rows
571 Dimen* cols; // array of col dimensions
572 int ncols; // length of cols
573 Kidinfo* kidinfos;
574 Kidinfo* nextframeset; // parsing stack
575 };
578 // Document info (global information about HTML page)
579 struct Docinfo
581 // stuff from HTTP headers, doc head, and body tag
582 Rune* src; // original source of doc
583 Rune* base; // base URL of doc
584 Rune* doctitle; // from <title> element
585 Background background; // background specification
586 Iimage* backgrounditem; // Image Item for doc background image, or nil
587 int text; // doc foreground (text) color
588 int link; // unvisited hyperlink color
589 int vlink; // visited hyperlink color
590 int alink; // highlighting hyperlink color
591 int target; // target frame default
592 int chset; // ISO_8859, etc.
593 int mediatype; // TextHtml, etc.
594 int scripttype; // TextJavascript, etc.
595 int hasscripts; // true if scripts used
596 Rune* refresh; // content of <http-equiv=Refresh ...>
597 Kidinfo* kidinfo; // if a frameset
598 int frameid; // id of document frame
600 // info needed to respond to user actions
601 Anchor* anchors; // list of href anchors
602 DestAnchor* dests; // list of destination anchors
603 Form* forms; // list of forms
604 Table* tables; // list of tables
605 Map* maps; // list of maps
606 Iimage* images; // list of image items (through nextimage links)
607 };
609 extern int dimenkind(Dimen d);
610 extern int dimenspec(Dimen d);
611 extern void freedocinfo(Docinfo* d);
612 extern void freeitems(Item* ithead);
613 extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
614 extern void printitems(Item* items, char* msg);
615 extern int targetid(Rune* s);
616 extern Rune* targetname(int targid);
617 extern int validitems(Item* i);
619 /* #pragma varargck type "I" Item* */
621 // Control print output
622 extern int warn;
623 extern int dbglex;
624 extern int dbgbuild;
626 // To be provided by caller
627 // emalloc and erealloc should not return if can't get memory.
628 // emalloc should zero its memory.
629 extern void* emalloc(ulong);
630 extern void* erealloc(void* p, ulong size);
631 #ifdef __cpluspplus
633 #endif
634 #endif