Blob


1 #ifndef _HTML_H_
2 #define _HTML_H_ 1
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
7 AUTOLIB(html)
8 /*
9 #pragma lib "libhtml.a"
10 #pragma src "/sys/src/libhtml"
11 */
13 // UTILS
14 extern uchar* fromStr(Rune* buf, int n, int chset);
15 extern Rune* toStr(uchar* buf, int n, int chset);
17 // Common LEX and BUILD enums
19 // Media types
20 enum
21 {
22 ApplMsword,
23 ApplOctets,
24 ApplPdf,
25 ApplPostscript,
26 ApplRtf,
27 ApplFramemaker,
28 ApplMsexcel,
29 ApplMspowerpoint,
30 UnknownType,
31 Audio32kadpcm,
32 AudioBasic,
33 ImageCgm,
34 ImageG3fax,
35 ImageGif,
36 ImageIef,
37 ImageJpeg,
38 ImagePng,
39 ImageTiff,
40 ImageXBit,
41 ImageXBit2,
42 ImageXBitmulti,
43 ImageXXBitmap,
44 ModelVrml,
45 MultiDigest,
46 MultiMixed,
47 TextCss,
48 TextEnriched,
49 TextHtml,
50 TextJavascript,
51 TextPlain,
52 TextRichtext,
53 TextSgml,
54 TextTabSeparatedValues,
55 TextXml,
56 VideoMpeg,
57 VideoQuicktime,
58 NMEDIATYPES
59 };
61 // HTTP methods
62 enum
63 {
64 HGet,
65 HPost
66 };
68 // Charsets
69 enum
70 {
71 UnknownCharset,
72 US_Ascii,
73 ISO_8859_1,
74 UTF_8,
75 Unicode,
76 NCHARSETS
77 };
79 // Frame Target IDs
80 enum {
81 FTtop,
82 FTself,
83 FTparent,
84 FTblank
85 };
87 // LEX
88 typedef struct Token Token;
89 typedef struct Attr Attr;
91 // BUILD
93 typedef struct Item Item;
94 typedef struct Itext Itext;
95 typedef struct Irule Irule;
96 typedef struct Iimage Iimage;
97 typedef struct Iformfield Iformfield;
98 typedef struct Itable Itable;
99 typedef struct Ifloat Ifloat;
100 typedef struct Ispacer Ispacer;
101 typedef struct Genattr Genattr;
102 typedef struct SEvent SEvent;
103 typedef struct Formfield Formfield;
104 typedef struct Option Option;
105 typedef struct Form Form;
106 typedef struct Table Table;
107 typedef struct Tablecol Tablecol;
108 typedef struct Tablerow Tablerow;
109 typedef struct Tablecell Tablecell;
110 typedef struct Align Align;
111 typedef struct Dimen Dimen;
112 typedef struct Anchor Anchor;
113 typedef struct DestAnchor DestAnchor;
114 typedef struct Map Map;
115 typedef struct Area Area;
116 typedef struct Background Background;
117 typedef struct Kidinfo Kidinfo;
118 typedef struct Docinfo Docinfo;
119 typedef struct Stack Stack;
120 typedef struct Pstate Pstate;
121 typedef struct ItemSource ItemSource;
122 typedef struct Lay Lay; // defined in Layout module
124 // Alignment types
125 enum {
126 ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
127 ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
128 };
130 struct Align
132 uchar halign; // one of ALnone, ALleft, etc.
133 uchar valign; // one of ALnone, ALtop, etc.
134 };
136 // A Dimen holds a dimension specification, especially for those
137 // cases when a number can be followed by a % or a * to indicate
138 // percentage of total or relative weight.
139 // Dnone means no dimension was specified
141 // To fit in a word, use top bits to identify kind, rest for value
142 enum {
143 Dnone = 0,
144 Dpixels = (1<<29),
145 Dpercent = (2<<29),
146 Drelative = (3<<29),
147 Dkindmask = (3<<29),
148 Dspecmask = (~Dkindmask)
149 };
151 struct Dimen
153 int kindspec; // kind | spec
154 };
156 // Background is either an image or a color.
157 // If both are set, the image has precedence.
158 struct Background
160 Rune* image; // url
161 int color;
162 };
165 // There are about a half dozen Item variants.
166 // The all look like this at the start (using Plan 9 C's
167 // anonymous structure member mechanism),
168 // and then the tag field dictates what extra fields there are.
169 struct Item
171 Item* next; // successor in list of items
172 int width; // width in pixels (0 for floating items)
173 int height; // height in pixels
174 int ascent; // ascent (from top to baseline) in pixels
175 int anchorid; // if nonzero, which anchor we're in
176 int state; // flags and values (see below)
177 Genattr* genattr; // generic attributes and events
178 int tag; // variant discriminator: Itexttag, etc.
179 };
181 // Item variant tags
182 enum {
183 Itexttag,
184 Iruletag,
185 Iimagetag,
186 Iformfieldtag,
187 Itabletag,
188 Ifloattag,
189 Ispacertag
190 };
192 struct Itext
194 Item item; // (with tag ==Itexttag)
195 Rune* s; // the characters
196 int fnt; // style*NumSize+size (see font stuff, below)
197 int fg; // Pixel (color) for text
198 uchar voff; // Voffbias+vertical offset from baseline, in pixels (+ve == down)
199 uchar ul; // ULnone, ULunder, or ULmid
200 };
202 struct Irule
204 Item item; // (with tag ==Iruletag)
205 uchar align; // alignment spec
206 uchar noshade; // if true, don't shade
207 int size; // size attr (rule height)
208 Dimen wspec; // width spec
209 };
212 struct Iimage
214 Item item; // (with tag ==Iimagetag)
215 Rune* imsrc; // image src url
216 int imwidth; // spec width (actual, if no spec)
217 int imheight; // spec height (actual, if no spec)
218 Rune* altrep; // alternate representation, in absence of image
219 Map* map; // if non-nil, client side map
220 int ctlid; // if animated
221 uchar align; // vertical alignment
222 uchar hspace; // in pixels; buffer space on each side
223 uchar vspace; // in pixels; buffer space on top and bottom
224 uchar border; // in pixels: border width to draw around image
225 Iimage* nextimage; // next in list of document's images
226 };
229 struct Iformfield
231 Item item; // (with tag ==Iformfieldtag)
232 Formfield* formfield;
233 };
236 struct Itable
238 Item item; // (with tag ==Itabletag)
239 Table* table;
240 };
243 struct Ifloat
245 Item _item; // (with tag ==Ifloattag)
246 Item* item; // table or image item that floats
247 int x; // x coord of top (from right, if ALright)
248 int y; // y coord of top
249 uchar side; // margin it floats to: ALleft or ALright
250 uchar infloats; // true if this has been added to a lay.floats
251 Ifloat* nextfloat; // in list of floats
252 };
255 struct Ispacer
257 Item item; // (with tag ==Ispacertag)
258 int spkind; // ISPnull, etc.
259 };
261 // Item state flags and value fields
262 enum {
263 // IFbrk = 0x80000000, // forced break before this item
264 #define IFbrk 0x80000000 /* too big for sun */
265 IFbrksp = 0x40000000, // add 1 line space to break (IFbrk set too)
266 IFnobrk = 0x20000000, // break not allowed before this item
267 IFcleft = 0x10000000, // clear left floats (IFbrk set too)
268 IFcright = 0x08000000, // clear right floats (IFbrk set too)
269 IFwrap = 0x04000000, // in a wrapping (non-pre) line
270 IFhang = 0x02000000, // in a hanging (into left indent) item
271 IFrjust = 0x01000000, // right justify current line
272 IFcjust = 0x00800000, // center justify current line
273 IFsmap = 0x00400000, // image is server-side map
274 IFindentshift = 8,
275 IFindentmask = (255<<IFindentshift), // current indent, in tab stops
276 IFhangmask = 255 // current hang into left indent, in 1/10th tabstops
277 };
279 // Bias added to Itext's voff field
280 enum { Voffbias = 128 };
282 // Spacer kinds
283 enum {
284 ISPnull, // 0 height and width
285 ISPvline, // height and ascent of current font
286 ISPhspace, // width of space in current font
287 ISPgeneral // other purposes (e.g., between markers and list)
288 };
290 // Generic attributes and events (not many elements will have any of these set)
291 struct Genattr
293 Rune* id;
294 Rune* class;
295 Rune* style;
296 Rune* title;
297 SEvent* events;
298 };
300 struct SEvent
302 SEvent* next; // in list of events
303 int type; // SEonblur, etc.
304 Rune* script;
305 };
307 enum {
308 SEonblur, SEonchange, SEonclick, SEondblclick,
309 SEonfocus, SEonkeypress, SEonkeyup, SEonload,
310 SEonmousedown, SEonmousemove, SEonmouseout,
311 SEonmouseover, SEonmouseup, SEonreset, SEonselect,
312 SEonsubmit, SEonunload,
313 Numscriptev
314 };
316 // Form field types
317 enum {
318 Ftext,
319 Fpassword,
320 Fcheckbox,
321 Fradio,
322 Fsubmit,
323 Fhidden,
324 Fimage,
325 Freset,
326 Ffile,
327 Fbutton,
328 Fselect,
329 Ftextarea
330 };
332 // Information about a field in a form
333 struct Formfield
335 Formfield* next; // in list of fields for a form
336 int ftype; // Ftext, Fpassword, etc.
337 int fieldid; // serial no. of field within its form
338 Form* form; // containing form
339 Rune* name; // name attr
340 Rune* value; // value attr
341 int size; // size attr
342 int maxlength; // maxlength attr
343 int rows; // rows attr
344 int cols; // cols attr
345 uchar flags; // FFchecked, etc.
346 Option* options; // for Fselect fields
347 Item* image; // image item, for Fimage fields
348 int ctlid; // identifies control for this field in layout
349 SEvent* events; // same as genattr->events of containing item
350 };
352 enum {
353 FFchecked = (1<<7),
354 FFmultiple = (1<<6)
355 };
357 // Option holds info about an option in a "select" form field
358 struct Option
360 Option* next; // next in list of options for a field
361 int selected; // true if selected initially
362 Rune* value; // value attr
363 Rune* display; // display string
364 };
366 // Form holds info about a form
367 struct Form
369 Form* next; // in list of forms for document
370 int formid; // serial no. of form within its doc
371 Rune* name; // name or id attr (netscape uses name, HTML 4.0 uses id)
372 Rune* action; // action attr
373 int target; // target attr as targetid
374 int method; // HGet or HPost
375 int nfields; // number of fields
376 Formfield* fields; // field's forms, in input order
377 };
379 // Flags used in various table structures
380 enum {
381 TFparsing = (1<<7),
382 TFnowrap = (1<<6),
383 TFisth = (1<<5)
384 };
387 // Information about a table
388 struct Table
390 Table* next; // next in list of document's tables
391 int tableid; // serial no. of table within its doc
392 Tablerow* rows; // array of row specs (list during parsing)
393 int nrow; // total number of rows
394 Tablecol* cols; // array of column specs
395 int ncol; // total number of columns
396 Tablecell* cells; // list of unique cells
397 int ncell; // total number of cells
398 Tablecell*** grid; // 2-D array of cells
399 Align align; // alignment spec for whole table
400 Dimen width; // width spec for whole table
401 int border; // border attr
402 int cellspacing; // cellspacing attr
403 int cellpadding; // cellpadding attr
404 Background background; // table background
405 Item* caption; // linked list of Items, giving caption
406 uchar caption_place; // ALtop or ALbottom
407 Lay* caption_lay; // layout of caption
408 int totw; // total width
409 int toth; // total height
410 int caph; // caption height
411 int availw; // used for previous 3 sizes
412 Token* tabletok; // token that started the table
413 uchar flags; // Lchanged, perhaps
414 };
417 struct Tablecol
419 int width;
420 Align align;
421 Point pos;
422 };
425 struct Tablerow
427 Tablerow* next; // Next in list of rows, during parsing
428 Tablecell* cells; // Cells in row, linked through nextinrow
429 int height;
430 int ascent;
431 Align align;
432 Background background;
433 Point pos;
434 uchar flags; // 0 or TFparsing
435 };
438 // A Tablecell is one cell of a table.
439 // It may span multiple rows and multiple columns.
440 // Cells are linked on two lists: the list for all the cells of
441 // a document (the next pointers), and the list of all the
442 // cells that start in a given row (the nextinrow pointers)
443 struct Tablecell
445 Tablecell* next; // next in list of table's cells
446 Tablecell* nextinrow; // next in list of row's cells
447 int cellid; // serial no. of cell within table
448 Item* content; // contents before layout
449 Lay* lay; // layout of cell
450 int rowspan; // number of rows spanned by this cell
451 int colspan; // number of cols spanned by this cell
452 Align align; // alignment spec
453 uchar flags; // TFparsing, TFnowrap, TFisth
454 Dimen wspec; // suggested width
455 int hspec; // suggested height
456 Background background; // cell background
457 int minw; // minimum possible width
458 int maxw; // maximum width
459 int ascent; // cell's ascent
460 int row; // row of upper left corner
461 int col; // col of upper left corner
462 Point pos; // nw corner of cell contents, in cell
463 };
465 // Anchor is for info about hyperlinks that go somewhere
466 struct Anchor
468 Anchor* next; // next in list of document's anchors
469 int index; // serial no. of anchor within its doc
470 Rune* name; // name attr
471 Rune* href; // href attr
472 int target; // target attr as targetid
473 };
476 // DestAnchor is for info about hyperlinks that are destinations
477 struct DestAnchor
479 DestAnchor* next; // next in list of document's destanchors
480 int index; // serial no. of anchor within its doc
481 Rune* name; // name attr
482 Item* item; // the destination
483 };
486 // Maps (client side)
487 struct Map
489 Map* next; // next in list of document's maps
490 Rune* name; // map name
491 Area* areas; // list of map areas
492 };
495 struct Area
497 Area* next; // next in list of a map's areas
498 int shape; // SHrect, etc.
499 Rune* href; // associated hypertext link
500 int target; // associated target frame
501 Dimen* coords; // array of coords for shape
502 int ncoords; // size of coords array
503 };
505 // Area shapes
506 enum {
507 SHrect, SHcircle, SHpoly
508 };
510 // Fonts are represented by integers: style*NumSize + size
512 // Font styles
513 enum {
514 FntR, // roman
515 FntI, // italic
516 FntB, // bold
517 FntT, // typewriter
518 NumStyle
519 };
521 // Font sizes
522 enum {
523 Tiny,
524 Small,
525 Normal,
526 Large,
527 Verylarge,
528 NumSize
529 };
531 enum {
532 NumFnt = (NumStyle*NumSize),
533 DefFnt = (FntR*NumSize+Normal)
534 };
536 // Lines are needed through some text items, for underlining or strikethrough
537 enum {
538 ULnone, ULunder, ULmid
539 };
541 // Kidinfo flags
542 enum {
543 FRnoresize = (1<<0),
544 FRnoscroll = (1<<1),
545 FRhscroll = (1<<2),
546 FRvscroll = (1<<3),
547 FRhscrollauto = (1<<4),
548 FRvscrollauto = (1<<5)
549 };
551 // Information about child frame or frameset
552 struct Kidinfo
554 Kidinfo* next; // in list of kidinfos for a frameset
555 int isframeset;
557 // fields for "frame"
558 Rune* src; // only nil if a "dummy" frame or this is frameset
559 Rune* name; // always non-empty if this isn't frameset
560 int marginw;
561 int marginh;
562 int framebd;
563 int flags;
565 // fields for "frameset"
566 Dimen* rows; // array of row dimensions
567 int nrows; // length of rows
568 Dimen* cols; // array of col dimensions
569 int ncols; // length of cols
570 Kidinfo* kidinfos;
571 Kidinfo* nextframeset; // parsing stack
572 };
575 // Document info (global information about HTML page)
576 struct Docinfo
578 // stuff from HTTP headers, doc head, and body tag
579 Rune* src; // original source of doc
580 Rune* base; // base URL of doc
581 Rune* doctitle; // from <title> element
582 Background background; // background specification
583 Iimage* backgrounditem; // Image Item for doc background image, or nil
584 int text; // doc foreground (text) color
585 int link; // unvisited hyperlink color
586 int vlink; // visited hyperlink color
587 int alink; // highlighting hyperlink color
588 int target; // target frame default
589 int chset; // ISO_8859, etc.
590 int mediatype; // TextHtml, etc.
591 int scripttype; // TextJavascript, etc.
592 int hasscripts; // true if scripts used
593 Rune* refresh; // content of <http-equiv=Refresh ...>
594 Kidinfo* kidinfo; // if a frameset
595 int frameid; // id of document frame
597 // info needed to respond to user actions
598 Anchor* anchors; // list of href anchors
599 DestAnchor* dests; // list of destination anchors
600 Form* forms; // list of forms
601 Table* tables; // list of tables
602 Map* maps; // list of maps
603 Iimage* images; // list of image items (through nextimage links)
604 };
606 extern int dimenkind(Dimen d);
607 extern int dimenspec(Dimen d);
608 extern void freedocinfo(Docinfo* d);
609 extern void freeitems(Item* ithead);
610 extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
611 extern void printitems(Item* items, char* msg);
612 extern int targetid(Rune* s);
613 extern Rune* targetname(int targid);
614 extern int validitems(Item* i);
616 /* #pragma varargck type "I" Item* */
618 // Control print output
619 extern int warn;
620 extern int dbglex;
621 extern int dbgbuild;
623 // To be provided by caller
624 // emalloc and erealloc should not return if can't get memory.
625 // emalloc should zero its memory.
626 extern void* emalloc(ulong);
627 extern void* erealloc(void* p, ulong size);
628 #ifdef __cpluspplus
630 #endif
631 #endif