Blob


1 #ifndef _HTML_H_
2 #define _HTML_H_ 1
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
7 /*
8 #pragma lib "libhtml.a"
9 #pragma src "/sys/src/libhtml"
10 */
12 // UTILS
13 extern uchar* fromStr(Rune* buf, int n, int chset);
14 extern Rune* toStr(uchar* buf, int n, int chset);
16 // Common LEX and BUILD enums
18 // Media types
19 enum
20 {
21 ApplMsword,
22 ApplOctets,
23 ApplPdf,
24 ApplPostscript,
25 ApplRtf,
26 ApplFramemaker,
27 ApplMsexcel,
28 ApplMspowerpoint,
29 UnknownType,
30 Audio32kadpcm,
31 AudioBasic,
32 ImageCgm,
33 ImageG3fax,
34 ImageGif,
35 ImageIef,
36 ImageJpeg,
37 ImagePng,
38 ImageTiff,
39 ImageXBit,
40 ImageXBit2,
41 ImageXBitmulti,
42 ImageXXBitmap,
43 ModelVrml,
44 MultiDigest,
45 MultiMixed,
46 TextCss,
47 TextEnriched,
48 TextHtml,
49 TextJavascript,
50 TextPlain,
51 TextRichtext,
52 TextSgml,
53 TextTabSeparatedValues,
54 TextXml,
55 VideoMpeg,
56 VideoQuicktime,
57 NMEDIATYPES
58 };
60 // HTTP methods
61 enum
62 {
63 HGet,
64 HPost
65 };
67 // Charsets
68 enum
69 {
70 UnknownCharset,
71 US_Ascii,
72 ISO_8859_1,
73 UTF_8,
74 Unicode,
75 NCHARSETS
76 };
78 // Frame Target IDs
79 enum {
80 FTtop,
81 FTself,
82 FTparent,
83 FTblank
84 };
86 // LEX
87 typedef struct Token Token;
88 typedef struct Attr Attr;
90 // BUILD
92 typedef struct Item Item;
93 typedef struct Itext Itext;
94 typedef struct Irule Irule;
95 typedef struct Iimage Iimage;
96 typedef struct Iformfield Iformfield;
97 typedef struct Itable Itable;
98 typedef struct Ifloat Ifloat;
99 typedef struct Ispacer Ispacer;
100 typedef struct Genattr Genattr;
101 typedef struct SEvent SEvent;
102 typedef struct Formfield Formfield;
103 typedef struct Option Option;
104 typedef struct Form Form;
105 typedef struct Table Table;
106 typedef struct Tablecol Tablecol;
107 typedef struct Tablerow Tablerow;
108 typedef struct Tablecell Tablecell;
109 typedef struct Align Align;
110 typedef struct Dimen Dimen;
111 typedef struct Anchor Anchor;
112 typedef struct DestAnchor DestAnchor;
113 typedef struct Map Map;
114 typedef struct Area Area;
115 typedef struct Background Background;
116 typedef struct Kidinfo Kidinfo;
117 typedef struct Docinfo Docinfo;
118 typedef struct Stack Stack;
119 typedef struct Pstate Pstate;
120 typedef struct ItemSource ItemSource;
121 typedef struct Lay Lay; // defined in Layout module
123 // Alignment types
124 enum {
125 ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
126 ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
127 };
129 struct Align
131 uchar halign; // one of ALnone, ALleft, etc.
132 uchar valign; // one of ALnone, ALtop, etc.
133 };
135 // A Dimen holds a dimension specification, especially for those
136 // cases when a number can be followed by a % or a * to indicate
137 // percentage of total or relative weight.
138 // Dnone means no dimension was specified
140 // To fit in a word, use top bits to identify kind, rest for value
141 enum {
142 Dnone = 0,
143 Dpixels = (1<<29),
144 Dpercent = (2<<29),
145 Drelative = (3<<29),
146 Dkindmask = (3<<29),
147 Dspecmask = (~Dkindmask)
148 };
150 struct Dimen
152 int kindspec; // kind | spec
153 };
155 // Background is either an image or a color.
156 // If both are set, the image has precedence.
157 struct Background
159 Rune* image; // url
160 int color;
161 };
164 // There are about a half dozen Item variants.
165 // The all look like this at the start (using Plan 9 C's
166 // anonymous structure member mechanism),
167 // and then the tag field dictates what extra fields there are.
168 struct Item
170 Item* next; // successor in list of items
171 int width; // width in pixels (0 for floating items)
172 int height; // height in pixels
173 int ascent; // ascent (from top to baseline) in pixels
174 int anchorid; // if nonzero, which anchor we're in
175 int state; // flags and values (see below)
176 Genattr* genattr; // generic attributes and events
177 int tag; // variant discriminator: Itexttag, etc.
178 };
180 // Item variant tags
181 enum {
182 Itexttag,
183 Iruletag,
184 Iimagetag,
185 Iformfieldtag,
186 Itabletag,
187 Ifloattag,
188 Ispacertag
189 };
191 struct Itext
193 Item _item; // (with tag ==Itexttag)
194 Rune* s; // the characters
195 int fnt; // style*NumSize+size (see font stuff, below)
196 int fg; // Pixel (color) for text
197 uchar voff; // Voffbias+vertical offset from baseline, in pixels (+ve == down)
198 uchar ul; // ULnone, ULunder, or ULmid
199 };
201 struct Irule
203 Item _item; // (with tag ==Iruletag)
204 uchar align; // alignment spec
205 uchar noshade; // if true, don't shade
206 int size; // size attr (rule height)
207 Dimen wspec; // width spec
208 };
211 struct Iimage
213 Item _item; // (with tag ==Iimagetag)
214 Rune* imsrc; // image src url
215 int imwidth; // spec width (actual, if no spec)
216 int imheight; // spec height (actual, if no spec)
217 Rune* altrep; // alternate representation, in absence of image
218 Map* map; // if non-nil, client side map
219 int ctlid; // if animated
220 uchar align; // vertical alignment
221 uchar hspace; // in pixels; buffer space on each side
222 uchar vspace; // in pixels; buffer space on top and bottom
223 uchar border; // in pixels: border width to draw around image
224 Iimage* nextimage; // next in list of document's images
225 };
228 struct Iformfield
230 Item _item; // (with tag ==Iformfieldtag)
231 Formfield* formfield;
232 };
235 struct Itable
237 Item _item; // (with tag ==Itabletag)
238 Table* table;
239 };
242 struct Ifloat
244 Item _item; // (with tag ==Ifloattag)
245 Item* item; // table or image item that floats
246 int x; // x coord of top (from right, if ALright)
247 int y; // y coord of top
248 uchar side; // margin it floats to: ALleft or ALright
249 uchar infloats; // true if this has been added to a lay.floats
250 Ifloat* nextfloat; // in list of floats
251 };
254 struct Ispacer
256 Item _item; // (with tag ==Ispacertag)
257 int spkind; // ISPnull, etc.
258 };
260 // Item state flags and value fields
261 enum {
262 IFbrk = 0x80000000, // forced break before this item
263 IFbrksp = 0x40000000, // add 1 line space to break (IFbrk set too)
264 IFnobrk = 0x20000000, // break not allowed before this item
265 IFcleft = 0x10000000, // clear left floats (IFbrk set too)
266 IFcright = 0x08000000, // clear right floats (IFbrk set too)
267 IFwrap = 0x04000000, // in a wrapping (non-pre) line
268 IFhang = 0x02000000, // in a hanging (into left indent) item
269 IFrjust = 0x01000000, // right justify current line
270 IFcjust = 0x00800000, // center justify current line
271 IFsmap = 0x00400000, // image is server-side map
272 IFindentshift = 8,
273 IFindentmask = (255<<IFindentshift), // current indent, in tab stops
274 IFhangmask = 255 // current hang into left indent, in 1/10th tabstops
275 };
277 // Bias added to Itext's voff field
278 enum { Voffbias = 128 };
280 // Spacer kinds
281 enum {
282 ISPnull, // 0 height and width
283 ISPvline, // height and ascent of current font
284 ISPhspace, // width of space in current font
285 ISPgeneral // other purposes (e.g., between markers and list)
286 };
288 // Generic attributes and events (not many elements will have any of these set)
289 struct Genattr
291 Rune* id;
292 Rune* class;
293 Rune* style;
294 Rune* title;
295 SEvent* events;
296 };
298 struct SEvent
300 SEvent* next; // in list of events
301 int type; // SEonblur, etc.
302 Rune* script;
303 };
305 enum {
306 SEonblur, SEonchange, SEonclick, SEondblclick,
307 SEonfocus, SEonkeypress, SEonkeyup, SEonload,
308 SEonmousedown, SEonmousemove, SEonmouseout,
309 SEonmouseover, SEonmouseup, SEonreset, SEonselect,
310 SEonsubmit, SEonunload,
311 Numscriptev
312 };
314 // Form field types
315 enum {
316 Ftext,
317 Fpassword,
318 Fcheckbox,
319 Fradio,
320 Fsubmit,
321 Fhidden,
322 Fimage,
323 Freset,
324 Ffile,
325 Fbutton,
326 Fselect,
327 Ftextarea
328 };
330 // Information about a field in a form
331 struct Formfield
333 Formfield* next; // in list of fields for a form
334 int ftype; // Ftext, Fpassword, etc.
335 int fieldid; // serial no. of field within its form
336 Form* form; // containing form
337 Rune* name; // name attr
338 Rune* value; // value attr
339 int size; // size attr
340 int maxlength; // maxlength attr
341 int rows; // rows attr
342 int cols; // cols attr
343 uchar flags; // FFchecked, etc.
344 Option* options; // for Fselect fields
345 Item* image; // image item, for Fimage fields
346 int ctlid; // identifies control for this field in layout
347 SEvent* events; // same as genattr->events of containing item
348 };
350 enum {
351 FFchecked = (1<<7),
352 FFmultiple = (1<<6)
353 };
355 // Option holds info about an option in a "select" form field
356 struct Option
358 Option* next; // next in list of options for a field
359 int selected; // true if selected initially
360 Rune* value; // value attr
361 Rune* display; // display string
362 };
364 // Form holds info about a form
365 struct Form
367 Form* next; // in list of forms for document
368 int formid; // serial no. of form within its doc
369 Rune* name; // name or id attr (netscape uses name, HTML 4.0 uses id)
370 Rune* action; // action attr
371 int target; // target attr as targetid
372 int method; // HGet or HPost
373 int nfields; // number of fields
374 Formfield* fields; // field's forms, in input order
375 };
377 // Flags used in various table structures
378 enum {
379 TFparsing = (1<<7),
380 TFnowrap = (1<<6),
381 TFisth = (1<<5)
382 };
385 // Information about a table
386 struct Table
388 Table* next; // next in list of document's tables
389 int tableid; // serial no. of table within its doc
390 Tablerow* rows; // array of row specs (list during parsing)
391 int nrow; // total number of rows
392 Tablecol* cols; // array of column specs
393 int ncol; // total number of columns
394 Tablecell* cells; // list of unique cells
395 int ncell; // total number of cells
396 Tablecell*** grid; // 2-D array of cells
397 Align align; // alignment spec for whole table
398 Dimen width; // width spec for whole table
399 int border; // border attr
400 int cellspacing; // cellspacing attr
401 int cellpadding; // cellpadding attr
402 Background background; // table background
403 Item* caption; // linked list of Items, giving caption
404 uchar caption_place; // ALtop or ALbottom
405 Lay* caption_lay; // layout of caption
406 int totw; // total width
407 int toth; // total height
408 int caph; // caption height
409 int availw; // used for previous 3 sizes
410 Token* tabletok; // token that started the table
411 uchar flags; // Lchanged, perhaps
412 };
415 struct Tablecol
417 int width;
418 Align align;
419 Point pos;
420 };
423 struct Tablerow
425 Tablerow* next; // Next in list of rows, during parsing
426 Tablecell* cells; // Cells in row, linked through nextinrow
427 int height;
428 int ascent;
429 Align align;
430 Background background;
431 Point pos;
432 uchar flags; // 0 or TFparsing
433 };
436 // A Tablecell is one cell of a table.
437 // It may span multiple rows and multiple columns.
438 // Cells are linked on two lists: the list for all the cells of
439 // a document (the next pointers), and the list of all the
440 // cells that start in a given row (the nextinrow pointers)
441 struct Tablecell
443 Tablecell* next; // next in list of table's cells
444 Tablecell* nextinrow; // next in list of row's cells
445 int cellid; // serial no. of cell within table
446 Item* content; // contents before layout
447 Lay* lay; // layout of cell
448 int rowspan; // number of rows spanned by this cell
449 int colspan; // number of cols spanned by this cell
450 Align align; // alignment spec
451 uchar flags; // TFparsing, TFnowrap, TFisth
452 Dimen wspec; // suggested width
453 int hspec; // suggested height
454 Background background; // cell background
455 int minw; // minimum possible width
456 int maxw; // maximum width
457 int ascent; // cell's ascent
458 int row; // row of upper left corner
459 int col; // col of upper left corner
460 Point pos; // nw corner of cell contents, in cell
461 };
463 // Anchor is for info about hyperlinks that go somewhere
464 struct Anchor
466 Anchor* next; // next in list of document's anchors
467 int index; // serial no. of anchor within its doc
468 Rune* name; // name attr
469 Rune* href; // href attr
470 int target; // target attr as targetid
471 };
474 // DestAnchor is for info about hyperlinks that are destinations
475 struct DestAnchor
477 DestAnchor* next; // next in list of document's destanchors
478 int index; // serial no. of anchor within its doc
479 Rune* name; // name attr
480 Item* item; // the destination
481 };
484 // Maps (client side)
485 struct Map
487 Map* next; // next in list of document's maps
488 Rune* name; // map name
489 Area* areas; // list of map areas
490 };
493 struct Area
495 Area* next; // next in list of a map's areas
496 int shape; // SHrect, etc.
497 Rune* href; // associated hypertext link
498 int target; // associated target frame
499 Dimen* coords; // array of coords for shape
500 int ncoords; // size of coords array
501 };
503 // Area shapes
504 enum {
505 SHrect, SHcircle, SHpoly
506 };
508 // Fonts are represented by integers: style*NumSize + size
510 // Font styles
511 enum {
512 FntR, // roman
513 FntI, // italic
514 FntB, // bold
515 FntT, // typewriter
516 NumStyle
517 };
519 // Font sizes
520 enum {
521 Tiny,
522 Small,
523 Normal,
524 Large,
525 Verylarge,
526 NumSize
527 };
529 enum {
530 NumFnt = (NumStyle*NumSize),
531 DefFnt = (FntR*NumSize+Normal)
532 };
534 // Lines are needed through some text items, for underlining or strikethrough
535 enum {
536 ULnone, ULunder, ULmid
537 };
539 // Kidinfo flags
540 enum {
541 FRnoresize = (1<<0),
542 FRnoscroll = (1<<1),
543 FRhscroll = (1<<2),
544 FRvscroll = (1<<3),
545 FRhscrollauto = (1<<4),
546 FRvscrollauto = (1<<5)
547 };
549 // Information about child frame or frameset
550 struct Kidinfo
552 Kidinfo* next; // in list of kidinfos for a frameset
553 int isframeset;
555 // fields for "frame"
556 Rune* src; // only nil if a "dummy" frame or this is frameset
557 Rune* name; // always non-empty if this isn't frameset
558 int marginw;
559 int marginh;
560 int framebd;
561 int flags;
563 // fields for "frameset"
564 Dimen* rows; // array of row dimensions
565 int nrows; // length of rows
566 Dimen* cols; // array of col dimensions
567 int ncols; // length of cols
568 Kidinfo* kidinfos;
569 Kidinfo* nextframeset; // parsing stack
570 };
573 // Document info (global information about HTML page)
574 struct Docinfo
576 // stuff from HTTP headers, doc head, and body tag
577 Rune* src; // original source of doc
578 Rune* base; // base URL of doc
579 Rune* doctitle; // from <title> element
580 Background background; // background specification
581 Iimage* backgrounditem; // Image Item for doc background image, or nil
582 int text; // doc foreground (text) color
583 int link; // unvisited hyperlink color
584 int vlink; // visited hyperlink color
585 int alink; // highlighting hyperlink color
586 int target; // target frame default
587 int chset; // ISO_8859, etc.
588 int mediatype; // TextHtml, etc.
589 int scripttype; // TextJavascript, etc.
590 int hasscripts; // true if scripts used
591 Rune* refresh; // content of <http-equiv=Refresh ...>
592 Kidinfo* kidinfo; // if a frameset
593 int frameid; // id of document frame
595 // info needed to respond to user actions
596 Anchor* anchors; // list of href anchors
597 DestAnchor* dests; // list of destination anchors
598 Form* forms; // list of forms
599 Table* tables; // list of tables
600 Map* maps; // list of maps
601 Iimage* images; // list of image items (through nextimage links)
602 };
604 extern int dimenkind(Dimen d);
605 extern int dimenspec(Dimen d);
606 extern void freedocinfo(Docinfo* d);
607 extern void freeitems(Item* ithead);
608 extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
609 extern void printitems(Item* items, char* msg);
610 extern int targetid(Rune* s);
611 extern Rune* targetname(int targid);
612 extern int validitems(Item* i);
614 #pragma varargck type "I" Item*
616 // Control print output
617 extern int warn;
618 extern int dbglex;
619 extern int dbgbuild;
621 // To be provided by caller
622 // emalloc and erealloc should not return if can't get memory.
623 // emalloc should zero its memory.
624 extern void* emalloc(ulong);
625 extern void* erealloc(void* p, ulong size);
626 #ifdef __cpluspplus
628 #endif
629 #endif