9 #pragma lib "libhtml.a"
10 #pragma src "/sys/src/libhtml"
14 extern uchar* fromStr(Rune* buf, int n, int chset);
15 extern Rune* toStr(uchar* buf, int n, int chset);
17 // Common LEX and BUILD enums
54 TextTabSeparatedValues,
88 typedef struct Token Token;
89 typedef struct Attr Attr;
93 typedef struct Item Item;
94 typedef struct Itext Itext;
95 typedef struct Irule Irule;
96 typedef struct Iimage Iimage;
97 typedef struct Iformfield Iformfield;
98 typedef struct Itable Itable;
99 typedef struct Ifloat Ifloat;
100 typedef struct Ispacer Ispacer;
101 typedef struct Genattr Genattr;
102 typedef struct SEvent SEvent;
103 typedef struct Formfield Formfield;
104 typedef struct Option Option;
105 typedef struct Form Form;
106 typedef struct Table Table;
107 typedef struct Tablecol Tablecol;
108 typedef struct Tablerow Tablerow;
109 typedef struct Tablecell Tablecell;
110 typedef struct Align Align;
111 typedef struct Dimen Dimen;
112 typedef struct Anchor Anchor;
113 typedef struct DestAnchor DestAnchor;
114 typedef struct Map Map;
115 typedef struct Area Area;
116 typedef struct Background Background;
117 typedef struct Kidinfo Kidinfo;
118 typedef struct Docinfo Docinfo;
119 typedef struct Stack Stack;
120 typedef struct Pstate Pstate;
121 typedef struct ItemSource ItemSource;
122 typedef struct Lay Lay; // defined in Layout module
126 ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
127 ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
132 uchar halign; // one of ALnone, ALleft, etc.
133 uchar valign; // one of ALnone, ALtop, etc.
136 // A Dimen holds a dimension specification, especially for those
137 // cases when a number can be followed by a % or a * to indicate
138 // percentage of total or relative weight.
139 // Dnone means no dimension was specified
141 // To fit in a word, use top bits to identify kind, rest for value
148 Dspecmask = (~Dkindmask)
153 int kindspec; // kind | spec
156 // Background is either an image or a color.
157 // If both are set, the image has precedence.
165 // There are about a half dozen Item variants.
166 // The all look like this at the start (using Plan 9 C's
167 // anonymous structure member mechanism),
168 // and then the tag field dictates what extra fields there are.
171 Item* next; // successor in list of items
172 int width; // width in pixels (0 for floating items)
173 int height; // height in pixels
174 int ascent; // ascent (from top to baseline) in pixels
175 int anchorid; // if nonzero, which anchor we're in
176 int state; // flags and values (see below)
177 Genattr* genattr; // generic attributes and events
178 int tag; // variant discriminator: Itexttag, etc.
194 Item item; // (with tag ==Itexttag)
195 Rune* s; // the characters
196 int fnt; // style*NumSize+size (see font stuff, below)
197 int fg; // Pixel (color) for text
198 uchar voff; // Voffbias+vertical offset from baseline, in pixels (+ve == down)
199 uchar ul; // ULnone, ULunder, or ULmid
204 Item item; // (with tag ==Iruletag)
205 uchar align; // alignment spec
206 uchar noshade; // if true, don't shade
207 int size; // size attr (rule height)
208 Dimen wspec; // width spec
214 Item item; // (with tag ==Iimagetag)
215 Rune* imsrc; // image src url
216 int imwidth; // spec width (actual, if no spec)
217 int imheight; // spec height (actual, if no spec)
218 Rune* altrep; // alternate representation, in absence of image
219 Map* map; // if non-nil, client side map
220 int ctlid; // if animated
221 uchar align; // vertical alignment
222 uchar hspace; // in pixels; buffer space on each side
223 uchar vspace; // in pixels; buffer space on top and bottom
224 uchar border; // in pixels: border width to draw around image
225 Iimage* nextimage; // next in list of document's images
231 Item item; // (with tag ==Iformfieldtag)
232 Formfield* formfield;
238 Item item; // (with tag ==Itabletag)
245 Item _item; // (with tag ==Ifloattag)
246 Item* item; // table or image item that floats
247 int x; // x coord of top (from right, if ALright)
248 int y; // y coord of top
249 uchar side; // margin it floats to: ALleft or ALright
250 uchar infloats; // true if this has been added to a lay.floats
251 Ifloat* nextfloat; // in list of floats
257 Item item; // (with tag ==Ispacertag)
258 int spkind; // ISPnull, etc.
261 // Item state flags and value fields
263 // IFbrk = 0x80000000, // forced break before this item
264 #define IFbrk 0x80000000 /* too big for sun */
265 IFbrksp = 0x40000000, // add 1 line space to break (IFbrk set too)
266 IFnobrk = 0x20000000, // break not allowed before this item
267 IFcleft = 0x10000000, // clear left floats (IFbrk set too)
268 IFcright = 0x08000000, // clear right floats (IFbrk set too)
269 IFwrap = 0x04000000, // in a wrapping (non-pre) line
270 IFhang = 0x02000000, // in a hanging (into left indent) item
271 IFrjust = 0x01000000, // right justify current line
272 IFcjust = 0x00800000, // center justify current line
273 IFsmap = 0x00400000, // image is server-side map
275 IFindentmask = (255<<IFindentshift), // current indent, in tab stops
276 IFhangmask = 255 // current hang into left indent, in 1/10th tabstops
279 // Bias added to Itext's voff field
280 enum { Voffbias = 128 };
284 ISPnull, // 0 height and width
285 ISPvline, // height and ascent of current font
286 ISPhspace, // width of space in current font
287 ISPgeneral // other purposes (e.g., between markers and list)
290 // Generic attributes and events (not many elements will have any of these set)
302 SEvent* next; // in list of events
303 int type; // SEonblur, etc.
308 SEonblur, SEonchange, SEonclick, SEondblclick,
309 SEonfocus, SEonkeypress, SEonkeyup, SEonload,
310 SEonmousedown, SEonmousemove, SEonmouseout,
311 SEonmouseover, SEonmouseup, SEonreset, SEonselect,
312 SEonsubmit, SEonunload,
332 // Information about a field in a form
335 Formfield* next; // in list of fields for a form
336 int ftype; // Ftext, Fpassword, etc.
337 int fieldid; // serial no. of field within its form
338 Form* form; // containing form
339 Rune* name; // name attr
340 Rune* value; // value attr
341 int size; // size attr
342 int maxlength; // maxlength attr
343 int rows; // rows attr
344 int cols; // cols attr
345 uchar flags; // FFchecked, etc.
346 Option* options; // for Fselect fields
347 Item* image; // image item, for Fimage fields
348 int ctlid; // identifies control for this field in layout
349 SEvent* events; // same as genattr->events of containing item
357 // Option holds info about an option in a "select" form field
360 Option* next; // next in list of options for a field
361 int selected; // true if selected initially
362 Rune* value; // value attr
363 Rune* display; // display string
366 // Form holds info about a form
369 Form* next; // in list of forms for document
370 int formid; // serial no. of form within its doc
371 Rune* name; // name or id attr (netscape uses name, HTML 4.0 uses id)
372 Rune* action; // action attr
373 int target; // target attr as targetid
374 int method; // HGet or HPost
375 int nfields; // number of fields
376 Formfield* fields; // field's forms, in input order
379 // Flags used in various table structures
387 // Information about a table
390 Table* next; // next in list of document's tables
391 int tableid; // serial no. of table within its doc
392 Tablerow* rows; // array of row specs (list during parsing)
393 int nrow; // total number of rows
394 Tablecol* cols; // array of column specs
395 int ncol; // total number of columns
396 Tablecell* cells; // list of unique cells
397 int ncell; // total number of cells
398 Tablecell*** grid; // 2-D array of cells
399 Align align; // alignment spec for whole table
400 Dimen width; // width spec for whole table
401 int border; // border attr
402 int cellspacing; // cellspacing attr
403 int cellpadding; // cellpadding attr
404 Background background; // table background
405 Item* caption; // linked list of Items, giving caption
406 uchar caption_place; // ALtop or ALbottom
407 Lay* caption_lay; // layout of caption
408 int totw; // total width
409 int toth; // total height
410 int caph; // caption height
411 int availw; // used for previous 3 sizes
412 Token* tabletok; // token that started the table
413 uchar flags; // Lchanged, perhaps
427 Tablerow* next; // Next in list of rows, during parsing
428 Tablecell* cells; // Cells in row, linked through nextinrow
432 Background background;
434 uchar flags; // 0 or TFparsing
438 // A Tablecell is one cell of a table.
439 // It may span multiple rows and multiple columns.
440 // Cells are linked on two lists: the list for all the cells of
441 // a document (the next pointers), and the list of all the
442 // cells that start in a given row (the nextinrow pointers)
445 Tablecell* next; // next in list of table's cells
446 Tablecell* nextinrow; // next in list of row's cells
447 int cellid; // serial no. of cell within table
448 Item* content; // contents before layout
449 Lay* lay; // layout of cell
450 int rowspan; // number of rows spanned by this cell
451 int colspan; // number of cols spanned by this cell
452 Align align; // alignment spec
453 uchar flags; // TFparsing, TFnowrap, TFisth
454 Dimen wspec; // suggested width
455 int hspec; // suggested height
456 Background background; // cell background
457 int minw; // minimum possible width
458 int maxw; // maximum width
459 int ascent; // cell's ascent
460 int row; // row of upper left corner
461 int col; // col of upper left corner
462 Point pos; // nw corner of cell contents, in cell
465 // Anchor is for info about hyperlinks that go somewhere
468 Anchor* next; // next in list of document's anchors
469 int index; // serial no. of anchor within its doc
470 Rune* name; // name attr
471 Rune* href; // href attr
472 int target; // target attr as targetid
476 // DestAnchor is for info about hyperlinks that are destinations
479 DestAnchor* next; // next in list of document's destanchors
480 int index; // serial no. of anchor within its doc
481 Rune* name; // name attr
482 Item* item; // the destination
486 // Maps (client side)
489 Map* next; // next in list of document's maps
490 Rune* name; // map name
491 Area* areas; // list of map areas
497 Area* next; // next in list of a map's areas
498 int shape; // SHrect, etc.
499 Rune* href; // associated hypertext link
500 int target; // associated target frame
501 Dimen* coords; // array of coords for shape
502 int ncoords; // size of coords array
507 SHrect, SHcircle, SHpoly
510 // Fonts are represented by integers: style*NumSize + size
532 NumFnt = (NumStyle*NumSize),
533 DefFnt = (FntR*NumSize+Normal)
536 // Lines are needed through some text items, for underlining or strikethrough
538 ULnone, ULunder, ULmid
547 FRhscrollauto = (1<<4),
548 FRvscrollauto = (1<<5)
551 // Information about child frame or frameset
554 Kidinfo* next; // in list of kidinfos for a frameset
557 // fields for "frame"
558 Rune* src; // only nil if a "dummy" frame or this is frameset
559 Rune* name; // always non-empty if this isn't frameset
565 // fields for "frameset"
566 Dimen* rows; // array of row dimensions
567 int nrows; // length of rows
568 Dimen* cols; // array of col dimensions
569 int ncols; // length of cols
571 Kidinfo* nextframeset; // parsing stack
575 // Document info (global information about HTML page)
578 // stuff from HTTP headers, doc head, and body tag
579 Rune* src; // original source of doc
580 Rune* base; // base URL of doc
581 Rune* doctitle; // from <title> element
582 Background background; // background specification
583 Iimage* backgrounditem; // Image Item for doc background image, or nil
584 int text; // doc foreground (text) color
585 int link; // unvisited hyperlink color
586 int vlink; // visited hyperlink color
587 int alink; // highlighting hyperlink color
588 int target; // target frame default
589 int chset; // ISO_8859, etc.
590 int mediatype; // TextHtml, etc.
591 int scripttype; // TextJavascript, etc.
592 int hasscripts; // true if scripts used
593 Rune* refresh; // content of <http-equiv=Refresh ...>
594 Kidinfo* kidinfo; // if a frameset
595 int frameid; // id of document frame
597 // info needed to respond to user actions
598 Anchor* anchors; // list of href anchors
599 DestAnchor* dests; // list of destination anchors
600 Form* forms; // list of forms
601 Table* tables; // list of tables
602 Map* maps; // list of maps
603 Iimage* images; // list of image items (through nextimage links)
606 extern int dimenkind(Dimen d);
607 extern int dimenspec(Dimen d);
608 extern void freedocinfo(Docinfo* d);
609 extern void freeitems(Item* ithead);
610 extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
611 extern void printitems(Item* items, char* msg);
612 extern int targetid(Rune* s);
613 extern Rune* targetname(int targid);
614 extern int validitems(Item* i);
616 /* #pragma varargck type "I" Item* */
618 // Control print output
623 // To be provided by caller
624 // emalloc and erealloc should not return if can't get memory.
625 // emalloc should zero its memory.
626 extern void* emalloc(ulong);
627 extern void* erealloc(void* p, ulong size);