op public repos

Blob

Date:: Fri Mar 26 01:59:35 2004 UTC
Message:: SunOS can rot in hell.
Actions:: History | Blame | Raw File
1 #include <u.h>
2 #include <libc.h>
3 #include <draw.h>
4 #include <memdraw.h>
5 
6 int drawdebug;
7 static int	tablesbuilt;
8 
9 /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
10 #define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
11 
12 /*
13  * for 0 ≤ x ≤ 255*255, (x*0x0101+0x100)>>16 is a perfect approximation.
14  * for 0 ≤ x < (1<<16), x/255 = ((x+1)*0x0101)>>16 is a perfect approximation.
15  * the last one is perfect for all up to 1<<16, avoids a multiply, but requires a rathole.
16  */
17 /* #define DIV255(x) (((x)*257+256)>>16)  */
18 #define DIV255(x) ((((x)+1)*257)>>16)
19 /* #define DIV255(x) (tmp=(x)+1, (tmp+(tmp>>8))>>8) */
20 
21 #define MUL(x, y, t)	(t = (x)*(y)+128, (t+(t>>8))>>8)
22 #define MASK13	0xFF00FF00
23 #define MASK02	0x00FF00FF
24 #define MUL13(a, x, t)		(t = (a)*(((x)&MASK13)>>8)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)
25 #define MUL02(a, x, t)		(t = (a)*(((x)&MASK02)>>0)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)
26 #define MUL0123(a, x, s, t)	((MUL13(a, x, s)<<8)|MUL02(a, x, t))
27 
28 #define MUL2(u, v, x, y)	(t = (u)*(v)+(x)*(y)+256, (t+(t>>8))>>8)
29 
30 static void mktables(void);
31 typedef int Subdraw(Memdrawparam*);
32 static Subdraw chardraw, alphadraw, memoptdraw;
33 
34 static Memimage*	memones;
35 static Memimage*	memzeros;
36 Memimage *memwhite;
37 Memimage *memblack;
38 Memimage *memtransparent;
39 Memimage *memopaque;
40 
41 int	__ifmt(Fmt*);
42 
43 void
44 memimageinit(void)
45 {
46 	static int didinit = 0;
47 
48 	if(didinit)
49 		return;
50 
51 	didinit = 1;
52 
53 	mktables();
54 	_memmkcmap();
55 
56 	fmtinstall('R', Rfmt); 
57 	fmtinstall('P', Pfmt);
58 	fmtinstall('b', __ifmt);
59 
60 	memones = allocmemimage(Rect(0,0,1,1), GREY1);
61 	memones->flags |= Frepl;
62 	memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
63 	*byteaddr(memones, ZP) = ~0;
64 
65 	memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
66 	memzeros->flags |= Frepl;
67 	memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
68 	*byteaddr(memzeros, ZP) = 0;
69 
70 	if(memones == nil || memzeros == nil)
71 		assert(0 /*cannot initialize memimage library */);	/* RSC BUG */
72 
73 	memwhite = memones;
74 	memblack = memzeros;
75 	memopaque = memones;
76 	memtransparent = memzeros;
77 }
78 
79 u32int _imgtorgba(Memimage*, u32int);
80 u32int _rgbatoimg(Memimage*, u32int);
81 u32int _pixelbits(Memimage*, Point);
82 
83 #define DBG if(0)
84 static Memdrawparam par;
85 
86 Memdrawparam*
87 _memimagedrawsetup(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1, int op)
88 {
89 	if(mask == nil)
90 		mask = memopaque;
91 
92 DBG	print("memimagedraw %p/%luX %R @ %p %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, dst->data->bdata, src, src->chan, p0, mask, mask->chan, p1);
93 
94 	if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
95 //		if(drawdebug)
96 //			iprint("empty clipped rectangle\n");
97 		return nil;
98 	}
99 
100 	if(op < Clear || op > SoverD){
101 //		if(drawdebug)
102 //			iprint("op out of range: %d\n", op);
103 		return nil;
104 	}
105 
106 	par.op = op;
107 	par.dst = dst;
108 	par.r = r;
109 	par.src = src;
110 	/* par.sr set by drawclip */
111 	par.mask = mask;
112 	/* par.mr set by drawclip */
113 
114 	par.state = 0;
115 	if(src->flags&Frepl){
116 		par.state |= Replsrc;
117 		if(Dx(src->r)==1 && Dy(src->r)==1){
118 			par.sval = pixelbits(src, src->r.min);
119 			par.state |= Simplesrc;
120 			par.srgba = _imgtorgba(src, par.sval);
121 			par.sdval = _rgbatoimg(dst, par.srgba);
122 			if((par.srgba&0xFF) == 0 && (op&DoutS)){
123 //				if (drawdebug) iprint("fill with transparent source\n");
124 				return nil;	/* no-op successfully handled */
125 			}
126 		}
127 	}
128 
129 	if(mask->flags & Frepl){
130 		par.state |= Replmask;
131 		if(Dx(mask->r)==1 && Dy(mask->r)==1){
132 			par.mval = pixelbits(mask, mask->r.min);
133 			if(par.mval == 0 && (op&DoutS)){
134 //				if(drawdebug) iprint("fill with zero mask\n");
135 				return nil;	/* no-op successfully handled */
136 			}
137 			par.state |= Simplemask;
138 			if(par.mval == ~0)
139 				par.state |= Fullmask;
140 			par.mrgba = _imgtorgba(mask, par.mval);
141 		}
142 	}
143 
144 //	if(drawdebug)
145 //		iprint("dr %R sr %R mr %R...", r, par.sr, par.mr);
146 DBG print("draw dr %R sr %R mr %R %lux\n", r, par.sr, par.mr, par.state);
147 
148 	return &par;
149 }
150 
151 void
152 _memimagedraw(Memdrawparam *par)
153 {
154 	/*
155 	 * Now that we've clipped the parameters down to be consistent, we 
156 	 * simply try sub-drawing routines in order until we find one that was able
157 	 * to handle us.  If the sub-drawing routine returns zero, it means it was
158 	 * unable to satisfy the request, so we do not return.
159 	 */
160 
161 	/*
162 	 * Hardware support.  Each video driver provides this function,
163 	 * which checks to see if there is anything it can help with.
164 	 * There could be an if around this checking to see if dst is in video memory.
165 	 */
166 DBG print("test hwdraw\n");
167 	if(hwdraw(par)){
168 //if(drawdebug) iprint("hw handled\n");
169 DBG print("hwdraw handled\n");
170 		return;
171 	}
172 	/*
173 	 * Optimizations using memmove and memset.
174 	 */
175 DBG print("test memoptdraw\n");
176 	if(memoptdraw(par)){
177 //if(drawdebug) iprint("memopt handled\n");
178 DBG print("memopt handled\n");
179 		return;
180 	}
181 
182 	/*
183 	 * Character drawing.
184 	 * Solid source color being painted through a boolean mask onto a high res image.
185 	 */
186 DBG print("test chardraw\n");
187 	if(chardraw(par)){
188 //if(drawdebug) iprint("chardraw handled\n");
189 DBG print("chardraw handled\n");
190 		return;
191 	}
192 
193 	/*
194 	 * General calculation-laden case that does alpha for each pixel.
195 	 */
196 DBG print("do alphadraw\n");
197 	alphadraw(par);
198 //if(drawdebug) iprint("alphadraw handled\n");
199 DBG print("alphadraw handled\n");
200 }
201 #undef DBG
202 
203 /*
204  * Clip the destination rectangle further based on the properties of the 
205  * source and mask rectangles.  Once the destination rectangle is properly
206  * clipped, adjust the source and mask rectangles to be the same size.
207  * Then if source or mask is replicated, move its clipped rectangle
208  * so that its minimum point falls within the repl rectangle.
209  *
210  * Return zero if the final rectangle is null.
211  */
212 int
213 drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
214 {
215 	Point rmin, delta;
216 	int splitcoords;
217 	Rectangle omr;
218 
219 	if(r->min.x>=r->max.x || r->min.y>=r->max.y)
220 		return 0;
221 	splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
222 	/* clip to destination */
223 	rmin = r->min;
224 	if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
225 		return 0;
226 	/* move mask point */
227 	p1->x += r->min.x-rmin.x;
228 	p1->y += r->min.y-rmin.y;
229 	/* move source point */
230 	p0->x += r->min.x-rmin.x;
231 	p0->y += r->min.y-rmin.y;
232 	/* map destination rectangle into source */
233 	sr->min = *p0;
234 	sr->max.x = p0->x+Dx(*r);
235 	sr->max.y = p0->y+Dy(*r);
236 	/* sr is r in source coordinates; clip to source */
237 	if(!(src->flags&Frepl) && !rectclip(sr, src->r))
238 		return 0;
239 	if(!rectclip(sr, src->clipr))
240 		return 0;
241 	/* compute and clip rectangle in mask */
242 	if(splitcoords){
243 		/* move mask point with source */
244 		p1->x += sr->min.x-p0->x;
245 		p1->y += sr->min.y-p0->y;
246 		mr->min = *p1;
247 		mr->max.x = p1->x+Dx(*sr);
248 		mr->max.y = p1->y+Dy(*sr);
249 		omr = *mr;
250 		/* mr is now rectangle in mask; clip it */
251 		if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
252 			return 0;
253 		if(!rectclip(mr, mask->clipr))
254 			return 0;
255 		/* reflect any clips back to source */
256 		sr->min.x += mr->min.x-omr.min.x;
257 		sr->min.y += mr->min.y-omr.min.y;
258 		sr->max.x += mr->max.x-omr.max.x;
259 		sr->max.y += mr->max.y-omr.max.y;
260 		*p1 = mr->min;
261 	}else{
262 		if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
263 			return 0;
264 		if(!rectclip(sr, mask->clipr))
265 			return 0;
266 		*p1 = sr->min;
267 	}
268 
269 	/* move source clipping back to destination */
270 	delta.x = r->min.x - p0->x;
271 	delta.y = r->min.y - p0->y;
272 	r->min.x = sr->min.x + delta.x;
273 	r->min.y = sr->min.y + delta.y;
274 	r->max.x = sr->max.x + delta.x;
275 	r->max.y = sr->max.y + delta.y;
276 
277 	/* move source rectangle so sr->min is in src->r */
278 	if(src->flags&Frepl) {
279 		delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
280 		delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
281 		sr->min.x += delta.x;
282 		sr->min.y += delta.y;
283 		sr->max.x += delta.x;
284 		sr->max.y += delta.y;
285 	}
286 	*p0 = sr->min;
287 
288 	/* move mask point so it is in mask->r */
289 	*p1 = drawrepl(mask->r, *p1);
290 	mr->min = *p1;
291 	mr->max.x = p1->x+Dx(*sr);
292 	mr->max.y = p1->y+Dy(*sr);
293 
294 	assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
295 	assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
296 	assert(ptinrect(*p0, src->r));
297 	assert(ptinrect(*p1, mask->r));
298 	assert(ptinrect(r->min, dst->r));
299 
300 	return 1;
301 }
302 
303 /*
304  * Conversion tables.
305  */
306 static uchar replbit[1+8][256];		/* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
307 static uchar conv18[256][8];		/* conv18[x][y] is the yth pixel in the depth-1 pixel x */
308 static uchar conv28[256][4];		/* ... */
309 static uchar conv48[256][2];
310 
311 /*
312  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
313  * the X's are where to put the bottom (ones) bit of the n-bit pattern.
314  * only the top 8 bits of the result are actually used.
315  * (the lower 8 bits are needed to get bits in the right place
316  * when n is not a divisor of 8.)
317  *
318  * Should check to see if its easier to just refer to replmul than
319  * use the precomputed values in replbit.  On PCs it may well
320  * be; on machines with slow multiply instructions it probably isn't.
321  */
322 #define a ((((((((((((((((0
323 #define X *2+1)
324 #define _ *2)
325 static int replmul[1+8] = {
326 	0,
327 	a X X X X X X X X X X X X X X X X,
328 	a _ X _ X _ X _ X _ X _ X _ X _ X,
329 	a _ _ X _ _ X _ _ X _ _ X _ _ X _,
330 	a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
331 	a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
332 	a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _, 
333 	a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
334 	a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
335 };
336 #undef a
337 #undef X
338 #undef _
339 
340 static void
341 mktables(void)
342 {
343 	int i, j, mask, sh, small;
344 		
345 	if(tablesbuilt)
346 		return;
347 
348 	fmtinstall('R', Rfmt);
349 	fmtinstall('P', Pfmt);
350 	tablesbuilt = 1;
351 
352 	/* bit replication up to 8 bits */
353 	for(i=0; i<256; i++){
354 		for(j=0; j<=8; j++){	/* j <= 8 [sic] */
355 			small = i & ((1<<j)-1);
356 			replbit[j][i] = (small*replmul[j])>>8;
357 		}
358 	}
359 
360 	/* bit unpacking up to 8 bits, only powers of 2 */
361 	for(i=0; i<256; i++){
362 		for(j=0, sh=7, mask=1; j<8; j++, sh--)
363 			conv18[i][j] = replbit[1][(i>>sh)&mask];
364 
365 		for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
366 			conv28[i][j] = replbit[2][(i>>sh)&mask];
367 
368 		for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
369 			conv48[i][j] = replbit[4][(i>>sh)&mask];
370 	}
371 }
372 
373 static uchar ones = 0xff;
374 
375 /*
376  * General alpha drawing case.  Can handle anything.
377  */
378 typedef struct	Buffer	Buffer;
379 struct Buffer {
380 	/* used by most routines */
381 	uchar	*red;
382 	uchar	*grn;
383 	uchar	*blu;
384 	uchar	*alpha;
385 	uchar	*grey;
386 	u32int	*rgba;
387 	int	delta;	/* number of bytes to add to pointer to get next pixel to the right */
388 
389 	/* used by boolcalc* for mask data */
390 	uchar	*m;		/* ptr to mask data r.min byte; like p->bytermin */
391 	int		mskip;	/* no. of left bits to skip in *m */
392 	uchar	*bm;		/* ptr to mask data img->r.min byte; like p->bytey0s */
393 	int		bmskip;	/* no. of left bits to skip in *bm */
394 	uchar	*em;		/* ptr to mask data img->r.max.x byte; like p->bytey0e */
395 	int		emskip;	/* no. of right bits to skip in *em */
396 };
397 
398 typedef struct	Param	Param;
399 typedef Buffer	Readfn(Param*, uchar*, int);
400 typedef void	Writefn(Param*, uchar*, Buffer);
401 typedef Buffer	Calcfn(Buffer, Buffer, Buffer, int, int, int);
402 
403 enum {
404 	MAXBCACHE = 16
405 };
406 
407 /* giant rathole to customize functions with */
408 struct Param {
409 	Readfn	*replcall;
410 	Readfn	*greymaskcall;	
411 	Readfn	*convreadcall;
412 	Writefn	*convwritecall;
413 
414 	Memimage *img;
415 	Rectangle	r;
416 	int	dx;	/* of r */
417 	int	needbuf;
418 	int	convgrey;
419 	int	alphaonly;
420 
421 	uchar	*bytey0s;		/* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
422 	uchar	*bytermin;	/* byteaddr(Pt(r.min.x, img->r.min.y)) */
423 	uchar	*bytey0e;		/* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
424 	int		bwidth;
425 
426 	int	replcache;	/* if set, cache buffers */
427 	Buffer	bcache[MAXBCACHE];
428 	u32int	bfilled;
429 	uchar	*bufbase;
430 	int	bufoff;
431 	int	bufdelta;
432 
433 	int	dir;
434 
435 	int	convbufoff;
436 	uchar	*convbuf;
437 	Param	*convdpar;
438 	int	convdx;
439 };
440 
441 static uchar *drawbuf;
442 static int	ndrawbuf;
443 static int	mdrawbuf;
444 static Param spar, mpar, dpar;	/* easier on the stacks */
445 static Readfn	greymaskread, replread, readptr;
446 static Writefn	nullwrite;
447 static Calcfn	alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
448 static Calcfn	boolcalc14, boolcalc236789, boolcalc1011;
449 
450 static Readfn*	readfn(Memimage*);
451 static Readfn*	readalphafn(Memimage*);
452 static Writefn*	writefn(Memimage*);
453 
454 static Calcfn*	boolcopyfn(Memimage*, Memimage*);
455 static Readfn*	convfn(Memimage*, Param*, Memimage*, Param*);
456 
457 static Calcfn *alphacalc[Ncomp] = 
458 {
459 	alphacalc0,		/* Clear */
460 	alphacalc14,		/* DoutS */
461 	alphacalc2810,		/* SoutD */
462 	alphacalc3679,		/* DxorS */
463 	alphacalc14,		/* DinS */
464 	alphacalc5,		/* D */
465 	alphacalc3679,		/* DatopS */
466 	alphacalc3679,		/* DoverS */
467 	alphacalc2810,		/* SinD */
468 	alphacalc3679,		/* SatopD */
469 	alphacalc2810,		/* S */
470 	alphacalc11,		/* SoverD */
471 };
472 
473 static Calcfn *boolcalc[Ncomp] =
474 {
475 	alphacalc0,		/* Clear */
476 	boolcalc14,		/* DoutS */
477 	boolcalc236789,		/* SoutD */
478 	boolcalc236789,		/* DxorS */
479 	boolcalc14,		/* DinS */
480 	alphacalc5,		/* D */
481 	boolcalc236789,		/* DatopS */
482 	boolcalc236789,		/* DoverS */
483 	boolcalc236789,		/* SinD */
484 	boolcalc236789,		/* SatopD */
485 	boolcalc1011,		/* S */
486 	boolcalc1011,		/* SoverD */
487 };
488 
489 static int
490 allocdrawbuf(void)
491 {
492 	uchar *p;
493 
494 	if(ndrawbuf > mdrawbuf){
495 		p = realloc(drawbuf, ndrawbuf);
496 		if(p == nil){
497 			werrstr("memimagedraw out of memory");
498 			return -1;
499 		}
500 		drawbuf = p;
501 		mdrawbuf = ndrawbuf;
502 	}
503 	return 0;
504 }
505 
506 static Param
507 getparam(Memimage *img, Rectangle r, int convgrey, int needbuf)
508 {
509 	Param p;
510 	int nbuf;
511 
512 	memset(&p, 0, sizeof p);
513 
514 	p.img = img;
515 	p.r = r;
516 	p.dx = Dx(r);
517 	p.needbuf = needbuf;
518 	p.convgrey = convgrey;
519 
520 	assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
521 
522 	p.bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
523 	p.bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
524 	p.bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
525 	p.bwidth = sizeof(u32int)*img->width;
526 
527 	assert(p.bytey0s <= p.bytermin && p.bytermin <= p.bytey0e);
528 
529 	if(p.r.min.x == p.img->r.min.x)
530 		assert(p.bytermin == p.bytey0s);
531 
532 	nbuf = 1;
533 	if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
534 		p.replcache = 1;
535 		nbuf = Dy(img->r);
536 	}
537 	p.bufdelta = 4*p.dx;
538 	p.bufoff = ndrawbuf;
539 	ndrawbuf += p.bufdelta*nbuf;
540 
541 	return p;
542 }
543 
544 static void
545 clipy(Memimage *img, int *y)
546 {
547 	int dy;
548 
549 	dy = Dy(img->r);
550 	if(*y == dy)
551 		*y = 0;
552 	else if(*y == -1)
553 		*y = dy-1;
554 	assert(0 <= *y && *y < dy);
555 }
556 
557 static void
558 dumpbuf(char *s, Buffer b, int n)
559 {
560 	int i;
561 	uchar *p;
562 	
563 	print("%s", s);
564 	for(i=0; i<n; i++){
565 		print(" ");
566 		if(p=b.grey){
567 			print(" k%.2uX", *p);
568 			b.grey += b.delta;
569 		}else{	
570 			if(p=b.red){
571 				print(" r%.2uX", *p);
572 				b.red += b.delta;
573 			}
574 			if(p=b.grn){
575 				print(" g%.2uX", *p);
576 				b.grn += b.delta;
577 			}
578 			if(p=b.blu){
579 				print(" b%.2uX", *p);
580 				b.blu += b.delta;
581 			}
582 		}
583 		if((p=b.alpha) != &ones){
584 			print(" α%.2uX", *p);
585 			b.alpha += b.delta;
586 		}
587 	}
588 	print("\n");
589 }
590 
591 /*
592  * For each scan line, we expand the pixels from source, mask, and destination
593  * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
594  * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
595  * the readers need not copy the data: they can simply return pointers to the data.
596  * If the destination image is grey and the source is not, it is converted using the NTSC
597  * formula.
598  *
599  * Once we have all the channels, we call either rgbcalc or greycalc, depending on 
600  * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
601  * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
602  * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
603  * the calculator, and that buffer is passed to a function to write it to the destination.
604  * If the buffer is already pointing at the destination, the writing function is a no-op.
605  */
606 #define DBG if(0)
607 static int
608 alphadraw(Memdrawparam *par)
609 {
610 	int isgrey, starty, endy, op;
611 	int needbuf, dsty, srcy, masky;
612 	int y, dir, dx, dy;
613 	Buffer bsrc, bdst, bmask;
614 	Readfn *rdsrc, *rdmask, *rddst;
615 	Calcfn *calc;
616 	Writefn *wrdst;
617 	Memimage *src, *mask, *dst;
618 	Rectangle r, sr, mr;
619 
620 	r = par->r;
621 	dx = Dx(r);
622 	dy = Dy(r);
623 
624 	ndrawbuf = 0;
625 
626 	src = par->src;
627 	mask = par->mask;	
628 	dst = par->dst;
629 	sr = par->sr;
630 	mr = par->mr;
631 	op = par->op;
632 
633 	isgrey = dst->flags&Fgrey;
634 
635 	/*
636 	 * Buffering when src and dst are the same bitmap is sufficient but not 
637 	 * necessary.  There are stronger conditions we could use.  We could
638 	 * check to see if the rectangles intersect, and if simply moving in the
639 	 * correct y direction can avoid the need to buffer.
640 	 */
641 	needbuf = (src->data == dst->data);
642 
643 	spar = getparam(src, sr, isgrey, needbuf);
644 	dpar = getparam(dst, r, isgrey, needbuf);
645 	mpar = getparam(mask, mr, 0, needbuf);
646 
647 	dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
648 	spar.dir = mpar.dir = dpar.dir = dir;
649 
650 	/*
651 	 * If the mask is purely boolean, we can convert from src to dst format
652 	 * when we read src, and then just copy it to dst where the mask tells us to.
653 	 * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
654 	 *
655 	 * The computation is accomplished by assigning the function pointers as follows:
656 	 *	rdsrc - read and convert source into dst format in a buffer
657 	 * 	rdmask - convert mask to bytes, set pointer to it
658 	 * 	rddst - fill with pointer to real dst data, but do no reads
659 	 *	calc - copy src onto dst when mask says to.
660 	 *	wrdst - do nothing
661 	 * This is slightly sleazy, since things aren't doing exactly what their names say,
662 	 * but it avoids a fair amount of code duplication to make this a case here
663 	 * rather than have a separate booldraw.
664 	 */
665 //if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth);
666 	if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
667 //if(drawdebug) iprint("boolcopy...");
668 		rdsrc = convfn(dst, &dpar, src, &spar);
669 		rddst = readptr;
670 		rdmask = readfn(mask);
671 		calc = boolcopyfn(dst, mask);
672 		wrdst = nullwrite;
673 	}else{
674 		/* usual alphadraw parameter fetching */
675 		rdsrc = readfn(src);
676 		rddst = readfn(dst);
677 		wrdst = writefn(dst);
678 		calc = alphacalc[op];
679 
680 		/*
681 		 * If there is no alpha channel, we'll ask for a grey channel
682 		 * and pretend it is the alpha.
683 		 */
684 		if(mask->flags&Falpha){
685 			rdmask = readalphafn(mask);
686 			mpar.alphaonly = 1;
687 		}else{
688 			mpar.greymaskcall = readfn(mask);
689 			mpar.convgrey = 1;
690 			rdmask = greymaskread;
691 
692 			/*
693 			 * Should really be above, but then boolcopyfns would have
694 			 * to deal with bit alignment, and I haven't written that.
695 			 *
696 			 * This is a common case for things like ellipse drawing.
697 			 * When there's no alpha involved and the mask is boolean,
698 			 * we can avoid all the division and multiplication.
699 			 */
700 			if(mask->chan == GREY1 && !(src->flags&Falpha))
701 				calc = boolcalc[op];
702 			else if(op == SoverD && !(src->flags&Falpha))
703 				calc = alphacalcS;
704 		}
705 	}
706 
707 	/*
708 	 * If the image has a small enough repl rectangle,
709 	 * we can just read each line once and cache them.
710 	 */
711 	if(spar.replcache){
712 		spar.replcall = rdsrc;
713 		rdsrc = replread;
714 	}
715 	if(mpar.replcache){
716 		mpar.replcall = rdmask;
717 		rdmask = replread;
718 	}
719 
720 	if(allocdrawbuf() < 0)
721 		return 0;
722 
723 	/*
724 	 * Before we were saving only offsets from drawbuf in the parameter
725 	 * structures; now that drawbuf has been grown to accomodate us,
726 	 * we can fill in the pointers.
727 	 */
728 	spar.bufbase = drawbuf+spar.bufoff;
729 	mpar.bufbase = drawbuf+mpar.bufoff;
730 	dpar.bufbase = drawbuf+dpar.bufoff;
731 	spar.convbuf = drawbuf+spar.convbufoff;
732 
733 	if(dir == 1){
734 		starty = 0;
735 		endy = dy;
736 	}else{
737 		starty = dy-1;
738 		endy = -1;
739 	}
740 
741 	/*
742 	 * srcy, masky, and dsty are offsets from the top of their
743 	 * respective Rectangles.  they need to be contained within
744 	 * the rectangles, so clipy can keep them there without division.
745  	 */
746 	srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
747 	masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
748 	dsty = starty + r.min.y - dst->r.min.y;
749 
750 	assert(0 <= srcy && srcy < Dy(src->r));
751 	assert(0 <= masky && masky < Dy(mask->r));
752 	assert(0 <= dsty && dsty < Dy(dst->r));
753 
754 	for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
755 		clipy(src, &srcy);
756 		clipy(dst, &dsty);
757 		clipy(mask, &masky);
758 
759 		bsrc = rdsrc(&spar, spar.bufbase, srcy);
760 DBG print("[");
761 		bmask = rdmask(&mpar, mpar.bufbase, masky);
762 DBG print("]\n");
763 		bdst = rddst(&dpar, dpar.bufbase, dsty);
764 DBG		dumpbuf("src", bsrc, dx);
765 DBG		dumpbuf("mask", bmask, dx);
766 DBG		dumpbuf("dst", bdst, dx);
767 		bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
768 		wrdst(&dpar, dpar.bytermin+dsty*dpar.bwidth, bdst);
769 	}
770 
771 	return 1;
772 }
773 #undef DBG
774 
775 static Buffer
776 alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
777 {
778 	USED(grey);
779 	USED(op);
780 	memset(bdst.rgba, 0, dx*bdst.delta);
781 	return bdst;
782 }
783 
784 static Buffer
785 alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
786 {
787 	Buffer obdst;
788 	int fd, sadelta;
789 	int i, sa, ma, q;
790 	u32int s, t;
791 
792 	obdst = bdst;
793 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
794 	q = bsrc.delta == 4 && bdst.delta == 4;
795 
796 	for(i=0; i<dx; i++){
797 		sa = *bsrc.alpha;
798 		ma = *bmask.alpha;
799 		fd = MUL(sa, ma, t);
800 		if(op == DoutS)
801 			fd = 255-fd;
802 
803 		if(grey){
804 			*bdst.grey = MUL(fd, *bdst.grey, t);
805 			bsrc.grey += bsrc.delta;
806 			bdst.grey += bdst.delta;
807 		}else{
808 			if(q){
809 				*bdst.rgba = MUL0123(fd, *bdst.rgba, s, t);
810 				bsrc.rgba++;
811 				bdst.rgba++;
812 				bsrc.alpha += sadelta;
813 				bmask.alpha += bmask.delta;
814 				continue;
815 			}
816 			*bdst.red = MUL(fd, *bdst.red, t);
817 			*bdst.grn = MUL(fd, *bdst.grn, t);
818 			*bdst.blu = MUL(fd, *bdst.blu, t);
819 			bsrc.red += bsrc.delta;
820 			bsrc.blu += bsrc.delta;
821 			bsrc.grn += bsrc.delta;
822 			bdst.red += bdst.delta;
823 			bdst.blu += bdst.delta;
824 			bdst.grn += bdst.delta;
825 		}
826 		if(bdst.alpha != &ones){
827 			*bdst.alpha = MUL(fd, *bdst.alpha, t);
828 			bdst.alpha += bdst.delta;
829 		}
830 		bmask.alpha += bmask.delta;
831 		bsrc.alpha += sadelta;
832 	}
833 	return obdst;
834 }
835 
836 static Buffer
837 alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
838 {
839 	Buffer obdst;
840 	int fs, sadelta;
841 	int i, ma, da, q;
842 	u32int s, t;
843 
844 	obdst = bdst;
845 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
846 	q = bsrc.delta == 4 && bdst.delta == 4;
847 
848 	for(i=0; i<dx; i++){
849 		ma = *bmask.alpha;
850 		da = *bdst.alpha;
851 		if(op == SoutD)
852 			da = 255-da;
853 		fs = ma;
854 		if(op != S)
855 			fs = MUL(fs, da, t);
856 
857 		if(grey){
858 			*bdst.grey = MUL(fs, *bsrc.grey, t);
859 			bsrc.grey += bsrc.delta;
860 			bdst.grey += bdst.delta;
861 		}else{
862 			if(q){
863 				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t);
864 				bsrc.rgba++;
865 				bdst.rgba++;
866 				bmask.alpha += bmask.delta;
867 				bdst.alpha += bdst.delta;
868 				continue;
869 			}
870 			*bdst.red = MUL(fs, *bsrc.red, t);
871 			*bdst.grn = MUL(fs, *bsrc.grn, t);
872 			*bdst.blu = MUL(fs, *bsrc.blu, t);
873 			bsrc.red += bsrc.delta;
874 			bsrc.blu += bsrc.delta;
875 			bsrc.grn += bsrc.delta;
876 			bdst.red += bdst.delta;
877 			bdst.blu += bdst.delta;
878 			bdst.grn += bdst.delta;
879 		}
880 		if(bdst.alpha != &ones){
881 			*bdst.alpha = MUL(fs, *bsrc.alpha, t);
882 			bdst.alpha += bdst.delta;
883 		}
884 		bmask.alpha += bmask.delta;
885 		bsrc.alpha += sadelta;
886 	}
887 	return obdst;
888 }
889 
890 static Buffer
891 alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
892 {
893 	Buffer obdst;
894 	int fs, fd, sadelta;
895 	int i, sa, ma, da, q;
896 	u32int s, t, u, v;
897 
898 	obdst = bdst;
899 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
900 	q = bsrc.delta == 4 && bdst.delta == 4;
901 
902 	for(i=0; i<dx; i++){
903 		sa = *bsrc.alpha;
904 		ma = *bmask.alpha;
905 		da = *bdst.alpha;
906 		if(op == SatopD)
907 			fs = MUL(ma, da, t);
908 		else
909 			fs = MUL(ma, 255-da, t);
910 		if(op == DoverS)
911 			fd = 255;
912 		else{
913 			fd = MUL(sa, ma, t);
914 			if(op != DatopS)
915 				fd = 255-fd;
916 		}
917 
918 		if(grey){
919 			*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
920 			bsrc.grey += bsrc.delta;
921 			bdst.grey += bdst.delta;
922 		}else{
923 			if(q){
924 				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);
925 				bsrc.rgba++;
926 				bdst.rgba++;
927 				bsrc.alpha += sadelta;
928 				bmask.alpha += bmask.delta;
929 				bdst.alpha += bdst.delta;
930 				continue;
931 			}
932 			*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);
933 			*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
934 			*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
935 			bsrc.red += bsrc.delta;
936 			bsrc.blu += bsrc.delta;
937 			bsrc.grn += bsrc.delta;
938 			bdst.red += bdst.delta;
939 			bdst.blu += bdst.delta;
940 			bdst.grn += bdst.delta;
941 		}
942 		if(bdst.alpha != &ones){
943 			*bdst.alpha = MUL(fs, sa, s)+MUL(fd, da, t);
944 			bdst.alpha += bdst.delta;
945 		}
946 		bmask.alpha += bmask.delta;
947 		bsrc.alpha += sadelta;
948 	}
949 	return obdst;
950 }
951 
952 static Buffer
953 alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
954 {
955 	USED(dx);
956 	USED(grey);
957 	USED(op);
958 	return bdst;
959 }
960 
961 static Buffer
962 alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
963 {
964 	Buffer obdst;
965 	int fd, sadelta;
966 	int i, sa, ma, q;
967 	u32int s, t, u, v;
968 
969 	USED(op);
970 	obdst = bdst;
971 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
972 	q = bsrc.delta == 4 && bdst.delta == 4;
973 
974 	for(i=0; i<dx; i++){
975 		sa = *bsrc.alpha;
976 		ma = *bmask.alpha;
977 		fd = 255-MUL(sa, ma, t);
978 
979 		if(grey){
980 			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
981 			bsrc.grey += bsrc.delta;
982 			bdst.grey += bdst.delta;
983 		}else{
984 			if(q){
985 				*bdst.rgba = MUL0123(ma, *bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);
986 				bsrc.rgba++;
987 				bdst.rgba++;
988 				bsrc.alpha += sadelta;
989 				bmask.alpha += bmask.delta;
990 				continue;
991 			}
992 			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);
993 			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
994 			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
995 			bsrc.red += bsrc.delta;
996 			bsrc.blu += bsrc.delta;
997 			bsrc.grn += bsrc.delta;
998 			bdst.red += bdst.delta;
999 			bdst.blu += bdst.delta;
1000 			bdst.grn += bdst.delta;
1001 		}
1002 		if(bdst.alpha != &ones){
1003 			*bdst.alpha = MUL(ma, sa, s)+MUL(fd, *bdst.alpha, t);
1004 			bdst.alpha += bdst.delta;
1005 		}
1006 		bmask.alpha += bmask.delta;
1007 		bsrc.alpha += sadelta;
1008 	}
1009 	return obdst;
1010 }
1011 
1012 /*
1013 not used yet
1014 source and mask alpha 1
1015 static Buffer
1016 alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1017 {
1018 	Buffer obdst;
1019 	int i;
1020 
1021 	USED(op);
1022 	obdst = bdst;
1023 	if(bsrc.delta == bdst.delta){
1024 		memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
1025 		return obdst;
1026 	}
1027 	for(i=0; i<dx; i++){
1028 		if(grey){
1029 			*bdst.grey = *bsrc.grey;
1030 			bsrc.grey += bsrc.delta;
1031 			bdst.grey += bdst.delta;
1032 		}else{
1033 			*bdst.red = *bsrc.red;
1034 			*bdst.grn = *bsrc.grn;
1035 			*bdst.blu = *bsrc.blu;
1036 			bsrc.red += bsrc.delta;
1037 			bsrc.blu += bsrc.delta;
1038 			bsrc.grn += bsrc.delta;
1039 			bdst.red += bdst.delta;
1040 			bdst.blu += bdst.delta;
1041 			bdst.grn += bdst.delta;
1042 		}
1043 		if(bdst.alpha != &ones){
1044 			*bdst.alpha = 255;
1045 			bdst.alpha += bdst.delta;
1046 		}
1047 	}
1048 	return obdst;
1049 }
1050 */
1051 
1052 /* source alpha 1 */
1053 static Buffer
1054 alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1055 {
1056 	Buffer obdst;
1057 	int fd;
1058 	int i, ma;
1059 	u32int s, t;
1060 
1061 	USED(op);
1062 	obdst = bdst;
1063 
1064 	for(i=0; i<dx; i++){
1065 		ma = *bmask.alpha;
1066 		fd = 255-ma;
1067 
1068 		if(grey){
1069 			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
1070 			bsrc.grey += bsrc.delta;
1071 			bdst.grey += bdst.delta;
1072 		}else{
1073 			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1074 			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1075 			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1076 			bsrc.red += bsrc.delta;
1077 			bsrc.blu += bsrc.delta;
1078 			bsrc.grn += bsrc.delta;
1079 			bdst.red += bdst.delta;
1080 			bdst.blu += bdst.delta;
1081 			bdst.grn += bdst.delta;
1082 		}
1083 		if(bdst.alpha != &ones){
1084 			*bdst.alpha = ma+MUL(fd, *bdst.alpha, t);
1085 			bdst.alpha += bdst.delta;
1086 		}
1087 		bmask.alpha += bmask.delta;
1088 	}
1089 	return obdst;
1090 }
1091 
1092 static Buffer
1093 boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
1094 {
1095 	Buffer obdst;
1096 	int i, ma, zero;
1097 
1098 	obdst = bdst;
1099 
1100 	for(i=0; i<dx; i++){
1101 		ma = *bmask.alpha;
1102 		zero = ma ? op == DoutS : op == DinS;
1103 
1104 		if(grey){
1105 			if(zero)
1106 				*bdst.grey = 0;
1107 			bdst.grey += bdst.delta;
1108 		}else{
1109 			if(zero)
1110 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1111 			bdst.red += bdst.delta;
1112 			bdst.blu += bdst.delta;
1113 			bdst.grn += bdst.delta;
1114 		}
1115 		bmask.alpha += bmask.delta;
1116 		if(bdst.alpha != &ones){
1117 			if(zero)
1118 				*bdst.alpha = 0;
1119 			bdst.alpha += bdst.delta;
1120 		}
1121 	}
1122 	return obdst;
1123 }
1124 
1125 static Buffer
1126 boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1127 {
1128 	Buffer obdst;
1129 	int fs, fd;
1130 	int i, ma, da, zero;
1131 	u32int s, t;
1132 
1133 	obdst = bdst;
1134 	zero = !(op&1);
1135 
1136 	for(i=0; i<dx; i++){
1137 		ma = *bmask.alpha;
1138 		da = *bdst.alpha;
1139 		fs = da;
1140 		if(op&2)
1141 			fs = 255-da;
1142 		fd = 0;
1143 		if(op&4)
1144 			fd = 255;
1145 
1146 		if(grey){
1147 			if(ma)
1148 				*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
1149 			else if(zero)
1150 				*bdst.grey = 0;
1151 			bsrc.grey += bsrc.delta;
1152 			bdst.grey += bdst.delta;
1153 		}else{
1154 			if(ma){
1155 				*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1156 				*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1157 				*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1158 			}
1159 			else if(zero)
1160 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1161 			bsrc.red += bsrc.delta;
1162 			bsrc.blu += bsrc.delta;
1163 			bsrc.grn += bsrc.delta;
1164 			bdst.red += bdst.delta;
1165 			bdst.blu += bdst.delta;
1166 			bdst.grn += bdst.delta;
1167 		}
1168 		bmask.alpha += bmask.delta;
1169 		if(bdst.alpha != &ones){
1170 			if(ma)
1171 				*bdst.alpha = fs+MUL(fd, da, t);
1172 			else if(zero)
1173 				*bdst.alpha = 0;
1174 			bdst.alpha += bdst.delta;
1175 		}
1176 	}
1177 	return obdst;
1178 }
1179 
1180 static Buffer
1181 boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1182 {
1183 	Buffer obdst;
1184 	int i, ma, zero;
1185 
1186 	obdst = bdst;
1187 	zero = !(op&1);
1188 
1189 	for(i=0; i<dx; i++){
1190 		ma = *bmask.alpha;
1191 
1192 		if(grey){
1193 			if(ma)
1194 				*bdst.grey = *bsrc.grey;
1195 			else if(zero)
1196 				*bdst.grey = 0;
1197 			bsrc.grey += bsrc.delta;
1198 			bdst.grey += bdst.delta;
1199 		}else{
1200 			if(ma){
1201 				*bdst.red = *bsrc.red;
1202 				*bdst.grn = *bsrc.grn;
1203 				*bdst.blu = *bsrc.blu;
1204 			}
1205 			else if(zero)
1206 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1207 			bsrc.red += bsrc.delta;
1208 			bsrc.blu += bsrc.delta;
1209 			bsrc.grn += bsrc.delta;
1210 			bdst.red += bdst.delta;
1211 			bdst.blu += bdst.delta;
1212 			bdst.grn += bdst.delta;
1213 		}
1214 		bmask.alpha += bmask.delta;
1215 		if(bdst.alpha != &ones){
1216 			if(ma)
1217 				*bdst.alpha = 255;
1218 			else if(zero)
1219 				*bdst.alpha = 0;
1220 			bdst.alpha += bdst.delta;
1221 		}
1222 	}
1223 	return obdst;
1224 }
1225 /*
1226  * Replicated cached scan line read.  Call the function listed in the Param,
1227  * but cache the result so that for replicated images we only do the work once.
1228  */
1229 static Buffer
1230 replread(Param *p, uchar *s, int y)
1231 {
1232 	Buffer *b;
1233 
1234 	USED(s);
1235 	b = &p->bcache[y];
1236 	if((p->bfilled & (1<<y)) == 0){
1237 		p->bfilled |= 1<<y;
1238 		*b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
1239 	}
1240 	return *b;
1241 }
1242 
1243 /*
1244  * Alpha reading function that simply relabels the grey pointer.
1245  */
1246 static Buffer
1247 greymaskread(Param *p, uchar *buf, int y)
1248 {
1249 	Buffer b;
1250 
1251 	b = p->greymaskcall(p, buf, y);
1252 	b.alpha = b.grey;
1253 	return b;
1254 }
1255 
1256 #define DBG if(0)
1257 static Buffer
1258 readnbit(Param *p, uchar *buf, int y)
1259 {
1260 	Buffer b;
1261 	Memimage *img;
1262 	uchar *repl, *r, *w, *ow, bits;
1263 	int i, n, sh, depth, x, dx, npack, nbits;
1264 
1265 	b.rgba = (u32int*)buf;
1266 	b.grey = w = buf;
1267 	b.red = b.blu = b.grn = w;
1268 	b.alpha = &ones;
1269 	b.delta = 1;
1270 
1271 	dx = p->dx;
1272 	img = p->img;
1273 	depth = img->depth;
1274 	repl = &replbit[depth][0];
1275 	npack = 8/depth;
1276 	sh = 8-depth;
1277 
1278 	/* copy from p->r.min.x until end of repl rectangle */
1279 	x = p->r.min.x;
1280 	n = dx;
1281 	if(n > p->img->r.max.x - x)
1282 		n = p->img->r.max.x - x;
1283 
1284 	r = p->bytermin + y*p->bwidth;
1285 DBG print("readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
1286 	bits = *r++;
1287 	nbits = 8;
1288 	if(i=x&(npack-1)){
1289 DBG print("throwaway %d...", i);
1290 		bits <<= depth*i;
1291 		nbits -= depth*i;
1292 	}
1293 	for(i=0; i<n; i++){
1294 		if(nbits == 0){
1295 DBG print("(%.2ux)...", *r);
1296 			bits = *r++;
1297 			nbits = 8;
1298 		}
1299 		*w++ = repl[bits>>sh];
1300 DBG print("bit %x...", repl[bits>>sh]);
1301 		bits <<= depth;
1302 		nbits -= depth;
1303 	}
1304 	dx -= n;
1305 	if(dx == 0)
1306 		return b;
1307 
1308 	assert(x+i == p->img->r.max.x);
1309 
1310 	/* copy from beginning of repl rectangle until where we were before. */
1311 	x = p->img->r.min.x;
1312 	n = dx;
1313 	if(n > p->r.min.x - x)
1314 		n = p->r.min.x - x;
1315 
1316 	r = p->bytey0s + y*p->bwidth;
1317 DBG print("x=%d r=%p...", x, r);
1318 	bits = *r++;
1319 	nbits = 8;
1320 	if(i=x&(npack-1)){
1321 		bits <<= depth*i;
1322 		nbits -= depth*i;
1323 	}
1324 DBG print("nbits=%d...", nbits);
1325 	for(i=0; i<n; i++){
1326 		if(nbits == 0){
1327 			bits = *r++;
1328 			nbits = 8;
1329 		}
1330 		*w++ = repl[bits>>sh];
1331 DBG print("bit %x...", repl[bits>>sh]);
1332 		bits <<= depth;
1333 		nbits -= depth;
1334 DBG print("bits %x nbits %d...", bits, nbits);
1335 	}
1336 	dx -= n;
1337 	if(dx == 0)
1338 		return b;
1339 
1340 	assert(dx > 0);
1341 	/* now we have exactly one full scan line: just replicate the buffer itself until we are done */
1342 	ow = buf;
1343 	while(dx--)
1344 		*w++ = *ow++;
1345 
1346 	return b;
1347 }
1348 #undef DBG
1349 
1350 #define DBG if(0)
1351 static void
1352 writenbit(Param *p, uchar *w, Buffer src)
1353 {
1354 	uchar *r;
1355 	u32int bits;
1356 	int i, sh, depth, npack, nbits, x, ex;
1357 
1358 	assert(src.grey != nil && src.delta == 1);
1359 
1360 	x = p->r.min.x;
1361 	ex = x+p->dx;
1362 	depth = p->img->depth;
1363 	npack = 8/depth;
1364 
1365 	i=x&(npack-1);
1366 	bits = i ? (*w >> (8-depth*i)) : 0;
1367 	nbits = depth*i;
1368 	sh = 8-depth;
1369 	r = src.grey;
1370 
1371 	for(; x<ex; x++){
1372 		bits <<= depth;
1373 DBG print(" %x", *r);
1374 		bits |= (*r++ >> sh);
1375 		nbits += depth;
1376 		if(nbits == 8){
1377 			*w++ = bits;
1378 			nbits = 0;
1379 		}
1380 	}
1381 
1382 	if(nbits){
1383 		sh = 8-nbits;
1384 		bits <<= sh;
1385 		bits |= *w & ((1<<sh)-1);
1386 		*w = bits;
1387 	}
1388 DBG print("\n");
1389 	return;
1390 }
1391 #undef DBG
1392 
1393 static Buffer
1394 readcmap(Param *p, uchar *buf, int y)
1395 {
1396 	Buffer b;
1397 	int a, convgrey, copyalpha, dx, i, m;
1398 	uchar *q, *cmap, *begin, *end, *r, *w;
1399 
1400 	begin = p->bytey0s + y*p->bwidth;
1401 	r = p->bytermin + y*p->bwidth;
1402 	end = p->bytey0e + y*p->bwidth;
1403 	cmap = p->img->cmap->cmap2rgb;
1404 	convgrey = p->convgrey;
1405 	copyalpha = (p->img->flags&Falpha) ? 1 : 0;
1406 
1407 	w = buf;
1408 	dx = p->dx;
1409 	if(copyalpha){
1410 		b.alpha = buf++;
1411 		a = p->img->shift[CAlpha]/8;
1412 		m = p->img->shift[CMap]/8;
1413 		for(i=0; i<dx; i++){
1414 			*w++ = r[a];
1415 			q = cmap+r[m]*3;
1416 			r += 2;
1417 			if(r == end)
1418 				r = begin;
1419 			if(convgrey){
1420 				*w++ = RGB2K(q[0], q[1], q[2]);
1421 			}else{
1422 				*w++ = q[2];	/* blue */
1423 				*w++ = q[1];	/* green */
1424 				*w++ = q[0];	/* red */
1425 			}
1426 		}
1427 	}else{
1428 		b.alpha = &ones;
1429 		for(i=0; i<dx; i++){
1430 			q = cmap+*r++*3;
1431 			if(r == end)
1432 				r = begin;
1433 			if(convgrey){
1434 				*w++ = RGB2K(q[0], q[1], q[2]);
1435 			}else{
1436 				*w++ = q[2];	/* blue */
1437 				*w++ = q[1];	/* green */
1438 				*w++ = q[0];	/* red */
1439 			}
1440 		}
1441 	}
1442 
1443 	b.rgba = (u32int*)(buf-copyalpha);
1444 
1445 	if(convgrey){
1446 		b.grey = buf;
1447 		b.red = b.blu = b.grn = buf;
1448 		b.delta = 1+copyalpha;
1449 	}else{
1450 		b.blu = buf;
1451 		b.grn = buf+1;
1452 		b.red = buf+2;
1453 		b.grey = nil;
1454 		b.delta = 3+copyalpha;
1455 	}
1456 	return b;
1457 }
1458 
1459 static void
1460 writecmap(Param *p, uchar *w, Buffer src)
1461 {
1462 	uchar *cmap, *red, *grn, *blu;
1463 	int i, dx, delta;
1464 
1465 	cmap = p->img->cmap->rgb2cmap;
1466 	
1467 	delta = src.delta;
1468 	red= src.red;
1469 	grn = src.grn;
1470 	blu = src.blu;
1471 
1472 	dx = p->dx;
1473 	for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
1474 		*w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
1475 }
1476 
1477 #define DBG if(0)
1478 static Buffer
1479 readbyte(Param *p, uchar *buf, int y)
1480 {
1481 	Buffer b;
1482 	Memimage *img;
1483 	int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
1484 	uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
1485 	uchar ured, ugrn, ublu;
1486 	u32int u;
1487 
1488 	img = p->img;
1489 	begin = p->bytey0s + y*p->bwidth;
1490 	r = p->bytermin + y*p->bwidth;
1491 	end = p->bytey0e + y*p->bwidth;
1492 
1493 	w = buf;
1494 	dx = p->dx;
1495 	nb = img->depth/8;
1496 
1497 	convgrey = p->convgrey;	/* convert rgb to grey */
1498 	isgrey = img->flags&Fgrey;
1499 	alphaonly = p->alphaonly;
1500 	copyalpha = (img->flags&Falpha) ? 1 : 0;
1501 
1502 DBG print("copyalpha %d alphaonly %d convgrey %d isgrey %d\n", copyalpha, alphaonly, convgrey, isgrey);
1503 	/* if we can, avoid processing everything */
1504 	if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
1505 		memset(&b, 0, sizeof b);
1506 		if(p->needbuf){
1507 			memmove(buf, r, dx*nb);
1508 			r = buf;
1509 		}
1510 		b.rgba = (u32int*)r;
1511 		if(copyalpha)
1512 			b.alpha = r+img->shift[CAlpha]/8;
1513 		else
1514 			b.alpha = &ones;
1515 		if(isgrey){
1516 			b.grey = r+img->shift[CGrey]/8;
1517 			b.red = b.grn = b.blu = b.grey;
1518 		}else{
1519 			b.red = r+img->shift[CRed]/8;
1520 			b.grn = r+img->shift[CGreen]/8;
1521 			b.blu = r+img->shift[CBlue]/8;
1522 		}
1523 		b.delta = nb;
1524 		return b;
1525 	}
1526 
1527 DBG print("2\n");
1528 	rrepl = replbit[img->nbits[CRed]];
1529 	grepl = replbit[img->nbits[CGreen]];
1530 	brepl = replbit[img->nbits[CBlue]];
1531 	arepl = replbit[img->nbits[CAlpha]];
1532 	krepl = replbit[img->nbits[CGrey]];
1533 
1534 	for(i=0; i<dx; i++){
1535 		u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
1536 		if(copyalpha) {
1537 			*w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
1538 DBG print("a %x\n", w[-1]);
1539 		}
1540 
1541 		if(isgrey)
1542 			*w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
1543 		else if(!alphaonly){
1544 			ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1545 			ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1546 			ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1547 			if(convgrey){
1548 DBG print("g %x %x %x\n", ured, ugrn, ublu);
1549 				*w++ = RGB2K(ured, ugrn, ublu);
1550 DBG print("%x\n", w[-1]);
1551 			}else{
1552 				*w++ = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1553 				*w++ = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1554 				*w++ = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1555 			}
1556 		}
1557 		r += nb;
1558 		if(r == end)
1559 			r = begin;
1560 	}
1561 	
1562 	b.alpha = copyalpha ? buf : &ones;
1563 	b.rgba = (u32int*)buf;
1564 	if(alphaonly){
1565 		b.red = b.grn = b.blu = b.grey = nil;
1566 		if(!copyalpha)
1567 			b.rgba = nil;
1568 		b.delta = 1;
1569 	}else if(isgrey || convgrey){
1570 		b.grey = buf+copyalpha;
1571 		b.red = b.grn = b.blu = buf+copyalpha;
1572 		b.delta = copyalpha+1;
1573 DBG print("alpha %x grey %x\n", b.alpha ? *b.alpha : 0xFF, *b.grey);
1574 	}else{
1575 		b.blu = buf+copyalpha;
1576 		b.grn = buf+copyalpha+1;
1577 		b.grey = nil;
1578 		b.red = buf+copyalpha+2;
1579 		b.delta = copyalpha+3;
1580 	}
1581 	return b;
1582 }
1583 #undef DBG
1584 
1585 #define DBG if(0)
1586 static void
1587 writebyte(Param *p, uchar *w, Buffer src)
1588 {
1589 	Memimage *img;
1590 	int i, isalpha, isgrey, nb, delta, dx, adelta;
1591 	uchar ff, *red, *grn, *blu, *grey, *alpha;
1592 	u32int u, mask;
1593 
1594 	img = p->img;
1595 
1596 	red = src.red;
1597 	grn = src.grn;
1598 	blu = src.blu;
1599 	alpha = src.alpha;
1600 	delta = src.delta;
1601 	grey = src.grey;
1602 	dx = p->dx;
1603 
1604 	nb = img->depth/8;
1605 	mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
1606 
1607 	isalpha = img->flags&Falpha;
1608 	isgrey = img->flags&Fgrey;
1609 	adelta = src.delta;
1610 
1611 	if(isalpha && (alpha == nil || alpha == &ones)){
1612 		ff = 0xFF;
1613 		alpha = &ff;
1614 		adelta = 0;
1615 	}
1616 
1617 	for(i=0; i<dx; i++){
1618 		u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
1619 DBG print("u %.8lux...", u);
1620 		u &= mask;
1621 DBG print("&mask %.8lux...", u);
1622 		if(isgrey){
1623 			u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
1624 DBG print("|grey %.8lux...", u);
1625 			grey += delta;
1626 		}else{
1627 			u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
1628 			u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
1629 			u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
1630 			red += delta;
1631 			grn += delta;
1632 			blu += delta;
1633 DBG print("|rgb %.8lux...", u);
1634 		}
1635 
1636 		if(isalpha){
1637 			u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
1638 			alpha += adelta;
1639 DBG print("|alpha %.8lux...", u);
1640 		}
1641 
1642 		w[0] = u;
1643 		w[1] = u>>8;
1644 		w[2] = u>>16;
1645 		w[3] = u>>24;
1646 		w += nb;
1647 	}
1648 }
1649 #undef DBG
1650 
1651 static Readfn*
1652 readfn(Memimage *img)
1653 {
1654 	if(img->depth < 8)
1655 		return readnbit;
1656 	if(img->nbits[CMap] == 8)
1657 		return readcmap;
1658 	return readbyte;
1659 }
1660 
1661 static Readfn*
1662 readalphafn(Memimage *m)
1663 {
1664 	USED(m);
1665 	return readbyte;
1666 }
1667 
1668 static Writefn*
1669 writefn(Memimage *img)
1670 {
1671 	if(img->depth < 8)
1672 		return writenbit;
1673 	if(img->chan == CMAP8)
1674 		return writecmap;
1675 	return writebyte;
1676 }
1677 
1678 static void
1679 nullwrite(Param *p, uchar *s, Buffer b)
1680 {
1681 	USED(p);
1682 	USED(s);
1683 }
1684 
1685 static Buffer
1686 readptr(Param *p, uchar *s, int y)
1687 {
1688 	Buffer b;
1689 	uchar *q;
1690 
1691 	USED(s);
1692 	q = p->bytermin + y*p->bwidth;
1693 	b.red = q;	/* ptr to data */
1694 	b.grn = b.blu = b.grey = b.alpha = nil;
1695 	b.rgba = (u32int*)q;
1696 	b.delta = p->img->depth/8;
1697 	return b;
1698 }
1699 
1700 static Buffer
1701 boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
1702 {
1703 	USED(i);
1704 	USED(o);
1705 	memmove(bdst.red, bsrc.red, dx*bdst.delta);
1706 	return bdst;
1707 }
1708 
1709 static Buffer
1710 boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1711 {
1712 	uchar *m, *r, *w, *ew;
1713 
1714 	USED(i);
1715 	USED(o);
1716 	m = bmask.grey;
1717 	w = bdst.red;
1718 	r = bsrc.red;
1719 	ew = w+dx;
1720 	for(; w < ew; w++,r++)
1721 		if(*m++)
1722 			*w = *r;
1723 	return bdst;	/* not used */
1724 }
1725 
1726 static Buffer
1727 boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1728 {
1729 	uchar *m;
1730 	ushort *r, *w, *ew;
1731 
1732 	USED(i);
1733 	USED(o);
1734 	m = bmask.grey;
1735 	w = (ushort*)bdst.red;
1736 	r = (ushort*)bsrc.red;
1737 	ew = w+dx;
1738 	for(; w < ew; w++,r++)
1739 		if(*m++)
1740 			*w = *r;
1741 	return bdst;	/* not used */
1742 }
1743 
1744 static Buffer
1745 boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1746 {
1747 	uchar *m;
1748 	uchar *r, *w, *ew;
1749 
1750 	USED(i);
1751 	USED(o);
1752 	m = bmask.grey;
1753 	w = bdst.red;
1754 	r = bsrc.red;
1755 	ew = w+dx*3;
1756 	while(w < ew){
1757 		if(*m++){
1758 			*w++ = *r++;
1759 			*w++ = *r++;
1760 			*w++ = *r++;
1761 		}else{
1762 			w += 3;
1763 			r += 3;
1764 		}
1765 	}
1766 	return bdst;	/* not used */
1767 }
1768 
1769 static Buffer
1770 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1771 {
1772 	uchar *m;
1773 	u32int *r, *w, *ew;
1774 
1775 	USED(i);
1776 	USED(o);
1777 	m = bmask.grey;
1778 	w = (u32int*)bdst.red;
1779 	r = (u32int*)bsrc.red;
1780 	ew = w+dx;
1781 	for(; w < ew; w++,r++)
1782 		if(*m++)
1783 			*w = *r;
1784 	return bdst;	/* not used */
1785 }
1786 
1787 static Buffer
1788 genconv(Param *p, uchar *buf, int y)
1789 {
1790 	Buffer b;
1791 	int nb;
1792 	uchar *r, *w, *ew;
1793 
1794 	/* read from source into RGB format in convbuf */
1795 	b = p->convreadcall(p, p->convbuf, y);
1796 
1797 	/* write RGB format into dst format in buf */
1798 	p->convwritecall(p->convdpar, buf, b);
1799 
1800 	if(p->convdx){
1801 		nb = p->convdpar->img->depth/8;
1802 		r = buf;
1803 		w = buf+nb*p->dx;
1804 		ew = buf+nb*p->convdx;
1805 		while(w<ew)
1806 			*w++ = *r++;
1807 	}
1808 
1809 	b.red = buf;
1810 	b.blu = b.grn = b.grey = b.alpha = nil;
1811 	b.rgba = (u32int*)buf;
1812 	b.delta = 0;
1813 	
1814 	return b;
1815 }
1816 
1817 static Readfn*
1818 convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar)
1819 {
1820 	if(dst->chan == src->chan && !(src->flags&Frepl)){
1821 //if(drawdebug) iprint("readptr...");
1822 		return readptr;
1823 	}
1824 
1825 	if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
1826 		/* cheat because we know the replicated value is exactly the color map entry. */
1827 //if(drawdebug) iprint("Readnbit...");
1828 		return readnbit;
1829 	}
1830 
1831 	spar->convreadcall = readfn(src);
1832 	spar->convwritecall = writefn(dst);
1833 	spar->convdpar = dpar;
1834 
1835 	/* allocate a conversion buffer */
1836 	spar->convbufoff = ndrawbuf;
1837 	ndrawbuf += spar->dx*4;
1838 
1839 	if(spar->dx > Dx(spar->img->r)){
1840 		spar->convdx = spar->dx;
1841 		spar->dx = Dx(spar->img->r);
1842 	}
1843 
1844 //if(drawdebug) iprint("genconv...");
1845 	return genconv;
1846 }
1847 
1848 /*
1849  * Do NOT call this directly.  pixelbits is a wrapper
1850  * around this that fetches the bits from the X server
1851  * when necessary.
1852  */
1853 u32int
1854 _pixelbits(Memimage *i, Point pt)
1855 {
1856 	uchar *p;
1857 	u32int val;
1858 	int off, bpp, npack;
1859 
1860 	val = 0;
1861 	p = byteaddr(i, pt);
1862 	switch(bpp=i->depth){
1863 	case 1:
1864 	case 2:
1865 	case 4:
1866 		npack = 8/bpp;
1867 		off = pt.x%npack;
1868 		val = p[0] >> bpp*(npack-1-off);
1869 		val &= (1<<bpp)-1;
1870 		break;
1871 	case 8:
1872 		val = p[0];
1873 		break;
1874 	case 16:
1875 		val = p[0]|(p[1]<<8);
1876 		break;
1877 	case 24:
1878 		val = p[0]|(p[1]<<8)|(p[2]<<16);
1879 		break;
1880 	case 32:
1881 		val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
1882 		break;
1883 	}
1884 	while(bpp<32){
1885 		val |= val<<bpp;
1886 		bpp *= 2;
1887 	}
1888 	return val;
1889 }
1890 
1891 static Calcfn*
1892 boolcopyfn(Memimage *img, Memimage *mask)
1893 {
1894 	if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
1895 		return boolmemmove;
1896 
1897 	switch(img->depth){
1898 	case 8:
1899 		return boolcopy8;
1900 	case 16:
1901 		return boolcopy16;
1902 	case 24:
1903 		return boolcopy24;
1904 	case 32:
1905 		return boolcopy32;
1906 	default:
1907 		assert(0 /* boolcopyfn */);
1908 	}
1909 	return 0;
1910 }
1911 
1912 /*
1913  * Optimized draw for filling and scrolling; uses memset and memmove.
1914  */
1915 static void
1916 memsets(void *vp, ushort val, int n)
1917 {
1918 	ushort *p, *ep;
1919 
1920 	p = vp;
1921 	ep = p+n;
1922 	while(p<ep)
1923 		*p++ = val;
1924 }
1925 
1926 static void
1927 memsetl(void *vp, u32int val, int n)
1928 {
1929 	u32int *p, *ep;
1930 
1931 	p = vp;
1932 	ep = p+n;
1933 	while(p<ep)
1934 		*p++ = val;
1935 }
1936 
1937 static void
1938 memset24(void *vp, u32int val, int n)
1939 {
1940 	uchar *p, *ep;
1941 	uchar a,b,c;
1942 
1943 	p = vp;
1944 	ep = p+3*n;
1945 	a = val;
1946 	b = val>>8;
1947 	c = val>>16;
1948 	while(p<ep){
1949 		*p++ = a;
1950 		*p++ = b;
1951 		*p++ = c;
1952 	}
1953 }
1954 
1955 u32int
1956 _imgtorgba(Memimage *img, u32int val)
1957 {
1958 	uchar r, g, b, a;
1959 	int nb, ov, v;
1960 	u32int chan;
1961 	uchar *p;
1962 
1963 	a = 0xFF;
1964 	r = g = b = 0xAA;	/* garbage */
1965 	for(chan=img->chan; chan; chan>>=8){
1966 		nb = NBITS(chan);
1967 		ov = v = val&((1<<nb)-1);
1968 		val >>= nb;
1969 
1970 		while(nb < 8){
1971 			v |= v<<nb;
1972 			nb *= 2;
1973 		}
1974 		v >>= (nb-8);
1975 
1976 		switch(TYPE(chan)){
1977 		case CRed:
1978 			r = v;
1979 			break;
1980 		case CGreen:
1981 			g = v;
1982 			break;
1983 		case CBlue:
1984 			b = v;
1985 			break;
1986 		case CAlpha:
1987 			a = v;
1988 			break;
1989 		case CGrey:
1990 			r = g = b = v;
1991 			break;
1992 		case CMap:
1993 			p = img->cmap->cmap2rgb+3*ov;
1994 			r = *p++;
1995 			g = *p++;	
1996 			b = *p;
1997 			break;
1998 		}
1999 	}
2000 	return (r<<24)|(g<<16)|(b<<8)|a;	
2001 }
2002 
2003 u32int
2004 _rgbatoimg(Memimage *img, u32int rgba)
2005 {
2006 	u32int chan;
2007 	int d, nb;
2008 	u32int v;
2009 	uchar *p, r, g, b, a, m;
2010 
2011 	v = 0;
2012 	r = rgba>>24;
2013 	g = rgba>>16;
2014 	b = rgba>>8;
2015 	a = rgba;
2016 	d = 0;
2017 	for(chan=img->chan; chan; chan>>=8){
2018 		nb = NBITS(chan);
2019 		switch(TYPE(chan)){
2020 		case CRed:
2021 			v |= (r>>(8-nb))<<d;
2022 			break;
2023 		case CGreen:
2024 			v |= (g>>(8-nb))<<d;
2025 			break;
2026 		case CBlue:
2027 			v |= (b>>(8-nb))<<d;
2028 			break;
2029 		case CAlpha:
2030 			v |= (a>>(8-nb))<<d;
2031 			break;
2032 		case CMap:
2033 			p = img->cmap->rgb2cmap;
2034 			m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
2035 			v |= (m>>(8-nb))<<d;
2036 			break;
2037 		case CGrey:
2038 			m = RGB2K(r,g,b);
2039 			v |= (m>>(8-nb))<<d;
2040 			break;
2041 		}
2042 		d += nb;
2043 	}
2044 //	print("rgba2img %.8lux = %.*lux\n", rgba, 2*d/8, v);
2045 	return v;
2046 }
2047 
2048 #define DBG if(0)
2049 static int
2050 memoptdraw(Memdrawparam *par)
2051 {
2052 	int m, y, dy, dx, op;
2053 	u32int v;
2054 	Memimage *src;
2055 	Memimage *dst;
2056 
2057 	dx = Dx(par->r);
2058 	dy = Dy(par->r);
2059 	src = par->src;
2060 	dst = par->dst;
2061 	op = par->op;
2062 
2063 DBG print("state %lux mval %lux dd %d\n", par->state, par->mval, dst->depth);
2064 	/*
2065 	 * If we have an opaque mask and source is one opaque pixel we can convert to the
2066 	 * destination format and just replicate with memset.
2067 	 */
2068 	m = Simplesrc|Simplemask|Fullmask;
2069 	if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
2070 		uchar *dp, p[4];
2071 		int d, dwid, ppb, np, nb;
2072 		uchar lm, rm;
2073 
2074 DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
2075 		dwid = dst->width*sizeof(u32int);
2076 		dp = byteaddr(dst, par->r.min);
2077 		v = par->sdval;
2078 DBG print("sdval %lud, depth %d\n", v, dst->depth);
2079 		switch(dst->depth){
2080 		case 1:
2081 		case 2:
2082 		case 4:
2083 			for(d=dst->depth; d<8; d*=2)
2084 				v |= (v<<d);
2085 			ppb = 8/dst->depth;	/* pixels per byte */
2086 			m = ppb-1;
2087 			/* left edge */
2088 			np = par->r.min.x&m;		/* no. pixels unused on left side of word */
2089 			dx -= (ppb-np);
2090 			nb = 8 - np * dst->depth;		/* no. bits used on right side of word */
2091 			lm = (1<<nb)-1;
2092 DBG print("np %d x %d nb %d lm %ux ppb %d m %ux\n", np, par->r.min.x, nb, lm, ppb, m);	
2093 
2094 			/* right edge */
2095 			np = par->r.max.x&m;	/* no. pixels used on left side of word */
2096 			dx -= np;
2097 			nb = 8 - np * dst->depth;		/* no. bits unused on right side of word */
2098 			rm = ~((1<<nb)-1);
2099 DBG print("np %d x %d nb %d rm %ux ppb %d m %ux\n", np, par->r.max.x, nb, rm, ppb, m);	
2100 
2101 DBG print("dx %d Dx %d\n", dx, Dx(par->r));
2102 			/* lm, rm are masks that are 1 where we should touch the bits */
2103 			if(dx < 0){	/* just one byte */
2104 				lm &= rm;
2105 				for(y=0; y<dy; y++, dp+=dwid)
2106 					*dp ^= (v ^ *dp) & lm;
2107 			}else if(dx == 0){	/* no full bytes */
2108 				if(lm)
2109 					dwid--;
2110 
2111 				for(y=0; y<dy; y++, dp+=dwid){
2112 					if(lm){
2113 DBG print("dp %p v %lux lm %ux (v ^ *dp) & lm %lux\n", dp, v, lm, (v^*dp)&lm);
2114 						*dp ^= (v ^ *dp) & lm;
2115 						dp++;
2116 					}
2117 					*dp ^= (v ^ *dp) & rm;
2118 				}
2119 			}else{		/* full bytes in middle */
2120 				dx /= ppb;
2121 				if(lm)
2122 					dwid--;
2123 				dwid -= dx;
2124 
2125 				for(y=0; y<dy; y++, dp+=dwid){
2126 					if(lm){
2127 						*dp ^= (v ^ *dp) & lm;
2128 						dp++;
2129 					}
2130 					memset(dp, v, dx);
2131 					dp += dx;
2132 					*dp ^= (v ^ *dp) & rm;
2133 				}
2134 			}
2135 			return 1;
2136 		case 8:
2137 			for(y=0; y<dy; y++, dp+=dwid)
2138 				memset(dp, v, dx);
2139 			return 1;
2140 		case 16:
2141 			p[0] = v;		/* make little endian */
2142 			p[1] = v>>8;
2143 			v = *(ushort*)p;
2144 DBG print("dp=%p; dx=%d; for(y=0; y<%d; y++, dp+=%d)\nmemsets(dp, v, dx);\n",
2145 	dp, dx, dy, dwid);
2146 			for(y=0; y<dy; y++, dp+=dwid)
2147 				memsets(dp, v, dx);
2148 			return 1;
2149 		case 24:
2150 			for(y=0; y<dy; y++, dp+=dwid)
2151 				memset24(dp, v, dx);
2152 			return 1;
2153 		case 32:
2154 			p[0] = v;		/* make little endian */
2155 			p[1] = v>>8;
2156 			p[2] = v>>16;
2157 			p[3] = v>>24;
2158 			v = *(u32int*)p;
2159 			for(y=0; y<dy; y++, dp+=dwid)
2160 				memsetl(dp, v, dx);
2161 			return 1;
2162 		default:
2163 			assert(0 /* bad dest depth in memoptdraw */);
2164 		}
2165 	}
2166 
2167 	/*
2168 	 * If no source alpha, an opaque mask, we can just copy the
2169 	 * source onto the destination.  If the channels are the same and
2170 	 * the source is not replicated, memmove suffices.
2171 	 */
2172 	m = Simplemask|Fullmask;
2173 	if((par->state&(m|Replsrc))==m && src->depth >= 8 
2174 	&& src->chan == dst->chan && !(src->flags&Falpha) && (op == S || op == SoverD)){
2175 		uchar *sp, *dp;
2176 		long swid, dwid, nb;
2177 		int dir;
2178 
2179 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
2180 			dir = -1;
2181 		else
2182 			dir = 1;
2183 
2184 		swid = src->width*sizeof(u32int);
2185 		dwid = dst->width*sizeof(u32int);
2186 		sp = byteaddr(src, par->sr.min);
2187 		dp = byteaddr(dst, par->r.min);
2188 		if(dir == -1){
2189 			sp += (dy-1)*swid;
2190 			dp += (dy-1)*dwid;
2191 			swid = -swid;
2192 			dwid = -dwid;
2193 		}
2194 		nb = (dx*src->depth)/8;
2195 		for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
2196 			memmove(dp, sp, nb);
2197 		return 1;
2198 	}
2199 
2200 	/*
2201 	 * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
2202 	 * they're all bit aligned, we can just use bit operators.  This happens
2203 	 * when we're manipulating boolean masks, e.g. in the arc code.
2204 	 */
2205 	if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0 
2206 	&& dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1 
2207 	&& (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
2208 		uchar *sp, *dp, *mp;
2209 		uchar lm, rm;
2210 		long swid, dwid, mwid;
2211 		int i, x, dir;
2212 
2213 		sp = byteaddr(src, par->sr.min);
2214 		dp = byteaddr(dst, par->r.min);
2215 		mp = byteaddr(par->mask, par->mr.min);
2216 		swid = src->width*sizeof(u32int);
2217 		dwid = dst->width*sizeof(u32int);
2218 		mwid = par->mask->width*sizeof(u32int);
2219 
2220 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
2221 			dir = -1;
2222 		}else
2223 			dir = 1;
2224 
2225 		lm = 0xFF>>(par->r.min.x&7);
2226 		rm = 0xFF<<(8-(par->r.max.x&7));
2227 		dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
2228 
2229 		if(dx < 0){	/* one byte wide */
2230 			lm &= rm;
2231 			if(dir == -1){
2232 				dp += dwid*(dy-1);
2233 				sp += swid*(dy-1);
2234 				mp += mwid*(dy-1);
2235 				dwid = -dwid;
2236 				swid = -swid;
2237 				mwid = -mwid;
2238 			}
2239 			for(y=0; y<dy; y++){
2240 				*dp ^= (*dp ^ *sp) & *mp & lm;
2241 				dp += dwid;
2242 				sp += swid;
2243 				mp += mwid;
2244 			}
2245 			return 1;
2246 		}
2247 
2248 		dx /= 8;
2249 		if(dir == 1){
2250 			i = (lm!=0)+dx+(rm!=0);
2251 			mwid -= i;
2252 			swid -= i;
2253 			dwid -= i;
2254 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2255 				if(lm){
2256 					*dp ^= (*dp ^ *sp++) & *mp++ & lm;
2257 					dp++;
2258 				}
2259 				for(x=0; x<dx; x++){
2260 					*dp ^= (*dp ^ *sp++) & *mp++;
2261 					dp++;
2262 				}
2263 				if(rm){
2264 					*dp ^= (*dp ^ *sp++) & *mp++ & rm;
2265 					dp++;
2266 				}
2267 			}
2268 			return 1;
2269 		}else{
2270 		/* dir == -1 */
2271 			i = (lm!=0)+dx+(rm!=0);
2272 			dp += dwid*(dy-1)+i-1;
2273 			sp += swid*(dy-1)+i-1;
2274 			mp += mwid*(dy-1)+i-1;
2275 			dwid = -dwid+i;
2276 			swid = -swid+i;
2277 			mwid = -mwid+i;
2278 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2279 				if(rm){
2280 					*dp ^= (*dp ^ *sp--) & *mp-- & rm;
2281 					dp--;
2282 				}
2283 				for(x=0; x<dx; x++){
2284 					*dp ^= (*dp ^ *sp--) & *mp--;
2285 					dp--;
2286 				}
2287 				if(lm){
2288 					*dp ^= (*dp ^ *sp--) & *mp-- & lm;
2289 					dp--;
2290 				}
2291 			}
2292 		}
2293 		return 1;
2294 	}
2295 	return 0;	
2296 }
2297 #undef DBG
2298 
2299 /*
2300  * Boolean character drawing.
2301  * Solid opaque color through a 1-bit greyscale mask.
2302  */
2303 #define DBG if(0)
2304 static int
2305 chardraw(Memdrawparam *par)
2306 {
2307 	u32int bits;
2308 	int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
2309 	u32int v, maskwid, dstwid;
2310 	uchar *wp, *rp, *q, *wc;
2311 	ushort *ws;
2312 	u32int *wl;
2313 	uchar sp[4];
2314 	Rectangle r, mr;
2315 	Memimage *mask, *src, *dst;
2316 
2317 if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
2318 		par->mask->flags, par->mask->depth, par->src->flags, 
2319 		Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
2320 
2321 	mask = par->mask;
2322 	src = par->src;
2323 	dst = par->dst;
2324 	r = par->r;
2325 	mr = par->mr;
2326 	op = par->op;
2327 
2328 	if((par->state&(Replsrc|Simplesrc|Replmask)) != (Replsrc|Simplesrc)
2329 	|| mask->depth != 1 || src->flags&Falpha || dst->depth<8 || dst->data==src->data
2330 	|| op != SoverD)
2331 		return 0;
2332 
2333 //if(drawdebug) iprint("chardraw...");
2334 
2335 	depth = mask->depth;
2336 	maskwid = mask->width*sizeof(u32int);
2337 	rp = byteaddr(mask, mr.min);
2338 	npack = 8/depth;
2339 	bsh = (mr.min.x % npack) * depth;
2340 
2341 	wp = byteaddr(dst, r.min);
2342 	dstwid = dst->width*sizeof(u32int);
2343 DBG print("bsh %d\n", bsh);
2344 	dy = Dy(r);
2345 	dx = Dx(r);
2346 
2347 	ddepth = dst->depth;
2348 
2349 	/*
2350 	 * for loop counts from bsh to bsh+dx
2351 	 *
2352 	 * we want the bottom bits to be the amount
2353 	 * to shift the pixels down, so for n≡0 (mod 8) we want 
2354 	 * bottom bits 7.  for n≡1, 6, etc.
2355 	 * the bits come from -n-1.
2356 	 */
2357 
2358 	bx = -bsh-1;
2359 	ex = -bsh-1-dx;
2360 	SET(bits);
2361 	v = par->sdval;
2362 
2363 	/* make little endian */
2364 	sp[0] = v;
2365 	sp[1] = v>>8;
2366 	sp[2] = v>>16;
2367 	sp[3] = v>>24;
2368 
2369 //print("sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]);
2370 	for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
2371 		q = rp;
2372 		if(bsh)
2373 			bits = *q++;
2374 		switch(ddepth){
2375 		case 8:
2376 //if(drawdebug) iprint("8loop...");
2377 			wc = wp;
2378 			for(x=bx; x>ex; x--, wc++){
2379 				i = x&7;
2380 				if(i == 8-1)
2381 					bits = *q++;
2382 DBG print("bits %lux sh %d...", bits, i);
2383 				if((bits>>i)&1)
2384 					*wc = v;
2385 			}
2386 			break;
2387 		case 16:
2388 			ws = (ushort*)wp;
2389 			v = *(ushort*)sp;
2390 			for(x=bx; x>ex; x--, ws++){
2391 				i = x&7;
2392 				if(i == 8-1)
2393 					bits = *q++;
2394 DBG print("bits %lux sh %d...", bits, i);
2395 				if((bits>>i)&1)
2396 					*ws = v;
2397 			}
2398 			break;
2399 		case 24:
2400 			wc = wp;
2401 			for(x=bx; x>ex; x--, wc+=3){
2402 				i = x&7;
2403 				if(i == 8-1)
2404 					bits = *q++;
2405 DBG print("bits %lux sh %d...", bits, i);
2406 				if((bits>>i)&1){
2407 					wc[0] = sp[0];
2408 					wc[1] = sp[1];
2409 					wc[2] = sp[2];
2410 				}
2411 			}
2412 			break;
2413 		case 32:
2414 			wl = (u32int*)wp;
2415 			v = *(u32int*)sp;
2416 			for(x=bx; x>ex; x--, wl++){
2417 				i = x&7;
2418 				if(i == 8-1)
2419 					bits = *q++;
2420 DBG iprint("bits %lux sh %d...", bits, i);
2421 				if((bits>>i)&1)
2422 					*wl = v;
2423 			}
2424 			break;
2425 		}
2426 	}
2427 
2428 DBG print("\n");	
2429 	return 1;	
2430 }
2431 #undef DBG
2432 
2433 
2434 /*
2435  * Fill entire byte with replicated (if necessary) copy of source pixel,
2436  * assuming destination ldepth is >= source ldepth.
2437  *
2438  * This code is just plain wrong for >8bpp.
2439  *
2440 u32int
2441 membyteval(Memimage *src)
2442 {
2443 	int i, val, bpp;
2444 	uchar uc;
2445 
2446 	unloadmemimage(src, src->r, &uc, 1);
2447 	bpp = src->depth;
2448 	uc <<= (src->r.min.x&(7/src->depth))*src->depth;
2449 	uc &= ~(0xFF>>bpp);
2450 	* pixel value is now in high part of byte. repeat throughout byte 
2451 	val = uc;
2452 	for(i=bpp; i<8; i<<=1)
2453 		val |= val>>i;
2454 	return val;
2455 }
2456  * 
2457  */
2458 
2459 void
2460 _memfillcolor(Memimage *i, u32int val)
2461 {
2462 	u32int bits;
2463 	int d, y;
2464 	uchar p[4];
2465 
2466 	if(val == DNofill)
2467 		return;
2468 
2469 	bits = _rgbatoimg(i, val);
2470 	switch(i->depth){
2471 	case 24:	/* 24-bit images suck */
2472 		for(y=i->r.min.y; y<i->r.max.y; y++)
2473 			memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
2474 		break;
2475 	default:	/* 1, 2, 4, 8, 16, 32 */
2476 		for(d=i->depth; d<32; d*=2)
2477 			bits = (bits << d) | bits;
2478 		p[0] = bits;		/* make little endian */
2479 		p[1] = bits>>8;
2480 		p[2] = bits>>16;
2481 		p[3] = bits>>24;
2482 		bits = *(u32int*)p;
2483 		memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
2484 		break;
2485 	}
2486 }
2487