op public repos

Blob

Date:: Sun Jun 25 18:58:06 2006 UTC
Message:: separate out
Actions:: History | Blame | Raw File
1 #include <u.h>
2 #include <libc.h>
3 #include <draw.h>
4 #include <memdraw.h>
5 
6 int drawdebug;
7 static int	tablesbuilt;
8 
9 /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
10 #define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
11 
12 /*
13  * for 0 ≤ x ≤ 255*255, (x*0x0101+0x100)>>16 is a perfect approximation.
14  * for 0 ≤ x < (1<<16), x/255 = ((x+1)*0x0101)>>16 is a perfect approximation.
15  * the last one is perfect for all up to 1<<16, avoids a multiply, but requires a rathole.
16  */
17 /* #define DIV255(x) (((x)*257+256)>>16)  */
18 #define DIV255(x) ((((x)+1)*257)>>16)
19 /* #define DIV255(x) (tmp=(x)+1, (tmp+(tmp>>8))>>8) */
20 
21 #define MUL(x, y, t)	(t = (x)*(y)+128, (t+(t>>8))>>8)
22 #define MASK13	0xFF00FF00
23 #define MASK02	0x00FF00FF
24 #define MUL13(a, x, t)		(t = (a)*(((x)&MASK13)>>8)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)
25 #define MUL02(a, x, t)		(t = (a)*(((x)&MASK02)>>0)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)
26 #define MUL0123(a, x, s, t)	((MUL13(a, x, s)<<8)|MUL02(a, x, t))
27 
28 #define MUL2(u, v, x, y)	(t = (u)*(v)+(x)*(y)+256, (t+(t>>8))>>8)
29 
30 static void mktables(void);
31 typedef int Subdraw(Memdrawparam*);
32 static Subdraw chardraw, alphadraw, memoptdraw;
33 
34 static Memimage*	memones;
35 static Memimage*	memzeros;
36 Memimage *memwhite;
37 Memimage *memblack;
38 Memimage *memtransparent;
39 Memimage *memopaque;
40 
41 int	__ifmt(Fmt*);
42 
43 void
44 memimageinit(void)
45 {
46 	static int didinit = 0;
47 
48 	if(didinit)
49 		return;
50 
51 	didinit = 1;
52 
53 	mktables();
54 	_memmkcmap();
55 
56 	fmtinstall('R', Rfmt); 
57 	fmtinstall('P', Pfmt);
58 	fmtinstall('b', __ifmt);
59 
60 	memones = allocmemimage(Rect(0,0,1,1), GREY1);
61 	memones->flags |= Frepl;
62 	memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
63 	*byteaddr(memones, ZP) = ~0;
64 
65 	memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
66 	memzeros->flags |= Frepl;
67 	memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
68 	*byteaddr(memzeros, ZP) = 0;
69 
70 	if(memones == nil || memzeros == nil)
71 		assert(0 /*cannot initialize memimage library */);	/* RSC BUG */
72 
73 	memwhite = memones;
74 	memblack = memzeros;
75 	memopaque = memones;
76 	memtransparent = memzeros;
77 }
78 
79 u32int _imgtorgba(Memimage*, u32int);
80 u32int _rgbatoimg(Memimage*, u32int);
81 u32int _pixelbits(Memimage*, Point);
82 
83 #define DBG if(drawdebug)
84 static Memdrawparam par;
85 
86 Memdrawparam*
87 _memimagedrawsetup(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1, int op)
88 {
89 	if(mask == nil)
90 		mask = memopaque;
91 
92 DBG	print("memimagedraw %p/%luX %R @ %p %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, dst->data->bdata, src, src->chan, p0, mask, mask->chan, p1);
93 
94 	if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
95 /*		if(drawdebug) */
96 /*			iprint("empty clipped rectangle\n"); */
97 		return nil;
98 	}
99 
100 	if(op < Clear || op > SoverD){
101 /*		if(drawdebug) */
102 /*			iprint("op out of range: %d\n", op); */
103 		return nil;
104 	}
105 
106 	par.op = op;
107 	par.dst = dst;
108 	par.r = r;
109 	par.src = src;
110 	/* par.sr set by drawclip */
111 	par.mask = mask;
112 	/* par.mr set by drawclip */
113 
114 	par.state = 0;
115 	if(src->flags&Frepl){
116 		par.state |= Replsrc;
117 		if(Dx(src->r)==1 && Dy(src->r)==1){
118 			par.sval = pixelbits(src, src->r.min);
119 			par.state |= Simplesrc;
120 			par.srgba = _imgtorgba(src, par.sval);
121 			par.sdval = _rgbatoimg(dst, par.srgba);
122 			if((par.srgba&0xFF) == 0 && (op&DoutS)){
123 /*				if (drawdebug) iprint("fill with transparent source\n"); */
124 				return nil;	/* no-op successfully handled */
125 			}
126 			if((par.srgba&0xFF) == 0xFF)
127 				par.state |= Fullsrc;
128 		}
129 	}
130 
131 	if(mask->flags & Frepl){
132 		par.state |= Replmask;
133 		if(Dx(mask->r)==1 && Dy(mask->r)==1){
134 			par.mval = pixelbits(mask, mask->r.min);
135 			if(par.mval == 0 && (op&DoutS)){
136 /*				if(drawdebug) iprint("fill with zero mask\n"); */
137 				return nil;	/* no-op successfully handled */
138 			}
139 			par.state |= Simplemask;
140 			if(par.mval == ~0)
141 				par.state |= Fullmask;
142 			par.mrgba = _imgtorgba(mask, par.mval);
143 		}
144 	}
145 
146 /*	if(drawdebug) */
147 /*		iprint("dr %R sr %R mr %R...", r, par.sr, par.mr); */
148 DBG print("draw dr %R sr %R mr %R %lux\n", r, par.sr, par.mr, par.state);
149 
150 	return &par;
151 }
152 
153 void
154 _memimagedraw(Memdrawparam *par)
155 {
156 	/*
157 	 * Now that we've clipped the parameters down to be consistent, we 
158 	 * simply try sub-drawing routines in order until we find one that was able
159 	 * to handle us.  If the sub-drawing routine returns zero, it means it was
160 	 * unable to satisfy the request, so we do not return.
161 	 */
162 
163 	/*
164 	 * Hardware support.  Each video driver provides this function,
165 	 * which checks to see if there is anything it can help with.
166 	 * There could be an if around this checking to see if dst is in video memory.
167 	 */
168 DBG print("test hwdraw\n");
169 	if(hwdraw(par)){
170 /*if(drawdebug) iprint("hw handled\n"); */
171 DBG print("hwdraw handled\n");
172 		return;
173 	}
174 	/*
175 	 * Optimizations using memmove and memset.
176 	 */
177 DBG print("test memoptdraw\n");
178 	if(memoptdraw(par)){
179 /*if(drawdebug) iprint("memopt handled\n"); */
180 DBG print("memopt handled\n");
181 		return;
182 	}
183 
184 	/*
185 	 * Character drawing.
186 	 * Solid source color being painted through a boolean mask onto a high res image.
187 	 */
188 DBG print("test chardraw\n");
189 	if(chardraw(par)){
190 /*if(drawdebug) iprint("chardraw handled\n"); */
191 DBG print("chardraw handled\n");
192 		return;
193 	}
194 
195 	/*
196 	 * General calculation-laden case that does alpha for each pixel.
197 	 */
198 DBG print("do alphadraw\n");
199 	alphadraw(par);
200 /*if(drawdebug) iprint("alphadraw handled\n"); */
201 DBG print("alphadraw handled\n");
202 }
203 #undef DBG
204 
205 /*
206  * Clip the destination rectangle further based on the properties of the 
207  * source and mask rectangles.  Once the destination rectangle is properly
208  * clipped, adjust the source and mask rectangles to be the same size.
209  * Then if source or mask is replicated, move its clipped rectangle
210  * so that its minimum point falls within the repl rectangle.
211  *
212  * Return zero if the final rectangle is null.
213  */
214 int
215 drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
216 {
217 	Point rmin, delta;
218 	int splitcoords;
219 	Rectangle omr;
220 
221 	if(r->min.x>=r->max.x || r->min.y>=r->max.y)
222 		return 0;
223 	splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
224 	/* clip to destination */
225 	rmin = r->min;
226 	if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
227 		return 0;
228 	/* move mask point */
229 	p1->x += r->min.x-rmin.x;
230 	p1->y += r->min.y-rmin.y;
231 	/* move source point */
232 	p0->x += r->min.x-rmin.x;
233 	p0->y += r->min.y-rmin.y;
234 	/* map destination rectangle into source */
235 	sr->min = *p0;
236 	sr->max.x = p0->x+Dx(*r);
237 	sr->max.y = p0->y+Dy(*r);
238 	/* sr is r in source coordinates; clip to source */
239 	if(!(src->flags&Frepl) && !rectclip(sr, src->r))
240 		return 0;
241 	if(!rectclip(sr, src->clipr))
242 		return 0;
243 	/* compute and clip rectangle in mask */
244 	if(splitcoords){
245 		/* move mask point with source */
246 		p1->x += sr->min.x-p0->x;
247 		p1->y += sr->min.y-p0->y;
248 		mr->min = *p1;
249 		mr->max.x = p1->x+Dx(*sr);
250 		mr->max.y = p1->y+Dy(*sr);
251 		omr = *mr;
252 		/* mr is now rectangle in mask; clip it */
253 		if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
254 			return 0;
255 		if(!rectclip(mr, mask->clipr))
256 			return 0;
257 		/* reflect any clips back to source */
258 		sr->min.x += mr->min.x-omr.min.x;
259 		sr->min.y += mr->min.y-omr.min.y;
260 		sr->max.x += mr->max.x-omr.max.x;
261 		sr->max.y += mr->max.y-omr.max.y;
262 		*p1 = mr->min;
263 	}else{
264 		if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
265 			return 0;
266 		if(!rectclip(sr, mask->clipr))
267 			return 0;
268 		*p1 = sr->min;
269 	}
270 
271 	/* move source clipping back to destination */
272 	delta.x = r->min.x - p0->x;
273 	delta.y = r->min.y - p0->y;
274 	r->min.x = sr->min.x + delta.x;
275 	r->min.y = sr->min.y + delta.y;
276 	r->max.x = sr->max.x + delta.x;
277 	r->max.y = sr->max.y + delta.y;
278 
279 	/* move source rectangle so sr->min is in src->r */
280 	if(src->flags&Frepl) {
281 		delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
282 		delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
283 		sr->min.x += delta.x;
284 		sr->min.y += delta.y;
285 		sr->max.x += delta.x;
286 		sr->max.y += delta.y;
287 	}
288 	*p0 = sr->min;
289 
290 	/* move mask point so it is in mask->r */
291 	*p1 = drawrepl(mask->r, *p1);
292 	mr->min = *p1;
293 	mr->max.x = p1->x+Dx(*sr);
294 	mr->max.y = p1->y+Dy(*sr);
295 
296 	assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
297 	assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
298 	assert(ptinrect(*p0, src->r));
299 	assert(ptinrect(*p1, mask->r));
300 	assert(ptinrect(r->min, dst->r));
301 
302 	return 1;
303 }
304 
305 /*
306  * Conversion tables.
307  */
308 static uchar replbit[1+8][256];		/* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
309 static uchar conv18[256][8];		/* conv18[x][y] is the yth pixel in the depth-1 pixel x */
310 static uchar conv28[256][4];		/* ... */
311 static uchar conv48[256][2];
312 
313 /*
314  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
315  * the X's are where to put the bottom (ones) bit of the n-bit pattern.
316  * only the top 8 bits of the result are actually used.
317  * (the lower 8 bits are needed to get bits in the right place
318  * when n is not a divisor of 8.)
319  *
320  * Should check to see if its easier to just refer to replmul than
321  * use the precomputed values in replbit.  On PCs it may well
322  * be; on machines with slow multiply instructions it probably isn't.
323  */
324 #define a ((((((((((((((((0
325 #define X *2+1)
326 #define _ *2)
327 static int replmul[1+8] = {
328 	0,
329 	a X X X X X X X X X X X X X X X X,
330 	a _ X _ X _ X _ X _ X _ X _ X _ X,
331 	a _ _ X _ _ X _ _ X _ _ X _ _ X _,
332 	a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
333 	a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
334 	a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _, 
335 	a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
336 	a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
337 };
338 #undef a
339 #undef X
340 #undef _
341 
342 static void
343 mktables(void)
344 {
345 	int i, j, mask, sh, small;
346 		
347 	if(tablesbuilt)
348 		return;
349 
350 	fmtinstall('R', Rfmt);
351 	fmtinstall('P', Pfmt);
352 	tablesbuilt = 1;
353 
354 	/* bit replication up to 8 bits */
355 	for(i=0; i<256; i++){
356 		for(j=0; j<=8; j++){	/* j <= 8 [sic] */
357 			small = i & ((1<<j)-1);
358 			replbit[j][i] = (small*replmul[j])>>8;
359 		}
360 	}
361 
362 	/* bit unpacking up to 8 bits, only powers of 2 */
363 	for(i=0; i<256; i++){
364 		for(j=0, sh=7, mask=1; j<8; j++, sh--)
365 			conv18[i][j] = replbit[1][(i>>sh)&mask];
366 
367 		for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
368 			conv28[i][j] = replbit[2][(i>>sh)&mask];
369 
370 		for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
371 			conv48[i][j] = replbit[4][(i>>sh)&mask];
372 	}
373 }
374 
375 static uchar ones = 0xff;
376 
377 /*
378  * General alpha drawing case.  Can handle anything.
379  */
380 typedef struct	Buffer	Buffer;
381 struct Buffer {
382 	/* used by most routines */
383 	uchar	*red;
384 	uchar	*grn;
385 	uchar	*blu;
386 	uchar	*alpha;
387 	uchar	*grey;
388 	u32int	*rgba;
389 	int	delta;	/* number of bytes to add to pointer to get next pixel to the right */
390 
391 	/* used by boolcalc* for mask data */
392 	uchar	*m;		/* ptr to mask data r.min byte; like p->bytermin */
393 	int		mskip;	/* no. of left bits to skip in *m */
394 	uchar	*bm;		/* ptr to mask data img->r.min byte; like p->bytey0s */
395 	int		bmskip;	/* no. of left bits to skip in *bm */
396 	uchar	*em;		/* ptr to mask data img->r.max.x byte; like p->bytey0e */
397 	int		emskip;	/* no. of right bits to skip in *em */
398 };
399 
400 typedef struct	Param	Param;
401 typedef Buffer	Readfn(Param*, uchar*, int);
402 typedef void	Writefn(Param*, uchar*, Buffer);
403 typedef Buffer	Calcfn(Buffer, Buffer, Buffer, int, int, int);
404 
405 enum {
406 	MAXBCACHE = 16
407 };
408 
409 /* giant rathole to customize functions with */
410 struct Param {
411 	Readfn	*replcall;
412 	Readfn	*greymaskcall;	
413 	Readfn	*convreadcall;
414 	Writefn	*convwritecall;
415 
416 	Memimage *img;
417 	Rectangle	r;
418 	int	dx;	/* of r */
419 	int	needbuf;
420 	int	convgrey;
421 	int	alphaonly;
422 
423 	uchar	*bytey0s;		/* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
424 	uchar	*bytermin;	/* byteaddr(Pt(r.min.x, img->r.min.y)) */
425 	uchar	*bytey0e;		/* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
426 	int		bwidth;
427 
428 	int	replcache;	/* if set, cache buffers */
429 	Buffer	bcache[MAXBCACHE];
430 	u32int	bfilled;
431 	uchar	*bufbase;
432 	int	bufoff;
433 	int	bufdelta;
434 
435 	int	dir;
436 
437 	int	convbufoff;
438 	uchar	*convbuf;
439 	Param	*convdpar;
440 	int	convdx;
441 };
442 
443 static uchar *drawbuf;
444 static int	ndrawbuf;
445 static int	mdrawbuf;
446 static Param spar, mpar, dpar;	/* easier on the stacks */
447 static Readfn	greymaskread, replread, readptr;
448 static Writefn	nullwrite;
449 static Calcfn	alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
450 static Calcfn	boolcalc14, boolcalc236789, boolcalc1011;
451 
452 static Readfn*	readfn(Memimage*);
453 static Readfn*	readalphafn(Memimage*);
454 static Writefn*	writefn(Memimage*);
455 
456 static Calcfn*	boolcopyfn(Memimage*, Memimage*);
457 static Readfn*	convfn(Memimage*, Param*, Memimage*, Param*);
458 
459 static Calcfn *alphacalc[Ncomp] = 
460 {
461 	alphacalc0,		/* Clear */
462 	alphacalc14,		/* DoutS */
463 	alphacalc2810,		/* SoutD */
464 	alphacalc3679,		/* DxorS */
465 	alphacalc14,		/* DinS */
466 	alphacalc5,		/* D */
467 	alphacalc3679,		/* DatopS */
468 	alphacalc3679,		/* DoverS */
469 	alphacalc2810,		/* SinD */
470 	alphacalc3679,		/* SatopD */
471 	alphacalc2810,		/* S */
472 	alphacalc11,		/* SoverD */
473 };
474 
475 static Calcfn *boolcalc[Ncomp] =
476 {
477 	alphacalc0,		/* Clear */
478 	boolcalc14,		/* DoutS */
479 	boolcalc236789,		/* SoutD */
480 	boolcalc236789,		/* DxorS */
481 	boolcalc14,		/* DinS */
482 	alphacalc5,		/* D */
483 	boolcalc236789,		/* DatopS */
484 	boolcalc236789,		/* DoverS */
485 	boolcalc236789,		/* SinD */
486 	boolcalc236789,		/* SatopD */
487 	boolcalc1011,		/* S */
488 	boolcalc1011,		/* SoverD */
489 };
490 
491 static int
492 allocdrawbuf(void)
493 {
494 	uchar *p;
495 
496 	if(ndrawbuf > mdrawbuf){
497 		p = realloc(drawbuf, ndrawbuf);
498 		if(p == nil){
499 			werrstr("memimagedraw out of memory");
500 			return -1;
501 		}
502 		drawbuf = p;
503 		mdrawbuf = ndrawbuf;
504 	}
505 	return 0;
506 }
507 
508 static void
509 getparam(Param *p, Memimage *img, Rectangle r, int convgrey, int needbuf)
510 {
511 	int nbuf;
512 
513 	memset(p, 0, sizeof *p);
514 
515 	p->img = img;
516 	p->r = r;
517 	p->dx = Dx(r);
518 	p->needbuf = needbuf;
519 	p->convgrey = convgrey;
520 
521 	assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
522 
523 	p->bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
524 	p->bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
525 	p->bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
526 	p->bwidth = sizeof(u32int)*img->width;
527 
528 	assert(p->bytey0s <= p->bytermin && p->bytermin <= p->bytey0e);
529 
530 	if(p->r.min.x == p->img->r.min.x)
531 		assert(p->bytermin == p->bytey0s);
532 
533 	nbuf = 1;
534 	if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
535 		p->replcache = 1;
536 		nbuf = Dy(img->r);
537 	}
538 	p->bufdelta = 4*p->dx;
539 	p->bufoff = ndrawbuf;
540 	ndrawbuf += p->bufdelta*nbuf;
541 }
542 
543 static void
544 clipy(Memimage *img, int *y)
545 {
546 	int dy;
547 
548 	dy = Dy(img->r);
549 	if(*y == dy)
550 		*y = 0;
551 	else if(*y == -1)
552 		*y = dy-1;
553 	assert(0 <= *y && *y < dy);
554 }
555 
556 static void
557 dumpbuf(char *s, Buffer b, int n)
558 {
559 	int i;
560 	uchar *p;
561 	
562 	print("%s", s);
563 	for(i=0; i<n; i++){
564 		print(" ");
565 		if(p=b.grey){
566 			print(" k%.2uX", *p);
567 			b.grey += b.delta;
568 		}else{	
569 			if(p=b.red){
570 				print(" r%.2uX", *p);
571 				b.red += b.delta;
572 			}
573 			if(p=b.grn){
574 				print(" g%.2uX", *p);
575 				b.grn += b.delta;
576 			}
577 			if(p=b.blu){
578 				print(" b%.2uX", *p);
579 				b.blu += b.delta;
580 			}
581 		}
582 		if((p=b.alpha) != &ones){
583 			print(" α%.2uX", *p);
584 			b.alpha += b.delta;
585 		}
586 	}
587 	print("\n");
588 }
589 
590 /*
591  * For each scan line, we expand the pixels from source, mask, and destination
592  * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
593  * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
594  * the readers need not copy the data: they can simply return pointers to the data.
595  * If the destination image is grey and the source is not, it is converted using the NTSC
596  * formula.
597  *
598  * Once we have all the channels, we call either rgbcalc or greycalc, depending on 
599  * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
600  * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
601  * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
602  * the calculator, and that buffer is passed to a function to write it to the destination.
603  * If the buffer is already pointing at the destination, the writing function is a no-op.
604  */
605 #define DBG if(drawdebug)
606 static int
607 alphadraw(Memdrawparam *par)
608 {
609 	int isgrey, starty, endy, op;
610 	int needbuf, dsty, srcy, masky;
611 	int y, dir, dx, dy;
612 	Buffer bsrc, bdst, bmask;
613 	Readfn *rdsrc, *rdmask, *rddst;
614 	Calcfn *calc;
615 	Writefn *wrdst;
616 	Memimage *src, *mask, *dst;
617 	Rectangle r, sr, mr;
618 
619 	if(drawdebug)
620 		print("alphadraw %R\n", par->r);
621 	r = par->r;
622 	dx = Dx(r);
623 	dy = Dy(r);
624 
625 	ndrawbuf = 0;
626 
627 	src = par->src;
628 	mask = par->mask;	
629 	dst = par->dst;
630 	sr = par->sr;
631 	mr = par->mr;
632 	op = par->op;
633 
634 	isgrey = dst->flags&Fgrey;
635 
636 	/*
637 	 * Buffering when src and dst are the same bitmap is sufficient but not 
638 	 * necessary.  There are stronger conditions we could use.  We could
639 	 * check to see if the rectangles intersect, and if simply moving in the
640 	 * correct y direction can avoid the need to buffer.
641 	 */
642 	needbuf = (src->data == dst->data);
643 
644 	getparam(&spar, src, sr, isgrey, needbuf);
645 	getparam(&dpar, dst, r, isgrey, needbuf);
646 	getparam(&mpar, mask, mr, 0, needbuf);
647 
648 	dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
649 	spar.dir = mpar.dir = dpar.dir = dir;
650 
651 	/*
652 	 * If the mask is purely boolean, we can convert from src to dst format
653 	 * when we read src, and then just copy it to dst where the mask tells us to.
654 	 * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
655 	 *
656 	 * The computation is accomplished by assigning the function pointers as follows:
657 	 *	rdsrc - read and convert source into dst format in a buffer
658 	 * 	rdmask - convert mask to bytes, set pointer to it
659 	 * 	rddst - fill with pointer to real dst data, but do no reads
660 	 *	calc - copy src onto dst when mask says to.
661 	 *	wrdst - do nothing
662 	 * This is slightly sleazy, since things aren't doing exactly what their names say,
663 	 * but it avoids a fair amount of code duplication to make this a case here
664 	 * rather than have a separate booldraw.
665 	 */
666 /*if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth); */
667 	if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
668 /*if(drawdebug) iprint("boolcopy..."); */
669 		rdsrc = convfn(dst, &dpar, src, &spar);
670 		rddst = readptr;
671 		rdmask = readfn(mask);
672 		calc = boolcopyfn(dst, mask);
673 		wrdst = nullwrite;
674 	}else{
675 		/* usual alphadraw parameter fetching */
676 		rdsrc = readfn(src);
677 		rddst = readfn(dst);
678 		wrdst = writefn(dst);
679 		calc = alphacalc[op];
680 
681 		/*
682 		 * If there is no alpha channel, we'll ask for a grey channel
683 		 * and pretend it is the alpha.
684 		 */
685 		if(mask->flags&Falpha){
686 			rdmask = readalphafn(mask);
687 			mpar.alphaonly = 1;
688 		}else{
689 			mpar.greymaskcall = readfn(mask);
690 			mpar.convgrey = 1;
691 			rdmask = greymaskread;
692 
693 			/*
694 			 * Should really be above, but then boolcopyfns would have
695 			 * to deal with bit alignment, and I haven't written that.
696 			 *
697 			 * This is a common case for things like ellipse drawing.
698 			 * When there's no alpha involved and the mask is boolean,
699 			 * we can avoid all the division and multiplication.
700 			 */
701 			if(mask->chan == GREY1 && !(src->flags&Falpha))
702 				calc = boolcalc[op];
703 			else if(op == SoverD && !(src->flags&Falpha))
704 				calc = alphacalcS;
705 		}
706 	}
707 
708 	/*
709 	 * If the image has a small enough repl rectangle,
710 	 * we can just read each line once and cache them.
711 	 */
712 	if(spar.replcache){
713 		spar.replcall = rdsrc;
714 		rdsrc = replread;
715 	}
716 	if(mpar.replcache){
717 		mpar.replcall = rdmask;
718 		rdmask = replread;
719 	}
720 
721 	if(allocdrawbuf() < 0)
722 		return 0;
723 
724 	/*
725 	 * Before we were saving only offsets from drawbuf in the parameter
726 	 * structures; now that drawbuf has been grown to accomodate us,
727 	 * we can fill in the pointers.
728 	 */
729 	spar.bufbase = drawbuf+spar.bufoff;
730 	mpar.bufbase = drawbuf+mpar.bufoff;
731 	dpar.bufbase = drawbuf+dpar.bufoff;
732 	spar.convbuf = drawbuf+spar.convbufoff;
733 
734 	if(dir == 1){
735 		starty = 0;
736 		endy = dy;
737 	}else{
738 		starty = dy-1;
739 		endy = -1;
740 	}
741 
742 	/*
743 	 * srcy, masky, and dsty are offsets from the top of their
744 	 * respective Rectangles.  they need to be contained within
745 	 * the rectangles, so clipy can keep them there without division.
746  	 */
747 	srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
748 	masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
749 	dsty = starty + r.min.y - dst->r.min.y;
750 
751 	assert(0 <= srcy && srcy < Dy(src->r));
752 	assert(0 <= masky && masky < Dy(mask->r));
753 	assert(0 <= dsty && dsty < Dy(dst->r));
754 
755 	if(drawdebug)
756 		print("alphadraw: rdsrc=%p rdmask=%p rddst=%p calc=%p wrdst=%p\n",
757 			rdsrc, rdmask, rddst, calc, wrdst);
758 	for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
759 		clipy(src, &srcy);
760 		clipy(dst, &dsty);
761 		clipy(mask, &masky);
762 
763 		bsrc = rdsrc(&spar, spar.bufbase, srcy);
764 DBG print("[");
765 		bmask = rdmask(&mpar, mpar.bufbase, masky);
766 DBG print("]\n");
767 		bdst = rddst(&dpar, dpar.bufbase, dsty);
768 DBG		dumpbuf("src", bsrc, dx);
769 DBG		dumpbuf("mask", bmask, dx);
770 DBG		dumpbuf("dst", bdst, dx);
771 		bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
772 DBG		dumpbuf("bdst", bdst, dx);
773 		wrdst(&dpar, dpar.bytermin+dsty*dpar.bwidth, bdst);
774 	}
775 
776 	return 1;
777 }
778 #undef DBG
779 
780 static Buffer
781 alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
782 {
783 	USED(grey);
784 	USED(op);
785 	memset(bdst.rgba, 0, dx*bdst.delta);
786 	return bdst;
787 }
788 
789 static Buffer
790 alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
791 {
792 	Buffer obdst;
793 	int fd, sadelta;
794 	int i, sa, ma, q;
795 	u32int s, t;
796 
797 	obdst = bdst;
798 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
799 	q = bsrc.delta == 4 && bdst.delta == 4;
800 
801 	for(i=0; i<dx; i++){
802 		sa = *bsrc.alpha;
803 		ma = *bmask.alpha;
804 		fd = MUL(sa, ma, t);
805 		if(op == DoutS)
806 			fd = 255-fd;
807 
808 		if(grey){
809 			*bdst.grey = MUL(fd, *bdst.grey, t);
810 			bsrc.grey += bsrc.delta;
811 			bdst.grey += bdst.delta;
812 		}else{
813 			if(q){
814 				*bdst.rgba = MUL0123(fd, *bdst.rgba, s, t);
815 				bsrc.rgba++;
816 				bdst.rgba++;
817 				bsrc.alpha += sadelta;
818 				bmask.alpha += bmask.delta;
819 				continue;
820 			}
821 			*bdst.red = MUL(fd, *bdst.red, t);
822 			*bdst.grn = MUL(fd, *bdst.grn, t);
823 			*bdst.blu = MUL(fd, *bdst.blu, t);
824 			bsrc.red += bsrc.delta;
825 			bsrc.blu += bsrc.delta;
826 			bsrc.grn += bsrc.delta;
827 			bdst.red += bdst.delta;
828 			bdst.blu += bdst.delta;
829 			bdst.grn += bdst.delta;
830 		}
831 		if(bdst.alpha != &ones){
832 			*bdst.alpha = MUL(fd, *bdst.alpha, t);
833 			bdst.alpha += bdst.delta;
834 		}
835 		bmask.alpha += bmask.delta;
836 		bsrc.alpha += sadelta;
837 	}
838 	return obdst;
839 }
840 
841 static Buffer
842 alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
843 {
844 	Buffer obdst;
845 	int fs, sadelta;
846 	int i, ma, da, q;
847 	u32int s, t;
848 
849 	obdst = bdst;
850 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
851 	q = bsrc.delta == 4 && bdst.delta == 4;
852 
853 	for(i=0; i<dx; i++){
854 		ma = *bmask.alpha;
855 		da = *bdst.alpha;
856 		if(op == SoutD)
857 			da = 255-da;
858 		fs = ma;
859 		if(op != S)
860 			fs = MUL(fs, da, t);
861 
862 		if(grey){
863 			*bdst.grey = MUL(fs, *bsrc.grey, t);
864 			bsrc.grey += bsrc.delta;
865 			bdst.grey += bdst.delta;
866 		}else{
867 			if(q){
868 				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t);
869 				bsrc.rgba++;
870 				bdst.rgba++;
871 				bmask.alpha += bmask.delta;
872 				bdst.alpha += bdst.delta;
873 				continue;
874 			}
875 			*bdst.red = MUL(fs, *bsrc.red, t);
876 			*bdst.grn = MUL(fs, *bsrc.grn, t);
877 			*bdst.blu = MUL(fs, *bsrc.blu, t);
878 			bsrc.red += bsrc.delta;
879 			bsrc.blu += bsrc.delta;
880 			bsrc.grn += bsrc.delta;
881 			bdst.red += bdst.delta;
882 			bdst.blu += bdst.delta;
883 			bdst.grn += bdst.delta;
884 		}
885 		if(bdst.alpha != &ones){
886 			*bdst.alpha = MUL(fs, *bsrc.alpha, t);
887 			bdst.alpha += bdst.delta;
888 		}
889 		bmask.alpha += bmask.delta;
890 		bsrc.alpha += sadelta;
891 	}
892 	return obdst;
893 }
894 
895 static Buffer
896 alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
897 {
898 	Buffer obdst;
899 	int fs, fd, sadelta;
900 	int i, sa, ma, da, q;
901 	u32int s, t, u, v;
902 
903 	obdst = bdst;
904 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
905 	q = bsrc.delta == 4 && bdst.delta == 4;
906 
907 	for(i=0; i<dx; i++){
908 		sa = *bsrc.alpha;
909 		ma = *bmask.alpha;
910 		da = *bdst.alpha;
911 		if(op == SatopD)
912 			fs = MUL(ma, da, t);
913 		else
914 			fs = MUL(ma, 255-da, t);
915 		if(op == DoverS)
916 			fd = 255;
917 		else{
918 			fd = MUL(sa, ma, t);
919 			if(op != DatopS)
920 				fd = 255-fd;
921 		}
922 
923 		if(grey){
924 			*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
925 			bsrc.grey += bsrc.delta;
926 			bdst.grey += bdst.delta;
927 		}else{
928 			if(q){
929 				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);
930 				bsrc.rgba++;
931 				bdst.rgba++;
932 				bsrc.alpha += sadelta;
933 				bmask.alpha += bmask.delta;
934 				bdst.alpha += bdst.delta;
935 				continue;
936 			}
937 			*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);
938 			*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
939 			*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
940 			bsrc.red += bsrc.delta;
941 			bsrc.blu += bsrc.delta;
942 			bsrc.grn += bsrc.delta;
943 			bdst.red += bdst.delta;
944 			bdst.blu += bdst.delta;
945 			bdst.grn += bdst.delta;
946 		}
947 		if(bdst.alpha != &ones){
948 			*bdst.alpha = MUL(fs, sa, s)+MUL(fd, da, t);
949 			bdst.alpha += bdst.delta;
950 		}
951 		bmask.alpha += bmask.delta;
952 		bsrc.alpha += sadelta;
953 	}
954 	return obdst;
955 }
956 
957 static Buffer
958 alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
959 {
960 	USED(dx);
961 	USED(grey);
962 	USED(op);
963 	return bdst;
964 }
965 
966 static Buffer
967 alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
968 {
969 	Buffer obdst;
970 	int fd, sadelta;
971 	int i, sa, ma, q;
972 	u32int s, t, u, v;
973 
974 	USED(op);
975 	obdst = bdst;
976 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
977 	q = bsrc.delta == 4 && bdst.delta == 4;
978 
979 	for(i=0; i<dx; i++){
980 		sa = *bsrc.alpha;
981 		ma = *bmask.alpha;
982 		fd = 255-MUL(sa, ma, t);
983 
984 		if(grey){
985 			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
986 			bsrc.grey += bsrc.delta;
987 			bdst.grey += bdst.delta;
988 		}else{
989 			if(q){
990 				*bdst.rgba = MUL0123(ma, *bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);
991 				bsrc.rgba++;
992 				bdst.rgba++;
993 				bsrc.alpha += sadelta;
994 				bmask.alpha += bmask.delta;
995 				continue;
996 			}
997 			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);
998 			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
999 			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1000 			bsrc.red += bsrc.delta;
1001 			bsrc.blu += bsrc.delta;
1002 			bsrc.grn += bsrc.delta;
1003 			bdst.red += bdst.delta;
1004 			bdst.blu += bdst.delta;
1005 			bdst.grn += bdst.delta;
1006 		}
1007 		if(bdst.alpha != &ones){
1008 			*bdst.alpha = MUL(ma, sa, s)+MUL(fd, *bdst.alpha, t);
1009 			bdst.alpha += bdst.delta;
1010 		}
1011 		bmask.alpha += bmask.delta;
1012 		bsrc.alpha += sadelta;
1013 	}
1014 	return obdst;
1015 }
1016 
1017 /*
1018 not used yet
1019 source and mask alpha 1
1020 static Buffer
1021 alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1022 {
1023 	Buffer obdst;
1024 	int i;
1025 
1026 	USED(op);
1027 	obdst = bdst;
1028 	if(bsrc.delta == bdst.delta){
1029 		memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
1030 		return obdst;
1031 	}
1032 	for(i=0; i<dx; i++){
1033 		if(grey){
1034 			*bdst.grey = *bsrc.grey;
1035 			bsrc.grey += bsrc.delta;
1036 			bdst.grey += bdst.delta;
1037 		}else{
1038 			*bdst.red = *bsrc.red;
1039 			*bdst.grn = *bsrc.grn;
1040 			*bdst.blu = *bsrc.blu;
1041 			bsrc.red += bsrc.delta;
1042 			bsrc.blu += bsrc.delta;
1043 			bsrc.grn += bsrc.delta;
1044 			bdst.red += bdst.delta;
1045 			bdst.blu += bdst.delta;
1046 			bdst.grn += bdst.delta;
1047 		}
1048 		if(bdst.alpha != &ones){
1049 			*bdst.alpha = 255;
1050 			bdst.alpha += bdst.delta;
1051 		}
1052 	}
1053 	return obdst;
1054 }
1055 */
1056 
1057 /* source alpha 1 */
1058 static Buffer
1059 alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1060 {
1061 	Buffer obdst;
1062 	int fd;
1063 	int i, ma;
1064 	u32int s, t;
1065 
1066 	USED(op);
1067 	obdst = bdst;
1068 
1069 	for(i=0; i<dx; i++){
1070 		ma = *bmask.alpha;
1071 		fd = 255-ma;
1072 
1073 		if(grey){
1074 			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
1075 			bsrc.grey += bsrc.delta;
1076 			bdst.grey += bdst.delta;
1077 		}else{
1078 			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1079 			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1080 			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1081 			bsrc.red += bsrc.delta;
1082 			bsrc.blu += bsrc.delta;
1083 			bsrc.grn += bsrc.delta;
1084 			bdst.red += bdst.delta;
1085 			bdst.blu += bdst.delta;
1086 			bdst.grn += bdst.delta;
1087 		}
1088 		if(bdst.alpha != &ones){
1089 			*bdst.alpha = ma+MUL(fd, *bdst.alpha, t);
1090 			bdst.alpha += bdst.delta;
1091 		}
1092 		bmask.alpha += bmask.delta;
1093 	}
1094 	return obdst;
1095 }
1096 
1097 static Buffer
1098 boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
1099 {
1100 	Buffer obdst;
1101 	int i, ma, zero;
1102 
1103 	obdst = bdst;
1104 
1105 	for(i=0; i<dx; i++){
1106 		ma = *bmask.alpha;
1107 		zero = ma ? op == DoutS : op == DinS;
1108 
1109 		if(grey){
1110 			if(zero)
1111 				*bdst.grey = 0;
1112 			bdst.grey += bdst.delta;
1113 		}else{
1114 			if(zero)
1115 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1116 			bdst.red += bdst.delta;
1117 			bdst.blu += bdst.delta;
1118 			bdst.grn += bdst.delta;
1119 		}
1120 		bmask.alpha += bmask.delta;
1121 		if(bdst.alpha != &ones){
1122 			if(zero)
1123 				*bdst.alpha = 0;
1124 			bdst.alpha += bdst.delta;
1125 		}
1126 	}
1127 	return obdst;
1128 }
1129 
1130 static Buffer
1131 boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1132 {
1133 	Buffer obdst;
1134 	int fs, fd;
1135 	int i, ma, da, zero;
1136 	u32int s, t;
1137 
1138 	obdst = bdst;
1139 	zero = !(op&1);
1140 
1141 	for(i=0; i<dx; i++){
1142 		ma = *bmask.alpha;
1143 		da = *bdst.alpha;
1144 		fs = da;
1145 		if(op&2)
1146 			fs = 255-da;
1147 		fd = 0;
1148 		if(op&4)
1149 			fd = 255;
1150 
1151 		if(grey){
1152 			if(ma)
1153 				*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
1154 			else if(zero)
1155 				*bdst.grey = 0;
1156 			bsrc.grey += bsrc.delta;
1157 			bdst.grey += bdst.delta;
1158 		}else{
1159 			if(ma){
1160 				*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1161 				*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1162 				*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1163 			}
1164 			else if(zero)
1165 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1166 			bsrc.red += bsrc.delta;
1167 			bsrc.blu += bsrc.delta;
1168 			bsrc.grn += bsrc.delta;
1169 			bdst.red += bdst.delta;
1170 			bdst.blu += bdst.delta;
1171 			bdst.grn += bdst.delta;
1172 		}
1173 		bmask.alpha += bmask.delta;
1174 		if(bdst.alpha != &ones){
1175 			if(ma)
1176 				*bdst.alpha = fs+MUL(fd, da, t);
1177 			else if(zero)
1178 				*bdst.alpha = 0;
1179 			bdst.alpha += bdst.delta;
1180 		}
1181 	}
1182 	return obdst;
1183 }
1184 
1185 static Buffer
1186 boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1187 {
1188 	Buffer obdst;
1189 	int i, ma, zero;
1190 
1191 	obdst = bdst;
1192 	zero = !(op&1);
1193 
1194 	for(i=0; i<dx; i++){
1195 		ma = *bmask.alpha;
1196 
1197 		if(grey){
1198 			if(ma)
1199 				*bdst.grey = *bsrc.grey;
1200 			else if(zero)
1201 				*bdst.grey = 0;
1202 			bsrc.grey += bsrc.delta;
1203 			bdst.grey += bdst.delta;
1204 		}else{
1205 			if(ma){
1206 				*bdst.red = *bsrc.red;
1207 				*bdst.grn = *bsrc.grn;
1208 				*bdst.blu = *bsrc.blu;
1209 			}
1210 			else if(zero)
1211 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1212 			bsrc.red += bsrc.delta;
1213 			bsrc.blu += bsrc.delta;
1214 			bsrc.grn += bsrc.delta;
1215 			bdst.red += bdst.delta;
1216 			bdst.blu += bdst.delta;
1217 			bdst.grn += bdst.delta;
1218 		}
1219 		bmask.alpha += bmask.delta;
1220 		if(bdst.alpha != &ones){
1221 			if(ma)
1222 				*bdst.alpha = 255;
1223 			else if(zero)
1224 				*bdst.alpha = 0;
1225 			bdst.alpha += bdst.delta;
1226 		}
1227 	}
1228 	return obdst;
1229 }
1230 /*
1231  * Replicated cached scan line read.  Call the function listed in the Param,
1232  * but cache the result so that for replicated images we only do the work once.
1233  */
1234 static Buffer
1235 replread(Param *p, uchar *s, int y)
1236 {
1237 	Buffer *b;
1238 
1239 	USED(s);
1240 	b = &p->bcache[y];
1241 	if((p->bfilled & (1<<y)) == 0){
1242 		p->bfilled |= 1<<y;
1243 		*b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
1244 	}
1245 	return *b;
1246 }
1247 
1248 /*
1249  * Alpha reading function that simply relabels the grey pointer.
1250  */
1251 static Buffer
1252 greymaskread(Param *p, uchar *buf, int y)
1253 {
1254 	Buffer b;
1255 
1256 	b = p->greymaskcall(p, buf, y);
1257 	b.alpha = b.grey;
1258 	return b;
1259 }
1260 
1261 #define DBG if(0)
1262 static Buffer
1263 readnbit(Param *p, uchar *buf, int y)
1264 {
1265 	Buffer b;
1266 	Memimage *img;
1267 	uchar *repl, *r, *w, *ow, bits;
1268 	int i, n, sh, depth, x, dx, npack, nbits;
1269 
1270 	memset(&b, 0, sizeof b);
1271 	b.rgba = (u32int*)buf;
1272 	b.grey = w = buf;
1273 	b.red = b.blu = b.grn = w;
1274 	b.alpha = &ones;
1275 	b.delta = 1;
1276 
1277 	dx = p->dx;
1278 	img = p->img;
1279 	depth = img->depth;
1280 	repl = &replbit[depth][0];
1281 	npack = 8/depth;
1282 	sh = 8-depth;
1283 
1284 	/* copy from p->r.min.x until end of repl rectangle */
1285 	x = p->r.min.x;
1286 	n = dx;
1287 	if(n > p->img->r.max.x - x)
1288 		n = p->img->r.max.x - x;
1289 
1290 	r = p->bytermin + y*p->bwidth;
1291 DBG print("readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
1292 	bits = *r++;
1293 	nbits = 8;
1294 	if(i=x&(npack-1)){
1295 DBG print("throwaway %d...", i);
1296 		bits <<= depth*i;
1297 		nbits -= depth*i;
1298 	}
1299 	for(i=0; i<n; i++){
1300 		if(nbits == 0){
1301 DBG print("(%.2ux)...", *r);
1302 			bits = *r++;
1303 			nbits = 8;
1304 		}
1305 		*w++ = repl[bits>>sh];
1306 DBG print("bit %x...", repl[bits>>sh]);
1307 		bits <<= depth;
1308 		nbits -= depth;
1309 	}
1310 	dx -= n;
1311 	if(dx == 0)
1312 		return b;
1313 
1314 	assert(x+i == p->img->r.max.x);
1315 
1316 	/* copy from beginning of repl rectangle until where we were before. */
1317 	x = p->img->r.min.x;
1318 	n = dx;
1319 	if(n > p->r.min.x - x)
1320 		n = p->r.min.x - x;
1321 
1322 	r = p->bytey0s + y*p->bwidth;
1323 DBG print("x=%d r=%p...", x, r);
1324 	bits = *r++;
1325 	nbits = 8;
1326 	if(i=x&(npack-1)){
1327 		bits <<= depth*i;
1328 		nbits -= depth*i;
1329 	}
1330 DBG print("nbits=%d...", nbits);
1331 	for(i=0; i<n; i++){
1332 		if(nbits == 0){
1333 			bits = *r++;
1334 			nbits = 8;
1335 		}
1336 		*w++ = repl[bits>>sh];
1337 DBG print("bit %x...", repl[bits>>sh]);
1338 		bits <<= depth;
1339 		nbits -= depth;
1340 DBG print("bits %x nbits %d...", bits, nbits);
1341 	}
1342 	dx -= n;
1343 	if(dx == 0)
1344 		return b;
1345 
1346 	assert(dx > 0);
1347 	/* now we have exactly one full scan line: just replicate the buffer itself until we are done */
1348 	ow = buf;
1349 	while(dx--)
1350 		*w++ = *ow++;
1351 
1352 	return b;
1353 }
1354 #undef DBG
1355 
1356 #define DBG if(0)
1357 static void
1358 writenbit(Param *p, uchar *w, Buffer src)
1359 {
1360 	uchar *r;
1361 	u32int bits;
1362 	int i, sh, depth, npack, nbits, x, ex;
1363 
1364 	assert(src.grey != nil && src.delta == 1);
1365 
1366 	x = p->r.min.x;
1367 	ex = x+p->dx;
1368 	depth = p->img->depth;
1369 	npack = 8/depth;
1370 
1371 	i=x&(npack-1);
1372 	bits = i ? (*w >> (8-depth*i)) : 0;
1373 	nbits = depth*i;
1374 	sh = 8-depth;
1375 	r = src.grey;
1376 
1377 	for(; x<ex; x++){
1378 		bits <<= depth;
1379 DBG print(" %x", *r);
1380 		bits |= (*r++ >> sh);
1381 		nbits += depth;
1382 		if(nbits == 8){
1383 			*w++ = bits;
1384 			nbits = 0;
1385 		}
1386 	}
1387 
1388 	if(nbits){
1389 		sh = 8-nbits;
1390 		bits <<= sh;
1391 		bits |= *w & ((1<<sh)-1);
1392 		*w = bits;
1393 	}
1394 DBG print("\n");
1395 	return;
1396 }
1397 #undef DBG
1398 
1399 static Buffer
1400 readcmap(Param *p, uchar *buf, int y)
1401 {
1402 	Buffer b;
1403 	int a, convgrey, copyalpha, dx, i, m;
1404 	uchar *q, *cmap, *begin, *end, *r, *w;
1405 
1406 	memset(&b, 0, sizeof b);
1407 	begin = p->bytey0s + y*p->bwidth;
1408 	r = p->bytermin + y*p->bwidth;
1409 	end = p->bytey0e + y*p->bwidth;
1410 	cmap = p->img->cmap->cmap2rgb;
1411 	convgrey = p->convgrey;
1412 	copyalpha = (p->img->flags&Falpha) ? 1 : 0;
1413 
1414 	w = buf;
1415 	dx = p->dx;
1416 	if(copyalpha){
1417 		b.alpha = buf++;
1418 		a = p->img->shift[CAlpha]/8;
1419 		m = p->img->shift[CMap]/8;
1420 		for(i=0; i<dx; i++){
1421 			*w++ = r[a];
1422 			q = cmap+r[m]*3;
1423 			r += 2;
1424 			if(r == end)
1425 				r = begin;
1426 			if(convgrey){
1427 				*w++ = RGB2K(q[0], q[1], q[2]);
1428 			}else{
1429 				*w++ = q[2];	/* blue */
1430 				*w++ = q[1];	/* green */
1431 				*w++ = q[0];	/* red */
1432 			}
1433 		}
1434 	}else{
1435 		b.alpha = &ones;
1436 		for(i=0; i<dx; i++){
1437 			q = cmap+*r++*3;
1438 			if(r == end)
1439 				r = begin;
1440 			if(convgrey){
1441 				*w++ = RGB2K(q[0], q[1], q[2]);
1442 			}else{
1443 				*w++ = q[2];	/* blue */
1444 				*w++ = q[1];	/* green */
1445 				*w++ = q[0];	/* red */
1446 			}
1447 		}
1448 	}
1449 
1450 	b.rgba = (u32int*)(buf-copyalpha);
1451 
1452 	if(convgrey){
1453 		b.grey = buf;
1454 		b.red = b.blu = b.grn = buf;
1455 		b.delta = 1+copyalpha;
1456 	}else{
1457 		b.blu = buf;
1458 		b.grn = buf+1;
1459 		b.red = buf+2;
1460 		b.grey = nil;
1461 		b.delta = 3+copyalpha;
1462 	}
1463 	return b;
1464 }
1465 
1466 static void
1467 writecmap(Param *p, uchar *w, Buffer src)
1468 {
1469 	uchar *cmap, *red, *grn, *blu;
1470 	int i, dx, delta;
1471 
1472 	cmap = p->img->cmap->rgb2cmap;
1473 	
1474 	delta = src.delta;
1475 	red= src.red;
1476 	grn = src.grn;
1477 	blu = src.blu;
1478 
1479 	dx = p->dx;
1480 	for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
1481 		*w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
1482 }
1483 
1484 #define DBG if(drawdebug)
1485 static Buffer
1486 readbyte(Param *p, uchar *buf, int y)
1487 {
1488 	Buffer b;
1489 	Memimage *img;
1490 	int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
1491 	uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
1492 	uchar ured, ugrn, ublu;
1493 	u32int u;
1494 
1495 	img = p->img;
1496 	begin = p->bytey0s + y*p->bwidth;
1497 	r = p->bytermin + y*p->bwidth;
1498 	end = p->bytey0e + y*p->bwidth;
1499 
1500 	w = buf;
1501 	dx = p->dx;
1502 	nb = img->depth/8;
1503 
1504 	convgrey = p->convgrey;	/* convert rgb to grey */
1505 	isgrey = img->flags&Fgrey;
1506 	alphaonly = p->alphaonly;
1507 	copyalpha = (img->flags&Falpha) ? 1 : 0;
1508 
1509 	/* if we can, avoid processing everything */
1510 	if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
1511 		memset(&b, 0, sizeof b);
1512 		if(p->needbuf){
1513 			memmove(buf, r, dx*nb);
1514 			r = buf;
1515 		}
1516 		b.rgba = (u32int*)r;
1517 		if(copyalpha)
1518 			b.alpha = r+img->shift[CAlpha]/8;
1519 		else
1520 			b.alpha = &ones;
1521 		if(isgrey){
1522 			b.grey = r+img->shift[CGrey]/8;
1523 			b.red = b.grn = b.blu = b.grey;
1524 		}else{
1525 			b.red = r+img->shift[CRed]/8;
1526 			b.grn = r+img->shift[CGreen]/8;
1527 			b.blu = r+img->shift[CBlue]/8;
1528 		}
1529 		b.delta = nb;
1530 		return b;
1531 	}
1532 
1533 	rrepl = replbit[img->nbits[CRed]];
1534 	grepl = replbit[img->nbits[CGreen]];
1535 	brepl = replbit[img->nbits[CBlue]];
1536 	arepl = replbit[img->nbits[CAlpha]];
1537 	krepl = replbit[img->nbits[CGrey]];
1538 
1539 	for(i=0; i<dx; i++){
1540 		u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
1541 		if(copyalpha)
1542 			*w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
1543 
1544 		if(isgrey)
1545 			*w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
1546 		else if(!alphaonly){
1547 			ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1548 			ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1549 			ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1550 			if(convgrey){
1551 				*w++ = RGB2K(ured, ugrn, ublu);
1552 			}else{
1553 				*w++ = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1554 				*w++ = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1555 				*w++ = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1556 			}
1557 		}
1558 		r += nb;
1559 		if(r == end)
1560 			r = begin;
1561 	}
1562 	
1563 	b.alpha = copyalpha ? buf : &ones;
1564 	b.rgba = (u32int*)buf;
1565 	if(alphaonly){
1566 		b.red = b.grn = b.blu = b.grey = nil;
1567 		if(!copyalpha)
1568 			b.rgba = nil;
1569 		b.delta = 1;
1570 	}else if(isgrey || convgrey){
1571 		b.grey = buf+copyalpha;
1572 		b.red = b.grn = b.blu = buf+copyalpha;
1573 		b.delta = copyalpha+1;
1574 	}else{
1575 		b.blu = buf+copyalpha;
1576 		b.grn = buf+copyalpha+1;
1577 		b.grey = nil;
1578 		b.red = buf+copyalpha+2;
1579 		b.delta = copyalpha+3;
1580 	}
1581 	return b;
1582 }
1583 #undef DBG
1584 
1585 #define DBG if(drawdebug)
1586 static void
1587 writebyte(Param *p, uchar *w, Buffer src)
1588 {
1589 	Memimage *img;
1590 	int i, isalpha, isgrey, nb, delta, dx, adelta;
1591 	uchar ff, *red, *grn, *blu, *grey, *alpha;
1592 	u32int u, mask;
1593 
1594 	img = p->img;
1595 
1596 	red = src.red;
1597 	grn = src.grn;
1598 	blu = src.blu;
1599 	alpha = src.alpha;
1600 	delta = src.delta;
1601 	grey = src.grey;
1602 	dx = p->dx;
1603 
1604 	nb = img->depth/8;
1605 	mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
1606 
1607 	isalpha = img->flags&Falpha;
1608 	isgrey = img->flags&Fgrey;
1609 	adelta = src.delta;
1610 
1611 	if(isalpha && (alpha == nil || alpha == &ones)){
1612 		ff = 0xFF;
1613 		alpha = &ff;
1614 		adelta = 0;
1615 	}
1616 
1617 	for(i=0; i<dx; i++){
1618 		u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
1619 DBG print("u %.8lux...", u);
1620 		u &= mask;
1621 DBG print("&mask %.8lux...", u);
1622 		if(isgrey){
1623 			u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
1624 DBG print("|grey %.8lux...", u);
1625 			grey += delta;
1626 		}else{
1627 			u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
1628 			u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
1629 			u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
1630 			red += delta;
1631 			grn += delta;
1632 			blu += delta;
1633 DBG print("|rgb %.8lux...", u);
1634 		}
1635 
1636 		if(isalpha){
1637 			u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
1638 			alpha += adelta;
1639 DBG print("|alpha %.8lux...", u);
1640 		}
1641 
1642 		w[0] = u;
1643 		w[1] = u>>8;
1644 		w[2] = u>>16;
1645 		w[3] = u>>24;
1646 DBG print("write back %.8lux...", u);
1647 		w += nb;
1648 	}
1649 }
1650 #undef DBG
1651 
1652 static Readfn*
1653 readfn(Memimage *img)
1654 {
1655 	if(img->depth < 8)
1656 		return readnbit;
1657 	if(img->nbits[CMap] == 8)
1658 		return readcmap;
1659 	return readbyte;
1660 }
1661 
1662 static Readfn*
1663 readalphafn(Memimage *m)
1664 {
1665 	USED(m);
1666 	return readbyte;
1667 }
1668 
1669 static Writefn*
1670 writefn(Memimage *img)
1671 {
1672 	if(img->depth < 8)
1673 		return writenbit;
1674 	if(img->chan == CMAP8)
1675 		return writecmap;
1676 	return writebyte;
1677 }
1678 
1679 static void
1680 nullwrite(Param *p, uchar *s, Buffer b)
1681 {
1682 	USED(p);
1683 	USED(s);
1684 }
1685 
1686 static Buffer
1687 readptr(Param *p, uchar *s, int y)
1688 {
1689 	Buffer b;
1690 	uchar *q;
1691 
1692 	USED(s);
1693 	memset(&b, 0, sizeof b);
1694 	q = p->bytermin + y*p->bwidth;
1695 	b.red = q;	/* ptr to data */
1696 	b.grn = b.blu = b.grey = b.alpha = nil;
1697 	b.rgba = (u32int*)q;
1698 	b.delta = p->img->depth/8;
1699 	return b;
1700 }
1701 
1702 static Buffer
1703 boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
1704 {
1705 	USED(i);
1706 	USED(o);
1707 	memmove(bdst.red, bsrc.red, dx*bdst.delta);
1708 	return bdst;
1709 }
1710 
1711 static Buffer
1712 boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1713 {
1714 	uchar *m, *r, *w, *ew;
1715 
1716 	USED(i);
1717 	USED(o);
1718 	m = bmask.grey;
1719 	w = bdst.red;
1720 	r = bsrc.red;
1721 	ew = w+dx;
1722 	for(; w < ew; w++,r++)
1723 		if(*m++)
1724 			*w = *r;
1725 	return bdst;	/* not used */
1726 }
1727 
1728 static Buffer
1729 boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1730 {
1731 	uchar *m;
1732 	ushort *r, *w, *ew;
1733 
1734 	USED(i);
1735 	USED(o);
1736 	m = bmask.grey;
1737 	w = (ushort*)bdst.red;
1738 	r = (ushort*)bsrc.red;
1739 	ew = w+dx;
1740 	for(; w < ew; w++,r++)
1741 		if(*m++)
1742 			*w = *r;
1743 	return bdst;	/* not used */
1744 }
1745 
1746 static Buffer
1747 boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1748 {
1749 	uchar *m;
1750 	uchar *r, *w, *ew;
1751 
1752 	USED(i);
1753 	USED(o);
1754 	m = bmask.grey;
1755 	w = bdst.red;
1756 	r = bsrc.red;
1757 	ew = w+dx*3;
1758 	while(w < ew){
1759 		if(*m++){
1760 			*w++ = *r++;
1761 			*w++ = *r++;
1762 			*w++ = *r++;
1763 		}else{
1764 			w += 3;
1765 			r += 3;
1766 		}
1767 	}
1768 	return bdst;	/* not used */
1769 }
1770 
1771 static Buffer
1772 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1773 {
1774 	uchar *m;
1775 	u32int *r, *w, *ew;
1776 
1777 	USED(i);
1778 	USED(o);
1779 	m = bmask.grey;
1780 	w = (u32int*)bdst.red;
1781 	r = (u32int*)bsrc.red;
1782 	ew = w+dx;
1783 	for(; w < ew; w++,r++)
1784 		if(*m++)
1785 			*w = *r;
1786 	return bdst;	/* not used */
1787 }
1788 
1789 static Buffer
1790 genconv(Param *p, uchar *buf, int y)
1791 {
1792 	Buffer b;
1793 	int nb;
1794 	uchar *r, *w, *ew;
1795 
1796 	/* read from source into RGB format in convbuf */
1797 	b = p->convreadcall(p, p->convbuf, y);
1798 
1799 	/* write RGB format into dst format in buf */
1800 	p->convwritecall(p->convdpar, buf, b);
1801 
1802 	if(p->convdx){
1803 		nb = p->convdpar->img->depth/8;
1804 		r = buf;
1805 		w = buf+nb*p->dx;
1806 		ew = buf+nb*p->convdx;
1807 		while(w<ew)
1808 			*w++ = *r++;
1809 	}
1810 
1811 	b.red = buf;
1812 	b.blu = b.grn = b.grey = b.alpha = nil;
1813 	b.rgba = (u32int*)buf;
1814 	b.delta = 0;
1815 	
1816 	return b;
1817 }
1818 
1819 static Readfn*
1820 convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar)
1821 {
1822 	if(dst->chan == src->chan && !(src->flags&Frepl)){
1823 /*if(drawdebug) iprint("readptr..."); */
1824 		return readptr;
1825 	}
1826 
1827 	if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
1828 		/* cheat because we know the replicated value is exactly the color map entry. */
1829 /*if(drawdebug) iprint("Readnbit..."); */
1830 		return readnbit;
1831 	}
1832 
1833 	spar->convreadcall = readfn(src);
1834 	spar->convwritecall = writefn(dst);
1835 	spar->convdpar = dpar;
1836 
1837 	/* allocate a conversion buffer */
1838 	spar->convbufoff = ndrawbuf;
1839 	ndrawbuf += spar->dx*4;
1840 
1841 	if(spar->dx > Dx(spar->img->r)){
1842 		spar->convdx = spar->dx;
1843 		spar->dx = Dx(spar->img->r);
1844 	}
1845 
1846 /*if(drawdebug) iprint("genconv..."); */
1847 	return genconv;
1848 }
1849 
1850 /*
1851  * Do NOT call this directly.  pixelbits is a wrapper
1852  * around this that fetches the bits from the X server
1853  * when necessary.
1854  */
1855 u32int
1856 _pixelbits(Memimage *i, Point pt)
1857 {
1858 	uchar *p;
1859 	u32int val;
1860 	int off, bpp, npack;
1861 
1862 	val = 0;
1863 	p = byteaddr(i, pt);
1864 	switch(bpp=i->depth){
1865 	case 1:
1866 	case 2:
1867 	case 4:
1868 		npack = 8/bpp;
1869 		off = pt.x%npack;
1870 		val = p[0] >> bpp*(npack-1-off);
1871 		val &= (1<<bpp)-1;
1872 		break;
1873 	case 8:
1874 		val = p[0];
1875 		break;
1876 	case 16:
1877 		val = p[0]|(p[1]<<8);
1878 		break;
1879 	case 24:
1880 		val = p[0]|(p[1]<<8)|(p[2]<<16);
1881 		break;
1882 	case 32:
1883 		val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
1884 		break;
1885 	}
1886 	while(bpp<32){
1887 		val |= val<<bpp;
1888 		bpp *= 2;
1889 	}
1890 	return val;
1891 }
1892 
1893 static Calcfn*
1894 boolcopyfn(Memimage *img, Memimage *mask)
1895 {
1896 	if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
1897 		return boolmemmove;
1898 
1899 	switch(img->depth){
1900 	case 8:
1901 		return boolcopy8;
1902 	case 16:
1903 		return boolcopy16;
1904 	case 24:
1905 		return boolcopy24;
1906 	case 32:
1907 		return boolcopy32;
1908 	default:
1909 		assert(0 /* boolcopyfn */);
1910 	}
1911 	return 0;
1912 }
1913 
1914 /*
1915  * Optimized draw for filling and scrolling; uses memset and memmove.
1916  */
1917 static void
1918 memsets(void *vp, ushort val, int n)
1919 {
1920 	ushort *p, *ep;
1921 
1922 	p = vp;
1923 	ep = p+n;
1924 	while(p<ep)
1925 		*p++ = val;
1926 }
1927 
1928 static void
1929 memsetl(void *vp, u32int val, int n)
1930 {
1931 	u32int *p, *ep;
1932 
1933 	p = vp;
1934 	ep = p+n;
1935 	while(p<ep)
1936 		*p++ = val;
1937 }
1938 
1939 static void
1940 memset24(void *vp, u32int val, int n)
1941 {
1942 	uchar *p, *ep;
1943 	uchar a,b,c;
1944 
1945 	p = vp;
1946 	ep = p+3*n;
1947 	a = val;
1948 	b = val>>8;
1949 	c = val>>16;
1950 	while(p<ep){
1951 		*p++ = a;
1952 		*p++ = b;
1953 		*p++ = c;
1954 	}
1955 }
1956 
1957 u32int
1958 _imgtorgba(Memimage *img, u32int val)
1959 {
1960 	uchar r, g, b, a;
1961 	int nb, ov, v;
1962 	u32int chan;
1963 	uchar *p;
1964 
1965 	a = 0xFF;
1966 	r = g = b = 0xAA;	/* garbage */
1967 	for(chan=img->chan; chan; chan>>=8){
1968 		nb = NBITS(chan);
1969 		ov = v = val&((1<<nb)-1);
1970 		val >>= nb;
1971 
1972 		while(nb < 8){
1973 			v |= v<<nb;
1974 			nb *= 2;
1975 		}
1976 		v >>= (nb-8);
1977 
1978 		switch(TYPE(chan)){
1979 		case CRed:
1980 			r = v;
1981 			break;
1982 		case CGreen:
1983 			g = v;
1984 			break;
1985 		case CBlue:
1986 			b = v;
1987 			break;
1988 		case CAlpha:
1989 			a = v;
1990 			break;
1991 		case CGrey:
1992 			r = g = b = v;
1993 			break;
1994 		case CMap:
1995 			p = img->cmap->cmap2rgb+3*ov;
1996 			r = *p++;
1997 			g = *p++;	
1998 			b = *p;
1999 			break;
2000 		}
2001 	}
2002 	return (r<<24)|(g<<16)|(b<<8)|a;	
2003 }
2004 
2005 u32int
2006 _rgbatoimg(Memimage *img, u32int rgba)
2007 {
2008 	u32int chan;
2009 	int d, nb;
2010 	u32int v;
2011 	uchar *p, r, g, b, a, m;
2012 
2013 	v = 0;
2014 	r = rgba>>24;
2015 	g = rgba>>16;
2016 	b = rgba>>8;
2017 	a = rgba;
2018 	d = 0;
2019 	for(chan=img->chan; chan; chan>>=8){
2020 		nb = NBITS(chan);
2021 		switch(TYPE(chan)){
2022 		case CRed:
2023 			v |= (r>>(8-nb))<<d;
2024 			break;
2025 		case CGreen:
2026 			v |= (g>>(8-nb))<<d;
2027 			break;
2028 		case CBlue:
2029 			v |= (b>>(8-nb))<<d;
2030 			break;
2031 		case CAlpha:
2032 			v |= (a>>(8-nb))<<d;
2033 			break;
2034 		case CMap:
2035 			p = img->cmap->rgb2cmap;
2036 			m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
2037 			v |= (m>>(8-nb))<<d;
2038 			break;
2039 		case CGrey:
2040 			m = RGB2K(r,g,b);
2041 			v |= (m>>(8-nb))<<d;
2042 			break;
2043 		}
2044 		d += nb;
2045 	}
2046 /*	print("rgba2img %.8lux = %.*lux\n", rgba, 2*d/8, v); */
2047 	return v;
2048 }
2049 
2050 #define DBG if(0)
2051 static int
2052 memoptdraw(Memdrawparam *par)
2053 {
2054 	int m, y, dy, dx, op;
2055 	u32int v;
2056 	Memimage *src;
2057 	Memimage *dst;
2058 
2059 	dx = Dx(par->r);
2060 	dy = Dy(par->r);
2061 	src = par->src;
2062 	dst = par->dst;
2063 	op = par->op;
2064 
2065 DBG print("state %lux mval %lux dd %d\n", par->state, par->mval, dst->depth);
2066 	/*
2067 	 * If we have an opaque mask and source is one opaque pixel we can convert to the
2068 	 * destination format and just replicate with memset.
2069 	 */
2070 	m = Simplesrc|Simplemask|Fullmask;
2071 	if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
2072 		uchar *dp, p[4];
2073 		int d, dwid, ppb, np, nb;
2074 		uchar lm, rm;
2075 
2076 DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
2077 		dwid = dst->width*sizeof(u32int);
2078 		dp = byteaddr(dst, par->r.min);
2079 		v = par->sdval;
2080 DBG print("sdval %lud, depth %d\n", v, dst->depth);
2081 		switch(dst->depth){
2082 		case 1:
2083 		case 2:
2084 		case 4:
2085 			for(d=dst->depth; d<8; d*=2)
2086 				v |= (v<<d);
2087 			ppb = 8/dst->depth;	/* pixels per byte */
2088 			m = ppb-1;
2089 			/* left edge */
2090 			np = par->r.min.x&m;		/* no. pixels unused on left side of word */
2091 			dx -= (ppb-np);
2092 			nb = 8 - np * dst->depth;		/* no. bits used on right side of word */
2093 			lm = (1<<nb)-1;
2094 DBG print("np %d x %d nb %d lm %ux ppb %d m %ux\n", np, par->r.min.x, nb, lm, ppb, m);	
2095 
2096 			/* right edge */
2097 			np = par->r.max.x&m;	/* no. pixels used on left side of word */
2098 			dx -= np;
2099 			nb = 8 - np * dst->depth;		/* no. bits unused on right side of word */
2100 			rm = ~((1<<nb)-1);
2101 DBG print("np %d x %d nb %d rm %ux ppb %d m %ux\n", np, par->r.max.x, nb, rm, ppb, m);	
2102 
2103 DBG print("dx %d Dx %d\n", dx, Dx(par->r));
2104 			/* lm, rm are masks that are 1 where we should touch the bits */
2105 			if(dx < 0){	/* just one byte */
2106 				lm &= rm;
2107 				for(y=0; y<dy; y++, dp+=dwid)
2108 					*dp ^= (v ^ *dp) & lm;
2109 			}else if(dx == 0){	/* no full bytes */
2110 				if(lm)
2111 					dwid--;
2112 
2113 				for(y=0; y<dy; y++, dp+=dwid){
2114 					if(lm){
2115 DBG print("dp %p v %lux lm %ux (v ^ *dp) & lm %lux\n", dp, v, lm, (v^*dp)&lm);
2116 						*dp ^= (v ^ *dp) & lm;
2117 						dp++;
2118 					}
2119 					*dp ^= (v ^ *dp) & rm;
2120 				}
2121 			}else{		/* full bytes in middle */
2122 				dx /= ppb;
2123 				if(lm)
2124 					dwid--;
2125 				dwid -= dx;
2126 
2127 				for(y=0; y<dy; y++, dp+=dwid){
2128 					if(lm){
2129 						*dp ^= (v ^ *dp) & lm;
2130 						dp++;
2131 					}
2132 					memset(dp, v, dx);
2133 					dp += dx;
2134 					*dp ^= (v ^ *dp) & rm;
2135 				}
2136 			}
2137 			return 1;
2138 		case 8:
2139 			for(y=0; y<dy; y++, dp+=dwid)
2140 				memset(dp, v, dx);
2141 			return 1;
2142 		case 16:
2143 			p[0] = v;		/* make little endian */
2144 			p[1] = v>>8;
2145 			v = *(ushort*)p;
2146 DBG print("dp=%p; dx=%d; for(y=0; y<%d; y++, dp+=%d)\nmemsets(dp, v, dx);\n",
2147 	dp, dx, dy, dwid);
2148 			for(y=0; y<dy; y++, dp+=dwid)
2149 				memsets(dp, v, dx);
2150 			return 1;
2151 		case 24:
2152 			for(y=0; y<dy; y++, dp+=dwid)
2153 				memset24(dp, v, dx);
2154 			return 1;
2155 		case 32:
2156 			p[0] = v;		/* make little endian */
2157 			p[1] = v>>8;
2158 			p[2] = v>>16;
2159 			p[3] = v>>24;
2160 			v = *(u32int*)p;
2161 			for(y=0; y<dy; y++, dp+=dwid)
2162 				memsetl(dp, v, dx);
2163 			return 1;
2164 		default:
2165 			assert(0 /* bad dest depth in memoptdraw */);
2166 		}
2167 	}
2168 
2169 	/*
2170 	 * If no source alpha, an opaque mask, we can just copy the
2171 	 * source onto the destination.  If the channels are the same and
2172 	 * the source is not replicated, memmove suffices.
2173 	 */
2174 	m = Simplemask|Fullmask;
2175 	if((par->state&(m|Replsrc))==m && src->depth >= 8 
2176 	&& src->chan == dst->chan && !(src->flags&Falpha) && (op == S || op == SoverD)){
2177 		uchar *sp, *dp;
2178 		long swid, dwid, nb;
2179 		int dir;
2180 
2181 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
2182 			dir = -1;
2183 		else
2184 			dir = 1;
2185 
2186 		swid = src->width*sizeof(u32int);
2187 		dwid = dst->width*sizeof(u32int);
2188 		sp = byteaddr(src, par->sr.min);
2189 		dp = byteaddr(dst, par->r.min);
2190 		if(dir == -1){
2191 			sp += (dy-1)*swid;
2192 			dp += (dy-1)*dwid;
2193 			swid = -swid;
2194 			dwid = -dwid;
2195 		}
2196 		nb = (dx*src->depth)/8;
2197 		for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
2198 			memmove(dp, sp, nb);
2199 		return 1;
2200 	}
2201 
2202 	/*
2203 	 * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
2204 	 * they're all bit aligned, we can just use bit operators.  This happens
2205 	 * when we're manipulating boolean masks, e.g. in the arc code.
2206 	 */
2207 	if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0 
2208 	&& dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1 
2209 	&& (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
2210 		uchar *sp, *dp, *mp;
2211 		uchar lm, rm;
2212 		long swid, dwid, mwid;
2213 		int i, x, dir;
2214 
2215 		sp = byteaddr(src, par->sr.min);
2216 		dp = byteaddr(dst, par->r.min);
2217 		mp = byteaddr(par->mask, par->mr.min);
2218 		swid = src->width*sizeof(u32int);
2219 		dwid = dst->width*sizeof(u32int);
2220 		mwid = par->mask->width*sizeof(u32int);
2221 
2222 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
2223 			dir = -1;
2224 		}else
2225 			dir = 1;
2226 
2227 		lm = 0xFF>>(par->r.min.x&7);
2228 		rm = 0xFF<<(8-(par->r.max.x&7));
2229 		dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
2230 
2231 		if(dx < 0){	/* one byte wide */
2232 			lm &= rm;
2233 			if(dir == -1){
2234 				dp += dwid*(dy-1);
2235 				sp += swid*(dy-1);
2236 				mp += mwid*(dy-1);
2237 				dwid = -dwid;
2238 				swid = -swid;
2239 				mwid = -mwid;
2240 			}
2241 			for(y=0; y<dy; y++){
2242 				*dp ^= (*dp ^ *sp) & *mp & lm;
2243 				dp += dwid;
2244 				sp += swid;
2245 				mp += mwid;
2246 			}
2247 			return 1;
2248 		}
2249 
2250 		dx /= 8;
2251 		if(dir == 1){
2252 			i = (lm!=0)+dx+(rm!=0);
2253 			mwid -= i;
2254 			swid -= i;
2255 			dwid -= i;
2256 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2257 				if(lm){
2258 					*dp ^= (*dp ^ *sp++) & *mp++ & lm;
2259 					dp++;
2260 				}
2261 				for(x=0; x<dx; x++){
2262 					*dp ^= (*dp ^ *sp++) & *mp++;
2263 					dp++;
2264 				}
2265 				if(rm){
2266 					*dp ^= (*dp ^ *sp++) & *mp++ & rm;
2267 					dp++;
2268 				}
2269 			}
2270 			return 1;
2271 		}else{
2272 		/* dir == -1 */
2273 			i = (lm!=0)+dx+(rm!=0);
2274 			dp += dwid*(dy-1)+i-1;
2275 			sp += swid*(dy-1)+i-1;
2276 			mp += mwid*(dy-1)+i-1;
2277 			dwid = -dwid+i;
2278 			swid = -swid+i;
2279 			mwid = -mwid+i;
2280 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2281 				if(rm){
2282 					*dp ^= (*dp ^ *sp--) & *mp-- & rm;
2283 					dp--;
2284 				}
2285 				for(x=0; x<dx; x++){
2286 					*dp ^= (*dp ^ *sp--) & *mp--;
2287 					dp--;
2288 				}
2289 				if(lm){
2290 					*dp ^= (*dp ^ *sp--) & *mp-- & lm;
2291 					dp--;
2292 				}
2293 			}
2294 		}
2295 		return 1;
2296 	}
2297 	return 0;	
2298 }
2299 #undef DBG
2300 
2301 /*
2302  * Boolean character drawing.
2303  * Solid opaque color through a 1-bit greyscale mask.
2304  */
2305 #define DBG if(0)
2306 static int
2307 chardraw(Memdrawparam *par)
2308 {
2309 	u32int bits;
2310 	int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
2311 	u32int v, maskwid, dstwid;
2312 	uchar *wp, *rp, *q, *wc;
2313 	ushort *ws;
2314 	u32int *wl;
2315 	uchar sp[4];
2316 	Rectangle r, mr;
2317 	Memimage *mask, *src, *dst;
2318 
2319 if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
2320 		par->mask->flags, par->mask->depth, par->src->flags, 
2321 		Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
2322 
2323 	mask = par->mask;
2324 	src = par->src;
2325 	dst = par->dst;
2326 	r = par->r;
2327 	mr = par->mr;
2328 	op = par->op;
2329 
2330 	if((par->state&(Replsrc|Simplesrc|Fullsrc|Replmask)) != (Replsrc|Simplesrc|Fullsrc)
2331 	|| mask->depth != 1 || dst->depth<8 || dst->data==src->data
2332 	|| op != SoverD)
2333 		return 0;
2334 
2335 /*if(drawdebug) iprint("chardraw..."); */
2336 
2337 	depth = mask->depth;
2338 	maskwid = mask->width*sizeof(u32int);
2339 	rp = byteaddr(mask, mr.min);
2340 	npack = 8/depth;
2341 	bsh = (mr.min.x % npack) * depth;
2342 
2343 	wp = byteaddr(dst, r.min);
2344 	dstwid = dst->width*sizeof(u32int);
2345 DBG print("bsh %d\n", bsh);
2346 	dy = Dy(r);
2347 	dx = Dx(r);
2348 
2349 	ddepth = dst->depth;
2350 
2351 	/*
2352 	 * for loop counts from bsh to bsh+dx
2353 	 *
2354 	 * we want the bottom bits to be the amount
2355 	 * to shift the pixels down, so for n≡0 (mod 8) we want 
2356 	 * bottom bits 7.  for n≡1, 6, etc.
2357 	 * the bits come from -n-1.
2358 	 */
2359 
2360 	bx = -bsh-1;
2361 	ex = -bsh-1-dx;
2362 	SET(bits);
2363 	v = par->sdval;
2364 
2365 	/* make little endian */
2366 	sp[0] = v;
2367 	sp[1] = v>>8;
2368 	sp[2] = v>>16;
2369 	sp[3] = v>>24;
2370 
2371 /*print("sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]); */
2372 	for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
2373 		q = rp;
2374 		if(bsh)
2375 			bits = *q++;
2376 		switch(ddepth){
2377 		case 8:
2378 /*if(drawdebug) iprint("8loop..."); */
2379 			wc = wp;
2380 			for(x=bx; x>ex; x--, wc++){
2381 				i = x&7;
2382 				if(i == 8-1)
2383 					bits = *q++;
2384 DBG print("bits %lux sh %d...", bits, i);
2385 				if((bits>>i)&1)
2386 					*wc = v;
2387 			}
2388 			break;
2389 		case 16:
2390 			ws = (ushort*)wp;
2391 			v = *(ushort*)sp;
2392 			for(x=bx; x>ex; x--, ws++){
2393 				i = x&7;
2394 				if(i == 8-1)
2395 					bits = *q++;
2396 DBG print("bits %lux sh %d...", bits, i);
2397 				if((bits>>i)&1)
2398 					*ws = v;
2399 			}
2400 			break;
2401 		case 24:
2402 			wc = wp;
2403 			for(x=bx; x>ex; x--, wc+=3){
2404 				i = x&7;
2405 				if(i == 8-1)
2406 					bits = *q++;
2407 DBG print("bits %lux sh %d...", bits, i);
2408 				if((bits>>i)&1){
2409 					wc[0] = sp[0];
2410 					wc[1] = sp[1];
2411 					wc[2] = sp[2];
2412 				}
2413 			}
2414 			break;
2415 		case 32:
2416 			wl = (u32int*)wp;
2417 			v = *(u32int*)sp;
2418 			for(x=bx; x>ex; x--, wl++){
2419 				i = x&7;
2420 				if(i == 8-1)
2421 					bits = *q++;
2422 DBG iprint("bits %lux sh %d...", bits, i);
2423 				if((bits>>i)&1)
2424 					*wl = v;
2425 			}
2426 			break;
2427 		}
2428 	}
2429 
2430 DBG print("\n");	
2431 	return 1;	
2432 }
2433 #undef DBG
2434 
2435 
2436 /*
2437  * Fill entire byte with replicated (if necessary) copy of source pixel,
2438  * assuming destination ldepth is >= source ldepth.
2439  *
2440  * This code is just plain wrong for >8bpp.
2441  *
2442 u32int
2443 membyteval(Memimage *src)
2444 {
2445 	int i, val, bpp;
2446 	uchar uc;
2447 
2448 	unloadmemimage(src, src->r, &uc, 1);
2449 	bpp = src->depth;
2450 	uc <<= (src->r.min.x&(7/src->depth))*src->depth;
2451 	uc &= ~(0xFF>>bpp);
2452 	* pixel value is now in high part of byte. repeat throughout byte 
2453 	val = uc;
2454 	for(i=bpp; i<8; i<<=1)
2455 		val |= val>>i;
2456 	return val;
2457 }
2458  * 
2459  */
2460 
2461 void
2462 _memfillcolor(Memimage *i, u32int val)
2463 {
2464 	u32int bits;
2465 	int d, y;
2466 	uchar p[4];
2467 
2468 	if(val == DNofill)
2469 		return;
2470 
2471 	bits = _rgbatoimg(i, val);
2472 	switch(i->depth){
2473 	case 24:	/* 24-bit images suck */
2474 		for(y=i->r.min.y; y<i->r.max.y; y++)
2475 			memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
2476 		break;
2477 	default:	/* 1, 2, 4, 8, 16, 32 */
2478 		for(d=i->depth; d<32; d*=2)
2479 			bits = (bits << d) | bits;
2480 		p[0] = bits;		/* make little endian */
2481 		p[1] = bits>>8;
2482 		p[2] = bits>>16;
2483 		p[3] = bits>>24;
2484 		bits = *(u32int*)p;
2485 		memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
2486 		break;
2487 	}
2488 }
2489