/**************************************************************************
 *
 * Copyright 2006 Thomas Hellstrom. 
 * Copyright (c) Intel Corp. 2007.
 * All Rights Reserved.
 *
 * Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
 * develop this driver.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 **************************************************************************/
/*
 * Authors:
 *   Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <sys/time.h>
#include <picturestr.h>
#include <psb_reg.h>
#include "psb_accel.h"
#include "psb_driver.h"

#define PSB_EXA_BOUNCE_SIZE (512*1024)
#define PSB_EXA_MIN_COMPOSITE 64       /* Needs tuning */
#define PSB_EXA_MIN_COPY 256	       /* Needs tuning */
#define PSB_MARKER_WRAP (1 << 24)
#define PSB_TIMEOUT_USEC 990000
#define PSB_FMT_HASH_SIZE 256
#define PSB_NUM_COMP_FORMATS 9

#define PSB_FIX_BUG_W8
#define PSB_FIX_BUG_OVERLAP

#define PSB_FMT_HASH(arg) (((((arg) >> 1) + (arg)) >> 8) & 0xFF)

typedef struct _PsbFormat
{
    unsigned pictFormat;
    Bool dstSupported;
    Bool patSupported;
    Bool srcSupported;
    CARD32 dstFormat;
    CARD32 patFormat;
    CARD32 srcFormat;
} PsbFormatRec, *PsbFormatPointer;

static PsbFormatRec psbCompFormats[PSB_FMT_HASH_SIZE];

static const unsigned psbFormats[PSB_NUM_COMP_FORMATS][7] = {
    {PICT_a8, 0x00, PSB_2D_PAT_8_ALPHA, PSB_2D_SRC_8_ALPHA, 0, 0, 1},
    {PICT_a4, 0x00, PSB_2D_PAT_4_ALPHA, PSB_2D_SRC_4_ALPHA, 0, 0, 1},
    {PICT_r3g3b2, PSB_2D_DST_332RGB, PSB_2D_PAT_332RGB, PSB_2D_SRC_332RGB, 1,
	1, 1},
    {PICT_a4r4g4b4, PSB_2D_DST_4444ARGB, PSB_2D_PAT_4444ARGB,
	PSB_2D_SRC_4444ARGB, 1, 1, 1},
    {PICT_x1r5g5b5, PSB_2D_DST_555RGB, PSB_2D_PAT_555RGB, PSB_2D_SRC_555RGB,
	1, 1, 1},
    {PICT_a1r5g5b5, PSB_2D_DST_1555ARGB, PSB_2D_PAT_1555ARGB,
	PSB_2D_SRC_1555ARGB, 1, 1, 1},
    {PICT_r5g6b5, PSB_2D_DST_565RGB, PSB_2D_PAT_565RGB, PSB_2D_SRC_565RGB, 1,
	1, 1},
    {PICT_x8r8g8b8, PSB_2D_DST_0888ARGB, PSB_2D_PAT_0888ARGB,
	PSB_2D_SRC_0888ARGB, 1, 1, 1},
    {PICT_a8r8g8b8, PSB_2D_DST_8888ARGB, PSB_2D_PAT_8888ARGB,
	PSB_2D_SRC_8888ARGB, 1, 1, 1}
};

static const int psbCopyROP[] =
    { 0x00, 0x88, 0x44, 0xCC, 0x22, 0xAA, 0x66, 0xEE, 0x11,
    0x99, 0x55, 0xDD, 0x33, 0xBB, 0x77, 0xFF
};
static const int psbPatternROP[] =
    { 0x00, 0xA0, 0x50, 0xF0, 0x0A, 0xAA, 0x5A, 0xFA, 0x05,
    0xA5, 0x55, 0xF5, 0x0F, 0xAF, 0x5F, 0xFF
};

static unsigned
psbTimeDiff(struct timeval *now, struct timeval *then)
{
    return (now->tv_usec >= then->tv_usec) ?
	now->tv_usec - then->tv_usec :
	1000000 - (then->tv_usec - now->tv_usec);
}

static void
psb2DSlavePortSpace(PsbTwodBufferPtr cb, unsigned size)
{
    PsbDevicePtr pDevice = cb->pDevice;
    CARD32 avail = PSB_RSGX32(PSB_CR_2D_SOCIF);
    struct timeval then, now;

    if (size <= avail)
	return;

    if (gettimeofday(&then, NULL))
	FatalError("Gettimeofday error.\n");

    do {
	avail = PSB_RSGX32(PSB_CR_2D_SOCIF);
	if (gettimeofday(&now, NULL))
	    FatalError("Gettimeofday error.\n");
    } while ((avail < size) && (psbTimeDiff(&now, &then) < PSB_TIMEOUT_USEC));

    if (size > avail)
	avail = PSB_RSGX32(PSB_CR_2D_SOCIF);
    if (size <= avail)
	return;

    psbEngineHang(cb->pScrn);
}

static void
psbFlushTwodBuffer(PsbTwodBufferPtr cb)
{
    PsbDevicePtr pDevice = cb->pDevice;
    int dWords = cb->cur - cb->buf;
    int numToFlush;
    CARD32 *cur = cb->buf;
    int i;

    while (dWords) {

	numToFlush = (dWords > PSB_FLUSH_CHUNK) ? PSB_FLUSH_CHUNK : dWords;
	dWords -= numToFlush;

	psb2DSlavePortSpace(cb, numToFlush);
	numToFlush <<= 2;

	for (i = 0; i < numToFlush; i += 4) {
	    PSB_WSLAVE32(i, *cur++);
	}

	if (numToFlush)
	    (void)PSB_RSLAVE32(i - 4);
    }
    cb->cur = cb->buf;
}

/*
 * Pattern as planemask.
 */

static inline int
psbCopyROP_PM(int xRop)
{
    return (psbCopyROP[xRop] & PSB_2D_ROP3_PAT) | (PSB_2D_ROP3_DST &
	~PSB_2D_ROP3_PAT);
}

/*
 * Source as planemask.
 */

static inline int
psbPatternROP_PM(int xRop)
{
    return (psbCopyROP[xRop] & PSB_2D_ROP3_SRC) | (PSB_2D_ROP3_DST &
	~PSB_2D_ROP3_SRC);
}

/*
 * Helper for bitdepth expansion.
 */

static CARD32
psbBitExpandHelper(CARD32 component, CARD32 bits)
{
    CARD32 tmp, mask;

    mask = (1 << (8 - bits)) - 1;
    tmp = component << (8 - bits);
    return ((component & 1) ? tmp | mask : tmp);
}

/*
 * Extract the components from a pixel of format "format" to an
 * argb8888 pixel. This is used to extract data from one-pixel repeat pixmaps.
 * Assumes little endian.
 */

static void
psbPixelARGB8888(unsigned format, void *pixelP, CARD32 * argb8888)
{
    CARD32 bits, shift, pixel, bpp;

    bpp = PICT_FORMAT_BPP(format);

    if (bpp <= 8) {
	pixel = *((CARD8 *) pixelP);
    } else if (bpp <= 16) {
	pixel = *((CARD16 *) pixelP);
    } else {
	pixel = *((CARD32 *) pixelP);
    }

    switch (PICT_FORMAT_TYPE(format)) {
    case PICT_TYPE_A:
	bits = PICT_FORMAT_A(format);
	*argb8888 = psbBitExpandHelper(pixel & ((1 << bits) - 1), bits) << 24;
	return;
    case PICT_TYPE_ARGB:
	shift = 0;
	bits = PICT_FORMAT_B(format);
	*argb8888 = psbBitExpandHelper(pixel & ((1 << bits) - 1), bits);
	shift += bits;
	bits = PICT_FORMAT_G(format);
	*argb8888 |=
	    psbBitExpandHelper((pixel >> shift) & ((1 << bits) - 1),
	    bits) << 8;
	shift += bits;
	bits = PICT_FORMAT_R(format);
	*argb8888 |=
	    psbBitExpandHelper((pixel >> shift) & ((1 << bits) - 1),
	    bits) << 16;
	shift += bits;
	bits = PICT_FORMAT_A(format);
	*argb8888 |= ((bits) ?
	    psbBitExpandHelper((pixel >> shift) & ((1 << bits) - 1),
		bits) : 0xFF) << 24;
	return;
    case PICT_TYPE_ABGR:
	shift = 0;
	bits = PICT_FORMAT_B(format);
	*argb8888 = psbBitExpandHelper(pixel & ((1 << bits) - 1), bits) << 16;
	shift += bits;
	bits = PICT_FORMAT_G(format);
	*argb8888 |=
	    psbBitExpandHelper((pixel >> shift) & ((1 << bits) - 1),
	    bits) << 8;
	shift += bits;
	bits = PICT_FORMAT_R(format);
	*argb8888 |=
	    psbBitExpandHelper((pixel >> shift) & ((1 << bits) - 1), bits);
	shift += bits;
	bits = PICT_FORMAT_A(format);
	*argb8888 |= ((bits) ?
	    psbBitExpandHelper((pixel >> shift) & ((1 << bits) - 1),
		bits) : 0xFF) << 24;
	return;
    default:
	break;
    }
    return;
}

/*
 * Check if the above function will work.
 */

static Bool
psbExpandablePixel(int format)
{
    int formatType = PICT_FORMAT_TYPE(format);

    return (formatType == PICT_TYPE_A ||
	formatType == PICT_TYPE_ABGR || formatType == PICT_TYPE_ARGB);
}

static void
psbAccelSetMode(PsbTwodContextPtr tdc, int sdepth, int ddepth, Pixel pix)
{
    switch (sdepth) {
    case 8:
	tdc->sMode = PSB_2D_SRC_332RGB;
	break;
    case 15:
	tdc->sMode = PSB_2D_SRC_555RGB;
	break;
    case 16:
	tdc->sMode = PSB_2D_SRC_565RGB;
	break;
    case 24:
	tdc->sMode = PSB_2D_SRC_0888ARGB;
	break;
    default:
	tdc->sMode = PSB_2D_SRC_8888ARGB;
	break;
    }
    switch (ddepth) {
    case 8:
	tdc->dMode = PSB_2D_DST_332RGB;
	psbPixelARGB8888(PICT_r3g3b2, &pix, &tdc->fixPat);
	break;
    case 15:
	tdc->dMode = PSB_2D_DST_555RGB;
	psbPixelARGB8888(PICT_x1r5g5b5, &pix, &tdc->fixPat);
	break;
    case 16:
	tdc->dMode = PSB_2D_DST_565RGB;
	psbPixelARGB8888(PICT_r5g6b5, &pix, &tdc->fixPat);
	break;
    case 24:
	tdc->dMode = PSB_2D_DST_0888ARGB;
	psbPixelARGB8888(PICT_x8r8g8b8, &pix, &tdc->fixPat);
	break;
    default:
	tdc->dMode = PSB_2D_DST_8888ARGB;
	psbPixelARGB8888(PICT_a8r8g8b8, &pix, &tdc->fixPat);
	break;
    }
}

/*
 * This function sets up the source surface to a two-color palette format with
 * the same color in both palette entries. This is used to give a constant source
 * color, which is used in masked fills. Typically the source surface is set to
 * the same offset and stride as the dst surface.
 */

static void
psbAccelPaletteHelper(PsbTwodBufferPtr cb, int x, int y, unsigned srcOffset,
    unsigned stride, unsigned palOffset)
{
    PSB_2D_SPACE(2);
    PSB_2D_OUT(PSB_2D_SRC_SURF_BH |
	PSB_2D_SRC_1_PAL |
	((stride << PSB_2D_SRC_STRIDE_SHIFT) & PSB_2D_SRC_STRIDE_MASK));
    PSB_2D_OUT(PSB_2D_SRC_PAL_BH |
	((palOffset << PSB_2D_SRCPAL_ADDR_SHIFT) & PSB_2D_SRCPAL_ADDR_MASK));
    PSB_2D_DONE;
}

Bool
psbAccelSetup2DBuffer(ScrnInfoPtr pScrn, PsbTwodBufferPtr cb)
{
    cb->buf = xcalloc(sizeof(CARD32), PSB_2D_BUFFER_SIZE);
    if (!cb->buf)
	return FALSE;

    cb->pScrn = pScrn;
    cb->pDevice = psbDevicePTR(psbPTR(pScrn));
    cb->cur = cb->buf;
    cb->dWords = PSB_2D_BUFFER_SIZE;
    return TRUE;
}

void
psbAccelTakeDown2DBuffer(PsbTwodBufferPtr cb)
{
    if (cb->buf)
	xfree(cb->buf);
}

static void
psbAccelSolidHelper(PsbTwodBufferPtr cb, int x, int y, int w, int h,
    unsigned offset, unsigned mode, unsigned stride, CARD32 fg, unsigned cmd)
{
#ifdef PSB_FIX_BUG_W8
    int origW = w;

    if (w == 8)
	w = 4;
    PSB_2D_SPACE(12);
#else
    PSB_2D_SPACE(7);
#endif

    PSB_2D_OUT(PSB_2D_FENCE_BH);
    PSB_2D_OUT(PSB_2D_DST_SURF_BH |
	(mode & PSB_2D_DST_FORMAT_MASK) |
	((stride & PSB_2D_DST_STRIDE_MASK) << PSB_2D_DST_STRIDE_SHIFT));
    PSB_2D_OUT(offset);
    PSB_2D_OUT(cmd);
    PSB_2D_OUT(fg);
    PSB_2D_OUT(((x << PSB_2D_DST_XSTART_SHIFT) & PSB_2D_DST_XSTART_MASK) |
	((y << PSB_2D_DST_YSTART_SHIFT) & PSB_2D_DST_YSTART_MASK));
    PSB_2D_OUT(((w << PSB_2D_DST_XSIZE_SHIFT) & PSB_2D_DST_XSIZE_MASK) |
	((h << PSB_2D_DST_YSIZE_SHIFT) & PSB_2D_DST_YSIZE_MASK));

#ifdef PSB_FIX_BUG_W8
    if (origW == 8) {

	x += 4;

	PSB_2D_OUT(PSB_2D_FENCE_BH);
	PSB_2D_OUT(cmd);
	PSB_2D_OUT(fg);
	PSB_2D_OUT(((x << PSB_2D_DST_XSTART_SHIFT) & PSB_2D_DST_XSTART_MASK) |
	    ((y << PSB_2D_DST_YSTART_SHIFT) & PSB_2D_DST_YSTART_MASK));
	PSB_2D_OUT(((4 << PSB_2D_DST_XSIZE_SHIFT) & PSB_2D_DST_XSIZE_MASK) |
	    ((h << PSB_2D_DST_YSIZE_SHIFT) & PSB_2D_DST_YSIZE_MASK));
    }
#endif
    PSB_2D_DONE;
}

static void
psbAccelCopyHelper(PsbTwodBufferPtr cb, int xs, int ys, int xd, int yd,
    int w, int h, unsigned srcOffset, unsigned dstOffset,
    unsigned srcMode, unsigned dstMode,
    unsigned srcStride, unsigned dstStride, unsigned fg, unsigned cmd)
{
#ifdef PSB_FIX_BUG_W8
    int origW = w;

    if (w == 8) {
	unsigned copyOrder = cmd & PSB_2D_COPYORDER_MASK;

	w = 4;
	if (copyOrder == PSB_2D_COPYORDER_TR2BL ||
	    copyOrder == PSB_2D_COPYORDER_BR2TL) {
	    xs -= 4;
	    xd -= 4;
	}
    }
    PSB_2D_SPACE(16);
#else
    PSB_2D_SPACE(10);
#endif

    PSB_2D_OUT(PSB_2D_FENCE_BH);
    PSB_2D_OUT(PSB_2D_DST_SURF_BH |
	(dstMode & PSB_2D_DST_FORMAT_MASK) |
	((dstStride << PSB_2D_DST_STRIDE_SHIFT) & PSB_2D_DST_STRIDE_MASK));
    PSB_2D_OUT(dstOffset);
    PSB_2D_OUT(PSB_2D_SRC_SURF_BH |
	(srcMode & PSB_2D_SRC_FORMAT_MASK) |
	((srcStride << PSB_2D_SRC_STRIDE_SHIFT) & PSB_2D_DST_STRIDE_MASK));
    PSB_2D_OUT(srcOffset);

    PSB_2D_OUT(PSB_2D_SRC_OFF_BH |
	((xs << PSB_2D_SRCOFF_XSTART_SHIFT) & PSB_2D_SRCOFF_XSTART_MASK) |
	((ys << PSB_2D_SRCOFF_YSTART_SHIFT) & PSB_2D_SRCOFF_YSTART_MASK));
    PSB_2D_OUT(cmd);
    PSB_2D_OUT(fg);
    PSB_2D_OUT(((xd << PSB_2D_DST_XSTART_SHIFT) & PSB_2D_DST_XSTART_MASK) |
	((yd << PSB_2D_DST_YSTART_SHIFT) & PSB_2D_DST_YSTART_MASK));
    PSB_2D_OUT(((w << PSB_2D_DST_XSIZE_SHIFT) & PSB_2D_DST_XSIZE_MASK) |
	((h << PSB_2D_DST_YSIZE_SHIFT) & PSB_2D_DST_YSIZE_MASK));

#ifdef PSB_FIX_BUG_W8
    if (origW == 8) {

	xs += 4;
	xd += 4;

	PSB_2D_OUT(PSB_2D_FENCE_BH);
	PSB_2D_OUT(PSB_2D_SRC_OFF_BH |
	    ((xs << PSB_2D_SRCOFF_XSTART_SHIFT) & PSB_2D_SRCOFF_XSTART_MASK) |
	    ((ys << PSB_2D_SRCOFF_YSTART_SHIFT) & PSB_2D_SRCOFF_YSTART_MASK));
	PSB_2D_OUT(cmd);
	PSB_2D_OUT(fg);
	PSB_2D_OUT(((xd << PSB_2D_DST_XSTART_SHIFT) & PSB_2D_DST_XSTART_MASK)
	    | ((yd << PSB_2D_DST_YSTART_SHIFT) & PSB_2D_DST_YSTART_MASK));
	PSB_2D_OUT(((4 << PSB_2D_DST_XSIZE_SHIFT) & PSB_2D_DST_XSIZE_MASK) |
	    ((h << PSB_2D_DST_YSIZE_SHIFT) & PSB_2D_DST_YSIZE_MASK));
    }
#endif

    PSB_2D_DONE;
}

static void
psbAccelCompositeBugDelta0(unsigned rotation, int *xDelta, int *yDelta)
{
    switch (rotation) {
    case PSB_2D_ROT_270DEGS:
	*xDelta = 0;
	*yDelta = 0;
	break;
    case PSB_2D_ROT_180DEGS:
	*xDelta = -4;
	*yDelta = 0;
	break;
    case PSB_2D_ROT_90DEGS:
	*xDelta = 0;
	*yDelta = -4;
	break;
    default:
	*xDelta = 0;
	*yDelta = 0;
	break;
    }
}

static void
psbAccelCompositeBugDelta1(unsigned rotation, int *xDelta, int *yDelta)
{
    switch (rotation) {
    case PSB_2D_ROT_270DEGS:
    case PSB_2D_ROT_90DEGS:
	*xDelta = 0;
	*yDelta = 4;
	break;
    default:
	*xDelta = 4;
	*yDelta = 0;
	break;
    }
}

static void
psbAccelCompositeHelper(PsbTwodBufferPtr cb, int xs, int ys, unsigned xp,
    unsigned yp,
    int xd, int yd, int wp, int hp,
    int w, int h, unsigned srcOffset, unsigned patOffset,
    unsigned dstOffset, unsigned srcMode, unsigned patMode,
    unsigned dstMode, unsigned srcStride, unsigned patStride,
    unsigned dstStride, unsigned fg, unsigned cmd,
    unsigned alpha1, unsigned alpha2, Bool usePat)
{
#ifdef PSB_FIX_BUG_W8
    int origW = w;
    int xDelta;
    int yDelta;
    unsigned rot = cmd & PSB_2D_ROT_MASK;

    if (origW == 8) {
	w = 4;
	psbAccelCompositeBugDelta0(rot, &xDelta, &yDelta);
	xs += xDelta;
	ys += yDelta;
	if (usePat && (cmd & PSB_2D_USE_PAT)) {
	    xp = (xp + xDelta) % wp;
	    yp = (yp + yDelta) % wp;
	}
    }
    PSB_2D_SPACE(23);
#else
    PSB_2D_SPACE(16);
#endif

    PSB_2D_OUT(PSB_2D_FENCE_BH);
    PSB_2D_OUT(PSB_2D_DST_SURF_BH |
	(dstMode & PSB_2D_DST_FORMAT_MASK) |
	((dstStride << PSB_2D_DST_STRIDE_SHIFT) & PSB_2D_DST_STRIDE_MASK));
    PSB_2D_OUT(dstOffset);
    PSB_2D_OUT(PSB_2D_SRC_SURF_BH |
	(srcMode & PSB_2D_SRC_FORMAT_MASK) |
	((srcStride << PSB_2D_SRC_STRIDE_SHIFT) & PSB_2D_DST_STRIDE_MASK));
    PSB_2D_OUT(srcOffset);

    PSB_2D_OUT(PSB_2D_SRC_OFF_BH |
	((xs << PSB_2D_SRCOFF_XSTART_SHIFT) & PSB_2D_SRCOFF_XSTART_MASK) |
	((ys << PSB_2D_SRCOFF_YSTART_SHIFT) & PSB_2D_SRCOFF_YSTART_MASK));

    if (usePat && (cmd & PSB_2D_USE_PAT)) {
	PSB_2D_OUT(PSB_2D_PAT_SURF_BH |
	    (patMode & PSB_2D_PAT_FORMAT_MASK) |
	    ((srcStride << PSB_2D_PAT_STRIDE_SHIFT) &
		PSB_2D_PAT_STRIDE_MASK));
	PSB_2D_OUT(patOffset);
	PSB_2D_OUT(PSB_2D_PAT_BH |
	    ((hp << PSB_2D_PAT_HEIGHT_SHIFT) &
		PSB_2D_PAT_HEIGHT_MASK) |
	    ((wp << PSB_2D_PAT_WIDTH_SHIFT) &
		PSB_2D_PAT_WIDTH_MASK) |
	    ((xp << PSB_2D_PAT_XSTART_SHIFT) &
		PSB_2D_PAT_XSTART_MASK) |
	    ((yp << PSB_2D_PAT_YSTART_SHIFT) & PSB_2D_PAT_YSTART_MASK));
    }

    if (cmd & PSB_2D_ALPHA_ENABLE) {
	PSB_2D_OUT(PSB_2D_CTRL_BH | PSB_2D_ALPHA_CTRL);
	PSB_2D_OUT(alpha1);
	PSB_2D_OUT(alpha2);
    }

    PSB_2D_OUT(cmd);

    if (!(cmd & PSB_2D_USE_PAT))
	PSB_2D_OUT(fg);

    PSB_2D_OUT(((xd << PSB_2D_DST_XSTART_SHIFT) & PSB_2D_DST_XSTART_MASK) |
	((yd << PSB_2D_DST_YSTART_SHIFT) & PSB_2D_DST_YSTART_MASK));
    PSB_2D_OUT(((w << PSB_2D_DST_XSIZE_SHIFT) & PSB_2D_DST_XSIZE_MASK) |
	((h << PSB_2D_DST_YSIZE_SHIFT) & PSB_2D_DST_YSIZE_MASK));

#ifdef PSB_FIX_BUG_W8
    if (origW == 8) {

	psbAccelCompositeBugDelta1(rot, &xDelta, &yDelta);

	PSB_2D_OUT(PSB_2D_FENCE_BH);

	xs += xDelta;
	ys += yDelta;
	xd += 4;
	if (usePat && (cmd & PSB_2D_USE_PAT)) {
	    xp = (xp + xDelta) % wp;
	    yp = (yp + yDelta) % wp;
	    PSB_2D_OUT(PSB_2D_PAT_BH |
		((hp << PSB_2D_PAT_HEIGHT_SHIFT) &
		    PSB_2D_PAT_HEIGHT_MASK) |
		((wp << PSB_2D_PAT_WIDTH_SHIFT) &
		    PSB_2D_PAT_WIDTH_MASK) |
		((xp << PSB_2D_PAT_XSTART_SHIFT) &
		    PSB_2D_PAT_XSTART_MASK) |
		((yp << PSB_2D_PAT_YSTART_SHIFT) & PSB_2D_PAT_YSTART_MASK));
	}
	PSB_2D_OUT(PSB_2D_SRC_OFF_BH |
	    ((xs << PSB_2D_SRCOFF_XSTART_SHIFT) & PSB_2D_SRCOFF_XSTART_MASK) |
	    ((ys << PSB_2D_SRCOFF_YSTART_SHIFT) & PSB_2D_SRCOFF_YSTART_MASK));
	PSB_2D_OUT(cmd);

	if (!(cmd & PSB_2D_USE_PAT))
	    PSB_2D_OUT(fg);

	PSB_2D_OUT(((xd << PSB_2D_DST_XSTART_SHIFT) & PSB_2D_DST_XSTART_MASK)
	    | ((yd << PSB_2D_DST_YSTART_SHIFT) & PSB_2D_DST_YSTART_MASK));
	PSB_2D_OUT(((4 << PSB_2D_DST_XSIZE_SHIFT) & PSB_2D_DST_XSIZE_MASK) |
	    ((h << PSB_2D_DST_YSIZE_SHIFT) & PSB_2D_DST_YSIZE_MASK));
    }
#endif

    PSB_2D_DONE;
}

static Bool
psbExaPixmapIsOffscreen(PixmapPtr p)
{
    ScreenPtr pScreen = p->drawable.pScreen;
    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);

    return (psbInBuffer(&pPsb->buffers, p->devPrivate.ptr) != NULL);
}

static Bool
psbExaGetPixmapOffset(PixmapPtr p, unsigned long *offset)
{
    ScreenPtr pScreen = p->drawable.pScreen;
    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);
    PsbBufListPtr b;
    struct _MMBuffer *buf;
    void *ptr;

    ptr = (void *)(exaGetPixmapOffset(p) +
	(unsigned long)mmBufVirtual(pPsb->pPsbExa->exaBuf.buf));

    b = psbInBuffer(&pPsb->buffers, ptr);

    if (!b) {
	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
	    "[EXA] Illegal pixmap pointer.\n");
	return FALSE;
    }

    if (!b->validated) {
	buf = b->buf;
	if (buf->man->validateBuffer(buf, 0, 0, 0)) {
	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
		"[EXA] Could not validate buffer.\n");
	    return FALSE;
	}
	b->validated = TRUE;
    }

    /*
     * SGX 2D uses only the lower 28 bits of mmBufOffset();
     */

    *offset = mmBufOffset(b->buf) +
	(unsigned long)ptr - (unsigned long)mmBufVirtual(b->buf);

    return TRUE;
}

void
psbExaClose(PsbExaPtr pPsbExa, ScreenPtr pScreen)
{
    PSB_DEBUG(pScreen->myNum, 2, "psbExaClose\n");

    if (!pPsbExa)
	return;

    if (pPsbExa->exaUp) {
	exaDriverFini(pScreen);
	pPsbExa->exaUp = FALSE;
    }
    if (pPsbExa->pExa) {
	xfree(pPsbExa->pExa);
	pPsbExa->pExa = NULL;
    }
    psbClearBufItem(&pPsbExa->exaBuf);
    psbClearBufItem(&pPsbExa->scratchBuf);
    psbClearBufItem(&pPsbExa->tmpBuf);

    xfree(pPsbExa);
}

static Bool
psbExaAllocBuffers(ScrnInfoPtr pScrn, PsbExaPtr pPsbExa)
{
    PsbPtr pPsb = psbPTR(pScrn);
    PsbDevicePtr pDevice = psbDevicePTR(pPsb);

    mmInitListHead(&pPsbExa->exaBuf.head);
    mmInitListHead(&pPsbExa->scratchBuf.head);
    mmInitListHead(&pPsbExa->tmpBuf.head);

    psbAddBufItem(&pPsb->buffers, &pPsbExa->exaBuf,
	pDevice->man->createBuf(pDevice->man, pPsb->exaSize, 0,
	    MM_FLAG_READ |
	    MM_FLAG_WRITE |
	    MM_FLAG_MEM_TT |
	    ((pPsb->exaCached) ? MM_FLAG_CACHED : 0), MM_HINT_DONT_FENCE));
    if (!pPsbExa->exaBuf.buf)
	return FALSE;

    psbAddBufItem(&pPsb->buffers, &pPsbExa->scratchBuf,
	pDevice->man->createBuf(pDevice->man, pPsb->exaScratchSize, 0,
	    MM_FLAG_READ |
	    MM_FLAG_WRITE |
	    MM_FLAG_MEM_TT |
	    ((pPsb->exaCached) ? MM_FLAG_CACHED : 0), MM_HINT_DONT_FENCE));

    if (!pPsbExa->scratchBuf.buf)
	return FALSE;

    pPsbExa->tmpBuf.buf = NULL;

    return TRUE;
}

static CARD32
psbAccelCopyDirection(int xdir, int ydir)
{
    if (xdir < 0)
	return ((ydir < 0) ? PSB_2D_COPYORDER_BR2TL : PSB_2D_COPYORDER_TR2BL);
    else
	return ((ydir < 0) ? PSB_2D_COPYORDER_BL2TR : PSB_2D_COPYORDER_TL2BR);
}

static Bool
psbExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir,
    int ydir, int alu, Pixel planeMask)
{
    int rop;
    ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);
    PsbTwodContextPtr tdc = &pPsb->td;

    if (pSrcPixmap->drawable.width * pSrcPixmap->drawable.height <
	PSB_EXA_MIN_COPY
	|| pDstPixmap->drawable.width * pDstPixmap->drawable.height <
	PSB_EXA_MIN_COPY)
	return FALSE;

    psbDRILock(pScrn, 0);
    tdc->direction = psbAccelCopyDirection(xdir, ydir);

    rop = (EXA_PM_IS_SOLID(&pDstPixmap->drawable, planeMask)) ?
	psbCopyROP[alu] : psbCopyROP_PM(alu);

    psbAccelSetMode(tdc, pSrcPixmap->drawable.depth,
	pDstPixmap->drawable.depth, planeMask);
    tdc->cmd = PSB_2D_BLIT_BH | PSB_2D_ROT_NONE |
	tdc->direction |
	PSB_2D_DSTCK_DISABLE |
	PSB_2D_SRCCK_DISABLE |
	PSB_2D_USE_FILL |
	((rop << PSB_2D_ROP3B_SHIFT) & PSB_2D_ROP3B_MASK) |
	((rop << PSB_2D_ROP3A_SHIFT) & PSB_2D_ROP3A_MASK);

    if (!psbExaGetPixmapOffset(pSrcPixmap, &tdc->sOffset))
	goto out_err;

    if (!psbExaGetPixmapOffset(pDstPixmap, &tdc->dOffset))
	goto out_err;

    tdc->sStride = exaGetPixmapPitch(pSrcPixmap);
    tdc->dStride = exaGetPixmapPitch(pDstPixmap);

    tdc->sBPP = pSrcPixmap->drawable.bitsPerPixel >> 3;

    return TRUE;
  out_err:
    psbDRIUnlock(pScrn);
    return FALSE;
}

static void
psbExaCopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY,
    int width, int height)
{
    ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);
    PsbTwodContextPtr tdc = &pPsb->td;
    PsbTwodBufferPtr cb = &pPsb->cb;

#ifdef PSB_FIX_BUG_OVERLAP
    tdc->cmd &= PSB_2D_COPYORDER_CLRMASK;
    tdc->direction = (tdc->sOffset != tdc->dOffset) ?
	PSB_2D_COPYORDER_TL2BR :
	psbAccelCopyDirection(srcX - dstX, srcY - dstY);
    tdc->cmd |= tdc->direction;
#endif

    if (tdc->direction == PSB_2D_COPYORDER_BR2TL ||
	tdc->direction == PSB_2D_COPYORDER_TR2BL) {
	srcX += width - 1;
	dstX += width - 1;
    }
    if (tdc->direction == PSB_2D_COPYORDER_BR2TL ||
	tdc->direction == PSB_2D_COPYORDER_BL2TR) {
	srcY += height - 1;
	dstY += height - 1;
    }

    psbAccelCopyHelper(cb, srcX, srcY, dstX, dstY,
	width, height, tdc->sOffset, tdc->dOffset,
	tdc->sMode, tdc->dMode,
	tdc->sStride, tdc->dStride, tdc->fixPat, tdc->cmd);

}
static void
psbExaDoneSolid(PixmapPtr pPixmap);

static Bool
psbExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg)
{
    ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);
    PsbTwodContextPtr tdc = &pPsb->td;
    int rop = psbPatternROP[alu];


    if (!EXA_PM_IS_SOLID(&pPixmap->drawable, planeMask))
	return FALSE;

    /*
     * Do solid fills in software. Much faster;
     */

    if (alu == GXcopy) 
	return FALSE;  

    psbDRILock(pScrn, 0);
    psbAccelSetMode(tdc, pPixmap->drawable.depth, pPixmap->drawable.depth,
	fg);
    tdc->cmd =
	PSB_2D_BLIT_BH | PSB_2D_ROT_NONE | PSB_2D_COPYORDER_TL2BR |
	PSB_2D_DSTCK_DISABLE | PSB_2D_SRCCK_DISABLE | PSB_2D_USE_FILL | ((rop
	    << PSB_2D_ROP3B_SHIFT) & PSB_2D_ROP3B_MASK) | ((rop <<
	    PSB_2D_ROP3A_SHIFT) & PSB_2D_ROP3A_MASK);

    if (!psbExaGetPixmapOffset(pPixmap, &tdc->dOffset))
	goto out_err;

    tdc->dStride = exaGetPixmapPitch(pPixmap);

    return TRUE;
  out_err:
    psbDRIUnlock(pScrn);
    return FALSE;
}

static void
psbExaSolid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
{
    ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);
    PsbTwodContextPtr tdc = &pPsb->td;
    PsbTwodBufferPtr cb = &pPsb->cb;
    int w = x2 - x1;
    int h = y2 - y1;

    psbAccelSolidHelper(cb, x1, y1, w, h, tdc->dOffset, tdc->dMode,
	tdc->dStride, tdc->fixPat, tdc->cmd);

}

static void
psbExaDoneSolid(PixmapPtr pPixmap)
{
    ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);
    PsbTwodBufferPtr cb = &pPsb->cb;

    psbFlushTwodBuffer(cb);
    psbFenceBuffers(&pPsb->buffers);
    psbDRIUnlock(pScrn);
}

void
psbAccelWaitMarker(ScreenPtr pScreen, int marker)
{
    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);
    PsbExaPtr pPsbExa = pPsb->pPsbExa;
    CARD32 uMarker = marker;
    Bool busy;
    struct timeval then, now;

    if ((*pPsbExa->markerAddr - uMarker) < PSB_MARKER_WRAP)
	return;

    if (gettimeofday(&then, NULL))
	FatalError("Gettimeofday error.\n");

    do {
	busy = ((*pPsbExa->markerAddr - uMarker) > PSB_MARKER_WRAP);
	if (gettimeofday(&now, NULL))
	    FatalError("Gettimeofday error.\n");
    } while (busy && (psbTimeDiff(&now, &then) < PSB_TIMEOUT_USEC));

    if (busy)
	busy = ((*pPsbExa->markerAddr - uMarker) > PSB_MARKER_WRAP);

    if (!busy)
	return;

    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "[EXA] waitmarker error:\n"
	"\tExpecting: 0x%08x Received: 0x%08x\n",
	(unsigned)uMarker, (unsigned)*pPsbExa->markerAddr);

    psbEngineHang(pScrn);
}

int
psbAccelMarkSync(ScreenPtr pScreen)
{
    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);
    PsbTwodBufferPtr cb = &pPsb->cb;
    PsbExaPtr pPsbExa = pPsb->pPsbExa;
    struct _MMBuffer *buf;
    unsigned long offset;

    buf = pPsbExa->exaBuf.buf;
    psbDRILock(pScrn, 0);

    if (!pPsbExa->exaBuf.validated) {

	if (buf->man->validateBuffer(buf, 0, 0, 0))
	    FatalError("Could not validate sync buffer\n");
	pPsbExa->exaBuf.validated = TRUE;
    }

    /*
     * SGX 2D uses only the lower 28 bits of mmBufOffset();
     */

    offset = mmBufOffset(buf) + pPsbExa->markerSpace->offset;

    PSB_2D_SPACE(1);
    PSB_2D_OUT(PSB_2D_FENCE_BH);
    PSB_2D_DONE;

    psbAccelSolidHelper(cb, 0, 0, 1, 1,
	offset, PSB_2D_DST_8888ARGB, 4,
	++pPsbExa->curMarker,
	PSB_2D_BLIT_BH |
	PSB_2D_ROT_NONE |
	PSB_2D_COPYORDER_TL2BR |
	PSB_2D_DSTCK_DISABLE |
	PSB_2D_SRCCK_DISABLE | PSB_2D_USE_FILL | PSB_2D_ROP3_PATCOPY);

    psbFlushTwodBuffer(cb);
    psbFenceBuffers(&pPsb->buffers);
    psbDRIUnlock(pScrn);
    return pPsbExa->curMarker;
}

static int
psbExaCheckTransform(PictTransformPtr tr)
{
    if (tr == NULL)
	return 0;

    if (tr->matrix[0][0] == IntToxFixed(1) &&
	tr->matrix[0][1] == IntToxFixed(0) &&
	tr->matrix[1][0] == IntToxFixed(0) &&
	tr->matrix[1][1] == IntToxFixed(1))
	return PSB_2D_ROT_NONE;

    if (tr->matrix[0][0] == IntToxFixed(0) &&
	tr->matrix[0][1] == IntToxFixed(-1) &&
	tr->matrix[1][0] == IntToxFixed(1) &&
	tr->matrix[1][1] == IntToxFixed(0))
	return PSB_2D_ROT_270DEGS;

    if (tr->matrix[0][0] == IntToxFixed(-1) &&
	tr->matrix[0][1] == IntToxFixed(0) &&
	tr->matrix[1][0] == IntToxFixed(0) &&
	tr->matrix[1][1] == IntToxFixed(-1))
	return PSB_2D_ROT_180DEGS;

    if (tr->matrix[0][0] == IntToxFixed(0) &&
	tr->matrix[0][1] == IntToxFixed(1) &&
	tr->matrix[1][0] == IntToxFixed(-1) &&
	tr->matrix[1][1] == IntToxFixed(0))
	return PSB_2D_ROT_90DEGS;

    /*
     * We don't support scaling etc. at this point.
     */

    return -1;
}

static Bool
psbDstSupported(unsigned format)
{
    PsbFormatPointer fm = &psbCompFormats[PSB_FMT_HASH(format)];

    if (fm->pictFormat != format)
	return FALSE;

    return fm->dstSupported;
}

static Bool
psbSrcSupported(unsigned format, Bool pat)
{
    PsbFormatPointer fm = &psbCompFormats[PSB_FMT_HASH(format)];

    if (fm->pictFormat != format)
	return FALSE;

    return ((pat) ? fm->patSupported : fm->srcSupported);
}

static PsbFormatPointer
psbCompFormat(unsigned format)
{
    return &psbCompFormats[PSB_FMT_HASH(format)];
}

static Bool
psbExaCheckComposite(int op,
    PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture)
{
    DrawablePtr pDraw = pSrcPicture->pDrawable;
    int w = pDraw->width;
    int h = pDraw->height;

    /*
     * Only support OpSrc for now.
     */

    if (op != PictOpSrc)
	return FALSE;

    if (!psbDstSupported(pDstPicture->format))
	return FALSE;

    if (!pSrcPicture->repeat && w * h < PSB_EXA_MIN_COMPOSITE)
	return FALSE;

    if (pSrcPicture->repeat && ((w > 16) || (h > 16)))
	return FALSE;

    if (pSrcPicture->filter != PictFilterNearest)
	return FALSE;

    if (!psbSrcSupported(pSrcPicture->format, pSrcPicture->repeat))
	return FALSE;

    if (!pMaskPicture)
	return TRUE;

    pDraw = pMaskPicture->pDrawable;
    w = pDraw->width;
    h = pDraw->height;

    if (!pMaskPicture->repeat && w * h < PSB_EXA_MIN_COMPOSITE)
	return FALSE;

    if (pMaskPicture->repeat && ((w > 16) || (h > 16)))
	return FALSE;

    if (pMaskPicture->componentAlpha)
	return FALSE;

    if (pMaskPicture->filter != PictFilterNearest)
	return FALSE;

    if (!psbSrcSupported(pMaskPicture->format, pMaskPicture->repeat))
	return FALSE;

    /*
     * Don't support mask for now.
     */

    return FALSE;
}

static Bool
psbExaPrepareComposite(int op, PicturePtr pSrcPicture,
    PicturePtr pMaskPicture, PicturePtr pDstPicture,
    PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
{
    ScreenPtr pScreen = pDst->drawable.pScreen;
    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);
    PsbTwodContextPtr tdc = &pPsb->td;
    PsbFormatPointer format;

    psbDRILock(pScrn, 0);

    tdc->cmd = 0;

    /*
     * We can only do a single-pass composite operation if we can utilize
     * source and pattern surfaces simultaneously.
     */

    tdc->twoPassComp = (pMaskPicture != NULL) &&
	(pSrcPicture->repeat == pMaskPicture->repeat);

    /*
     * Reject two-pass composites at this point.
     */

    tdc->srcTransform = pSrcPicture->transform;
    tdc->srcRot = psbExaCheckTransform(pSrcPicture->transform);

    format = psbCompFormat(pSrcPicture->format);
    tdc->sMode =
	(pSrcPicture->repeat) ? format->patFormat : format->srcFormat;
    format = psbCompFormat(pDstPicture->format);
    tdc->dMode = format->dstFormat;

    if (tdc->srcRot == -1)
	goto out_err;

    if (pMaskPicture) {

	/*
	 * Mask picture needs to have the same rotation as source picture.
	 */

	if (psbExaCheckTransform(pMaskPicture->transform) != tdc->srcRot)
	    goto out_err;

	format = psbCompFormat(pMaskPicture->format);
	tdc->mMode =
	    (pSrcPicture->repeat) ? format->patFormat : format->srcFormat;
    }

    tdc->cmd = PSB_2D_BLIT_BH |
	tdc->srcRot |
	PSB_2D_COPYORDER_TL2BR |
	PSB_2D_DSTCK_DISABLE |
	PSB_2D_SRCCK_DISABLE | PSB_2D_USE_FILL | PSB_2D_ROP3_SRCCOPY;

    if (!psbExaGetPixmapOffset(pSrc, &tdc->sOffset))
	goto out_err;

    tdc->sStride = exaGetPixmapPitch(pSrc);
    tdc->sBPP = pSrc->drawable.bitsPerPixel >> 3;
    tdc->srcWidth = pSrc->drawable.width;
    tdc->srcHeight = pSrc->drawable.height;

    if (!psbExaGetPixmapOffset(pDst, &tdc->dOffset))
	goto out_err;

    tdc->dStride = exaGetPixmapPitch(pDst);

    return TRUE;
  out_err:
    psbDRIUnlock(pScrn);
    return FALSE;
}

static void
psbExaBoundingLine(Bool sub, int coord, int *add, int limit, int *cDelta)
{
    int origCoord = coord;

    if (!sub) {
	if (coord < 0) {
	    *add += coord;
	    coord = 0;
	    if (*add < 0)
		*add = 0;
	}
	if (coord >= limit)
	    *add = 0;
	if (coord + *add > limit)
	    *add = limit - coord;
    } else {
	if (coord > limit) {
	    *add -= coord - limit;
	    coord = limit;
	    if (*add < 0)
		*add = 0;
	}
	if (coord < 1)
	    *add = 0;
	if (coord < *add)
	    *add = coord;
    }
    *cDelta = coord - origCoord;
}

/*
 * Apply a source picture bounding box to source coordinates and composite
 * dimensions after transform. Adjust source offset to blitter requirements.
 */

static void
psbExaAdjustForTransform(unsigned rot, int srcWidth, int srcHeight,
    int *srcX, int *srcY, int *maskX, int *maskY,
    int *dstX, int *dstY, int *width, int *height)
{
    int xDelta, yDelta;

    switch (rot) {
    case PSB_2D_ROT_270DEGS:
	psbExaBoundingLine(TRUE, *srcX, height, srcWidth, &xDelta);
	psbExaBoundingLine(FALSE, *srcY, width, srcHeight, &yDelta);
	*srcX += xDelta - 1;
	*srcY += yDelta;
	*maskX += xDelta - 1;
	*maskY += yDelta;
	*dstX += yDelta;
	*dstY -= xDelta;
	break;
    case PSB_2D_ROT_90DEGS:
	psbExaBoundingLine(FALSE, *srcX, height, srcWidth, &xDelta);
	psbExaBoundingLine(TRUE, *srcY, width, srcHeight, &yDelta);
	*srcX += xDelta;
	*srcY += yDelta - 1;
	*maskX += xDelta;
	*maskY += yDelta - 1;
	*dstX -= yDelta;
	*dstY += xDelta;
	break;
    case PSB_2D_ROT_180DEGS:
	psbExaBoundingLine(TRUE, *srcX, width, srcWidth, &xDelta);
	psbExaBoundingLine(TRUE, *srcY, height, srcHeight, &yDelta);
	*srcX += xDelta - 1;
	*srcY += yDelta - 1;
	*maskX += xDelta - 1;
	*maskY += yDelta - 1;
	*dstX -= xDelta;
	*dstY -= yDelta;
	break;
    default:
	psbExaBoundingLine(FALSE, *srcX, width, srcWidth, &xDelta);
	psbExaBoundingLine(FALSE, *srcY, height, srcHeight, &yDelta);
	*srcX += xDelta;
	*srcY += yDelta;
	*maskX += xDelta;
	*maskY += yDelta;
	*dstX -= xDelta;
	*dstY -= yDelta;
	break;
    }
}

static void
psbExaComposite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
    int dstX, int dstY, int width, int height)
{
    ScreenPtr pScreen = pDst->drawable.pScreen;
    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);
    PsbTwodContextPtr tdc = &pPsb->td;

    if (tdc->srcTransform) {
	PictVector d;

	d.vector[0] = IntToxFixed(srcX);
	d.vector[1] = IntToxFixed(srcY);
	d.vector[2] = IntToxFixed(1);

	PictureTransformPoint(tdc->srcTransform, &d);
	srcX = xFixedToInt(d.vector[0]);
	srcY = xFixedToInt(d.vector[1]);

	psbExaAdjustForTransform(tdc->srcRot, tdc->srcWidth, tdc->srcHeight,
	    &srcX, &srcY, &maskX, &maskY, &dstX, &dstY, &width, &height);
    }

    psbAccelCompositeHelper(&pPsb->cb, srcX, srcY, 0, 0, dstX, dstY,
	1, 1, width, height, tdc->sOffset, 0,
	tdc->dOffset, tdc->sMode, tdc->mMode, tdc->dMode,
	tdc->sStride, 8, tdc->dStride, tdc->fixPat, tdc->cmd, 0, 0, FALSE);

}

static void
psbInitComposite(void)
{
    int i;
    unsigned tmp;
    unsigned hash;
    PsbFormatPointer format;

    for (i = 0; i < PSB_FMT_HASH_SIZE; ++i) {
	psbCompFormats[i].pictFormat = 0;
    }

    for (i = 0; i < PSB_NUM_COMP_FORMATS; ++i) {
	tmp = psbFormats[i][0];
	hash = PSB_FMT_HASH(tmp);
	format = &psbCompFormats[hash];

	if (format->pictFormat)
	    FatalError("Bad composite format hash function.\n");

	format->pictFormat = tmp;
	format->dstSupported = (psbFormats[i][4] != 0);
	format->patSupported = (psbFormats[i][5] != 0);
	format->srcSupported = (psbFormats[i][6] != 0);
	format->dstFormat = psbFormats[i][1];
	format->patFormat = psbFormats[i][2];
	format->srcFormat = psbFormats[i][3];
    }
}

Bool
psbExaPrepareAccess(PixmapPtr pPix, int index)
{   
    ScreenPtr pScreen = pPix->drawable.pScreen;
    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);
    void *ptr;
    PsbBufListPtr b;
    unsigned flags;

    ptr = (void *)(exaGetPixmapOffset(pPix) +
		   (unsigned long)mmBufVirtual(pPsb->pPsbExa->exaBuf.buf));
    b = psbInBuffer(&pPsb->buffers, ptr);
    if (b) {
	flags = (index == EXA_PREPARE_DEST) ? 
	    DRM_BO_FLAG_WRITE : DRM_BO_FLAG_READ;

	/*
	 * We already have a virtual address of the pixmap.
	 * Use mapBuf as a syncing operation only. 
	 * This makes sure the hardware has finished rendering to this
	 * buffer.
	 */
	
	if (b->buf->man->mapBuf(b->buf, flags, 0))
	    return FALSE;
    }
    return TRUE;
}

void psbExaFinishAccess(PixmapPtr pPix, int index)
{
    ScreenPtr pScreen = pPix->drawable.pScreen;
    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
    PsbPtr pPsb = psbPTR(pScrn);
    PsbBufListPtr b;
    void *ptr;

    ptr = (void *)(exaGetPixmapOffset(pPix) +
		   (unsigned long)mmBufVirtual(pPsb->pPsbExa->exaBuf.buf));
    b = psbInBuffer(&pPsb->buffers, ptr);
    if (b) 
	(void) b->buf->man->unMapBuf(b->buf);
}

PsbExaPtr
psbExaInit(ScrnInfoPtr pScrn)
{
    PsbPtr pPsb = psbPTR(pScrn);
    PsbExaPtr pPsbExa;
    ExaDriverPtr pExa;

    pPsbExa = xcalloc(sizeof(*pPsbExa), 1);
    if (!pPsbExa)
	goto out_err;

    pPsbExa->pExa = exaDriverAlloc();
    pExa = pPsbExa->pExa;
    if (!pExa)
	goto out_err;

    if (!psbExaAllocBuffers(pScrn, pPsbExa))
	goto out_err;

    memset(pExa, 0, sizeof(*pExa));
    pExa->exa_major = 2;
    pExa->exa_minor = 2;
    pExa->memoryBase = mmBufVirtual(pPsbExa->exaBuf.buf);
    pExa->offScreenBase = 0;
    pExa->memorySize = mmBufSize(pPsbExa->exaBuf.buf);
    pExa->pixmapOffsetAlign = 8;
    pExa->pixmapPitchAlign = 4;
    pExa->flags = EXA_OFFSCREEN_PIXMAPS;
    pExa->maxX = 4095;
    pExa->maxY = 4095;
    pExa->WaitMarker = psbAccelWaitMarker;
    pExa->MarkSync = psbAccelMarkSync;
    pExa->PrepareSolid = psbExaPrepareSolid;
    pExa->Solid = psbExaSolid;
    pExa->DoneSolid = psbExaDoneSolid;
    pExa->PrepareCopy = psbExaPrepareCopy;
    pExa->Copy = psbExaCopy;
    pExa->DoneCopy = psbExaDoneSolid;
    pExa->CheckComposite = psbExaCheckComposite;
    pExa->PrepareComposite = psbExaPrepareComposite;
    pExa->Composite = psbExaComposite;
    pExa->DoneComposite = psbExaDoneSolid;
    pExa->PixmapIsOffscreen = psbExaPixmapIsOffscreen;
    pExa->PrepareAccess = psbExaPrepareAccess;
    pExa->FinishAccess = psbExaFinishAccess;

    if (!exaDriverInit(pScrn->pScreen, pExa))
	goto out_err;

    pPsbExa->markerSpace =
	exaOffscreenAlloc(pScrn->pScreen, 64, 64, TRUE, NULL, NULL);
    if (!pPsbExa->markerSpace)
	goto out_err;

    pPsbExa->bounce = exaOffscreenAlloc(pScrn->pScreen, PSB_EXA_BOUNCE_SIZE,
	8, TRUE, NULL, NULL);
    if (!pPsbExa->bounce)
	goto out_err;

    pPsbExa->markerAddr = (volatile CARD32 *)
	((unsigned long)mmBufVirtual(pPsbExa->exaBuf.buf) +
	pPsbExa->markerSpace->offset);
    pPsbExa->curMarker = 0;
    *pPsbExa->markerAddr = pPsbExa->curMarker;

    if (!pPsb->secondary)
	psbInitComposite();

    return pPsbExa;

  out_err:
    psbExaClose(pPsbExa, pScrn->pScreen);

    return NULL;
}
