/*	$NetBSD: lvm.c,v 1.62.2.1 1999/11/05 07:39:39 cgd Exp $	*/

/*-
 * Copyright (c) 1996, 1997, 1998, 1999 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the NetBSD
 *        Foundation, Inc. and its contributors.
 * 4. Neither the name of The NetBSD Foundation nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1988 University of Utah.
 * Copyright (c) 1990, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah $Hdr: cd.c 1.6 90/11/28$
 *
 *	@(#)cd.c	8.2 (Berkeley) 11/16/93
 */

/*
 * "Concatenated" disk driver.
 *
 * Dynamic configuration and disklabel support by:
 *	Jason R. Thorpe <thorpej@nas.nasa.gov>
 *	Numerical Aerodynamic Simulation Facility
 *	Mail Stop 258-6
 *	NASA Ames Research Center
 *	Moffett Field, CA 94035
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/buf.h>
#include <sys/malloc.h>
#include <sys/namei.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/disklabel.h>
#include <sys/device.h>
#include <sys/disk.h>
#include <sys/syslog.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/conf.h>
#include <sys/lock.h>
#include <sys/queue.h>

#include <dev/lvmvar.h>

#define LVMDEBUG
#if defined(LVMDEBUG) && !defined(DEBUG)
#define DEBUG
#endif

#ifdef DEBUG
#define LVMDB_FOLLOW	0x01
#define LVMDB_INIT	0x02
#define LVMDB_IO		0x04
#define LVMDB_LABEL	0x08
#define LVMDB_VNODE	0x10
int lvmdebug = 0x0;
#endif

#define	LVM_CORRECT_READ_AHEAD( a) \
   if      ( a < LVM_MIN_READ_AHEAD || \
             a > LVM_MAX_READ_AHEAD) a = LVM_MAX_READ_AHEAD;

static void lvm_init_vars(struct lvm_softc *);

static int do_vg_create(int, void *);
static int do_vg_remove(int);
static int do_lv_create(int, char *, lv_t *);
static int do_lv_remove(int, char *, int);
static int do_lv_extend_reduce(int, char *, lv_t *);
#ifdef LVM_GET_INODE
static struct inode *lvm_get_inode(int);
#endif

/* volume group descriptor area pointers */
static vg_t *vg[ABS_MAX_VG + 1];
static pv_t *pvp = NULL;
static lv_t *lvp = NULL;
static pe_t *pep = NULL;
static pe_t *pep1 = NULL;

/* map from block minor number to VG and LV numbers */
typedef struct {
	int vg_number;
	int lv_number;
} vg_lv_map_t;
static vg_lv_map_t vg_lv_map[ABS_MAX_LV];

/* Request structures (lvm_chr_ioctl()) */
static pv_change_req_t pv_change_req;
static pv_flush_req_t pv_flush_req;
static pv_status_req_t pv_status_req;
static pe_lock_req_t pe_lock_req;
static le_remap_req_t le_remap_req;
static lv_req_t lv_req;

#ifdef LVM_TOTAL_RESET
static int lvm_reset_spindown = 0;
#endif

static char pv_name[NAME_LEN];
/* static char rootvg[NAME_LEN] = { 0, }; */
static uint lv_open = 0;
static const char *const lvm_name = LVM_NAME;
static int lock = 0;
/*  static int loadtime = 0; */
static uint vg_count = 0;
static long lvm_chr_open_count = 0;
static ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
static int lvm_wait;
static int lvm_map_wait;

static spinlock_t lvm_lock;


static struct disklabel dk_label;

/*  static struct disklabel lvm_disklabel[MAX_LV]; */

/* XXX end */

/* Linux compatibility defines */
#define printk printf
#define KERN_EMERG
#define KERN_CRIT
#define KERN_WARNING
#define KERN_DEBUG
#define vmalloc(a1) malloc(a1, M_DEVBUF, M_WAITOK)
#define vfree(a1) free(a1, M_DEVBUF)
/* #define copy_from_user(a1,a2,a3) memcpy(a1,a2,a3) */
/*  static inline int copy_from_user (void *, void *, int); */
/*  #define copy_to_user(a1,a2,a3) memcpy(a1,a2,a3) */
/*  #define copy_to_user(a1,a2,a3) copy_from_user(a1,a2,a3) */
#define copy_from_user(a1,a2,a3) copyin(a2,a1,a3)
#define copy_to_user(a1,a2,a3) copyout(a2,a1,a3)
#define fsync_dev(a1)  /* XXX todo: fsync_dev(a1) */
#define invalidate_buffers(a1)  /* XXX todo: invalidate_buffers(a1) */
#define BLOCK_SIZE DEV_BSIZE

/*  #define	lvmunit(x)	DISKUNIT(x) */

struct lvmbuf {
	struct buf	cb_buf;		/* new I/O buf */
	struct buf	*cb_obp;	/* ptr. to original I/O buf */
	struct lvm_softc *cb_sc;	/* pointer to lvm softc */
	int		cb_comp;	/* target component */
	SIMPLEQ_ENTRY(lvmbuf) cb_q;	/* fifo of component buffers */
};

#define	LVM_GETBUF(cs)		pool_get(&(cs)->sc_cbufpool, PR_NOWAIT)
#define	LVM_PUTBUF(cs, cbp)	pool_put(&(cs)->sc_cbufpool, cbp)

/*  #define LVMLABELDEV(dev)	\ */
/*  	(MAKEDISKDEV(major((dev)), lvmunit((dev)), RAW_PART)) */

/* called by main() at boot time */
void	lvmattach __P((int));

/* called by biodone() at interrupt time */
void	lvmiodone __P((struct buf *));
int	lvmsize __P((dev_t));

static	int lvmboundscheck(struct buf *, lv_t *);
static	void lvmstart __P((struct lvm_softc *, struct buf *));
/*  static	void lvminterleave __P((struct lvm_softc *)); */
static	void lvmintr __P((struct lvm_softc *, struct buf *));
/*  static	int lvminit __P((struct lvm_softc *, char **, struct vnode **, */
/*  	    struct proc *)); */
/*  static	int lvmlookup __P((char *, struct proc *p, struct vnode **)); */
static	struct lvmbuf *lvmbuffer __P((struct lvm_softc *, lv_t *,
		struct buf *, daddr_t, caddr_t, long));
/*  static	void lvmgetdefaultlabel __P((struct lvm_softc *, struct disklabel *)); */
/*  static	void lvmgetdisklabel __P((dev_t)); */
/*  static	void lvmmakedisklabel __P((struct lvm_softc *)); */
static	void lvmfakelabel __P((struct disklabel *, int));
static	pv_t *lvmdevtopv (dev_t, vg_t *);

/* Non-private for the benefit of libkvm. */
struct	lvm_softc *lvm_softc;
int	numlvm = 0;

/*
 * Called by main() during pseudo-device attachment.  All we need
 * to do is allocate enough space for devices to be configured later.
 */
void
lvmattach(num)
	int num;
{
	struct lvm_softc *cs;
	int i;

	if (num <= 0) {
#ifdef DIAGNOSTIC
		panic("lvmattach: count <= 0");
#endif
		return;
	}

	num = 1;	/* FIXME chris set to 1 for now */

	lvm_softc = (struct lvm_softc *)malloc(num * sizeof(struct lvm_softc),
	    M_DEVBUF, M_NOWAIT);
	if (lvm_softc == NULL) {
		printf("WARNING: no memory for lvm disks\n");
		if (lvm_softc != NULL)
			free(lvm_softc, M_DEVBUF);
		return;
	}
	numlvm = num;
	bzero(lvm_softc, num * sizeof(struct lvm_softc));

	/* Initialize per-softc structures. */
	for (i = 0; i < num; i++) {
		cs = &lvm_softc[i];
		sprintf(cs->sc_xname, "lvm%d", i);
		cs->sc_dkdev.dk_name = cs->sc_xname;
		lockinit(&cs->sc_lock, PRIBIO, "lvmlk", 0, 0);
		lvm_init_vars(cs);

		/* Attach the disk. */
		disk_attach(&cs->sc_dkdev);

		/* Initialize the component buffer pool. */
		pool_init(&cs->sc_cbufpool, sizeof(struct lvmbuf), 0,
		    0, 0, "lvmpl", 0, NULL, NULL, M_DEVBUF);
	}

	simple_lock_init (&lvm_lock);

}

static void
lvm_init_vars(cs)
	struct lvm_softc *cs;
{
	int v;

/*  	loadtime = CURRENT_TIME; */

	pe_lock_req.lock = UNLOCK_PE;
	pe_lock_req.data.lv_dev = \
	    pe_lock_req.data.pv_dev = \
	    pe_lock_req.data.pv_offset = 0;

	/* Initialize VG pointers */
	for (v = 0; v <= ABS_MAX_VG; v++)
		vg[v] = NULL;

	/* Initialize LV -> VG association */
	for (v = 0; v < ABS_MAX_LV; v++) {
		/* index ABS_MAX_VG never used for real VG */
		vg_lv_map[v].vg_number = ABS_MAX_VG;
		vg_lv_map[v].lv_number = -1;
	}

	return;
}

/* ARGSUSED */
int
lvmopen(dev, flags, fmt, p)
	dev_t dev;
	int flags, fmt;
	struct proc *p;
{
	int unit = 0; /*  lvmunit(dev); */
	int minor = MINOR(dev);
	struct lvm_softc *cs;
	int error = 0;

#ifdef DEBUG
	if (lvmdebug & LVMDB_FOLLOW)
		printf("lvmopen(0x%x, 0x%x)\n", dev, flags);
#endif
	if (unit >= numlvm)
		return (ENXIO);
	cs = &lvm_softc[unit];

/*  	if ((error = lockmgr(&cs->sc_lock, LK_EXCLUSIVE, NULL)) != 0) */
/*  		return (error); */

	if (major(dev) == LVM_CHAR_MAJOR) {
		lvm_chr_open_count++;
	} else {
		if (vg[VG_BLK(minor)] != NULL &&
		    (vg[VG_BLK(minor)]->vg_status & VG_ACTIVE) &&
		    vg[VG_BLK(minor)]->lv[LV_BLK(minor)] != NULL &&
		    LV_BLK(minor) >= 0 &&
		    LV_BLK(minor) < vg[VG_BLK(minor)]->lv_max) {
			
			/* Check parallel LV spindown (LV remove) */
			if (vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_status & LV_SPINDOWN) {
				error = EPERM;
				goto done;
			}

			/* Check inactive LV and open for read/write */
			if ((flags & FWRITE) != 0) {
				if (!(vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_status & LV_ACTIVE)) {
					error = EPERM;
					goto done;
				}
				if (!(vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_access & LV_WRITE)) {
					error = EACCES;
					goto done;
				}
			}
			if (vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open == 0)
				vg[VG_BLK(minor)]->lv_open++;
			vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open++;
			
#ifdef MODULE
			MOD_INC_USE_COUNT;
#endif
			
#ifdef DEBUG_LVM_BLK_OPEN
			printk(KERN_DEBUG
			       "%s -- lvm_blk_open MINOR: %d  VG#: %d  LV#: %d  size: %d\n",
			       lvm_name, minor, VG_BLK(minor), LV_BLK(minor),
			       vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_size);
#endif
			
		} else {
			error = ENXIO;
			goto done;
		}
	}

 done:
/*  	(void) lockmgr(&cs->sc_lock, LK_RELEASE, NULL); */
	return (error);
}

/* ARGSUSED */
int
lvmclose(dev, flags, fmt, p)
	dev_t dev;
	int flags, fmt;
	struct proc *p;
{
	int unit = 0; /*  lvmunit(dev); */
	int minor = MINOR(dev);
	struct lvm_softc *cs;
/*  	int error = 0; */

#ifdef DEBUG
	if (lvmdebug & LVMDB_FOLLOW)
		printf("lvmclose(0x%x, 0x%x)\n", dev, flags);
#endif

	if (unit >= numlvm)
		return (ENXIO);
	cs = &lvm_softc[unit];

/*  	if ((error = lockmgr(&cs->sc_lock, LK_EXCLUSIVE, NULL)) != 0) */
/*  		return (error); */

	if (major(dev) == LVM_CHAR_MAJOR) {
#ifdef LVM_TOTAL_RESET
		if (lvm_reset_spindown > 0) {
			lvm_reset_spindown = 0;
			lvm_chr_open_count = 1;
		}
#endif
		if (lvm_chr_open_count > 0)
			lvm_chr_open_count--;
		if (lock == p->p_pid) {
			lock = 0;
			wakeup (&lvm_wait);
		}
	} else {
		if (fmt == S_IFBLK) {
/*  			sync_dev(inode->i_rdev); */
		}
#if 0
		if (vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open == 1)
		  vg[VG_BLK(minor)]->lv_open--;
		vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open--;
#endif
		vg[VG_BLK(minor)]->lv_open--;
		vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open = 0;
	}
	
/*  	(void) lockmgr(&cs->sc_lock, LK_RELEASE, NULL); */
	return (0);
}

void
lvmstrategy(bp)
	register struct buf *bp;
{
	register int unit = 0; /*  lvmunit(bp->b_dev); */
	register struct lvm_softc *cs = &lvm_softc[unit];
	register int s;
	int minor = MINOR(bp->b_dev);
	lv_t *lv;

	if (VG_BLK(minor) > ABS_MAX_VG || vg[VG_BLK(minor)] == NULL ||
	    LV_BLK(minor) > ABS_MAX_LV ||
	    vg[VG_BLK(minor)]->lv[LV_BLK(minor)] == NULL) {
		bp->b_error = ENXIO;
		bp->b_flags |= B_ERROR;
		goto done;
	}

	lv = vg[VG_BLK(minor)]->lv[LV_BLK(minor)];

#ifdef DEBUG
	if (lvmdebug & LVMDB_FOLLOW)
		printf("lvmstrategy(%p): unit %d\n", bp, unit);
#endif
#if 0
	if ((cs->sc_flags & LVMF_INITED) == 0) {
#ifdef DEBUG
		if (lvmdebug & LVMDB_FOLLOW)
			printf("lvmstrategy: unit %d: not inited\n", unit);
#endif
		bp->b_error = ENXIO;
		bp->b_flags |= B_ERROR;
		goto done;
	}
#endif

	if (!(lv->lv_status & LV_ACTIVE)) {
/*  		printk(KERN_ALERT */
/*  		       "%s - lvm_map: ll_rw_blk for inactive LV %s\n", */
/*  		       lvm_name, lv->lv_name); */
		bp->b_error = ENXIO;
		bp->b_flags |= B_ERROR;
		goto done;
	}

	if (!(bp->b_flags & B_READ) && !(lv->lv_access & LV_WRITE)) {
/*  		printk(KERN_CRIT */
/*  		    "%s - lvm_map: ll_rw_blk write for readonly LV %s\n", */
/*  		       lvm_name, lv->lv_name); */
		bp->b_error = EACCES;
		bp->b_flags |= B_ERROR;
		goto done;
	}

	/* If it's a nil transfer, wake up the top half now. */
	if (bp->b_bcount == 0)
		goto done;

/*  	lp = cs->sc_dkdev.dk_label; */

	/*
	 * Do bounds checking and adjust transfer.  If there's an
	 * error, the bounds check will flag that for us.
	 */
/*  	wlabel = cs->sc_flags & (LVMF_WLABEL|LVMF_LABELLING); */
/*  	if (DISKPART(bp->b_dev) != RAW_PART) */
/*  		if (bounds_check_with_label(bp, lp, wlabel) <= 0) */
/*  			goto done; */

	if (lvmboundscheck (bp, lv) <= 0)
		goto done;

	bp->b_resid = bp->b_bcount;

	/*
	 * "Start" the unit.
	 */
	s = splbio();
	lvmstart(cs, bp);
	splx(s);
	return;
done:
	biodone(bp);
}

/*
 * Determine the size of the transfer, and make sure it is
 * within the boundaries of the partition. Adjust transfer
 * if needed, and signal errors or early completion.
 */
static int
lvmboundscheck(bp, lv)
	struct buf *bp;
	lv_t *lv;
{
	int sz;

	sz = howmany(bp->b_bcount, DEV_BSIZE);

	if (bp->b_blkno + sz > lv->lv_size) {
		sz = lv->lv_size - bp->b_blkno;
		if (sz == 0) {
			/* If exactly at end of disk, return EOF. */
			bp->b_resid = bp->b_bcount;
			goto done;
		}
		if (sz < 0) {
			/* If past end of disk, return EINVAL. */
			bp->b_error = EINVAL;
			goto bad;
		}
		/* Otherwise, truncate request. */
		bp->b_bcount = sz << DEV_BSHIFT;
	}

	/* calculate cylinder for disksort to order transfers with */
/*  	bp->b_cylin = (bp->b_blkno + p->p_offset) / */
/*  	    (lp->d_secsize / DEV_BSIZE) / lp->d_secpercyl; */
#define	b_cylin	b_resid
	bp->b_cylin = bp->b_blkno >> 20;
	return (1);

bad:
	bp->b_flags |= B_ERROR;
done:
	return (0);
}

static void
lvmstart(cs, bp)
	register struct lvm_softc *cs;
	register struct buf *bp;
{
	register long bcount, rcount;
	struct lvmbuf *cbp;
	caddr_t addr;
	daddr_t bn;
/*  	struct partition *pp; */
	SIMPLEQ_HEAD(, lvmbuf) cbufq;
	int minor = MINOR (bp->b_dev);
	lv_t *lv = vg[VG_BLK(minor)]->lv[LV_BLK(minor)];

#ifdef DEBUG
	if (lvmdebug & LVMDB_FOLLOW)
		printf("lvmstart(%p, %p)\n", cs, bp);
#endif

	/* Instrumentation. */
	disk_busy(&cs->sc_dkdev);

	/*
	 * Translate the partition-relative block number to an absolute.
	 */
	bn = bp->b_blkno;
/*  	if (DISKPART(bp->b_dev) != RAW_PART) { */
/*  		pp = &cs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; */
/*  		bn += pp->p_offset; */
/*  	} */

	/*
	 * Allocate the component buffers.
	 */
	SIMPLEQ_INIT(&cbufq);
	addr = bp->b_data;
	for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
		cbp = lvmbuffer(cs, lv, bp, bn, addr, bcount);
		if (cbp == NULL) {
			/* Free the already allocated component buffers. */
			while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) {
				SIMPLEQ_REMOVE_HEAD(&cbufq, cbp, cb_q);
				LVM_PUTBUF(cs, cbp);
			}

			/* Notify the upper layer we are out of memory. */
			bp->b_error = ENOMEM;
			bp->b_flags |= B_ERROR;
			biodone(bp);
			disk_unbusy(&cs->sc_dkdev, 0);
			return;
		}
		SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q);
		rcount = cbp->cb_buf.b_bcount;
		bn += btodb(rcount);
		addr += rcount;
	}

	/* Now fire off the requests. */
	while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) {
		SIMPLEQ_REMOVE_HEAD(&cbufq, cbp, cb_q);
		if ((cbp->cb_buf.b_flags & B_READ) == 0)
			cbp->cb_buf.b_vp->v_numoutput++;
		VOP_STRATEGY(&cbp->cb_buf);
	}
}

/*
 * Build a component buffer header.
 */
static struct lvmbuf *
lvmbuffer(cs, lv, bp, bn, addr, bcount)
	register struct lvm_softc *cs;
	lv_t *lv;
	struct buf *bp;
	daddr_t bn;
	caddr_t addr;
	long bcount;
{
	register struct lvmbuf *cbp;
	register daddr_t rbn, rcount, rboff;
	register pv_t *rpv;
	register u_int64_t cbc;
	int minor = MINOR (bp->b_dev);
	int index;

#ifdef DEBUG
	if (lvmdebug & LVMDB_IO)
		printf("lvmbuffer(%p, %p, %d, %p, %ld)\n",
		       cs, bp, bn, addr, bcount);
#endif

      lvm_second_remap:

	/* linear mapping */
	if (lv->lv_stripes < 2) {
		index = bn / vg[VG_BLK(minor)]->pe_size;	/* get the index */
		rboff = (bn % vg[VG_BLK(minor)]->pe_size);
		rbn = lv->lv_current_pe[index].pe + rboff;
		rpv = lv->lv_current_pe[index].pv;
		rcount = vg[VG_BLK(minor)]->pe_size - rboff;

#ifdef DEBUG_MAP
		printk(KERN_DEBUG
		       "lv_current_pe[%d].pe: %d  rdev: %02d:%02d  rsector:%d rcount:%d\n",
		       index,
		       lv->lv_current_pe[index].pe,
		       MAJOR(rpv->pv_dev),
		       MINOR(rpv->pv_dev),
		       (uint)rbn, rcount);
#endif

		/* striped mapping */
	} else {
		ulong stripe_index;
		ulong stripe_length;

		stripe_length = vg[VG_BLK(minor)]->pe_size * lv->lv_stripes;
		stripe_index = (bn % stripe_length) / lv->lv_stripesize;
		index = bn / stripe_length +
		    (stripe_index % lv->lv_stripes) *
		    (lv->lv_allocated_le / lv->lv_stripes);
		rbn = lv->lv_current_pe[index].pe +
		    (bn % stripe_length) -
		    (stripe_index % lv->lv_stripes) * lv->lv_stripesize -
		    stripe_index / lv->lv_stripes *
		    (lv->lv_stripes - 1) * lv->lv_stripesize;
		rcount = lv->lv_stripesize - (bn % lv->lv_stripesize);
		rpv = lv->lv_current_pe[index].pv;
	}

	/* handle physical extents on the move */
	if (pe_lock_req.lock == LOCK_PE) {
		if (rpv->pv_dev == pe_lock_req.data.pv_dev &&
		    rbn >= pe_lock_req.data.pv_offset &&
		    rbn < (pe_lock_req.data.pv_offset +
				vg[VG_BLK(minor)]->pe_size)) {
/*  			sleep_on(&lvm_map_wait); */
			sleep(&lvm_map_wait, PRIBIO);
			goto lvm_second_remap;
		}
	}

	/* statistic */
	if (bp->b_flags & B_READ)
		lv->lv_current_pe[index].reads++;
	else
		lv->lv_current_pe[index].writes++;

	/* snapshot volume exception handling on physical address base */
				/* TODO */
#if 0
	if (lv->lv_exception != NULL) {
		simple_lock(&lv->lv_exception->lv_snapshot_lock);
		if (lv->lv_exception->lv_remap_ptr <= lv->lv_exception->lv_remap_end) {
			if (lv->lv_access & LV_SNAPSHOT_ORG) {
				/* for write, check if it is neccessary to
				   create a new remapped block */
				if (!(bp->b_flags & B_READ))
				{
					rdev_sav = *rdev;
					rsector_sav = *rsector;
					if (lvm_snapshot_remap_block(rsector, rdev, minor) == FALSE) {
						/* create a new mapping */
						lvm_snapshot_remap_new_block(rsector, rdev, minor);
					}
					*rdev = rdev_sav;
					*rsector = rsector_sav;
				}
			} else
				lvm_snapshot_remap_block(rsector, rdev, minor);
		}
		simple_unlock(&lv->lv_exception->lv_snapshot_lock);
	}
#endif

	/*
	 * Fill in the component buf structure.
	 */
	cbp = LVM_GETBUF(cs);
	if (cbp == NULL)
		return (NULL);
	cbp->cb_buf.b_flags = bp->b_flags | B_CALL;
	cbp->cb_buf.b_iodone = lvmiodone;
	cbp->cb_buf.b_proc = bp->b_proc;
	cbp->cb_buf.b_dev = rpv->pv_dev;
	cbp->cb_buf.b_blkno = rbn;
	cbp->cb_buf.b_data = addr;
	cbp->cb_buf.b_vp = rpv->vnode;
	cbc = dbtob((u_int64_t)rcount);
	cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount;

	/*
	 * context for lvmiodone
	 */
	cbp->cb_obp = bp;
	cbp->cb_sc = cs;

#ifdef DEBUG2
	if (lvmdebug & LVMDB_IO)
		printf(" dev 0x%x(u%lu): cbp %p bn %d addr %p bcnt %ld\n",
		    ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp,
		    cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
		    cbp->cb_buf.b_bcount);
#endif

	return (cbp);
}

static void
lvmintr(cs, bp)
	register struct lvm_softc *cs;
	register struct buf *bp;
{

#ifdef DEBUG
	if (lvmdebug & LVMDB_FOLLOW)
		printf("lvmintr(%p, %p)\n", cs, bp);
#endif
	/*
	 * Request is done for better or worse, wakeup the top half.
	 */
	if (bp->b_flags & B_ERROR)
		bp->b_resid = bp->b_bcount;
	disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid));
	biodone(bp);
}

/*
 * Called at interrupt time.
 * Mark the component as done and if all components are done,
 * take a lvm interrupt.
 */
void
lvmiodone(vbp)
	struct buf *vbp;
{
	struct lvmbuf *cbp = (struct lvmbuf *) vbp;
	struct buf *bp = cbp->cb_obp;
	struct lvm_softc *cs = cbp->cb_sc;
	int count, s;

	s = splbio();
#ifdef DEBUG
	if (lvmdebug & LVMDB_FOLLOW)
		printf("lvmiodone(%p)\n", cbp);
	if (lvmdebug & LVMDB_IO) {
		printf("lvmiodone: bp %p bcount %ld resid %ld\n",
		       bp, bp->b_bcount, bp->b_resid);
		printf(" dev 0x%x(u%d), cbp %p bn %d addr %p bcnt %ld\n",
		       cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
		       cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
		       cbp->cb_buf.b_bcount);
	}
#endif

	if (cbp->cb_buf.b_flags & B_ERROR) {
		bp->b_flags |= B_ERROR;
		bp->b_error = cbp->cb_buf.b_error ?
		    cbp->cb_buf.b_error : EIO;

		printf("%s: error %d on component %d\n",
		       cs->sc_xname, bp->b_error, cbp->cb_comp);
	}
	count = cbp->cb_buf.b_bcount;
	LVM_PUTBUF(cs, cbp);

	/*
	 * If all done, "interrupt".
	 */
	bp->b_resid -= count;
	if (bp->b_resid < 0)
		panic("lvmiodone: count");
	if (bp->b_resid == 0)
		lvmintr(cs, bp);
	splx(s);
}

/* ARGSUSED */
int
lvmread(dev, uio, flags)
	dev_t dev;
	struct uio *uio;
	int flags;
{
	int unit = 0; /*  lvmunit(dev); */
	struct lvm_softc *cs;
	int minor = MINOR(dev);
	lv_t *lv;

	if (VG_BLK(minor) > ABS_MAX_VG || vg[VG_BLK(minor)] == NULL ||
	    LV_BLK(minor) > ABS_MAX_LV ||
	    vg[VG_BLK(minor)]->lv[LV_BLK(minor)] == NULL)
		return (ENXIO);

	lv = vg[VG_BLK(minor)]->lv[LV_BLK(minor)];

#ifdef DEBUG
	if (lvmdebug & LVMDB_FOLLOW)
		printf("lvmread(0x%x, %p)\n", dev, uio);
#endif
	if (unit >= numlvm)
		return (ENXIO);
	cs = &lvm_softc[unit];

/*  	if ((cs->sc_flags & LVMF_INITED) == 0) */
/*  		return (ENXIO); */

	if (!(lv->lv_status & LV_ACTIVE)) {
/*  		printk(KERN_ALERT */
/*  		       "%s - lvm_map: ll_rw_blk for inactive LV %s\n", */
/*  		       lvm_name, lv->lv_name); */
		return (ENXIO);
	}

	/*
	 * XXX: It's not clear that using minphys() is completely safe,
	 * in particular, for raw I/O.  Underlying devices might have some
	 * non-obvious limits, because of the copy to user-space.
	 */
	return (physio(lvmstrategy, NULL, dev, B_READ, minphys, uio));
}

/* ARGSUSED */
int
lvmwrite(dev, uio, flags)
	dev_t dev;
	struct uio *uio;
	int flags;
{
	int unit = 0; /*  lvmunit(dev); */
	struct lvm_softc *cs;
	int minor = MINOR(dev);
	lv_t *lv;

	if (VG_BLK(minor) >= ABS_MAX_VG || vg[VG_BLK(minor)] == NULL ||
	    LV_BLK(minor) >= ABS_MAX_LV ||
	    vg[VG_BLK(minor)]->lv[LV_BLK(minor)] == NULL)
		return (ENXIO);

	lv = vg[VG_BLK(minor)]->lv[LV_BLK(minor)];

#ifdef DEBUG
	if (lvmdebug & LVMDB_FOLLOW)
		printf("lvmwrite(0x%x, %p)\n", dev, uio);
#endif
	if (unit >= numlvm)
		return (ENXIO);
	cs = &lvm_softc[unit];

/*  	if ((cs->sc_flags & LVMF_INITED) == 0) */
/*  		return (ENXIO); */

	if (!(lv->lv_status & LV_ACTIVE)) {
/*  		printk(KERN_ALERT */
/*  		       "%s - lvm_map: ll_rw_blk for inactive LV %s\n", */
/*  		       lvm_name, lv->lv_name); */
		return (ENXIO);
	}

	if (!(lv->lv_access & LV_WRITE)) {
/*  		printk(KERN_CRIT */
/*  		    "%s - lvm_map: ll_rw_blk write for readonly LV %s\n", */
/*  		       lvm_name, lv->lv_name); */
		return (EACCES);
	}

	/*
	 * XXX: It's not clear that using minphys() is completely safe,
	 * in particular, for raw I/O.  Underlying devices might have some
	 * non-obvious limits, because of the copy to user-space.
	 */
	return (physio(lvmstrategy, NULL, dev, B_WRITE, minphys, uio));
}

int
lvmioctl(dev, cmd, data, flag, proc)
	dev_t dev;
	u_long cmd;
	caddr_t data;
	int flag;
	struct proc *proc;
{
	int unit = 0;		/* XXX fix? */
	int minor = minor (dev);
	int error = 0;
	struct lvm_softc *cs;
/*  	struct lvm_ioctl *lvmio = (struct lvm_ioctl *)data; */
	int extendable;
	ulong l, le, p, v;
	ulong size;
	void *arg = (void *)data;
#ifdef LVM_GET_INODE
	struct inode *inode_sav;
#endif
#ifdef LVM_GET_VNODE
	struct vnode *vnode_sav;
#endif
	lv_status_byname_req_t lv_status_byname_req;
	lv_status_byindex_req_t lv_status_byindex_req;
	lv_t lv;

	if (unit >= numlvm)
		return (ENXIO);
	cs = &lvm_softc[unit];

#ifdef LVM_TOTAL_RESET
	if (lvm_reset_spindown > 0)
		return (EACCES);
#endif

/*  	if ((error = lockmgr(&cs->sc_lock, LK_EXCLUSIVE, NULL)) != 0) */
/*  		return (error); */

#if 0
	switch (cmd) {
	case VG_CREATE:
		printf ("ioctl: VG_CREATE\n");
		break;
	case VG_REMOVE:
		printf ("ioctl: VG_REMOVE\n");
		break;
	case VG_EXTEND:
		printf ("ioctl: VG_EXTEND\n");
		break;
	case VG_REDUCE:
		printf ("ioctl: VG_REDUCE\n");
		break;
	case VG_STATUS:
		printf ("ioctl: VG_STATUS\n");
		break;
	case VG_STATUS_GET_COUNT:
		printf ("ioctl: VG_STATUS_GET_COUNT\n");
		break;
	case VG_STATUS_GET_NAMELIST:
		printf ("ioctl: VG_STATUS_GET_NAMELIST\n");
		break;
	case VG_SET_EXTENDABLE:
		printf ("ioctl: VG_SET_EXTENDABLE\n");
		break;
	case LV_CREATE:
		printf ("ioctl: LV_CREATE\n");
		break;
	case LV_REMOVE:
		printf ("ioctl: LV_REMOVE\n");
		break;
	case LV_ACTIVATE:
		printf ("ioctl: LV_ACTIVATE\n");
		break;
	case LV_DEACTIVATE:
		printf ("ioctl: LV_DEACTIVATE\n");
		break;
	case LV_EXTEND:
		printf ("ioctl: LV_EXTEND\n");
		break;
	case LV_REDUCE:
		printf ("ioctl: LV_REDUCE\n");
		break;
	case LV_STATUS_BYNAME:
		printf ("ioctl: LV_STATUS_BYNAME\n");
		break;
	case LV_STATUS_BYINDEX:
		printf ("ioctl: LV_STATUS_BYINDEX\n");
		break;
	case LV_SET_ACCESS:
		printf ("ioctl: LV_SET_ACCESS\n");
		break;
	case LV_SET_ALLOCATION:
		printf ("ioctl: LV_SET_ALLOCATION\n");
		break;
	case LV_SET_STATUS:
		printf ("ioctl: LV_SET_STATUS\n");
		break;
	case LE_REMAP:
		printf ("ioctl: LE_REMAP\n");
		break;
	case PV_STATUS:
		printf ("ioctl: PV_STATUS\n");
		break;
	case PV_CHANGE:
		printf ("ioctl: PV_CHANGE\n");
		break;
	case PV_FLUSH:
		printf ("ioctl: PV_FLUSH\n");
		break;
	case PE_LOCK_UNLOCK:
		printf ("ioctl: PE_LOCK_UNLOCK\n");
		break;
	case LVM_GET_IOP_VERSION:
		printf ("ioctl: LVM_GET_IOP_VERSION\n");
		break;
	case LVM_RESET:
		printf ("ioctl: LVM_RESET\n");
		break;
	case LVM_LOCK_LVM:
		printf ("ioctl: LVM_LOCK_LVM\n");
		break;
	case DIOCGDINFO:
		printf ("ioctl: DIOCGDINFO\n");
		break;
	case DIOCSDINFO:
		printf ("ioctl: DIOCSDINFO\n");
		break;
	case DIOCWDINFO:
		printf ("ioctl: DIOCWDINFO\n");
		break;
	case DIOCGPINFO:
		printf ("ioctl: DIOCGPINFO\n");
		break;
	case DIOCGPART:
		printf ("ioctl: DIOCGPART\n");
		break;
	case DIOCWLABEL:
		printf ("ioctl: DIOCWLABEL\n");
		break;
	case DIOCGDEFLABEL:
		printf ("ioctl: DIOCGDEFLABEL\n");
		break;
	default:
		printf ("ioctl: unknown %X\n", (uint)cmd);
		break;
	}
#endif

	switch (cmd) {
		
				/* these need an existing vg */
	case PE_LOCK_UNLOCK:
	case LE_REMAP:
	case VG_REMOVE:
	case VG_EXTEND:
	case VG_REDUCE:
	case VG_SET_EXTENDABLE:
	case VG_STATUS:
	case LV_CREATE:
	case LV_REMOVE:
	case LV_EXTEND:
	case LV_REDUCE:
	case LV_STATUS_BYNAME:
	case LV_STATUS_BYINDEX:
	case PV_CHANGE:
	case PV_STATUS:
		if (vg[VG_CHR(minor)] == NULL)
		{
			error = ENXIO;
			goto out;
		}
		break;

				/* these need an existing vg and lv */
	case LV_SET_ACCESS:	/* LV block ioctl */
	case LV_SET_STATUS:	/* LV block ioctl */
	case LV_SET_ALLOCATION:	/* LV block ioctl */
	case DIOCGDEFLABEL:	/* LV block ioctl */
	case DIOCGDINFO:	/* LV block ioctl */
	case DIOCGPART:
	case DIOCGPINFO:
	case DIOCWDINFO:
	case DIOCSDINFO:
	case DIOCWLABEL:
		if (VG_BLK(minor) >= ABS_MAX_VG ||
		    vg[VG_BLK(minor)] == NULL ||
		    LV_BLK(minor) >= ABS_MAX_LV ||
		    vg[VG_BLK(minor)]->lv[LV_BLK(minor)] == NULL)
		{
			error = ENXIO;
			goto out;
		}
		

				/* these need nothing */
	case VG_CREATE:
	case VG_STATUS_GET_COUNT:
	case VG_STATUS_GET_NAMELIST:
	case PV_FLUSH:
	default:
	}

	switch (cmd) {

	case LVM_LOCK_LVM:
	      lock_try_again:
	        simple_lock(&lvm_lock);
		if (lock != 0 && lock != proc->p_pid) {
#ifdef DEBUG_IOCTL
			printk(KERN_INFO "lvm_chr_ioctl: %s is locked by pid %d ...\n",
			       lvm_name, lock);
#endif
			simple_unlock(&lvm_lock);
/*  			interruptible_sleep_on(&lvm_wait); */
			if (tsleep (&lvm_wait, PRIBIO | PCATCH, "lvmlck", 0) == EINTR) {
				error = EINTR;
				goto out;
			}
#ifdef LVM_TOTAL_RESET
			if (lvm_reset_spindown > 0)
			{
			        error = EACCES;
				goto out;
			}
#endif
			goto lock_try_again;
		}
		lock = proc->p_pid;
		simple_unlock(&lvm_lock);
		error = 0;
		goto out;

		/* check lvm version to ensure driver/tools+lib interoperability */
	case LVM_GET_IOP_VERSION:
		*((ushort *)arg) = lvm_iop_version;
#if 0				/* copy going away */
		if (copy_to_user(arg, &lvm_iop_version, sizeof(ushort)) != 0)
			return EFAULT;
#endif
		return 0;

#ifdef LVM_TOTAL_RESET
		/* lock reset function */
	case LVM_RESET:
		lvm_reset_spindown = 1;
		for (v = 0; v < ABS_MAX_VG; v++) {
			if (vg[v] != NULL) {
				do_vg_remove(v);
			}
		}

#ifdef MODULE
		while (GET_USE_COUNT(&__this_module) < 1)
			MOD_INC_USE_COUNT;
		while (GET_USE_COUNT(&__this_module) > 1)
			MOD_DEC_USE_COUNT;
#endif				/* MODULE */
		lock = 0;	/* release lock */
/*  		wake_up_interruptible(&lvm_wait); */
		wakeup (&lvm_wait);
		break;
#endif				/* LVM_TOTAL_RESET */


		/* lock/unlock i/o to a physical extent to move it to another
		   physical volume (move's done in user space's pvmove) */
	case PE_LOCK_UNLOCK:
		pe_lock_req = *((pe_lock_req_t *)arg);
#if 0				/* copy going away */
		if (copy_from_user(&pe_lock_req, arg, sizeof(pe_lock_req_t)) != 0)
		{
			error = EFAULT;
			goto out;
		}
#endif

		switch (pe_lock_req.lock) {
		case LOCK_PE:
			for (p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
				if (vg[VG_CHR(minor)]->pv[p] != NULL &&
				    pe_lock_req.data.pv_dev ==
				    vg[VG_CHR(minor)]->pv[p]->pv_dev)
					break;
			}

			if (p == vg[VG_CHR(minor)]->pv_max)
			{
				error = ENXIO;
				goto out;
			}

			pe_lock_req.lock = UNLOCK_PE;
			fsync_dev(pe_lock_req.data.lv_dev);
			pe_lock_req.lock = LOCK_PE;
			break;

		case UNLOCK_PE:
			pe_lock_req.lock = UNLOCK_PE;
			pe_lock_req.data.lv_dev = \
			    pe_lock_req.data.pv_dev = \
			    pe_lock_req.data.pv_offset = 0;
			wakeup(&lvm_map_wait);
			break;

		default:
			error = EINVAL;
		}

		break;


		/* remap a logical extent (after moving the physical extent) */
	case LE_REMAP:
		le_remap_req = *((le_remap_req_t *)arg);
#if 0				/* copy going away */
		if (copy_from_user(&le_remap_req, arg,
				   sizeof(le_remap_req_t)) != 0)
		{
			error = EFAULT;
			goto out;
		}
#endif

		for (l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
			if (vg[VG_CHR(minor)]->lv[l] != NULL &&
			    strcmp(vg[VG_CHR(minor)]->lv[l]->lv_name,
				   le_remap_req.lv_name) == 0) {
				for (le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le;
				     le++) {
					if (vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev ==
					    le_remap_req.old_dev &&
					    vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].pe ==
					    le_remap_req.old_pe) {
						vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev =
						    le_remap_req.new_dev;
						vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].pv =
							lvmdevtopv (le_remap_req.new_dev, vg[VG_CHR(minor)]);
						vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].pe =
						    le_remap_req.new_pe;
						error = 0;
						goto out;
					}
				}
				error = EINVAL;
				goto out;
			}
		}

		error = ENXIO;
		break;


		/* create a VGDA */
	case VG_CREATE:
		error = do_vg_create(minor, arg);
		break;

		/* remove an inactive VGDA */
	case VG_REMOVE:
		error = do_vg_remove(minor);
		break;

		/* extend a volume group */
	case VG_EXTEND:
		if (vg[VG_CHR(minor)]->pv_cur < vg[VG_CHR(minor)]->pv_max) {
			for (p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
				if (vg[VG_CHR(minor)]->pv[p] == NULL) {
					if ((vg[VG_CHR(minor)]->pv[p] =
					vmalloc(sizeof(pv_t))) == NULL) {
						printk(KERN_CRIT
						       "%s -- VG_EXTEND: vmalloc error PV\n", lvm_name);
						error = ENOMEM;
						goto out;
					}
					*vg[VG_CHR(minor)]->pv[p] =
						*((pv_t *)arg);
#if 0				/* copy going away */
					if (copy_from_user(vg[VG_CHR(minor)]->pv[p], arg,
						      sizeof(pv_t)) != 0)
					{
						error = EFAULT;
						goto out;
					}
#endif

					vg[VG_CHR(minor)]->pv[p]->pv_status = PV_ACTIVE;
					/* We don't need the PE list
					   in kernel space like LVs pe_t list */
					vg[VG_CHR(minor)]->pv[p]->pe = NULL;
					vg[VG_CHR(minor)]->pv_cur++;
					vg[VG_CHR(minor)]->pv_act++;
					vg[VG_CHR(minor)]->pe_total +=
					    vg[VG_CHR(minor)]->pv[p]->pe_total;
#ifdef LVM_GET_INODE
					/* insert a dummy inode for fs_may_mount */
					vg[VG_CHR(minor)]->pv[p]->inode =
					    lvm_get_inode(vg[VG_CHR(minor)]->pv[p]->pv_dev);
#endif
#ifdef LVM_GET_VNODE
					/* get a vnode for the pv */
					bdevvp(vg[VG_CHR(minor)]->pv[p]->pv_dev,
					       &vg[VG_CHR(minor)]->pv[p]->vnode);
#endif
					error = 0;
					goto out;
				}
			}
		}
		error = EPERM;
		goto out;


		/* reduce a volume group */
	case VG_REDUCE:
		memcpy (pv_name, arg, sizeof (pv_name));
#if 0				/* copy going away */
		if (copy_from_user(pv_name, arg, sizeof(pv_name)) != 0)
		{
			error = EFAULT;
			goto out;
		}
#endif

		for (p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
			if (vg[VG_CHR(minor)]->pv[p] != NULL &&
			    strcmp(vg[VG_CHR(minor)]->pv[p]->pv_name,
				   pv_name) == 0) {
				if (vg[VG_CHR(minor)]->pv[p]->lv_cur > 0)
				{
					error = EPERM;
					goto out;
				}
				vg[VG_CHR(minor)]->pe_total -=
				    vg[VG_CHR(minor)]->pv[p]->pe_total;
				vg[VG_CHR(minor)]->pv_cur--;
				vg[VG_CHR(minor)]->pv_act--;
#ifdef DEBUG_VFREE
				printk(KERN_DEBUG
				 "%s -- vfree %d\n", lvm_name, __LINE__);
#endif
#ifdef LVM_GET_INODE
				clear_inode(vg[VG_CHR(minor)]->pv[p]->inode);
#endif
#ifdef LVM_GET_VNODE
				vput(vg[VG_CHR(minor)]->pv[p]->vnode);
#endif
				vfree(vg[VG_CHR(minor)]->pv[p]);
				/* Make PV pointer array contiguous */
				for (; p < vg[VG_CHR(minor)]->pv_max - 1; p++)
					vg[VG_CHR(minor)]->pv[p] = vg[VG_CHR(minor)]->pv[p + 1];
				vg[VG_CHR(minor)]->pv[p + 1] = NULL;
				error = 0;
				goto out;
			}
		}
		error = ENXIO;
		goto out;


		/* set/clear extendability flag of volume group */
	case VG_SET_EXTENDABLE:
		extendable = *((int *)arg);
#if 0				/* copy going away */
		if (copy_from_user(&extendable, arg, sizeof(extendable)) != 0)
		{
			error = EFAULT;
			goto out;
		}
#endif

		if (extendable == VG_EXTENDABLE ||
		    extendable == ~VG_EXTENDABLE) {
			if (extendable == VG_EXTENDABLE)
				vg[VG_CHR(minor)]->vg_status |= VG_EXTENDABLE;
			else
				vg[VG_CHR(minor)]->vg_status &= ~VG_EXTENDABLE;
		} else {
			error = EINVAL;
			goto out;
		}
		error = 0;
		goto out;


		/* get volume group data (only the vg_t struct) */
	case VG_STATUS:
		*((vg_t *)arg) = *vg[VG_CHR(minor)];
#if 0				/* copy going away */
		if (copy_to_user(arg, vg[VG_CHR(minor)], sizeof(vg_t)) != 0)
		{
			error = EFAULT;
			goto out;
		}
#endif

		error = 0;
		goto out;


		/* get volume group count */
	case VG_STATUS_GET_COUNT:
		*((uint *)arg) = vg_count;
#if 0				/* copy going away */
		if (copy_to_user(arg, &vg_count, sizeof(vg_count)) != 0)
		{
			error = EFAULT;
			goto out;
		}
#endif

		error = 0;
		goto out;


		/* get volume group names */
	case VG_STATUS_GET_NAMELIST:
		for (l = v = 0; v < ABS_MAX_VG; v++) {
			if (vg[v] != NULL) {
				if (copy_to_user(*((char **)arg) + l++ * NAME_LEN,
						 vg[v]->vg_name,
						 NAME_LEN) != 0)
				{
					error = EFAULT;
					goto out;
				}
			}
		}
		break;

		/* create, remove, extend or reduce a logical volume */
	case LV_CREATE:
	case LV_REMOVE:
	case LV_EXTEND:
	case LV_REDUCE:
		lv_req = *((lv_req_t *)arg);
#if 0				/* copy going away */
		if (copy_from_user(&lv_req, arg, sizeof(lv_req)) != 0)
		{
			error = EFAULT;
			goto out;
		}
#endif

		if (cmd != LV_REMOVE) {
			if (copy_from_user(&lv, lv_req.lv, sizeof(lv_t)) != 0)
			{
				error = EFAULT;
				goto out;
			}
		}
		switch (cmd) {
		case LV_CREATE:
			error = do_lv_create(minor, lv_req.lv_name, &lv);
			goto out;

		case LV_REMOVE:
			error = do_lv_remove(minor, lv_req.lv_name, -1);
			goto out;

		case LV_EXTEND:
		case LV_REDUCE:
			error = do_lv_extend_reduce(minor, lv_req.lv_name, &lv);
			goto out;
		}


		/* get status of a logical volume by name */
	case LV_STATUS_BYNAME:
		lv_status_byname_req = *((lv_status_byname_req_t *)arg);
#if 0				/* copy going away */
		if (copy_from_user(&lv_status_byname_req, arg,
				   sizeof(lv_status_byname_req_t)) != 0)
		{
			error = EFAULT;
			goto out;
		}
#endif

		if (lv_status_byname_req.lv == NULL)
		{
			error = EINVAL;
			goto out;
		}
		if (copy_from_user(&lv, lv_status_byname_req.lv,
				   sizeof(lv_t)) != 0)
		{
			error = EFAULT;
			goto out;
		}

		for (l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
			if (vg[VG_CHR(minor)]->lv[l] != NULL &&
			    strcmp(vg[VG_CHR(minor)]->lv[l]->lv_name,
				   lv_status_byname_req.lv_name) == 0) {
				if (copy_to_user(lv_status_byname_req.lv,
						 vg[VG_CHR(minor)]->lv[l],
						 sizeof(lv_t)) != 0)
				{
					error = EFAULT;
					goto out;
				}

				if (lv.lv_current_pe != NULL) {
					size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le *
					    sizeof(pe_t);
					if (copy_to_user(lv.lv_current_pe,
							 vg[VG_CHR(minor)]->lv[l]->lv_current_pe,
							 size) != 0)
					{
						error = EFAULT;
						goto out;
					}
				}
				if (lv.lv_exception != NULL &&
				    vg[VG_CHR(minor)]->lv[l]->lv_exception != NULL) {
					if (copy_to_user(lv.lv_exception,
							 vg[VG_CHR(minor)]->lv[l]->lv_exception,
					    sizeof(lv_exception_t)) != 0)
					{
						error = EFAULT;
						goto out;
					}
				}
				error = 0;
				goto out;
			}
		}
		error = ENXIO;
		goto out;


		/* get status of a logical volume by index */
	case LV_STATUS_BYINDEX:
		lv_status_byindex_req = *((lv_status_byindex_req_t *)arg);
#if 0				/* copy going away */
		if (copy_from_user(&lv_status_byindex_req, arg,
				   sizeof(lv_status_byindex_req)) != 0)
		{
			error = EFAULT;
			goto out;
		}
#endif

		if ((lvp = lv_status_byindex_req.lv) == NULL)
		{
			error = EINVAL;
			goto out;
		}
		l = lv_status_byindex_req.lv_index;
		if (vg[VG_CHR(minor)]->lv[l] == NULL)
		{
			error = ENXIO;
			goto out;
		}

		if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0)
		{
			error = EFAULT;
			goto out;
		}

		if (copy_to_user(lvp, vg[VG_CHR(minor)]->lv[l],
				 sizeof(lv_t)) != 0)
		{
			error = EFAULT;
			goto out;
		}

		if (lv.lv_current_pe != NULL) {
			size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le * sizeof(pe_t);
			if (copy_to_user(lv.lv_current_pe,
				 vg[VG_CHR(minor)]->lv[l]->lv_current_pe,
					 size) != 0)
			{
				error = EFAULT;
				goto out;
			}
		}
		if (lv.lv_exception != NULL &&
		    vg[VG_CHR(minor)]->lv[l]->lv_exception != NULL) {
			if (copy_to_user(lv.lv_exception,
				  vg[VG_CHR(minor)]->lv[l]->lv_exception,
					 sizeof(lv_exception_t)) != 0)
			{
				error = EFAULT;
				goto out;
			}
		}
		error = 0;
		goto out;


		/* change a physical volume */
	case PV_CHANGE:
		pv_change_req = *((pv_change_req_t *)arg);
#if 0				/* copy going away */
		if (copy_from_user(&pv_change_req, arg,
				   sizeof(pv_change_req)) != 0)
		{
			error = EFAULT;
			goto out;
		}
#endif

		for (p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
			if (vg[VG_CHR(minor)]->pv[p] != NULL &&
			    strcmp(vg[VG_CHR(minor)]->pv[p]->pv_name,
				   pv_change_req.pv_name) == 0) {
#ifdef LVM_GET_INODE
				inode_sav = vg[VG_CHR(minor)]->pv[p]->inode;
#endif
#ifdef LVM_GET_VNODE
				vnode_sav = vg[VG_CHR(minor)]->pv[p]->vnode;
#endif
				if (copy_from_user(vg[VG_CHR(minor)]->pv[p],
						   pv_change_req.pv,
						   sizeof(pv_t)) != 0)
				{
					error = EFAULT;
					goto out;
				}

				/* We don't need the PE list
				   in kernel space as with LVs pe_t list */
				vg[VG_CHR(minor)]->pv[p]->pe = NULL;
#ifdef LVM_GET_INODE
				vg[VG_CHR(minor)]->pv[p]->inode = inode_sav;
#endif
#ifdef LVM_GET_VNODE
				vg[VG_CHR(minor)]->pv[p]->vnode = vnode_sav;
#endif
				error = 0;
				goto out;
			}
		}
		error = ENXIO;
		goto out;


		/* get physical volume data (pv_t structure only) */
	case PV_STATUS:
		pv_status_req = *((pv_status_req_t *)arg);
#if 0				/* copy going away */
		if (copy_from_user(&pv_status_req, arg,
				   sizeof(pv_status_req)) != 0)
		{
			error = EFAULT;
			goto out;
		}
#endif

		for (p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
			if (vg[VG_CHR(minor)]->pv[p] != NULL) {
				if (strcmp(vg[VG_CHR(minor)]->pv[p]->pv_name,
					   pv_status_req.pv_name) == 0) {
					if (copy_to_user(pv_status_req.pv,
						vg[VG_CHR(minor)]->pv[p],
						      sizeof(pv_t)) != 0)
					{
						error = EFAULT;
						goto out;
					}
					error = 0;
					goto out;
				}
			}
		}
		error = ENXIO;
		goto out;


		/* physical volume buffer flush/invalidate */
	case PV_FLUSH:
		pv_flush_req = *((pv_flush_req_t *)arg);
#if 0				/* copy going away */
		if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0)
		{
			error = EFAULT;
			goto out;
		}
#endif

		for (v = 0; v < ABS_MAX_VG; v++) {
			if (vg[v] == NULL)
				continue;
			for (p = 0; p < vg[v]->pv_max; p++) {
				if (vg[v]->pv[p] != NULL &&
				    strcmp(vg[v]->pv[p]->pv_name,
					   pv_flush_req.pv_name) == 0) {
					fsync_dev(vg[v]->pv[p]->pv_dev);
					invalidate_buffers(vg[v]->pv[p]->pv_dev);
					error = 0;
					goto out;
				}
			}
		}
		error = 0;
		goto out;

				/* XXXXXXXXXXX logical volume major ioctls */

		/* set access flags of a logical volume */
	case LV_SET_ACCESS:
		/* super user validation */
/*  		if (!suser()) */
/*  			return EACCES; */
		vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_access =
			*((ulong *) arg);
		break;


		/* set status flags of a logical volume */
	case LV_SET_STATUS:
		/* super user validation */
/*  		if (!suser()) */
/*  			return EACCES; */
		if (!((ulong) arg & LV_ACTIVE) &&
		    vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open > 1)
		{
			error = EPERM;
			goto out;
		}
		vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_status =
			*((ulong *) arg);
		break;


		/* set allocation flags of a logical volume */
	case LV_SET_ALLOCATION:
		/* super user validation */
/*  		if (!suser()) */
/*  			return EACCES; */
		vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_allocation =
			*((ulong *) arg);
		break;

	case DIOCGDEFLABEL:
	case DIOCGDINFO:
		lvmfakelabel ((struct disklabel *)arg, minor);
		break;
	
	case DIOCGPART:
				/* FIXME chris if we have a per-LV static
                                   disklabel */
		lvmfakelabel (&dk_label, minor);
		((struct partinfo *)data)->disklab = &dk_label;
		((struct partinfo *)data)->part =
		    &dk_label.d_partitions[0];
		break;

	case DIOCGPINFO:
		bzero (arg, sizeof (struct partition));
		((struct partition *)arg)->p_offset = 0;
		((struct partition *)arg)->p_size =
			vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_size;
		((struct partition *)arg)->p_fstype = FS_BSDFFS;
		break;

	case DIOCWDINFO:
	case DIOCSDINFO:
	case DIOCWLABEL:
		break;

	default:
		error = ENOTTY;
	}

 out:
/*  	(void) lockmgr(&cs->sc_lock, LK_RELEASE, NULL); */
	return (error);
}

int
lvmsize(dev)
	dev_t dev;
{
	/* Not implemented. */
	return (-1);
}

int
lvmdump(dev, blkno, va, size)
	dev_t dev;
	daddr_t blkno;
	caddr_t va;
	size_t size;
{

	/* Not implemented. */
	return ENXIO;
}

/*
 * internal support functions
 */

static void
lvmfakelabel (lp, minor)
	struct disklabel *lp;
	int minor;
{
	lv_t *lv = vg[VG_BLK(minor)]->lv[LV_BLK(minor)];
	char *lv_name = strrchr (lv->lv_name, '/');

	if (lv_name == NULL)
		lv_name = lv->lv_name;
	else
		lv_name++;

	bzero(lp, sizeof(*lp));

	lp->d_secperunit = lv->lv_size;
	lp->d_secsize = DEV_BSIZE;
	lp->d_nsectors = vg[VG_BLK(minor)]->pe_size / DEV_BSIZE;
	lp->d_ntracks = (1024 * 1024) / (lp->d_secsize * lp->d_nsectors);
	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
	lp->d_ncylinders = lp->d_secperunit / lp->d_secpercyl;

#if NAME_LEN < 16
#warn NAME_LEN < 16
#endif
	strncpy(lp->d_typename, lv->vg_name, sizeof(lp->d_typename));
	lp->d_type = DTYPE_LVM;
	strncpy(lp->d_packname, lv_name, sizeof(lp->d_packname));
	lp->d_rpm = 3600;
	lp->d_interleave = 1;
	lp->d_flags = 0;

	lp->d_partitions[0].p_offset = 0;
	lp->d_partitions[0].p_size = lv->lv_size;
	lp->d_partitions[0].p_fstype = FS_BSDFFS;
	lp->d_partitions[RAW_PART].p_offset = 0;
	lp->d_partitions[RAW_PART].p_size = lv->lv_size;
	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
	lp->d_npartitions = RAW_PART + 1;

	lp->d_magic = DISKMAGIC;
	lp->d_magic2 = DISKMAGIC;
	lp->d_checksum = dkcksum(lp);
}

static pv_t *
lvmdevtopv (dev, avg)
     dev_t dev;
     vg_t *avg;
{
	int p;
	for (p = 0; p < avg->pv_cur; p++)
		if (avg->pv[p]->pv_dev == dev)
			return (avg->pv[p]);
	return (NULL);
}

/*
 * character device support function VGDA create
 */
int do_vg_create(int minor, void *arg)
{
	int snaporg_minor;
	ulong l, p;
	lv_t lv;

	if (vg[VG_CHR(minor)] != NULL)
		return EPERM;

	if ((vg[VG_CHR(minor)] = vmalloc(sizeof(vg_t))) == NULL) {
		printk(KERN_CRIT
		       "%s -- VG_CREATE: vmalloc error VG\n", lvm_name);
		return ENOMEM;
	}
	/* get the volume group structure */
	*vg[VG_CHR(minor)] = *((vg_t *)arg);
#if 0				/* copy going away */
	if (copy_from_user(vg[VG_CHR(minor)], arg, sizeof(vg_t)) != 0) {
		vfree(vg[VG_CHR(minor)]);
		vg[VG_CHR(minor)] = NULL;
		return EFAULT;
	}
#endif
	/* we are not that active so far... */
	vg[VG_CHR(minor)]->vg_status &= ~VG_ACTIVE;

	vg[VG_CHR(minor)]->pe_allocated = 0;
	if (vg[VG_CHR(minor)]->pv_max > ABS_MAX_PV) {
		printk(KERN_WARNING
		       "%s -- Can't activate VG: ABS_MAX_PV too small\n",
		       lvm_name);
		vfree(vg[VG_CHR(minor)]);
		vg[VG_CHR(minor)] = NULL;
		return EPERM;
	}
	if (vg[VG_CHR(minor)]->lv_max > ABS_MAX_LV) {
		printk(KERN_WARNING
		"%s -- Can't activate VG: ABS_MAX_LV too small for %u\n",
		       lvm_name, vg[VG_CHR(minor)]->lv_max);
		vfree(vg[VG_CHR(minor)]);
		vg[VG_CHR(minor)] = NULL;
		return EPERM;
	}
	/* get the physical volume structures */
	vg[VG_CHR(minor)]->pv_act = vg[VG_CHR(minor)]->pv_cur = 0;
	for (p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
		/* user space address */
		if ((pvp = vg[VG_CHR(minor)]->pv[p]) != NULL) {
			vg[VG_CHR(minor)]->pv[p] = vmalloc(sizeof(pv_t));
			if (vg[VG_CHR(minor)]->pv[p] == NULL) {
				printk(KERN_CRIT
				       "%s -- VG_CREATE: vmalloc error PV\n", lvm_name);
				do_vg_remove(minor);
				return ENOMEM;
			}
			if (copy_from_user(vg[VG_CHR(minor)]->pv[p], pvp,
					   sizeof(pv_t)) != 0) {
				do_vg_remove(minor);
				return EFAULT;
			}
			/* We don't need the PE list
			   in kernel space as with LVs pe_t list (see below) */
			vg[VG_CHR(minor)]->pv[p]->pe = NULL;
			vg[VG_CHR(minor)]->pv[p]->pe_allocated = 0;
			vg[VG_CHR(minor)]->pv[p]->pv_status = PV_ACTIVE;
			vg[VG_CHR(minor)]->pv_act++;
			vg[VG_CHR(minor)]->pv_cur++;

#ifdef LVM_GET_INODE
			/* insert a dummy inode for fs_may_mount */
			vg[VG_CHR(minor)]->pv[p]->inode =
			    lvm_get_inode(vg[VG_CHR(minor)]->pv[p]->pv_dev);
#endif
#ifdef LVM_GET_VNODE
			/* get a vnode for the pv */
			bdevvp(vg[VG_CHR(minor)]->pv[p]->pv_dev,
			       &vg[VG_CHR(minor)]->pv[p]->vnode);
#endif
		}
	}

	/* get the logical volume structures */
	vg[VG_CHR(minor)]->lv_cur = 0;
	for (l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
		/* user space address */
		if ((lvp = vg[VG_CHR(minor)]->lv[l]) != NULL) {
			if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
				do_vg_remove(minor);
				return EFAULT;
			}
			vg[VG_CHR(minor)]->lv[l] = NULL;
			if (do_lv_create(minor, lv.lv_name, &lv) != 0) {
				do_vg_remove(minor);
				return EFAULT;
			}
		}
	}

	/* Second path to correct snapshot logical volumes which are not
	   in place during first path above */
	for (l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
		if (vg[VG_CHR(minor)]->lv[l] != NULL &&
		    vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) {
			snaporg_minor = vg[VG_CHR(minor)]->lv[l]->lv_snapshot_minor;
			if (vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)] != NULL) {
				vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_access |=
				    LV_SNAPSHOT_ORG;
				vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_exception =
				    vg[VG_CHR(minor)]->lv[l]->lv_exception;
				vg[VG_CHR(minor)]->lv[l]->lv_current_pe =
				    vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_current_pe;
				vg[VG_CHR(minor)]->lv[l]->lv_allocated_le =
				    vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_allocated_le;
				vg[VG_CHR(minor)]->lv[l]->lv_current_le =
				    vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_current_le;
			}
		}
	}

	vg_count++;

	/* let's go active */
	vg[VG_CHR(minor)]->vg_status |= VG_ACTIVE;

#if 0
	printf ("vg activated: %s (%d) count: %d\n", 
		vg[VG_CHR(minor)]->vg_name, VG_CHR(minor), vg_count);
#endif
#ifdef MODULE
	MOD_INC_USE_COUNT;
#endif
	return 0;
}				/* do_vg_create () */


/*
 * character device support function VGDA remove
 */
static int do_vg_remove(int minor)
{
	int l, p;

#ifdef LVM_TOTAL_RESET
	if (vg[VG_CHR(minor)]->lv_open > 0 && lvm_reset_spindown == 0)
#else
	if (vg[VG_CHR(minor)]->lv_open > 0)
#endif
		return EPERM;

	/* let's go inactive */
	vg[VG_CHR(minor)]->vg_status &= ~VG_ACTIVE;

	/* free LVs */
	/* first free snapshot logical volumes */
	for (l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
		if (vg[VG_CHR(minor)]->lv[l] != NULL &&
		    vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) {
			do_lv_remove(minor, NULL, l);
/* XXX TODO
			current->state = TASK_INTERRUPTIBLE;
			schedule_timeout(1);
*/
		}
	}
	/* then free the rest */
	for (l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
		if (vg[VG_CHR(minor)]->lv[l] != NULL) {
			do_lv_remove(minor, NULL, l);
/* XXX TODO
			current->state = TASK_INTERRUPTIBLE;
			schedule_timeout(1);
*/
		}
	}

	/* free PVs */
	for (p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
		if (vg[VG_CHR(minor)]->pv[p] != NULL) {
#ifdef DEBUG_VFREE
			printk(KERN_DEBUG
			       "%s -- vfree %d\n", lvm_name, __LINE__);
#endif
#ifdef LVM_GET_INODE
			clear_inode(vg[VG_CHR(minor)]->pv[p]->inode);
#endif
#ifdef LVM_GET_VNODE
			vput(vg[VG_CHR(minor)]->pv[p]->vnode);
#endif
			vfree(vg[VG_CHR(minor)]->pv[p]);
			vg[VG_CHR(minor)]->pv[p] = NULL;
		}
	}

#ifdef DEBUG_VFREE
	printk(KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
#endif
	vfree(vg[VG_CHR(minor)]);
	vg[VG_CHR(minor)] = NULL;
	VG_CHR(minor) = ABS_MAX_VG;

	vg_count--;

#ifdef MODULE
	MOD_DEC_USE_COUNT;
#endif
	return 0;
}				/* do_vg_remove () */


/*
 * character device support function logical volume create
 */
static int do_lv_create(int minor, char *lv_name, lv_t * lv)
{
	int l, le, l_new, p, size, snaporg_minor;
	ulong lv_status_save;
	lv_exception_t *lve = lv->lv_exception;
	lv_block_exception_t *lvbe = NULL;

	/* use precopied logical volume */
	if ((pep = lv->lv_current_pe) == NULL)
		return EINVAL;

	/* in case of lv_remove(), lv_create() pair; for eg. lvrename does this */
	l_new = -1;
	if (vg[VG_CHR(minor)]->lv[lv->lv_number] == NULL)
		l_new = lv->lv_number;
	else {
		for (l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
			if (strcmp(vg[VG_CHR(minor)]->lv[l]->lv_name, lv_name) == 0)
				return EEXIST;
			if (vg[VG_CHR(minor)]->lv[l] == NULL)
				if (l_new == -1)
					l_new = l;
		}
	}
	if (l_new == -1)
		return EPERM;
	l = l_new;

	if ((vg[VG_CHR(minor)]->lv[l] = vmalloc(sizeof(lv_t))) == NULL) {;
		printk(KERN_CRIT "%s -- LV_CREATE: vmalloc error LV\n", lvm_name);
		return ENOMEM;
	}
	/* copy preloaded LV */
	memcpy((char *) vg[VG_CHR(minor)]->lv[l],
	       (char *) lv, sizeof(lv_t));
	lv_status_save = vg[VG_CHR(minor)]->lv[l]->lv_status;
	vg[VG_CHR(minor)]->lv[l]->lv_status &= ~LV_ACTIVE;

	/* get the PE structures from user space if this
	   is no snapshot logical volume */
	if (!(vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT)) {
		size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le * sizeof(pe_t);
		if ((vg[VG_CHR(minor)]->lv[l]->lv_current_pe =
		     vmalloc(size)) == NULL) {
			printk(KERN_CRIT
			       "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte\n",
			       lvm_name, size);
#ifdef DEBUG_VFREE
			printk(KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
#endif
			vfree(vg[VG_CHR(minor)]->lv[l]);
			vg[VG_CHR(minor)]->lv[l] = NULL;
			return ENOMEM;
		}
		if (copy_from_user(vg[VG_CHR(minor)]->lv[l]->lv_current_pe,
				   pep,
				   size)) {
			vfree(vg[VG_CHR(minor)]->lv[l]->lv_current_pe);
			vfree(vg[VG_CHR(minor)]->lv[l]);
			vg[VG_CHR(minor)]->lv[l] = NULL;
			return EFAULT;
		}
		for (p = 0; p < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le; p++) {
			vg[VG_CHR(minor)]->lv[l]->lv_current_pe[p].pv =
				lvmdevtopv (vg[VG_CHR(minor)]->lv[l]->lv_current_pe[p].dev, vg[VG_CHR(minor)]);
		}
	} else {
		/* Get snapshot exception data and block list */
		if (lve != NULL) {
			if ((vg[VG_CHR(minor)]->lv[l]->lv_exception =
			     vmalloc(sizeof(lv_exception_t))) == NULL) {
				printk(KERN_CRIT
				       "%s -- LV_CREATE: vmalloc error LV_EXCEPTION at line %d "
				       "of %d Byte\n",
				       lvm_name, __LINE__, sizeof(lv_exception_t));
				vfree(vg[VG_CHR(minor)]->lv[l]);
				vg[VG_CHR(minor)]->lv[l] = NULL;
				return ENOMEM;
			}
			if (copy_from_user(vg[VG_CHR(minor)]->lv[l]->lv_exception,
					   lve,
					   sizeof(lv_exception_t))) {
				vfree(vg[VG_CHR(minor)]->lv[l]->lv_exception);
				vfree(vg[VG_CHR(minor)]->lv[l]);
				vg[VG_CHR(minor)]->lv[l] = NULL;
				return EFAULT;
			}
			lvbe = vg[VG_CHR(minor)]->lv[l]->lv_exception->lv_block_exception;
			size = vg[VG_CHR(minor)]->lv[l]->lv_exception->lv_remap_end *
			    sizeof(lv_block_exception_t);
			if ((vg[VG_CHR(minor)]->lv[l]->lv_exception->lv_block_exception =
			     vmalloc(size)) == NULL) {
				printk(KERN_CRIT
				       "%s -- LV_CREATE: vmalloc error LV_BLOCK_EXCEPTION "
				       "at line %d of %d Byte\n",
				       lvm_name, __LINE__, size);
#ifdef DEBUG_VFREE
				printk(KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
#endif
				vfree(vg[VG_CHR(minor)]->lv[l]->lv_exception);
				vfree(vg[VG_CHR(minor)]->lv[l]);
				vg[VG_CHR(minor)]->lv[l] = NULL;
				return ENOMEM;
			}
			if (copy_from_user(
						  vg[VG_CHR(minor)]->lv[l]->lv_exception->lv_block_exception,
						  lvbe, size)) {
				vfree(vg[VG_CHR(minor)]->lv[l]->lv_exception->lv_block_exception);
				vfree(vg[VG_CHR(minor)]->lv[l]->lv_exception);
				vfree(vg[VG_CHR(minor)]->lv[l]);
				vg[VG_CHR(minor)]->lv[l] = NULL;
				return EFAULT;
			}
			simple_lock_init (&vg[VG_CHR(minor)]->lv[l]->lv_exception->lv_snapshot_lock);

			snaporg_minor = vg[VG_CHR(minor)]->lv[l]->lv_snapshot_minor;

			if (vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)] != NULL) {
				vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_access |=
				    LV_SNAPSHOT_ORG;
				vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_snapshot_minor =
				    MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev);
				vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_exception =
				    vg[VG_CHR(minor)]->lv[l]->lv_exception;
				vg[VG_CHR(minor)]->lv[l]->lv_current_pe =
				    vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_current_pe;
				vg[VG_CHR(minor)]->lv[l]->lv_allocated_le =
				    vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_allocated_le;
				vg[VG_CHR(minor)]->lv[l]->lv_current_le =
				    vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_current_le;
				vg[VG_CHR(minor)]->lv[l]->lv_size =
				    vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_size;
				/* sync the original logical volume */
				fsync_dev(vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_dev);
			} else {
				vfree(vg[VG_CHR(minor)]->lv[l]->lv_exception->lv_block_exception);
				vfree(vg[VG_CHR(minor)]->lv[l]->lv_exception);
				vfree(vg[VG_CHR(minor)]->lv[l]);
				vg[VG_CHR(minor)]->lv[l] = NULL;
				return EFAULT;
			}
		} else {
			vfree(vg[VG_CHR(minor)]->lv[l]);
			vg[VG_CHR(minor)]->lv[l] = NULL;
			return EINVAL;
		}
	}			/* if ( vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) */

	/* correct the PE count in PVs if this is no snapshot logical volume */
	if (!(vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT)) {
		for (le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le; le++) {
			vg[VG_CHR(minor)]->pe_allocated++;
			for (p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
				if (vg[VG_CHR(minor)]->pv[p]->pv_dev ==
				    vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev)
					vg[VG_CHR(minor)]->pv[p]->pe_allocated++;
			}
		}
	}
	vg_lv_map[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)].vg_number =
	    vg[VG_CHR(minor)]->vg_number;
	vg_lv_map[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)].lv_number =
	    vg[VG_CHR(minor)]->lv[l]->lv_number;

	LVM_CORRECT_READ_AHEAD(vg[VG_CHR(minor)]->lv[l]->lv_read_ahead);
	vg[VG_CHR(minor)]->lv_cur++;

	vg[VG_CHR(minor)]->lv[l]->lv_status = lv_status_save;

	return 0;
}				/* do_lv_create () */


/*
 * character device support function logical volume remove
 */
static int do_lv_remove(int minor, char *lv_name, int l)
{
	uint le, p;
	int snaporg_minor;


	if (l == -1) {
		for (l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
			if (vg[VG_CHR(minor)]->lv[l] != NULL &&
			    strcmp(vg[VG_CHR(minor)]->lv[l]->lv_name, lv_name) == 0) {
				break;
			}
		}
	}
	if (l < vg[VG_CHR(minor)]->lv_max) {
#ifdef LVM_TOTAL_RESET
		if (vg[VG_CHR(minor)]->lv[l]->lv_open > 0 && lvm_reset_spindown == 0)
#else
		if (vg[VG_CHR(minor)]->lv[l]->lv_open > 0)
#endif
			return EBUSY;

#if 0
#ifdef DEBUG
		printk(KERN_DEBUG
		       "%s -- fsync_dev and "
		       "invalidate_buffers for %s [%s] in %s\n",
		       lvm_name, vg[VG_CHR(minor)]->lv[l]->lv_name,
		       kdevname(vg[VG_CHR(minor)]->lv[l]->lv_dev),
		       vg[VG_CHR(minor)]->vg_name);
#endif
#endif

		/* check for deletion of snapshot source while
		   snapshot volume still exists */
		if ((vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT_ORG) &&
		    vg[VG_CHR(minor)]->lv[l]->lv_snapshot_minor != 0)
			return EPERM;

		vg[VG_CHR(minor)]->lv[l]->lv_status |= LV_SPINDOWN;

		/* sync the buffers if this is no snapshot logical volume */
		if (!(vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT))
			fsync_dev(vg[VG_CHR(minor)]->lv[l]->lv_dev);

		vg[VG_CHR(minor)]->lv[l]->lv_status &= ~LV_ACTIVE;

		/* invalidate the buffers */
		invalidate_buffers(vg[VG_CHR(minor)]->lv[l]->lv_dev);

		/* reset VG/LV mapping */
		vg_lv_map[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)].vg_number = ABS_MAX_VG;
		vg_lv_map[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)].lv_number = -1;

		/* correct the PE count in PVs if this is no snapshot logical volume */
		if (!(vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT)) {
			for (le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le; le++) {
				vg[VG_CHR(minor)]->pe_allocated--;
				for (p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
					if (vg[VG_CHR(minor)]->pv[p]->pv_dev ==
					    vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev)
						vg[VG_CHR(minor)]->pv[p]->pe_allocated--;
				}
			}
			/* only if this is no snapshot logical volume because we share
			   the lv_current_pe[] structs with the original logical volume */
			vfree(vg[VG_CHR(minor)]->lv[l]->lv_current_pe);
		} else {
			snaporg_minor = vg[VG_CHR(minor)]->lv[l]->lv_snapshot_minor;

			/* if we deleted snapshot original logical volume before
			   the snapshot volume (in case og VG_REMOVE for eg.) */
			if (vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)] != NULL) {
				vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_access &=
				    ~LV_SNAPSHOT_ORG;
				vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_exception = NULL;
				vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_snapshot_minor = 0;
			}
			if (vg[VG_CHR(minor)]->lv[l]->lv_exception != NULL) {
				if (vg[VG_CHR(minor)]->lv[l]->lv_exception->lv_block_exception
				    != NULL)
					vfree(vg[VG_CHR(minor)]->lv[l]->lv_exception \
					      ->lv_block_exception);
				vfree(vg[VG_CHR(minor)]->lv[l]->lv_exception);
			}
		}

#ifdef DEBUG_VFREE
		printk(KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
#endif
		vfree(vg[VG_CHR(minor)]->lv[l]);
		vg[VG_CHR(minor)]->lv[l] = NULL;
		vg[VG_CHR(minor)]->lv_cur--;
		return 0;
	}
	return ENXIO;
}				/* do_lv_remove () */


/*
 * character device support function logical volume extend / reduce
 */
static int do_lv_extend_reduce(int minor, char *lv_name, lv_t * lv)
{
	int l, le, p, size, old_allocated_le;
	uint32_t end, lv_status_save;
	pe_t *pe;

	if ((pep = lv->lv_current_pe) == NULL)
		return EINVAL;

	for (l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
		if (vg[VG_CHR(minor)]->lv[l] != NULL &&
		    strcmp(vg[VG_CHR(minor)]->lv[l]->lv_name, lv_name) == 0)
			break;
	}
	if (l == vg[VG_CHR(minor)]->lv_max)
		return ENXIO;

	/* check for active snapshot */
	if (lv->lv_exception != NULL &&
	 lv->lv_exception->lv_remap_ptr < lv->lv_exception->lv_remap_end)
		return EPERM;

	if ((pe = vmalloc(size = lv->lv_current_le * sizeof(pe_t))) == NULL) {
		printk(KERN_CRIT
		"%s -- do_lv_extend_reduce: vmalloc error LV_CURRENT_PE "
		       "of %d Byte\n", lvm_name, size);
		return ENOMEM;
	}
	/* get the PE structures from user space */
	if (copy_from_user(pe, pep, size)) {
		vfree(pe);
		return EFAULT;
	}
				/* XXX FIXME chris memory leak ??? */
	if ((pe = vmalloc(size = lv->lv_current_le * sizeof(pe_t))) == NULL) {
		printk(KERN_CRIT
		"%s -- do_lv_extend_reduce: vmalloc error LV_CURRENT_PE "
		       "of %d Byte\n", lvm_name, size);
		return ENOMEM;
	}
	/* get the PE structures from user space */
	if (copy_from_user(pe, pep, size)) {
		vfree(pe);
		return EFAULT;
	}
#if 0
#ifdef DEBUG
	printk(KERN_DEBUG
	       "%s -- fsync_dev and "
	       "invalidate_buffers for %s [%s] in %s\n",
	       lvm_name, vg[VG_CHR(minor)]->lv[l]->lv_name,
	       kdevname(vg[VG_CHR(minor)]->lv[l]->lv_dev),
	       vg[VG_CHR(minor)]->vg_name);
#endif
#endif

	vg[VG_CHR(minor)]->lv[l]->lv_status |= LV_SPINDOWN;
	fsync_dev(vg[VG_CHR(minor)]->lv[l]->lv_dev);
	vg[VG_CHR(minor)]->lv[l]->lv_status &= ~LV_ACTIVE;
	invalidate_buffers(vg[VG_CHR(minor)]->lv[l]->lv_dev);

	/* reduce allocation counters on PV(s) */
	for (le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le; le++) {
		vg[VG_CHR(minor)]->pe_allocated--;
		for (p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
			if (vg[VG_CHR(minor)]->pv[p]->pv_dev ==
			vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev) {
				vg[VG_CHR(minor)]->pv[p]->pe_allocated--;
				break;
			}
		}
	}

#ifdef DEBUG_VFREE
	printk(KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
#endif

	/* save pointer to "old" lv/pe pointer array */
	pep1 = vg[VG_CHR(minor)]->lv[l]->lv_current_pe;
	end = vg[VG_CHR(minor)]->lv[l]->lv_current_le;

	/* save open counter */
	lv_open = vg[VG_CHR(minor)]->lv[l]->lv_open;

	/* save # of old allocated logical extents */
	old_allocated_le = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le;

	/* copy preloaded LV */
	memcpy((char *) vg[VG_CHR(minor)]->lv[l], (char *) lv, sizeof(lv_t));
	lv_status_save = vg[VG_CHR(minor)]->lv[l]->lv_status;
	vg[VG_CHR(minor)]->lv[l]->lv_status |= LV_SPINDOWN;
	vg[VG_CHR(minor)]->lv[l]->lv_status &= ~LV_ACTIVE;
	vg[VG_CHR(minor)]->lv[l]->lv_current_pe = pe;
	vg[VG_CHR(minor)]->lv[l]->lv_open = lv_open;

	/* save availiable i/o statistic data */
	/* linear logical volume */
	if (vg[VG_CHR(minor)]->lv[l]->lv_stripes < 2) {
		/* Check what last LE shall be used */
		if (end > vg[VG_CHR(minor)]->lv[l]->lv_current_le)
			end = vg[VG_CHR(minor)]->lv[l]->lv_current_le;
		for (le = 0; le < end; le++) {
			vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].reads = pep1[le].reads;
			vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].writes = pep1[le].writes;
		}
		/* striped logical volume */
	} else {
		uint i, j, source, dest, end, old_stripe_size, new_stripe_size;

		old_stripe_size = old_allocated_le / vg[VG_CHR(minor)]->lv[l]->lv_stripes;
		new_stripe_size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le /
		    vg[VG_CHR(minor)]->lv[l]->lv_stripes;
		end = old_stripe_size;
		if (end > new_stripe_size)
			end = new_stripe_size;
		for (i = source = dest = 0;
		     i < vg[VG_CHR(minor)]->lv[l]->lv_stripes; i++) {
			for (j = 0; j < end; j++) {
				vg[VG_CHR(minor)]->lv[l]->lv_current_pe[dest + j].reads =
				    pep1[source + j].reads;
				vg[VG_CHR(minor)]->lv[l]->lv_current_pe[dest + j].writes =
				    pep1[source + j].writes;
			}
			source += old_stripe_size;
			dest += new_stripe_size;
		}
	}
	vfree(pep1);
	pep1 = NULL;


	/* extend the PE count in PVs */
	for (le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le; le++) {
		vg[VG_CHR(minor)]->pe_allocated++;
		for (p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
			if (vg[VG_CHR(minor)]->pv[p]->pv_dev ==
			vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev) {
				vg[VG_CHR(minor)]->pv[p]->pe_allocated++;
				break;
			}
		}
	}

	/* vg_lv_map array doesn't have to be changed here */

	vg[VG_CHR(minor)]->lv[l]->lv_status = lv_status_save;
	LVM_CORRECT_READ_AHEAD(vg[VG_CHR(minor)]->lv[l]->lv_read_ahead);

	return 0;
}				/* do_lv_extend_reduce () */
