/*
 *   (C) Copyright IBM Corp. 2001, 2003
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Local Disk Manager plugin.
 */

#define _GNU_SOURCE

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <mntent.h>
#include <dirent.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#include <glob.h>
#include <malloc.h>
#include <wait.h>

#include <plugin.h>
#include <ldm_funcs.h>
#include "localdskmgr.h"
#include "cache.h"
#include "info.h"

engine_functions_t * EngFncs = NULL;

char * base_directory = NULL;
int    base_directory_len;
char * sysfs_mount_point;
const char * scan;
const char * const * directories;
int directories_count;
const char * const * includes;
int include_count;
const char * const default_legacy_includes[] = {
	"sd?",
	"hd?",
	"dasd?"
};
const char * const default_sysfs_includes[] = {
	"*?",
};
const char * const * excludes;
int exclude_count;
glob_t dev_names_glob;
int glob_flags;
static char pattern[PATH_MAX];

static dm_device_list_t * dm_devices = NULL;
static list_anchor_t multipath_children = NULL;

/* Global list for keeping track of open file-handles. On systems with
 * a large number of disks, we don't want to keep open file-handles to
 * every disk, or the process may run out of available file-handles.
 */
static list_anchor_t file_handles = NULL;
u_int32_t num_file_handles = 0;
#define DEFAULT_FILE_HANDLES 64
#define MAX_FILE_HANDLES 1024

static int LD_read(storage_object_t * disk,
		   lsn_t offset,
		   sector_count_t count,
		   void * buffer);


static void close_dev(storage_object_t * disk);

/**
 * round_down
 * @value:	Value (in sectors) to be rounded down.
 * @boundary:	Boundary (in bytes) to round-down to.
 *
 * Given a value, round it down to be a multiple of the specified boundary size.
 **/
static inline sector_count_t round_down(sector_count_t value,
					u_int32_t boundary)
{
	sector_count_t boundary_in_vsectors = ((sector_count_t)boundary) >>
					      EVMS_VSECTOR_SIZE_SHIFT;
	return (boundary > EVMS_VSECTOR_SIZE) ?
	       (value & ~(boundary_in_vsectors - 1)) : value;
}

/**
 * round_up
 * @value:	Value (in sectors) to be rounded up.
 * @boundary:	Boundary (in bytes) to round-up to.
 *
 * Given a value, round it up to be a multiple of the specified boundary size.
 **/
static inline sector_count_t round_up(sector_count_t value,
				      u_int32_t boundary)
{
	sector_count_t boundary_in_vsectors = ((sector_count_t)boundary) >>
					      EVMS_VSECTOR_SIZE_SHIFT;
	sector_count_t temp_value = value + boundary_in_vsectors - 1;
	return (boundary > EVMS_VSECTOR_SIZE) ?
	       (temp_value & ~(boundary_in_vsectors - 1)) : value;
}

/**
 * drbd_active
 *
 * Is the drbd driver running? If so, the nbd driver cannot also be running,
 * since they use the same major-number.
 **/
static int drbd_active = -1;

static boolean is_drbd_active(void)
{
	struct stat st;
	int rc;

	LOG_ENTRY();

	if (drbd_active == -1) {
		rc = stat("/proc/drbd", &st);
		if (rc) {
			drbd_active = FALSE;
		} else {
			drbd_active = TRUE;
		}
	}

	LOG_EXIT_BOOL(drbd_active);
	return drbd_active;
}

static boolean search_mount_records(FILE * records,
				    char * fs_name,
				    char ** mount_name)
{
	boolean found = FALSE;
	struct mntent * mount_entry;

	LOG_ENTRY();

	while (!found && (mount_entry = getmntent(records)) != NULL) {
		if (strcmp(mount_entry->mnt_type, fs_name) == 0) {
			found = TRUE;
			if (mount_name) {
				*mount_name = strdup(mount_entry->mnt_dir);
			}
		}
	}

	LOG_EXIT_BOOL(found);
	return found;
}

/**
 * where_is_sysfs
 *
 * Is sysfs mounted. If so, return the mount point. The caller must free the
 * returned string.
 **/
static boolean where_is_sysfs(char ** mount_name)
{
	boolean found = FALSE;
	FILE * mount_records;

	LOG_ENTRY();

	if (mount_name) {
		*mount_name = NULL;
	}

	mount_records = setmntent(MOUNTED, "r");
	if (mount_records) {
		LOG_DEBUG("Searching for sysfs in %s.\n", MOUNTED);
		found = search_mount_records(mount_records, "sysfs", mount_name);
		endmntent(mount_records);
	}

	if (!found) {
		mount_records = setmntent("/proc/mounts", "r");
		if (mount_records) {
			LOG_DEBUG("Searching for sysfs in /proc/mounts.\n");
			found = search_mount_records(mount_records, "sysfs", mount_name);
			endmntent(mount_records);
		}
	}

	LOG_EXIT_BOOL(found);
	return found;
}

static void get_legacy_config()
{
	LOG_ENTRY();

	scan = "/dev/";
	EngFncs->get_config_string("legacy_devices.scan", &scan);

	directories_count = 0;
	directories = NULL;
	EngFncs->get_config_string_array("legacy_devices.directories",
					 &directories_count, &directories);
	include_count = 0;
	includes = NULL;
	EngFncs->get_config_string_array("legacy_devices.include",
					 &include_count, &includes);
	if (includes == NULL) {
		includes = default_legacy_includes;
		include_count = sizeof(default_legacy_includes) /
				sizeof(default_legacy_includes[0]);
	}

	exclude_count = 0;
	excludes = NULL;
	EngFncs->get_config_string_array("legacy_devices.exclude",
					 &exclude_count, &excludes);

	EngFncs->get_config_uint32("legacy_devices.max_open_disks",
				   &num_file_handles);

	LOG_EXIT_VOID();
}

static void get_sysfs_config()
{
	boolean ignore_sysfs = FALSE;

	LOG_ENTRY();

	EngFncs->get_config_bool("sysfs_devices.ignore_sysfs", &ignore_sysfs);
	if (ignore_sysfs) {
		/* Fall back to the legacy_devices section
		 * if the config file says to ignore sysfs.
		 */
		free(sysfs_mount_point);
		sysfs_mount_point = NULL;
		get_legacy_config();
		LOG_EXIT_VOID();
		return;
	}

	include_count = 0;
	includes = NULL;
	EngFncs->get_config_string_array("sysfs_devices.include",
					 &include_count, &includes);
	if (includes == NULL) {
		includes = default_sysfs_includes;
		include_count = sizeof(default_sysfs_includes) /
				sizeof(default_sysfs_includes[0]);
	}

	exclude_count = 0;
	excludes = NULL;
	EngFncs->get_config_string_array("sysfs_devices.exclude",
					 &exclude_count, &excludes);

	EngFncs->get_config_uint32("sysfs_devices.max_open_disks",
				   &num_file_handles);

	LOG_EXIT_VOID();
}

/**
 * file_handle_cleanup
 *
 * Delete the list of file-handle tracking structures.
 **/
static void file_handle_cleanup(void)
{
	file_handle_t *handle;
	list_element_t iter;

	LOG_ENTRY();

	LIST_FOR_EACH(file_handles, iter, handle) {
		EngFncs->engine_free(handle);
	}

	EngFncs->destroy_list(file_handles);
	file_handles = NULL;

	LOG_EXIT_VOID();
}

/**
 * file_handle_setup
 *
 * Create and initialize MAX_FILE_HANDLES entries on the file_handles list.
 **/
static int file_handle_setup(void)
{
	file_handle_t *handle;
	u_int32_t i;
	int rc = 0;

	LOG_ENTRY();

	/* Make sure the number of entries in the
	 * file-handles list will be reasonable.
	 */
	if (num_file_handles == 0) {
		num_file_handles = DEFAULT_FILE_HANDLES;
	} else if (num_file_handles > MAX_FILE_HANDLES) {
		num_file_handles = MAX_FILE_HANDLES;
	}

	LOG_DEBUG("Allocating %u entries in the file-handles list.\n",
		  num_file_handles);

	file_handles = EngFncs->allocate_list();
	if (!file_handles) {
		rc = ENOMEM;
		goto out;
	}

	for (i = 0; i < num_file_handles; i++) {
		handle = EngFncs->engine_alloc(sizeof(*handle));
		if (!handle) {
			file_handle_cleanup();
			rc = ENOMEM;
			break;
		}
		handle->elem = EngFncs->insert_thing(file_handles, handle,
						     INSERT_AFTER, NULL);
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * file_handle_release
 *
 * Release this file-handle back to the pool of available handles. Move it
 * to the start of the list so it will be quicker to find on the next search.
 **/
static void file_handle_release(file_handle_t *handle)
{
	LOG_ENTRY();

	handle->disk = NULL;
	EngFncs->remove_element(handle->elem);
	EngFncs->insert_element(file_handles, handle->elem, INSERT_BEFORE, NULL);

	LOG_EXIT_VOID();
}

/**
 * file_handle_make_last
 *
 * Move this file-handle to the end of the list so it will be the least likely
 * to be "stolen".
 **/
static void file_handle_make_last(file_handle_t *handle)
{
	LOG_ENTRY();

	EngFncs->remove_element(handle->elem);
	EngFncs->insert_element(file_handles, handle->elem, INSERT_AFTER, NULL);

	LOG_EXIT_VOID();
}

/**
 * file_handle_find_free
 *
 * Search the file-handles list for an unused entry and assign it to this disk.
 **/
static file_handle_t *file_handle_find_free(void)
{
	file_handle_t *handle;
	list_element_t iter;

	LOG_ENTRY();

	LIST_FOR_EACH(file_handles, iter, handle) {
		if (!handle->disk) {
			break;
		}
	}

	LOG_EXIT_PTR(handle);
	return handle;
}

/**
 * file_handle_steal_first
 *
 * "Steal" the first file-handle on the list, forceably close the disk that
 * currently owns it, and assign it to the new disk.
 **/
static file_handle_t *file_handle_steal_first(void)
{
	file_handle_t *handle;

	LOG_ENTRY();

	handle = EngFncs->first_thing(file_handles, NULL);
	if (handle->disk) {
		LOG_DEBUG("Stealing file-handle from disk %s.\n",
			  handle->disk->name);
		close_dev(handle->disk);
	}

	LOG_EXIT_PTR(handle);
	return handle;
}

/**
 * file_handle_get
 *
 * Get a file-handle for this disk. First search the list for an used one. If
 * we can't find one, simply "steal" the first one on the list.
 **/
static file_handle_t *file_handle_get(void)
{
	file_handle_t *handle;

	LOG_ENTRY();

	handle = file_handle_find_free();
	if (!handle) {
		handle = file_handle_steal_first();
	}

	LOG_EXIT_PTR(handle);
	return handle;
}

static int LD_setup(engine_functions_t * engine_function_table)
{
	int rc;

	/* save info we get from the engine */
	EngFncs = engine_function_table;

	LOG_ENTRY();

	if (where_is_sysfs(&sysfs_mount_point)) {
		get_sysfs_config();
	} else {
		get_legacy_config();
	}

	rc = file_handle_setup();

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * open_dev
 *
 * Open the specified disk. Use O_DIRECT to avoid caching. Use O_SYNC in case
 * the kernel does not honor O_DIRECT. Use the Engine's service so we
 * automatically get a dev-node in the /dev/evms/.nodes/ tree. Record the
 * file handle in the disk's private data.
 **/
static int open_dev(storage_object_t * disk)
{
	local_disk_t * ld = disk->private_data;
	int rc = 0;

	LOG_ENTRY();

	if (ld->fd <= 0) {
		ld->file_handle = file_handle_get();
		ld->file_handle->disk = disk;

		ld->fd = EngFncs->open_object(disk, O_RDWR | O_DIRECT | O_SYNC);
		if (ld->fd < 0) {
			rc = - ld->fd;
			file_handle_release(ld->file_handle);
			ld->file_handle = NULL;
			LOG_DEBUG("Error opening disk %s: %d: %s\n",
				  disk->name, rc, strerror(rc));
		}
	}

	if (!rc) {
		file_handle_make_last(ld->file_handle);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * close_dev
 *
 * Close the disk and clear the file handle.
 **/
static void close_dev(storage_object_t * disk)
{
	local_disk_t * ld = disk->private_data;
	int rc;

	LOG_ENTRY();

	if (ld->fd >= 0) {
		rc = EngFncs->close_object(disk, ld->fd);
		file_handle_release(ld->file_handle);
		ld->file_handle = NULL;
		ld->fd = -1;
	}

	LOG_EXIT_VOID();
}

/**
 * LD_cleanup
 *
 * Find any disks and close the device that was opended during discovery.
 **/
static void LD_cleanup(void)
{
	storage_object_t * disk;
	list_anchor_t disk_list;
	list_element_t disk_list_itr;
	int rc;

	LOG_ENTRY();

	/* Get a list of disks that are managed by this plug-in. */
	rc = EngFncs->get_object_list(DISK, 0, my_plugin_record,
				      NULL, 0, &disk_list);
	if (!rc) {
		/* Close any dev handles that might be open. */
		LIST_FOR_EACH(disk_list, disk_list_itr, disk) {
			close_dev(disk);
			EngFncs->engine_free(disk->private_data);
		}
		EngFncs->destroy_list(disk_list);
	}

	destroy_cache();
	file_handle_cleanup();

	if (base_directory) {
		free(base_directory);
		base_directory = NULL;
	}
	if (sysfs_mount_point) {
		free(sysfs_mount_point);
		sysfs_mount_point = NULL;
	}

	LOG_EXIT_VOID();
}

static void filter_out_excludes(char * pattern, int path_len, int new_globs_index)
{
	int rc;
	int i;
	glob_t exclude_glob = {0};

	LOG_ENTRY();

	for (i = 0; i < exclude_count; i++) {

		strcpy(pattern + path_len, excludes[i]);

		rc = glob(pattern, glob_flags, NULL, &exclude_glob);

		if (rc == 0) {
			glob_flags |= GLOB_APPEND;

		} else {
			if (rc != GLOB_NOMATCH) {
				LOG_WARNING("glob() of pattern %s failed with error %s\n", pattern,
					    (rc == GLOB_NOSPACE) ? "GLOB_NOSPACE" :
					    (rc == GLOB_ABEND) ? "GLOB_ABEND" :
					    "(unknown)");
			}
		}
	}

	for (i = 0; i < exclude_glob.gl_pathc; i++) {
		int j;

		for (j = new_globs_index; j < dev_names_glob.gl_pathc; j++) {
			if (strcmp(exclude_glob.gl_pathv[i], dev_names_glob.gl_pathv[j]) == 0) {
				int k;

				LOG_DEBUG("Removing %s.\n", dev_names_glob.gl_pathv[j]);
				free(dev_names_glob.gl_pathv[j]);

				/* Scoot up all following entries. */
				for (k = j+1; k < dev_names_glob.gl_pathc; k++) {
					dev_names_glob.gl_pathv[k-1] = dev_names_glob.gl_pathv[k];
				}
				dev_names_glob.gl_pathc--;

				break;
			}
		}
	}

	if (exclude_glob.gl_pathc >= 0) {
		globfree(&exclude_glob);
	}

	LOG_EXIT_VOID();
}

static void filter_out_non_block_devices(int new_globs_index)
{
	int i;
	struct stat statbuf;
	int status;

	LOG_ENTRY();

	i = new_globs_index;
	while (i < dev_names_glob.gl_pathc) {

		status = stat(dev_names_glob.gl_pathv[i], &statbuf);

		if (status == 0) {
			if (!S_ISBLK(statbuf.st_mode)) {
				int j;

				LOG_DEBUG("Removing %s.\n", dev_names_glob.gl_pathv[i]);
				free(dev_names_glob.gl_pathv[i]);

				/* Scoot up all following entries. */
				for (j = i+1; j < dev_names_glob.gl_pathc; j++) {
					dev_names_glob.gl_pathv[j-1] = dev_names_glob.gl_pathv[j];
				}
				dev_names_glob.gl_pathc--;
				dev_names_glob.gl_pathv[dev_names_glob.gl_pathc] = NULL;

				/* Leave "i" as it is so we check the new
				 * entry at the current index.
				 */
				continue;
			}

		} else {
			LOG_WARNING("stat(%s) failed with error code %d: %s\n", dev_names_glob.gl_pathv[i], errno, strerror(errno));
		}

		i++;
	}

	LOG_EXIT_VOID();
}

static void get_dev_names(const char * dir)
{
	int rc;
	int i;
	int path_len;
	int new_globs_index;

	LOG_ENTRY();
	LOG_DEBUG("Get device names in directory %s\n", dir);

	strcpy(pattern, dir);
	path_len = strlen(pattern);
	if (pattern[path_len-1] != '/') {
		pattern[path_len] = '/';
		pattern[path_len+1] = '\0';
		path_len++;
	}

	new_globs_index = dev_names_glob.gl_pathc;

	for (i = 0; i < include_count; i++) {

		strcpy(pattern + path_len, includes[i]);

		rc = glob(pattern, glob_flags, NULL, &dev_names_glob);

		if (rc == 0) {
			glob_flags |= GLOB_APPEND;

		} else {
			if (rc != GLOB_NOMATCH) {
				LOG_WARNING("glob() of pattern %s failed with error %s\n", pattern,
					    (rc == GLOB_NOSPACE) ? "GLOB_NOSPACE" :
					    (rc == GLOB_ABEND) ? "GLOB_ABEND" :
					    "(unknown)");
			}
		}
	}

	filter_out_excludes(pattern, path_len, new_globs_index);

	if (sysfs_mount_point == NULL) {
		filter_out_non_block_devices(new_globs_index);
	}

	LOG_EXIT_VOID();
}

static char dir_pattern[PATH_MAX];

static void process_dir(char * name)
{
	int i;
	glob_t dirs_glob;

	LOG_ENTRY();

	/* Process entries in this directory. */
	get_dev_names(name);

	/* Get a list of this directory's subdirectories. */
	strcpy(dir_pattern, name);
	strcat(dir_pattern, "*/");

	if (glob(dir_pattern, 0, NULL, &dirs_glob) == 0) {

		/* Process the subdirectories. */
		for (i = 0; i < dirs_glob.gl_pathc; i++) {
			int status;
			struct stat statbuf;

			status = stat(dirs_glob.gl_pathv[i], &statbuf);
			if (status == 0) {
				if (S_ISDIR(statbuf.st_mode)) {
					process_dir(dirs_glob.gl_pathv[i]);
				}
			}
		}

		globfree(&dirs_glob);
	}

	LOG_EXIT_VOID();
}

static char dir_path[PATH_MAX];

static void get_legacy_devs()
{
	int base_len, i;
	char * pch;

	LOG_ENTRY();

	memset(&dev_names_glob, 0, sizeof(dev_names_glob));
	glob_flags = 0;

	/* Make sure the user-specified directory ends with a '/'. */
	base_len = strlen(scan);
	if (scan[base_len-1] != '/') {
		pch = malloc(base_len + 2);
		if (pch) {
			strcpy(pch, scan);
			strcpy(pch + base_len, "/");
			scan = pch;
			base_len += 2;
		}
	}

	base_directory = strdup(scan);
	base_directory_len = strlen(base_directory);

	/* Always find devices in the base directory. */
	get_dev_names(base_directory);

	/* Recursively search any subdirectories the user specified. */
	strcpy(dir_path, base_directory);
	for (i = 0; i < directories_count; i++) {
		int len;

		strcpy(dir_path + base_directory_len, directories[i]);

		len = strlen(dir_path);
		if (dir_path[len-1] != '/') {
			strcpy(dir_path + len, "/");
		}

		process_dir(dir_path);
	}

	LOG_EXIT_VOID();
}

static void get_sysfs_devs()
{
	LOG_ENTRY();

	memset(&dev_names_glob, 0, sizeof(dev_names_glob));
	glob_flags = 0;

	strcpy(dir_path, sysfs_mount_point);
	strcat(dir_path, "/block/");

	base_directory = strdup(dir_path);
	base_directory_len = strlen(base_directory);

	LOG_DEBUG("Scanning %s\n", dir_path);
	get_dev_names(dir_path);

	LOG_EXIT_VOID();
}

/**
 * get_sysfs_size
 * @full_name:	Full path-name to the disk device-node.
 * @p_size:	Return pointer to the disk's size (in sectors).
 *
 * Use sysfs to get the size (in sectors) of the specified disk.
 **/
static int get_sysfs_size(char * full_name, u_int64_t * p_size)
{
	int rc = 0;
	int fd;
	char * size_file = malloc(strlen(full_name) + 6);
	char size_str[24];

	LOG_ENTRY();

	if (size_file != NULL) {
		strcpy(size_file, full_name);
		strcat(size_file, "/size");
		fd = open(size_file, O_RDONLY);
		if (fd > 0) {
			int bytes_read;

			bytes_read = read(fd, size_str, 24);

			if (bytes_read > 0) {
				/* Size is already in sectors. */
				*p_size = strtoull(size_str, NULL, 10);

			} else {
				if (bytes_read == 0) {
					LOG_ERROR("No bytes read from %s.\n", size_file);
				}

				rc = errno;
				LOG_ERROR("read() returned error %d: %s\n", rc, strerror(rc));
			}

			close(fd);

		} else {
			rc = errno;
			LOG_ERROR("open(%s) returned error %d: %s\n", size_file, rc, strerror(rc));
		}

		free(size_file);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_legacy_size
 * @full_name:	Full path-name to the disk device-node.
 * @p_size:	Return pointer to the disk's size (in sectors).
 *
 * Use the BLKGETSIZE64 ioctl to get the size (in sectors) of the
 * specified disk.
 **/
static int get_legacy_size(char * full_name, u_int64_t * p_size)
{
	int rc = 0;
	int fd;

	LOG_ENTRY();

	fd = open(full_name, O_RDONLY);

	if (fd > 0) {

		/* Ioctl to get size. (returns bytes) */
		rc = ioctl(fd, BLKGETSIZE64, p_size);
		if (rc == 0) {
			*p_size >>= EVMS_VSECTOR_SIZE_SHIFT;
			*p_size &= ~1;
		} else {
			rc = errno;
			LOG_DETAILS("ioctl to get the size returned error code "
				    "%d: %s.\n", rc, strerror(rc));
		}

		close(fd);

	} else {
		rc = errno;
		LOG_DETAILS("open(%s) returned error %d: %s\n",
			    full_name, rc, strerror(rc));
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_disk_size
 * @full_name:	Full path-name to the device node.
 * @disk:	Pointer to the disk object.
 *
 * Get the size of the disk, and check that it is non-zero.
 **/
static int get_disk_size(char * full_name, storage_object_t * disk)
{
	int rc;
	LOG_ENTRY();

	if (sysfs_mount_point) {
		rc = get_sysfs_size(full_name, &disk->size);
	} else {
		rc = get_legacy_size(full_name, &disk->size);
	}

	if (disk->size == 0) {
		LOG_DEBUG("Disk %s has zero-size. Not a valid disk.\n",
			  disk->name);
		rc = EINVAL;
	} else if (disk->dev_major == NBD_MAJOR &&
		   ! is_drbd_active()) {

		/* YUCK!!! Uninitialized NBD devices report a size anyway.
		 * DRBD (which shares the same major) behaves correctly.
		 */
		if (EngFncs->is_2_4_kernel()) {
			if (disk->size == NBD_DEF_SIZE_2_4) {
				LOG_DEBUG("Disk %s appears to be an uninitialized NBD "
					  "device.\n", disk->name);
				rc = EINVAL;
			}

		} else {
			if (disk->size == NBD_DEF_SIZE_2_6) {
				LOG_DEBUG("Disk %s appears to be an uninitialized NBD "
					  "device.\n", disk->name);
				rc = EINVAL;
			}
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_sysfs_major_minor
 * @full_name:	Full path-name to the disk device-node.
 * @p_major:	Return pointer to the disk's major-number.
 * @p_minor:	Return pointer to the disk's minor-number.
 *
 * Use sysfs to get the device-number for the specified disk.
 **/
static int get_sysfs_major_minor(char * full_name,
				 u_int32_t * p_major,
				 u_int32_t * p_minor)
{
	int rc = 0;
	int fd;
	char * dev_file = malloc(strlen(full_name) + 5);
	char dev_str[16];
	dev_t dev;

	LOG_ENTRY();

	if (dev_file != NULL) {
		strcpy(dev_file, full_name);
		strcat(dev_file, "/dev");
		fd = open(dev_file, O_RDONLY);
		if (fd > 0) {
			int bytes_read;

			bytes_read = read(fd, dev_str, 16);

			if (bytes_read > 0) {
				rc = sscanf(dev_str, "%u:%u", p_major, p_minor);
				if (rc != 2) {
					dev = strtoul(dev_str, NULL, 16);
					*p_major = major(dev);
					*p_minor = minor(dev);
				}
				rc = 0;
			} else {
				if (bytes_read == 0) {
					LOG_ERROR("No bytes read from %s.\n", dev_file);
				}

				rc = errno;
				LOG_ERROR("read() returned error %d: %s\n", rc, strerror(rc));
			}

			close(fd);

		} else {
			rc = errno;
			LOG_ERROR("open(%s) returned error %d: %s\n", dev_file, rc, strerror(rc));
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_legacy_major_minor
 * @full_name:	Full path-name to the disk device-node.
 * @p_major:	Return pointer to the disk's major-number.
 * @p_minor:	Return pointer to the disk's minor-number.
 *
 * Use stat to get the device-number for the specified disk.
 **/
static int get_legacy_major_minor(char * full_name,
				  u_int32_t * p_major,
				  u_int32_t * p_minor)
{
	int rc = 0;
	struct stat statbuf;

	LOG_ENTRY();

	rc = stat(full_name, &statbuf);
	if (rc == 0) {
		*p_major = major(statbuf.st_rdev);
		*p_minor = minor(statbuf.st_rdev);

	} else {
		rc = errno;
		LOG_ERROR("stat(%s) returned error code %d: %s\n",
			  full_name, rc, strerror(rc));
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * check_for_duplicate_dev
 *
 * Search the current output list for the device-number of the specified disk.
 * Each device-number should only be discovered once.
 **/
static int check_for_duplicate_dev(storage_object_t * new_disk,
				   list_anchor_t output_list)
{
	storage_object_t * disk;
	list_element_t itr;

	LOG_ENTRY();

	LIST_FOR_EACH(output_list, itr, disk) {
		if (disk->dev_major == new_disk->dev_major &&
		    disk->dev_minor == new_disk->dev_minor) {
			LOG_WARNING("Current disk %s has device-number %x:%x, which"
				    "is a duplicate of disk %s. Ignoring %s.\n",
				    new_disk->name, new_disk->dev_major,
				    new_disk->dev_minor, disk->name, new_disk->name);
			LOG_EXIT_INT(EINVAL);
			return EINVAL;
		}
	}

	LOG_EXIT_INT(0);
	return 0;
}

/**
 * get_disk_devnum
 * @full_name:	Full path-name to the disk device-node.
 * @disk:	Pointer to the disk object.
 * @output_list:Current list of discovered disks.
 *
 * Get the device-number for the specified disk. Check that the device-number
 * is allowed, and that it isn't a duplicate of an already-discovered disk.
 **/
static int get_disk_devnum(char * full_name,
			   storage_object_t * disk,
			   list_anchor_t output_list)
{
	int rc;
	LOG_ENTRY();

	if (sysfs_mount_point) {
		rc = get_sysfs_major_minor(full_name, &disk->dev_major,
					   &disk->dev_minor);
	} else {
		rc = get_legacy_major_minor(full_name, &disk->dev_major,
					    &disk->dev_minor);
	}
	if (rc) {
		goto out;
	}

	/* Exclude floppy, md, and lvm1 devices. */
	if (disk->dev_major == FLOPPY_MAJOR ||
	    disk->dev_major == MD_MAJOR ||
	    disk->dev_major == LVM_MAJOR) {
		LOG_DEBUG("Disk %s has a disallowed major number: %d.\n",
			  disk->name, disk->dev_major);
		rc = EINVAL;
		goto out;
	}

	/* Only discover a given device-number once. */
	rc = check_for_duplicate_dev(disk, output_list);

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_dm_device_list
 *
 * Get the list of current DM devices (if we haven't gotten it previously).
 **/
static dm_device_list_t * get_dm_device_list(void)
{
	int rc;

	LOG_ENTRY();

	if (!dm_devices) {
		rc = EngFncs->dm_get_devices(&dm_devices);
		if (rc) {
			LOG_ERROR("Error calling dm_get_devices.\n");
		}
	}

	LOG_EXIT_PTR(dm_devices);
	return dm_devices;
}

/**
 * find_disk_in_dm_devices
 *
 * Search the DM devices list for an entry with the same major:minor
 * as this disk.
 **/
static dm_device_list_t * find_disk_in_dm_devices(storage_object_t * disk,
						  dm_device_list_t * dm_list)
{
	dm_device_list_t * dm_entry;

	LOG_ENTRY();

	for (dm_entry = dm_list; dm_entry; dm_entry = dm_entry->next) {
		if (dm_entry->dev_major == disk->dev_major &&
		    dm_entry->dev_minor == disk->dev_minor) {
			goto out;
		}
	}

out:
	LOG_EXIT_PTR(dm_entry);
	return dm_entry;
}

/**
 * check_multipath_name
 *
 * Other EVMS plugins can create multipath devices. We *don't* want to
 * recognize those devices as disks. So check the name that we got from
 * DM to see if it uses the naming format of the EVMS multipath plugins.
 **/
static int check_multipath_name(storage_object_t *disk)
{
	int rc;

	LOG_ENTRY();

	/* Multipath-segment-manager devices start with "mp/". */
	rc = strncmp(disk->name, "mp/", 3);
	if (rc) {
		/* MD-multipath devices start with "md/". */
		rc = strncmp(disk->name, "md/", 3);
	}

	rc = rc ? 0 : EINVAL;

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * update_multipath_child_list
 *
 * Search the multipath target info for all the child devices. Add these
 * devices to the global list so we can filter these out at the end of
 * discovery. That way we won't discover both the multipath devices and
 * their component disks, which would lead to duplicate discoveries in the
 * higher levels.
 *
 * Use a temporary list to build up the list of children for this multipath.
 * Then append this list to the global one once we've collected all the
 * children. This will prevent hitting an error part way through processing
 * the list of child devices, which could leave the global list in an
 * inconsistent state.
 **/
static int update_multipath_child_list(dm_target_t * targets)
{
	dm_target_multipath_t * mp = targets->data.multipath;
	dm_priority_group_t * pg;
	dm_path_t * path;
	dm_device_t * device;
	list_anchor_t children = NULL;
	list_element_t itr1, itr2;
	int i, j, rc = 0;

	LOG_ENTRY();

	/* Allocate the global child list if it doesn't exist yet. */
	if (!multipath_children) {
		multipath_children = EngFncs->allocate_list();
		if (!multipath_children) {
			LOG_ERROR("Error allocating multipath_children list.\n");
			rc = ENOMEM;
			goto out;
		}
	}

	/* Allocate a temporary list. */
	children = EngFncs->allocate_list();
	if (!children) {
		LOG_ERROR("Error allocating temporary child list.\n");
		rc = ENOMEM;
		goto out;
	}

	/* For each priority group in the multipath. */
	for (i = 0; i < mp->num_groups; i++) {
		pg = mp->group + i;
		/* For each path in the priority group. */
		for (j = 0; j < pg->num_paths; j++) {
			path = pg->path + j;
			device = EngFncs->engine_alloc(sizeof(*device));
			if (!device) {
				LOG_ERROR("Error allocating device structure "
					  "for path %d:%d.\n",
					  path->device.major, path->device.minor);
				rc = ENOMEM;
				goto out;
			}
			device->major = path->device.major;
			device->minor = path->device.minor;

			/* Add this path's device to the temporary list. */
			itr1 = EngFncs->insert_thing(children, device,
						     INSERT_AFTER, NULL);
			if (!itr1) {
				LOG_ERROR("Error adding device %d:%d to the "
					  "temporary child list.\n",
					  device->major, device->minor);
				rc = ENOMEM;
				goto out;
			}
		}
	}

out:
	if (!rc) {
		/* Success. Append the temporary list to the global list. */
		rc = EngFncs->merge_lists(multipath_children, children, NULL, NULL);
		if (rc) {
			LOG_ERROR("Error merging temporary list with "
				  "multipath_children list.\n");
		}
	}
	if (rc) {
		/* Some error occurred. Delete the temporary
		 * list and all of it's devices.
		 */
		if (children) {
			LIST_FOR_EACH_SAFE(children, itr1, itr2, device) {
				EngFncs->delete_element(itr1);
				EngFncs->engine_free(device);
			}
		}
	}
	if (children) {
		EngFncs->destroy_list(children);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * check_multipath
 *
 * Check if this disk is a DM multipath device.
 **/
static int check_multipath(storage_object_t * disk)
{
	dm_device_list_t * dm_list, * dm_entry;
	dm_target_t * targets = NULL;
	local_disk_t * ld = disk->private_data;
	int rc = 0;

	LOG_ENTRY();

	/* Get the list of active DM devices. */
	dm_list = get_dm_device_list();
	if (!dm_list) {
		LOG_WARNING("Cannot get list of DM devices.\n");
		goto out;
	}

	/* Search the DM list for an entry that matches this disk. */
	dm_entry = find_disk_in_dm_devices(disk, dm_list);
	if (!dm_entry) {
		LOG_DEBUG("Disk %s is not a DM device.\n", disk->name);
		goto out;
	}

	/* Copy the DM name to this disk. */
	LOG_DEBUG("Changing disk name from %s to %s.\n",
		  disk->name, dm_entry->name);
	strncpy(disk->name, dm_entry->name, EVMS_NAME_SIZE);

	/* Get the DM mapping for this disk. */
	rc = EngFncs->dm_get_targets(disk, &targets);
	if (rc) {
		LOG_ERROR("Error getting DM mapping for disk %s.\n", disk->name);
		goto out;
	}

	/* Reject all non-multipath devices. */
	if (targets->type != DM_TARGET_MULTIPATH) {
		LOG_DEBUG("Disk %s is not a multipath device.\n", disk->name);
		rc = EINVAL;
		goto out;
	}

	/* Reject all multipath devices that
	 * were created by other EVMS plugins.
	 */
	rc = check_multipath_name(disk);
	if (rc) {
		LOG_DEBUG("Multipath disk %s belongs to another EVMS plugin.\n",
			  disk->name);
		goto out;
	}

	rc = update_multipath_child_list(targets);
	if (rc) {
		LOG_DEBUG("Error building list of children of "
			  "multipath disk %s.\n", disk->name);
		goto out;
	}

	ld->flags |= LD_FLAG_MULTIPATH;

out:
	EngFncs->dm_deallocate_targets(targets);
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * remove_multipath_children
 *
 * Compare the multipath_children list with the discovery output list. Any
 * disks on the multipath_children list must be removed from the output list.
 **/
static void remove_multipath_children(list_anchor_t multipath_children,
				      list_anchor_t output_list)
{
	list_element_t itr1, itr2, itr3;
	storage_object_t * disk;
	dm_device_t * child;

	LOG_ENTRY();

	LIST_FOR_EACH(multipath_children, itr3, child) {
		LIST_FOR_EACH_SAFE(output_list, itr1, itr2, disk) {
			if (child->major == disk->dev_major &&
			    child->minor == disk->dev_minor) {
				EngFncs->delete_element(itr1);
				close_dev(disk);
				EngFncs->engine_free(disk->private_data);
				disk->flags &= ~SOFLAG_ACTIVE;
				EngFncs->free_logical_disk(disk);
			}
		}
	}

	LOG_EXIT_VOID();
}

/**
 * valid_part_table_flag
 *
 * Does this sector contain a partition table with a DOS MBR signature?
 **/
inline boolean valid_part_table_flag(unsigned char *b)
{
	return (b[510] == 0x55 && b[511] == 0xaa);
}

static unsigned char MBR_buff[EVMS_VSECTOR_SIZE];

#define pt_offset(b, n) ((struct partition *)((b) + 0x1be + \
					      (n) * sizeof(struct partition)))

/**
 * get_partition_table_geometry
 *
 * See if the disk has a DOS partition table in the first sector. If so, see
 * if a valid geometry is recorded there that we can use for the disk.
 **/
static int get_partition_table_geometry(storage_object_t * disk)
{
	int rc;
	struct partition * p;
	int i;
	int heads = 0;
	int sectors = 0;
	boolean first = TRUE;
	boolean bad = FALSE;

	/* Use LD_read() because the disk was opened with O_DIRECT. LD_read()
	 * knows how to handle setting up buffer alignment for O_DIRECT.
	 */
	rc = LD_read(disk, 0, 1, MBR_buff);
	if (rc) {
		LOG_DETAILS("Unable to read MBR sector from disk %s. "
			    "Error code is %d:%s\n",
			    disk->name, rc, EngFncs->strerror(rc));
		LOG_EXIT_INT(rc);
		return rc;
	}

	if (!(valid_part_table_flag(MBR_buff))) {
		LOG_DETAILS("Disk %s does not have a valid partition table "
			    "flag.\n", disk->name);
		LOG_EXIT_INT(ENODATA);
		return ENODATA;
	}

	for (i = 0; (i < 4) && !bad; i++) {
		p = pt_offset(MBR_buff, i);
		if (p->sys_ind != 0) {
			int h, s;

			h = p->end_head + 1;
			s = (p->end_sector & 077);
			if (first) {
				heads = h;
				sectors = s;
				first = FALSE;
			} else if (heads != h || sectors != s)
				bad = TRUE;
		}
	}

	if (first || bad) {
		LOG_DETAILS("Could not determine geometry from the partition "
			    "records on disk %s.\n", disk->name);
		LOG_EXIT_INT(ENODATA);
		return ENODATA;
	}

	if (!first && !bad) {
		disk->geometry.heads = heads;
		disk->geometry.sectors_per_track = sectors;
	}

	LOG_EXIT_INT(0);
	return 0;
}

/**
 * get_kernel_geometry
 *
 * Use the HDIO_GETGEO_BIG or HDIO_GETGEO ioctl to get the disk's geometry.
 * Check that the geometry is valid for a disk.
 **/
static int get_kernel_geometry(storage_object_t * disk)
{
	int rc;
	struct hd_big_geometry big_geometry;
	struct hd_geometry geometry;
	local_disk_t * ld = disk->private_data;

	LOG_ENTRY();

	rc = ioctl(ld->fd, HDIO_GETGEO, &geometry);
	if (rc == -1) {
		rc = ioctl(ld->fd, HDIO_GETGEO_BIG, &big_geometry);
		if (rc == -1) {
			rc = errno;
			LOG_DEBUG("Error getting geometry for disk %s: %d: "
				  "%s.\n", disk->name, rc, EngFncs->strerror(rc));
		} else if (big_geometry.start != 0) {
			/* A disk's geometry must start at offset 0. */
			LOG_DEBUG("Geometry for disk %s reports a non-zero "
				  "starting offset. Not a valid disk.\n",
				  disk->name);
			rc = EINVAL;
		} else {
			/* Ala fdisk: Never use geometry.cylinders -
			 * it's truncated.
			 */
			// disk->geometry.cylinders = big_geometry.cylinders;
			disk->geometry.heads = big_geometry.heads;
			disk->geometry.sectors_per_track = big_geometry.sectors;
		}
	} else {
		if (geometry.start != 0) {
			/* A disk's geometry must start at offset 0. */
			LOG_DEBUG("Geometry for disk %s reports a non-zero "
				  "starting offset. Not a valid disk.\n",
				  disk->name);
			rc = EINVAL;
		} else {
			/* Ala fdisk: Never use geometry.cylinders -
			 * it's truncated.
			 */
			// disk->geometry.cylinders = geometry.cylinders;
			disk->geometry.heads = geometry.heads;
			disk->geometry.sectors_per_track = geometry.sectors;
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_geometry
 *
 * First try to get the geometry from the partition table (if it exists).
 * Otherwise ask the kernel what the geometry is.
 **/
static int get_geometry(storage_object_t * disk)
{
	int rc;

	LOG_ENTRY();

	/* Set defaults */
	disk->geometry.heads = 255;
	disk->geometry.sectors_per_track = 63;

	rc = get_partition_table_geometry(disk);

	if (rc != 0) {
		get_kernel_geometry(disk);
	}

	disk->geometry.cylinders = disk->size /
				   (disk->geometry.heads *
				    disk->geometry.sectors_per_track *
				    (disk->geometry.bytes_per_sector /
				     EVMS_VSECTOR_SIZE));

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_block_size
 *
 * Use the BLKBSZGET ioctl to get the block-size for the specified disk.
 **/
static int get_block_size(storage_object_t * disk)
{
	local_disk_t * ld = disk->private_data;
	int rc, block_size;

	LOG_ENTRY();

	rc = ioctl(ld->fd, BLKBSZGET, &block_size);
	if (rc) {
		rc = errno;
		LOG_ERROR("Error getting block size for disk %s: %d: %s.\n",
			  disk->name, rc, strerror(rc));
	} else {
		LOG_DEBUG("Disk %s has block-size %d.\n",
			  disk->name, block_size);
		disk->geometry.block_size = block_size;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * set_block_size
 *
 * Use the BLKBSZSET ioctl to set the disk's block-size.
 **/
static int set_block_size(storage_object_t * disk, int block_size)
{
	local_disk_t * ld = disk->private_data;
	int rc;

	LOG_ENTRY();

	rc = ioctl(ld->fd, BLKBSZSET, &block_size);
	if (rc) {
		rc = errno;
		LOG_ERROR("Error setting block size (%d) for disk %s: %d: "
			  "%s.\n", block_size, disk->name, rc, strerror(rc));
	} else {
		LOG_DEBUG("Setting disk %s block-size to %d.\n",
			  disk->name, block_size);
		disk->geometry.block_size = block_size;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_hardsector_size
 *
 * Use the BLKSSZGET ioctl to get the disk's hard-sector-size. If the ioctl
 * returns an error (some drivers don't support it yet), fall back on the
 * default sector-size (which is the same default used in the kernel - see
 * include/linux/blkdev.h::get_hardsect_size).
 **/
static void get_hardsector_size(storage_object_t * disk)
{
	u_int32_t hardsector_size;
	local_disk_t * ld = disk->private_data;
	int rc;

	LOG_ENTRY();

	rc = ioctl(ld->fd, BLKSSZGET, &hardsector_size);
	if (rc) {
		rc = errno;
		LOG_DEBUG("Error getting hardsector size for disk %s: %d: "
			  "%s.\n", disk->name, rc, strerror(rc));
		hardsector_size = EVMS_VSECTOR_SIZE;
	}
	disk->geometry.bytes_per_sector = hardsector_size;

	LOG_EXIT_VOID();
}

/**
 * find_disk_type
 *
 * Determine if this disk is IDE, SCSI, or something else. This is done by
 * examining the name of the disk. Names starting with "hd" or that contain
 * "ide" will be marked as IDE. Names starting with "sd" or that contain
 * "scsi" will be marked as SCSI.
 **/
static void find_disk_type(storage_object_t * disk)
{
	local_disk_t * ld = disk->private_data;

	LOG_ENTRY();

	if (!strncmp(disk->name, "hd", 2) ||
	    strstr(disk->name, "ide")) {
		ld->flags |= LD_FLAG_IDE;
	} else if (!strncmp(disk->name, "sd", 2) ||
		   strstr(disk->name, "scsi")) {
		ld->flags |= LD_FLAG_SCSI;
	}

	LOG_DEBUG("Type of disk %s is %s\n", disk->name,
		  (ld->flags & LD_FLAG_IDE) ? "IDE" :
		  (ld->flags & LD_FLAG_SCSI) ? "SCSI" : "Unknown");

	LOG_EXIT_VOID();
}

/**
 * create_logical_disk
 *
 * Allocate a new disk and initialize all fields.
 **/
static storage_object_t * create_logical_disk(storage_object_t * working_disk)
{
	storage_object_t * disk = NULL;
	local_disk_t * working_ld = working_disk->private_data;
	local_disk_t * ld;
	char *name;
	int rc;

	LOG_ENTRY();

	/* Replace exclaimation marks with slashes in the disk name. */
	for (name = working_disk->name; *name; name++) {
		if (*name == '!') *name = '/';
	}

	rc = EngFncs->allocate_logical_disk(working_disk->name, &disk);
	if (rc) {
		LOG_SERIOUS("Error allocating new disk object for disk %s: %d: "
			    "%s.\n", working_disk->name, rc, EngFncs->strerror(rc));
		goto out;
	}

	disk->private_data = EngFncs->engine_alloc(sizeof(local_disk_t));
	if (!disk->private_data) {
		LOG_SERIOUS("Error allocating private data for disk %s.\n",
			    disk->name);
		EngFncs->free_logical_disk(disk);
		disk = NULL;
		goto out;
	}
	ld = disk->private_data;

	/* Initialize the logical disk structure */
	disk->data_type			= DATA_TYPE;
	disk->dev_major			= working_disk->dev_major;
	disk->dev_minor			= working_disk->dev_minor;
	disk->plugin			= my_plugin_record;
	disk->flags			= SOFLAG_ACTIVE;
	disk->size			= working_disk->size;
	disk->geometry.cylinders	= working_disk->geometry.cylinders;
	disk->geometry.heads		= working_disk->geometry.heads;
	disk->geometry.sectors_per_track= working_disk->geometry.sectors_per_track;
	disk->geometry.bytes_per_sector	= working_disk->geometry.bytes_per_sector;
	disk->geometry.block_size	= working_disk->geometry.block_size;

	/* Fill in the boot cylinder limit (LBA of 1st sector above boot
	 * cylinder) for this drive. If the drive is too small then the limit
	 * is the size of the drive. Otherwise the limit is calculated.
	 */
	disk->geometry.boot_cylinder_limit = (disk->geometry.cylinders < 1024) ?
		disk->size :
		(disk->geometry.heads * disk->geometry.sectors_per_track * 1023);

	*ld = *working_ld;
	ld->file_handle->disk = disk;

	find_disk_type(disk);

	LOG_DETAILS("New Logical Disk:\n");
	LOG_DETAILS("  name:            %s\n", disk->name);
	LOG_DETAILS("  size:            %"PRIu64"\n", disk->size);
	LOG_DETAILS("  device-number:   %x:%x\n", disk->dev_major, disk->dev_minor);
	LOG_DETAILS("  file-descriptor: %d\n", ld->fd);
	LOG_DETAILS("  geometry:\n");
	LOG_DETAILS("    cylinders:     %"PRIu64"\n", disk->geometry.cylinders);
	LOG_DETAILS("    heads:         %d\n", disk->geometry.heads);
	LOG_DETAILS("    sectors:       %d\n", disk->geometry.sectors_per_track);
	LOG_DETAILS("    sector size:   %d (bytes)\n", disk->geometry.bytes_per_sector);
	LOG_DETAILS("    block size:    %"PRIu64" (bytes)\n", disk->geometry.block_size);

out:
	LOG_EXIT_PTR(disk);
	return disk;
}

/**
 * LD_discover
 **/
static int LD_discover(list_anchor_t input_list,
		       list_anchor_t output_list,
		       boolean final_call)
{
	storage_object_t working_disk;
	storage_object_t * disk;
	local_disk_t working_ld;
	list_element_t itr;
	char * full_node_path;
	uint count;
	int rc, i;

	LOG_ENTRY();

	/* Get the list of devices to examine. */
	if (sysfs_mount_point) {
		get_sysfs_devs();
	} else {
		get_legacy_devs();
	}

	for (i = 0; i < dev_names_glob.gl_pathc; i++) {
		full_node_path = dev_names_glob.gl_pathv[i];
		LOG_DEBUG("Examining disk %s\n", full_node_path);

		/* Initialize the working disk object. */
		memset(&working_disk, 0, sizeof(working_disk));
		memset(&working_ld, 0, sizeof(working_ld));
		working_disk.private_data = &working_ld;
		working_ld.fd = -1;
		strncpy(working_disk.name, full_node_path + base_directory_len,
			EVMS_NAME_SIZE);

		/* Get the device-number of the disk. */
		rc = get_disk_devnum(full_node_path, &working_disk, output_list);
		if (rc) {
			continue;
		}

		/* Get the size of the disk. */
		rc = get_disk_size(full_node_path, &working_disk);
		if (rc) {
			continue;
		}

		/* Open the disk. */
		rc = open_dev(&working_disk);
		if (rc) {
			continue;
		}

		/* Check for DM-multipath devices. */
		rc = check_multipath(&working_disk);
		if (rc) {
			close_dev(&working_disk);
			continue;
		}

		/* Get the disk's block-size. */
		rc = get_block_size(&working_disk);
		if (rc) {
			close_dev(&working_disk);
			continue;
		}

		/* Get the disk's hard-sector-size. */
		get_hardsector_size(&working_disk);

		/* Get the disk's geometry. */
		get_geometry(&working_disk);

		/* Passed all checks. Create a new disk. */
		disk = create_logical_disk(&working_disk);
		if (!disk) {
			close_dev(&working_disk);
			continue;
		}

		/* Insert the new disk into ouput list. */
		itr = EngFncs->insert_thing(output_list, disk,
					    INSERT_AFTER, NULL);
		if (!itr) {
			LOG_SERIOUS("Error adding new disk %s to output list. "
				    "Deleting the disk.\n", disk->name);
			EngFncs->engine_free(disk->private_data);
			EngFncs->free_logical_disk(disk);
			close_dev(&working_disk);
			continue;
		}
	}

	remove_multipath_children(multipath_children, output_list);

	EngFncs->dm_deallocate_device_list(dm_devices);
	EngFncs->destroy_list(multipath_children);

	count = EngFncs->list_count(output_list);
	LOG_DEBUG("Discovered %d disks.\n", count);
	LOG_EXIT_INT(0);
	return 0;
}

/**
 * get_alignment_size
 *
 * Return the size (in bytes) of the alignment restrictions for O_DIRECT. On
 * 2.5 kernels, this will be the disk's hard-sector-size. On 2.4 kernels, this
 * will be the disk's block-size. Since block-size can change at run-time,
 * always check the current block-size. Also, since we want access to as much
 * of the disk as possible, try to set the block-size to 1k if it isn't
 * already.
 **/
static int get_alignment_size(storage_object_t * disk)
{
	int size;
	int min_block_size = max(disk->geometry.bytes_per_sector, 1024);

	LOG_ENTRY();

	if (EngFncs->is_2_4_kernel()) {
		get_block_size(disk);
		size = disk->geometry.block_size;
		if (size > min_block_size) {
			set_block_size(disk, min_block_size);
			size = disk->geometry.block_size;
		}
	} else {
		size = disk->geometry.bytes_per_sector;
	}

	LOG_EXIT_INT(size);
	return size;
}

/**
 * check_alignment
 * @align_size:
 * @offset:
 * @count:
 * @buffer:
 *
 * Check whether the specified offset, count, and buffer are valid for the
 * specified alignment restriction.
 **/
static int check_alignment(int align_size,
			   lsn_t offset,
			   sector_count_t count,
			   void * buffer)
{
	int align_sectors = align_size >> EVMS_VSECTOR_SIZE_SHIFT;
	int align_mask = align_size - 1;
	int rc = 0;

	LOG_ENTRY();
	LOG_EXTRA("Checking alignment.\n");
	LOG_EXTRA("\tAlignment Size: %d bytes\n", align_size);
	LOG_EXTRA("\tBuffer:         0x%p\n", buffer);
	LOG_EXTRA("\tSector Offset:  %"PRIu64"\n", offset);
	LOG_EXTRA("\tSector Count:   %"PRIu64"\n", count);

	if ((unsigned long)buffer & align_mask) {
		rc = EINVAL;
	} else if (offset % align_sectors) {
		rc = EINVAL;
	} else if (count % align_sectors) {
		rc = EINVAL;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_aligned_buffer
 * @offset:	Starting offset (in sectors) of engine I/O request.
 * @count:	Size (in sectors) of engine I/O request.
 * @align_size:	Size (in bytes) that the I/O must be aligned on.
 * @local_offset:	Aligned starting offset (in sectors).
 * @local_count:	Aligned I/O size (in sectors).
 * @buffer:	Aligned data buffer.
 *
 * To use O_DIRECT, the buffer passed to read() or write() must be aligned on
 * the device's block/sector size. The size and starting offset of the I/O must
 * also be a multiple of the block/sector size.
 **/
static int get_aligned_buffer(lsn_t offset,
			      sector_count_t count,
			      int align_size,
			      lsn_t * local_offset,
			      sector_count_t * local_count,
			      void ** buffer)
{
	u_int32_t offset_diff;
	int rc = 0;

	LOG_ENTRY();

	/* Round down starting offset to the alignment size. */
	*local_offset = round_down(offset, align_size);

	/* Difference between real offset and local offset. */
	offset_diff = offset - *local_offset;

	/* Round up total count of sectors to alignment size. */
	*local_count = round_up(count + offset_diff, align_size);

	/* Allocate the buffer that will actually perform the I/O. The
	 * memalign call guarantees that the allocated buffer is
	 * aligned on the desired alignment-size.
	 */
	*buffer = memalign(align_size, *local_count << EVMS_VSECTOR_SIZE_SHIFT);
	if (!*buffer) {
		rc = ENOMEM;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * LD_read
 **/
static int LD_read(storage_object_t * disk,
		   lsn_t offset,
		   sector_count_t count,
		   void * buffer)
{
	void * local_buffer = NULL;
	lsn_t local_offset;
	sector_count_t local_count;
	local_disk_t * ld = disk->private_data;
	int rc, align_size, aligned = FALSE;

	LOG_ENTRY();
	LOG_DEBUG("Read disk:%s offset:%"PRIu64" count:%"PRIu64"\n",
		  disk->name, offset, count);

        if (offset + count > disk->size) {
		LOG_ERROR("Read request past end of disk.\n");
		rc = EINVAL;
		goto out;
	}

	rc = read_from_cache(disk, offset, count, buffer);
	if (!rc) {
		/* Found in the cache. */
		goto out;
	}

	/* Make sure the disk is open. */
	rc = open_dev(disk);
	if (rc) {
		goto out;
	}

	/* Get the alignment restriction for O_DIRECT. */
	align_size = get_alignment_size(disk);

	/* Check if the supplied buffer, offset, and count
	 * are valid for the alignment restrictions.
	 */
	rc = check_alignment(align_size, offset, count, buffer);
	if (rc) {
		/* Get a data buffer aligned with this restriction. */
		rc = get_aligned_buffer(offset, count, align_size, &local_offset,
					&local_count, &local_buffer);
		if (rc) {
			goto out;
		}
	} else {
		aligned = TRUE;
		local_offset = offset;
		local_count = count;
		local_buffer = buffer;
	}

	/* Send the read to the engine. */
	rc = EngFncs->read_object(disk, ld->fd, local_buffer,
				  local_count << EVMS_VSECTOR_SIZE_SHIFT,
				  local_offset << EVMS_VSECTOR_SIZE_SHIFT);
	if (rc < 0) {
		rc = -rc;
		goto out;
	}

	/* Copy the data back to the caller's buffer. */
	if (!aligned) {
		memcpy(buffer, local_buffer +
			       ((offset - local_offset) << EVMS_VSECTOR_SIZE_SHIFT),
		       count << EVMS_VSECTOR_SIZE_SHIFT);
	}

	/* Record this I/O in the cache. */
	write_to_cache(disk, offset, count, buffer);
	rc = 0;

out:
	if (!aligned) {
		free(local_buffer);
	}
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * LD_write
 **/
static int LD_write(storage_object_t * disk,
		    lsn_t offset,
		    sector_count_t count,
		    void * buffer)
{
	void * local_buffer = NULL;
	lsn_t local_offset;
	sector_count_t local_count;
	local_disk_t * ld = disk->private_data;
	int rc, align_size, aligned = FALSE;

	LOG_ENTRY();
	LOG_DEBUG("Write disk:%s offset:%"PRIu64" count:%"PRIu64"\n",
		  disk->name, offset, count);

	if (offset + count > disk->size) {
		LOG_ERROR("Write request past end of disk.\n");
		rc = EINVAL;
		goto out;
	}

	/* Make sure the disk is open. */
	rc = open_dev(disk);
	if (rc) {
		goto out;
	}

	/* Get the alignment restriction for O_DIRECT. */
	align_size = get_alignment_size(disk);

	/* Check if the supplied buffer, offset, and count
	 * are valid for the alignment restrictions.
	 */
	rc = check_alignment(align_size, offset, count, buffer);
	if (rc) {
		/* Get a data buffer aligned with this restriction. */
		rc = get_aligned_buffer(offset, count, align_size, &local_offset,
					&local_count, &local_buffer);
		if (rc) {
			goto out;
		}
	} else {
		aligned = TRUE;
		local_offset = offset;
		local_count = count;
		local_buffer = buffer;
	}

	if (local_count != count) {
		rc = EngFncs->read_object(disk, ld->fd, local_buffer,
					  local_count << EVMS_VSECTOR_SIZE_SHIFT,
					  local_offset << EVMS_VSECTOR_SIZE_SHIFT);
		if (rc < 0) {
			rc = -rc;
			goto out;
		}
	}

	/* Put user data at the right place in the buffer */
	if (!aligned) {
		memcpy(local_buffer +
		       ((offset - local_offset) << EVMS_VSECTOR_SIZE_SHIFT),
		       buffer, count << EVMS_VSECTOR_SIZE_SHIFT);
	}

	/* Send the write to the engine. */
	rc = EngFncs->write_object(disk, ld->fd, local_buffer,
				   local_count << EVMS_VSECTOR_SIZE_SHIFT,
				   local_offset << EVMS_VSECTOR_SIZE_SHIFT);
	if (rc < 0) {
		rc = -rc;
		goto out;
	}

	/* The cache is too simple to do real caching.  It's really only a read
	 * cache.  A write, which should not happen during discovery, means the
	 * contents of the cache may not be up to date.  Purge the cache and
	 * start caching all over again.
	 */
	purge_cache();
	rc = 0;

out:
	if (!aligned) {
		free(local_buffer);
	}
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * LD_discard
 *
 * We don't expect to get called on this API. Just like commit.
 **/
static int LD_discard(list_anchor_t disks)
{
	LOG_ENTRY();
	LOG_EXIT_INT(0);
	return 0;
}

/**
 * LD_add_sectors_to_kill_list
 **/
static int LD_add_sectors_to_kill_list(storage_object_t * disk,
				       lsn_t lsn,
				       sector_count_t count)
{
	int rc;
	LOG_ENTRY();

	if (lsn + count > disk->size) {
		LOG_ERROR("Kill-sectors request past end of disk %s.\n",
			  disk->name);
		rc = EINVAL;
	} else {
		rc = EngFncs->add_sectors_to_kill_list(disk, lsn, count);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * LD_commit_changes
 *
 * Disk manager doesn't do anything during commit. Just return success.
 **/
static int LD_commit_changes(storage_object_t * disk, commit_phase_t phase)
{
	LOG_ENTRY();
	LOG_EXIT_INT(0);
	return 0;
}

/**
 * LD_get_info
 *
 * Return information about this disk to display to the user.
 **/
static int LD_get_info(storage_object_t * disk,
		       char * name,
		       extended_info_array_t ** info)
{
	local_disk_t * ld = disk->private_data;
	int rc = EINVAL;

	LOG_ENTRY();

	*info = NULL;

	if (!name) {
		rc = get_basic_info(disk, info);
	} else if (!strncasecmp(name, "Type", 4)) {
		if (ld->flags & LD_FLAG_IDE) {
			rc = get_ide_info(disk, info);
		} else if (ld->flags & LD_FLAG_SCSI) {
			rc = get_scsi_info(disk, info);
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * LD_get_plugin_info
 *
 * Returns plug-in specific information
 **/
static int LD_get_plugin_info(char * descriptor_name,
			      extended_info_array_t ** info)
{
	int rc = EINVAL;
	extended_info_array_t * Info;
	char version_string[64];
	char required_engine_api_version_string[64];
	char required_plugin_api_version_string[64];

	LOG_ENTRY();

	if (!info) {
		goto out;
	}
	*info = NULL;

	if (descriptor_name) {
		goto out;
	}

	Info = EngFncs->engine_alloc(sizeof(extended_info_array_t) +
				     6 * sizeof(extended_info_t));
	if (!Info) {
		rc = ENOMEM;
		goto out;
	}

	Info->count = 6;

	sprintf(version_string, "%d.%d.%d",
		MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL);

	sprintf(required_engine_api_version_string, "%d.%d.%d",
		my_plugin_record->required_engine_api_version.major,
		my_plugin_record->required_engine_api_version.minor,
		my_plugin_record->required_engine_api_version.patchlevel);

	sprintf(required_plugin_api_version_string, "%d.%d.%d",
		my_plugin_record->required_plugin_api_version.plugin.major,
		my_plugin_record->required_plugin_api_version.plugin.minor,
		my_plugin_record->required_plugin_api_version.plugin.patchlevel);

	Info->info[0].name = EngFncs->engine_strdup("Short Name");
	Info->info[0].title = EngFncs->engine_strdup(_("Short Name"));
	Info->info[0].desc = EngFncs->engine_strdup(_("A short name given to this plug-in"));
	Info->info[0].type = EVMS_Type_String;
	Info->info[0].value.s = EngFncs->engine_strdup(my_plugin_record->short_name);

	Info->info[1].name = EngFncs->engine_strdup("Long Name");
	Info->info[1].title = EngFncs->engine_strdup(_("Long Name"));
	Info->info[1].desc = EngFncs->engine_strdup(_("A longer, more descriptive name for this plug-in"));
	Info->info[1].type = EVMS_Type_String;
	Info->info[1].value.s = EngFncs->engine_strdup(my_plugin_record->long_name);

	Info->info[2].name = EngFncs->engine_strdup("Type");
	Info->info[2].title = EngFncs->engine_strdup(_("Plug-in Type"));
	Info->info[2].desc = EngFncs->engine_strdup(_("There are various types of plug-ins, each responsible for some kind of storage object or logical volume."));
	Info->info[2].type = EVMS_Type_String;
	Info->info[2].value.s = EngFncs->engine_strdup(_("Device Manager"));

	Info->info[3].name = EngFncs->engine_strdup("Version");
	Info->info[3].title = EngFncs->engine_strdup(_("Plug-in Version"));
	Info->info[3].desc = EngFncs->engine_strdup(_("Version number of this plug-in"));
	Info->info[3].type = EVMS_Type_String;
	Info->info[3].value.s = EngFncs->engine_strdup(version_string);

	Info->info[4].name = EngFncs->engine_strdup("Required Engine Services Version");
	Info->info[4].title = EngFncs->engine_strdup(_("Required Engine Services Version"));
	Info->info[4].desc = EngFncs->engine_strdup(_("Version of the Engine services that this plug-in requires.  "
						      "It will not run on older versions of the Engine services."));
	Info->info[4].type = EVMS_Type_String;
	Info->info[4].value.s = EngFncs->engine_strdup(required_engine_api_version_string);

	Info->info[5].name = EngFncs->engine_strdup("Required Plug-in API Version");
	Info->info[5].title = EngFncs->engine_strdup(_("Required Plug-in API Version"));
	Info->info[5].desc = EngFncs->engine_strdup(_("Version of the Engine plug-in API that this plug-in requires.  "
						      "It will not run on older versions of the Engine plug-in API."));
	Info->info[5].type = EVMS_Type_String;
	Info->info[5].value.s = EngFncs->engine_strdup(required_plugin_api_version_string);

	*info = Info;
	rc = 0;

out:
	LOG_EXIT_INT(rc);
	return rc;
}


static int LD_get_plugin_functions(storage_object_t        * object,
				   function_info_array_t * * actions)
{
	LOG_ENTRY();

	/*
	 * The Local Disk Manager has plug-in functions, but they are not
	 * available for the asking.  Those who want to use them must know
	 * what they are and call the plug-in function directly which will
	 * be processed by LD_plugin_function() below.
	 */

	LOG_EXIT_INT(ENOSYS);
	return ENOSYS;
}

static int LD_plugin_function(storage_object_t * object,
			      task_action_t      action,
			      list_anchor_t      objects,
			      option_array_t   * options)
{
	int rc = 0;

	LOG_ENTRY();

	switch (action) {
		case LDM_Start_Caching:
			LOG_DEBUG("Start caching\n");
			initialize_cache();
			break;

		case LDM_Stop_Caching:
			LOG_DEBUG("Stop caching\n");
			destroy_cache();
			break;

		case LDM_Open_Disk:
			if (object->plugin == my_plugin_record) {
				LOG_DEBUG("Open disk %s\n", object->name);
				rc = open_dev(object);

			} else {
				LOG_ERROR("%s is not managed by %s.\n",
					  object->name, my_plugin_record->short_name);
				rc = EINVAL;
			}
			break;

		case LDM_Close_Disk:
			if (object->plugin == my_plugin_record) {
				LOG_DEBUG("Close disk %s\n", object->name);
				close_dev(object);

			} else {
				LOG_ERROR("%s is not managed by %s.\n",
					  object->name, my_plugin_record->short_name);
				rc = EINVAL;
			}
			break;

		default:
			LOG_ERROR("%d is not a valid function code.\n", action);
			rc = EINVAL;
	}

	LOG_EXIT_INT(rc);
	return rc;
}


static int LD_backup_metadata(storage_object_t * disk) {

	int rc;

	LOG_ENTRY();

	if (disk->plugin->id != EVMS_DISK_PLUGIN_ID) {
		LOG_ERROR("I don't own object %s.\n", disk->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
        }

	rc = EngFncs->save_metadata(disk->name, NULL, 0, 0, NULL);

	LOG_EXIT_INT(rc);
	return rc;
}


static plugin_functions_t ft_sysfs = {
	.setup_evms_plugin		= LD_setup,
	.cleanup_evms_plugin		= LD_cleanup,
	.discover			= LD_discover,
	.discard			= LD_discard,
	.add_sectors_to_kill_list	= LD_add_sectors_to_kill_list,
	.commit_changes			= LD_commit_changes,
	.get_info			= LD_get_info,
	.get_plugin_info		= LD_get_plugin_info,
	.read				= LD_read,
	.write				= LD_write,
	.get_plugin_functions		= LD_get_plugin_functions,
	.plugin_function		= LD_plugin_function,
	.backup_metadata		= LD_backup_metadata
};

plugin_record_t LD_Plugin = {
	.id = EVMS_DISK_PLUGIN_ID,
	.version = {
		.major		= MAJOR_VERSION,
		.minor		= MINOR_VERSION,
		.patchlevel	= PATCH_LEVEL
	},
	.required_engine_api_version = {
		.major		= 15,
		.minor		= 0,
		.patchlevel	= 0
	},
	.required_plugin_api_version = {
		.plugin = {
			.major		= 13,
			.minor		= 1,
			.patchlevel	= 0
		}
	},
	.short_name = EVMS_DISK_PLUGIN_SHORT_NAME,
	.long_name = EVMS_DISK_PLUGIN_LONG_NAME,
	.oem_name = EVMS_IBM_OEM_NAME,
	.functions = {
		.plugin = &ft_sysfs
	},
	.container_functions = NULL
};

plugin_record_t * my_plugin_record = &LD_Plugin;

plugin_record_t * evms_plugin_records[] = { &LD_Plugin,
					    NULL };

