#ifdef __linux__
#include <linux/mm.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/malloc.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fcntl.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/time.h>
#include <linux/string.h>
#include <linux/devfs_fs_kernel.h>

#include <asm/uaccess.h>
#include <sys/syscall.h>

static devfs_handle_t devfs_handle = NULL;

#undef PDEBUG             /* undef it, just in case */
#ifdef SYSTRACE_DEBUG
#  ifdef __KERNEL__
/* This one if debugging is on, and kernel space */
#    define PDEBUG(fmt, args...) printk( KERN_DEBUG "systrace: " fmt, ## args)
#  else
/* This one for user space */
#    define PDEBUG(fmt, args...) fprintf(stderr, fmt, ## args)
#  endif
#else
#  define PDEBUG(fmt, args...) /* not debugging: nothing */
#endif

#undef PDEBUGG
#define PDEBUGG(fmt, args...) /* nothing: it's a placeholder */

#define SYSTRACE_MAJOR		127
#define SYSTRACE_RINGSIZE	128
int systrace_ringsize =  SYSTRACE_RINGSIZE;

int systrace_init(void);
void systrace_cleanup(void);

#if 1
/* nothing */
#else
  typedef  struct wait_queue *wait_queue_head_t;
# define init_waitqueue_head(head) (*(head)) = NULL
#endif

#include <systrace.h>

struct systrace_opt {
	wait_queue_head_t	wait;
	unsigned long 		*pg_vec;		
	struct syscall_hdr 	**buffer;		
	int	 		rindex, windex;
	ssize_t			off;
	int 			ringsize;			
	int 			nreaders;		
	struct systrace_stats	stats;
	struct fasync_struct 	*async_queue;
	struct semaphore 	sem;		
	struct syscall_filter	filter;
	char			*pagebuf;
};

static struct systrace_opt systrace_device;

static inline void syscall_task_fill(struct syscall_task *task, char *page)
{
	task->pid = 	current->pid;
	task->uid =	current->uid;
	memcpy(task->comm, current->comm, 16);
	if (page) {
		unsigned long len;
		char * cwd = 
#if 1
		        d_path(current->fs->pwd, current->fs->pwdmnt, 
			       page, PAGE_SIZE);
#else
		        d_path(current->fs->pwd, page, PAGE_SIZE);
#endif	
		len = PAGE_SIZE + page - cwd;
		if (len < 256)
			strncpy(task->cwd, cwd, len);
		else 
			*(task->cwd) = '\0';
	}
}

static ssize_t syscall_args_flatten(char *, size_t, struct syscall_arg *, int);

#if 0
/* Drop the inode semaphore and wait for a pipe event, atomically */
void systrace_wait(systrace_opt * dev)
{
	DECLARE_WAITQUEUE(wait, current);
	current->state = TASK_INTERRUPTIBLE;
	add_wait_queue(&dev->wait, &wait);
	up(&dev->sem);
	schedule();
	remove_wait_queue(&dev->wait, &wait);
	current->state = TASK_RUNNING;
	down(&dev->sem);
}
#endif

/*
 * Open and close
 */

static void free_pg_vec(unsigned long *pg_vec, int len)
{
	int i;
	for (i = 0; i < len; ++i)
		if (pg_vec[i]) 
			free_page(pg_vec[i]);
	kfree(pg_vec);
}

static int init_pg_vec(struct systrace_opt *dev)
{
	int ret, i;

	ret = -ENOMEM;
	if (!dev->pg_vec) { /* allocate the pg_vec */
		dev->pg_vec = (unsigned long *) 
		        kmalloc(systrace_ringsize*sizeof(unsigned long),
				GFP_KERNEL);
		ret = -ENOMEM;
		if (!dev->pg_vec) {
		        PDEBUG("cannot allocate pg_vec\n");
			goto out;
		}

		memset(dev->pg_vec,0,systrace_ringsize*sizeof(unsigned long));
		for (i = 0; i < systrace_ringsize; ++i) {
			dev->pg_vec[i] = __get_free_page(GFP_KERNEL);
			if (!dev->pg_vec[i]) {
				free_pg_vec(dev->pg_vec, systrace_ringsize);
				dev->pg_vec = NULL;
				PDEBUG("cannot allocate page #%d\n", i);
				goto out;
			}
		}
		PDEBUGG("page vector allocated\n");

		if (dev->buffer) /* should not happen */
			kfree(dev->buffer);
		dev->buffer = kmalloc(systrace_ringsize *
				      sizeof(struct syscall_hdr *),
				      GFP_KERNEL);
		if (!dev->buffer)
			goto out;
		for (i = 0; i < systrace_ringsize; ++i) {
			dev->buffer[i] = (struct syscall_hdr *) dev->pg_vec[i];
			memset(dev->buffer[i], 0, sizeof(struct syscall_hdr));
		}		

		dev->ringsize = systrace_ringsize;
		dev->rindex = dev->windex = 0;
		dev->off = 0;
	}

	ret = 0;

 out:
	return ret;
}

static int systrace_open (struct inode *inode, struct file *filp)
{
	struct systrace_opt *dev;
	int num = MINOR(inode->i_rdev);
	int ret;
	
	if (num != 0) return -ENODEV;
	dev = &systrace_device;
	if (down_interruptible (&dev->sem))
		return -ERESTARTSYS;

	ret = init_pg_vec(dev);
	if (ret < 0)
		goto out;

	if (!dev->pagebuf)
		dev->pagebuf = (char *) __get_free_page(GFP_USER);

	dev->filter.mask = 0;
	/* use f_mode, not  f_flags: it's cleaner (fs/open.c tells why) */
	if (filp->f_mode & FMODE_READ)
		dev->nreaders++;
    
	filp->private_data = dev;
	MOD_INC_USE_COUNT;
	ret = 0;
	PDEBUGG("systrace_open suceeded\n");
 out:
	up (&dev->sem);
	return ret;
}

static int systrace_release (struct inode *inode, struct file *filp)
{
	struct systrace_opt *dev = filp->private_data;
	static int systrace_fasync (int fd, struct file *filp, int mode);

	/* remove this filp from the asynchronously notified filp's */
	systrace_fasync(-1, filp, 0);

	down (&dev->sem);
	if (filp->f_mode & FMODE_READ)
		dev->nreaders--;
	if (dev->nreaders == 0) {
		free_pg_vec(dev->pg_vec, dev->ringsize);
		kfree(dev->buffer);
		dev->pg_vec = 0; /* other fields are not checked on open */
		dev->buffer = 0;
		dev->filter.mask = 0;
		if (dev->pagebuf)
			free_page((unsigned long) dev->pagebuf);
	}
	up (&dev->sem);
	MOD_DEC_USE_COUNT;
	return 0;
}

/*
 * Data management: read and write
 */

static ssize_t systrace_read (struct file *filp, char *buf, 
			     size_t count, loff_t *f_pos)
{
	struct systrace_opt *dev = filp->private_data;
	ssize_t ret, to_read;
	ssize_t rec_len, bytes, coff;
	int 	i;

	ret = -ERESTARTSYS;
	if (down_interruptible (&dev->sem))
		goto out_nolock;

	coff = 0;
	while (dev->rindex == dev->windex) { /* nothing to read */
		ret = -EAGAIN;
		if (filp->f_flags & O_NONBLOCK)
			goto out;
		PDEBUGG("\"%s\" reading: going to sleep\n", current->comm);
#if 0
		systrace_wait(dev);
#else
		up(&dev->sem);
		interruptible_sleep_on(&dev->wait);
		ret = -ERESTARTSYS;
		if (down_interruptible(&dev->sem))
			goto out_nolock;
#endif	
		PDEBUGG("woke up!\n");
		ret = -ERESTARTSYS;
		if (signal_pending(current)) 	/* a signal arrived */
			goto out; 		/* tell the fs layer to handle it */
		/* otherwise loop */
	}

	to_read = dev->windex - dev->rindex;
	if (to_read < 0) to_read += dev->ringsize;
	if (to_read == 0) goto out;

	for (i = 0; i < to_read && coff < count; ++i) {
		struct syscall_hdr *hdr = dev->buffer[dev->rindex];
		rec_len = sizeof(struct syscall_hdr) + hdr->len - dev->off;
		if (rec_len < 0) {
			ret = -EFAULT;
			PDEBUG("arithmetic error, exiting\n");
			goto out;
		}
		bytes = (rec_len < (count - coff)) ? rec_len : (count - coff); 
		PDEBUGG("reading at %d bytes pos #%d\n", bytes, dev->rindex);
		__copy_to_user(buf + coff, ((char *)hdr) + dev->off, bytes);

		coff += bytes;
		if (bytes < rec_len) { /* couldnt fill a complete record */ 
			dev->off += bytes;
			break;
		} else {
			dev->off = 0;
			++dev->rindex;
			if (dev->rindex == dev->ringsize)
				dev->rindex = 0;
		}
	}

	PDEBUGG("\"%s\" did read %li bytes\n", current->comm, (long)coff);
	ret = coff;
 out:
	up (&dev->sem);
 out_nolock:
	return ret;
}


static unsigned int systrace_poll (struct file *filp, poll_table *wait)
{
	struct systrace_opt *dev = filp->private_data;
	unsigned int mask;

	poll_wait(filp, &dev->wait,  wait);

	mask = POLLERR;
	if (!down_interruptible(&dev->sem)) {
		if (dev->rindex != dev->windex) 
			mask = POLLIN | POLLRDNORM;  /* readable */
		up(&dev->sem);
	}
	return mask;
}

static int systrace_ioctl(struct inode *inode, struct file *filp,
		      unsigned int cmd, unsigned long arg)
{
	int ret;
	struct systrace_opt *dev = filp->private_data;
	
	ret = -ERESTARTSYS;
	if (!down_interruptible(&dev->sem)) {
		ret = 0;

		switch (cmd) {
		case IOC_FILTER:
			ret = copy_from_user(&(dev->filter),
					     (struct syscall_filter *) arg,
					     sizeof(struct syscall_filter));
		case IOC_SCSTATS:
			ret = copy_to_user((struct systrace_stats *) arg, 
					   &dev->stats,
					   sizeof(struct systrace_stats));
			break;
		case IOC_HDRST:
			MOD_DEC_USE_COUNT;
			break;
		default:
			ret =  -EINVAL;
			break;
		}
		up(&dev->sem);
	}
	
	return ret;
}

static int systrace_fasync (int fd, struct file *filp, int mode)
{
	struct systrace_opt *dev = filp->private_data;
	
	return fasync_helper(fd, filp, mode, &dev->async_queue);
}



static loff_t systrace_llseek (struct file *filp,  loff_t off, int whence)
{
	return -ESPIPE; /* unseekable */
}

static int systrace_run_filter(const struct syscall_task *task, 
			       const struct syscall_filter *filter)
{
	const struct syscall_task *ftask = &(filter->task);
	u32 mask = filter->mask;

	if (!filter->mask) return 1;
	return ( ( !(mask & SYSCALL_FILTER_UID) 
		   || ((task->uid == ftask->uid) == filter->truth) )
		 && ( !(mask & SYSCALL_FILTER_PID) 
		      || ((task->pid == ftask->pid) == filter->truth) )
		 && ( !(mask & SYSCALL_FILTER_COMM) 
		      || ((!strncmp(task->comm, ftask->comm, 15)) 
			  == filter->truth) )
		 && ( !(mask & SYSCALL_FILTER_CWD) 
		      || ((!strncmp(task->cwd, ftask->cwd, 255)) 
			  == filter->truth) ) );

}


static ssize_t systrace_put(struct systrace_opt *dev, u32 type, int retval,
			    struct syscall_arg *args, int nargs)
{
	ssize_t ret;
	int old_windex;
	struct syscall_hdr *hdr;
  
	ret = -ERESTARTSYS;
	if (down_interruptible(&dev->sem))
		goto out_nolock;

	ret = 0;
	if (dev->nreaders <= 0)
		goto out;

	++dev->stats.total;
	old_windex = dev->windex;
	++dev->windex;
	if (dev->windex == dev->ringsize)
		dev->windex = 0;

	ret = -ENOSPC;
	if (dev->windex == dev->rindex) { /* the ring is full */
		++dev->stats.drops;
		goto out_rollback;
	}

	hdr = dev->buffer[old_windex];
	get_fast_time(&(hdr->stamp));
	ret = syscall_args_flatten((char *)(hdr + 1), 
				   PAGE_SIZE - sizeof(struct syscall_hdr), 
				   args, nargs);
	if (ret < 0) {
		++dev->stats.errors;
		goto out_rollback;
	} 
	hdr->type = type;
	hdr->ret = retval;
	hdr->nargs = nargs;
	hdr->len = ret;
	syscall_task_fill(&(hdr->task), dev->pagebuf);

	ret = 0;
	if (!systrace_run_filter(&(hdr->task), &(dev->filter)))
		goto out_rollback;

	PDEBUGG("writing %d bytes at pos #%d\n", sizeof(syscall_hdr)+hdr->len, 
	       old_windex);
	ret = 1;

	wake_up_interruptible(&dev->wait);

	if (dev->async_queue)
#if 1
                kill_fasync (&dev->async_queue, SIGIO, POLL_IN);
#else
	
	        kill_fasync (dev->async_queue, SIGIO);	   
#endif


 out:
	up (&dev->sem);
 out_nolock:
	return ret;

 out_rollback:
	dev->windex = old_windex;
	goto out;	
}

static void systrace_record(u32 type, int ret, struct syscall_arg *args,
			    int nargs)
{
	struct systrace_opt *dev = &systrace_device;
        PDEBUGG("recording something...\n");
	/* that's where filtering should take place */
	systrace_put(dev, type, ret, args, nargs);
	return;
}

static ssize_t systrace_write(struct file *filp, const char *buf, 
			     size_t count, loff_t *ppos)
{
	//systrace_opt *dev = filp->private_data;
	char tmp[1024];
	struct syscall_arg args[3] = {
		{ SYSCALL_ARG_STR, 0, 0 }, 
		{ SYSCALL_ARG_INT, 0, 0 },
		{ SYSCALL_ARG_INT, 0, 0 }
	};


	strcpy(tmp, "toto");
	
	args[0].p = tmp;
	args[1].i = 23;
	args[2].i = 24;
	systrace_record(SYS_open, 23, args, 3);

	return count;
}


/*
 * The file operations for the pipe device
 */
static struct file_operations systrace_fops = {
#if 1
	llseek:		systrace_llseek,
	read:		systrace_read,
	write:		systrace_write,
	poll:		systrace_poll,
	ioctl:		systrace_ioctl,
	open:		systrace_open,
	release:	systrace_release,
	fasync:		systrace_fasync,
#else
        systrace_llseek,
        systrace_read,
        systrace_write,
        NULL,		/* systrace_readdir */
        systrace_poll,
        systrace_ioctl,
        NULL,		/* systrace_mmap */
        systrace_open,
        NULL,           /* systrace_flush */
        systrace_release,
# if 0
	NULL,		/* systrace_fsync */
        systrace_fasync,
# endif
#endif
};


void systrace_cleanup(void)
{
	struct systrace_opt *dev = &systrace_device;
	
	if (dev->pg_vec)
	        free_pg_vec(dev->pg_vec, dev->ringsize);
	if (dev->buffer)
		 kfree(dev->buffer);
	if (dev->pagebuf)
	         free_page((unsigned long) dev->pagebuf);
}


extern void *sys_call_table[];         /* export the syscall table */

int (*orig_open)(const char *path, int flags, mode_t mode);
int new_open(const char *path, int flags, mode_t mode);

int (*orig_close)(int flags);
int new_close(int flags);

int (*orig_write)(int fd, void *buf, size_t size);
int new_write(int fd, void *buf, size_t size);

int (*orig_read)(int fd, void *buf, size_t size);
int new_read(int fd, void *buf, size_t size);

int (*orig_unlink)(const char *path);
int new_unlink(const char *path);

int (*orig_mkdir)(const char *path, int mode);
int new_mkdir(const char *path, int mode);

int (*orig_rmdir)(const char *path);
int new_rmdir(const char *path);

int (*orig_rename)(const char *oldname, const char *newname);
int new_rename(const char *oldname, const char *newname);

#define replace_syscall(name)                                           \
orig_##name = sys_call_table[SYS_##name];                               \
sys_call_table[SYS_##name] = &new_##name

#define restore_syscall(name)                                           \
sys_call_table[SYS_##name] = orig_##name

int new_open(const char *path, int flags, mode_t mode)
{
	int ret;
	char *p;

	struct syscall_arg args[3] = {
		{ SYSCALL_ARG_STR, 0 }, 
		{ SYSCALL_ARG_INT, 0 },
		{ SYSCALL_ARG_INT, 0 }
	};
	ret = (*orig_open)(path, flags, mode);
	
	p = getname(path);
	if (!IS_ERR(p)) {
		args[0].p = (char *)p;
		__get_user(args[1].i, &flags);
		__get_user(args[2].i, &mode);
		
		systrace_record(SYS_open, ret, args, 3);
		
		putname(p);
	}

	return ret;
}


int new_close(int fd)
{
	int ret;
	struct syscall_arg args[1] = {
		{ SYSCALL_ARG_INT, 0, 0 }
	};
	
	ret = (*orig_close)(fd);

	__get_user(args[0].i, &fd);
	systrace_record(SYS_close, ret, args, 1);
	
	return ret;
}


int new_write(int fd, void *buf, size_t size)
{
	int ret;
	struct syscall_arg args[3] = {
		{ SYSCALL_ARG_INT, 0, 0 }, 
		{ SYSCALL_ARG_INT, 0, 0 },
		{ SYSCALL_ARG_INT, 0, 0 }
	};
	
	ret = (*orig_write)(fd, buf, size);
	
	__get_user(args[0].i, &fd);
	/* args[1].i = 0; */
	__get_user(args[2].i, &size);

	systrace_record(SYS_write, ret, args, 3);

	return ret;
}


int new_read(int fd, void *buf, size_t size)
{
	int ret;
	struct syscall_arg args[3] = {
		{ SYSCALL_ARG_INT, 0, 0 }, 
		{ SYSCALL_ARG_INT, 0, 0 },
		{ SYSCALL_ARG_INT, 0, 0 }
	};
	
	ret = (*orig_read)(fd, buf, size);
	
	__get_user(args[0].i, &fd);
	/* args[1].i = 0; */
	__get_user(args[2].i, &size);

	systrace_record(SYS_read, ret, args, 3);

	return ret;
}


int new_unlink(const char *path)
{
	int ret;
	char *p;

	struct syscall_arg args[1] = {
		{ SYSCALL_ARG_STR, 0, 0 }
	};
	
	ret = (*orig_unlink)(path);
	
	p = getname(path);

	if (!IS_ERR(p)) {
		args[0].p = p;
		
		systrace_record(SYS_unlink, ret, args, 1);
		
		putname(p);
	}

	return ret;
}

int new_mkdir(const char *path, int mode)
{
	int ret;
	char *p;

	struct syscall_arg args[2] = {
		{ SYSCALL_ARG_STR, 0, 0 },
		{ SYSCALL_ARG_INT, 0, 0 }
	};

	ret = (*orig_mkdir)(path, mode);

	p = getname(path);

	if (!IS_ERR(p)) {
		args[0].p = p;
		__get_user(args[1].i, &mode);
		
		systrace_record(SYS_mkdir, ret, args, 2);
		
		putname(p);
	}

	return ret;
}

int new_rmdir(const char *path)
{
	int ret;
	char *p;

	struct syscall_arg args[1] = {
		{ SYSCALL_ARG_STR, 0, 0 }
	};
	
	ret = (*orig_rmdir)(path);

	p = getname(path);

	if (!IS_ERR(p)) {
		args[0].p = p;
		
		systrace_record(SYS_rmdir, ret, args, 1);
		
		putname(p);
	}

	return ret;
}

int new_rename(const char *oldname, const char *newname)
{
	int ret;
	char *from;

	struct syscall_arg args[2] = {
		{ SYSCALL_ARG_STR, 0, 0 },
		{ SYSCALL_ARG_STR, 0, 0 }
	};
	
	ret = (*orig_rename)(oldname, newname);
	
	from = getname(oldname);

	if (!IS_ERR(from)) {
		char *to = getname(newname);

		if (!IS_ERR(to)) {
			args[0].p = from;
			args[1].p = to;

			systrace_record(SYS_rename, ret, args, 2);

			putname(to);
		}
		putname(from);
	}

	return ret;
}


static ssize_t syscall_args_flatten(char *buf, size_t len,
				    struct syscall_arg *args, int nargs)
{
	int i;
	ssize_t ret, off;

	ret = -E2BIG;
	off = 0;
	for (i = 0; i < nargs; ++i) {
	  struct syscall_arg *arg = &(args[i]);
	  if (sizeof(struct syscall_arg) + off > len) goto out;
	  if (arg->type == SYSCALL_ARG_STR) {
	    arg->i = strlen(arg->p);
	    if (sizeof(struct syscall_arg) + arg->i + off > len) goto out;
	  }

	  memcpy(buf + off, (char *)arg, sizeof(struct syscall_arg));
	  off += sizeof(struct syscall_arg);

	  if (arg->type == SYSCALL_ARG_STR) {
	    memcpy(buf + off, arg->p, arg->i);
	    off += arg->i;
	  }
	}
	
	ret = off;

 out:
	return ret;
}



#define TRACE_SYS_CALLS	1

int init_module(void)
{
        struct systrace_opt *dev = NULL;

	if (devfs_register_chrdev(SYSTRACE_MAJOR, "syscall", 
				  &systrace_fops)) {
		printk(KERN_WARNING "systrace: can't get major %d\n",
		       SYSTRACE_MAJOR);
		return -EIO;
	}
	devfs_handle = devfs_mk_dir (NULL, "trace", NULL);
	devfs_register (devfs_handle, "syscall", DEVFS_FL_DEFAULT,
			SYSTRACE_MAJOR, 0,
			S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
			&systrace_fops, NULL);

	dev = &systrace_device;
	memset((char *)dev, 0, sizeof(struct systrace_opt));
	init_waitqueue_head (&(dev->wait));
	sema_init (&(dev->sem), 1);

#if TRACE_SYS_CALLS
	replace_syscall(open);
	replace_syscall(close);
#if 0
	replace_syscall(write);
	replace_syscall(read);
#endif
	replace_syscall(unlink);
	replace_syscall(mkdir);
	replace_syscall(rmdir);
	replace_syscall(rename);
#endif
	
	PDEBUGG("systrace init succeded\n");

	return 0;
}

void cleanup_module(void)
{
#if TRACE_SYS_CALLS
	restore_syscall(rename);
	restore_syscall(rmdir);
	restore_syscall(mkdir);
	restore_syscall(unlink);
#if 0
	restore_syscall(read);
	restore_syscall(write);
#endif
	restore_syscall(close);
	restore_syscall(open);
#endif

	systrace_cleanup();

	devfs_unregister (devfs_handle);
	devfs_unregister_chrdev(SYSTRACE_MAJOR, "syscall");

	PDEBUGG("systrace cleanup succeded\n");
}

#undef TRACE_SYS_CALLS

#endif /* __linux__ */
