/* Copyright (C) 2009 Keith Crane

This file is part DFILE Tools.

DFILE Tools is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.

DFILE Tools is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License along
with DFILE Tools; see the file COPYING.  If not, see
<http://www.gnu.org/licenses/>. */

/*
** $Id: dfile.h,v 1.3 2009/10/16 07:33:05 keith Exp $
**
** $Log: dfile.h,v $
** Revision 1.3  2009/10/16 07:33:05  keith
** Add GPL to source code.
**
** Revision 1.2  2009/05/21 21:42:13  keith
** Made changes to use DFILE_THREAD as compile time switch for threading.
**
** Revision 1.1  2009/02/28 21:59:50  keith
** Initial revision
**
*/

#ifdef DFILE_THREAD
#ifndef _PTHREAD_H_
#include <pthread.h>
#endif
#endif

/*
** dfile_error_t is used to communicate errors.
*/
typedef enum { Dfile_ok = 0, Dfile_open_failed, Dfile_io_failed, Dfile_partial_record, Dfile_end_of_file, Dfile_all_data_processed, Dfile_invalid_gzip_format, Dfile_invalid_record_layout } dfile_error_t;

typedef enum { Dfile_delimited, Dfile_variable_length } dfile_record_type_t;

/*
** dfile_parse_t is used to communicate status of parsing between record
** parsing function and its caller.
*/
typedef enum { Dfile_fatal_parse_error, Dfile_parsed_field, Dfile_parsed_record, Dfile_parse_failed } dfile_parse_t;

/*
** dfile_state_t is used by rotating ASCII buffers to identify its current
** state.
*/
typedef enum { Dfile_io_state, Dfile_parse_state, Dfile_format_state } dfile_buffer_state_t;

/*
** dfile_format_t is used to identify whether ASCII or GZIP I/O is required.
*/
typedef enum { Dfile_ascii, Dfile_zipped } dfile_format_t;

typedef enum { Dfile_trunc, Dfile_append } dfile_open_mode_t;

/*
** dfile_rec_t is a structure used to configure record format.
*/
typedef struct {
	char		field_separator;
	char		record_separator;
	char		separator_escape;
} dfile_rec_t;

/*
** dfile_bind_t is a structure used to bind C language variables to data
** file fields.
*/
typedef struct {
	const char	*field_name;
	char		**field_buffer;
	size_t		*field_length;
} dfile_bind_t;

typedef struct {
	const char	*dfile_name;
	dfile_rec_t	rec_attribute;
	const char	*record_layout_path;
	const char	**field;
	dfile_bind_t	*bind;
	unsigned short	bind_cnt;
	void		*bind_hash_table;
	const char	*data_file_path;
} dfile_cfg_t;

typedef struct {
	const char	*tag;
	const char	*tag_value;
} dfile_tag_t;

/*
** dfile_state_t is a structure used to identify state of rotating ASCII
** buffers.
*/
typedef struct dfile_state_s	dfile_state_t;

struct dfile_state_s {
	dfile_buffer_state_t	buffer_state;
	const dfile_state_t	*next;
};

typedef struct {
	size_t		append_overflow_len;
	dfile_bind_t	*current_bind;
	char		*start_of_record;
	unsigned short	processed_field_cnt;
	unsigned char	next_field_len;
	char		use_next_field_len_flag;
} dfile_read_var_rec_t;

/*
** dfile_buffer_t is a structure for an individual buffer used in the rotating
** buffer system.
*/
typedef struct dfile_buffer_s	dfile_buffer_t;

struct dfile_buffer_s {
	/*
	** Individual buffer ID used for debugging.
	*/
	unsigned short	buffer_id;

	/*
	** processing_complete_flag is used to communicate between threads
	** that end of input file has been reached or output file needs to
	** be closed.
	*/
	char		processing_complete_flag;

	/*
	** buffer_rec_cnt is the number of records that were processed in
	** this buffer. It is reset each time the buffer is filled.
	*/
	unsigned long	buffer_rec_cnt;

	/*
	** buffer_area is a pointer to buffer.
	*/
	char		*buffer_area;

	/*
	** buffer_end is a pointer to the end of buffer.
	*/
	char		*buffer_end;

	/*
	** buf_ptr is used during record parsing and formatting to track
	** progress through buffer processing.
	*/
	char		*buf_ptr;

	/*
	** buf_ptr_end is a pointer used identify end of data within a buffer.
	** This is used to discard record fragments at the end of a buffer.
	** Incomplete records are not field parsed.
	*/
	char		*buf_ptr_end;

	/*
	** state is used to identify a buffer's current processing state.
	*/
	const dfile_state_t	*state;

#ifdef DFILE_THREAD
	pthread_t	thread_id;
	pthread_mutex_t	mutex;
#endif

	/*
	** next points to the next buffer to be processed. The last buffer
	** points to the first buffer. This creates the desired circular
	** rotation.
	*/
	dfile_buffer_t	*next;
};

typedef struct dfile_s	dfile_t;

struct dfile_s {
	/*
	** dfile_name is name of dfile used to call dfile_cfg().
	*/
	const char	*dfile_name;

	/*
	** file_name points to file name when dfile_read_open() or
	** dfile_write_open() is called.
	*/
	const char	*file_name;

	/*
	** file_rec_cnt is an accumulated count of records being processed
	** for the current file.
	*/
	unsigned long	file_rec_cnt;

	/*
	** file_char_cnt is an accumulated count of characters being processed
	** for the current file. It is used as trailer information in GZIP
	** files.
	*/
	unsigned long	file_char_cnt;

	/*
	** parse_fail_rec_cnt is an accumulated count of records that failed to
	** be correctly parsed.
	*/
	unsigned long	parse_fail_rec_cnt;

	/*
	** rec_attribute contains the record format.
	*/
	dfile_rec_t	rec_attribute;

	/*
	** bind is an array that contains C language variables to reference
	** data.
	*/
	dfile_bind_t	*bind;

	/*
	** bind_cnt contains the number of entries in bind array.
	*/
	unsigned short	bind_cnt;

	/*
	** Hash table to find bind entries using field_name.
	*/
	void	*bind_hash_table;

#ifdef DFILE_THREAD
	/*
	** buffer_cnt is the number of rotating buffers being used.
	*/
	unsigned short	buffer_cnt;
#endif

	/*
	** buffer_base is the base address of buffer memory. It is use
	** to later free buffer memory.
	*/
	dfile_buffer_t	*buffer_base;

	/*
	** io_buffer contains address of the rotating buffer currently being
	** used for I/O.
	*/
	dfile_buffer_t	*io_buffer;

	/*
	** application_buffer contains address of the rotating buffer
	** currently being used for record formatting or parsing.
	*/
	dfile_buffer_t	*application_buffer;

	/*
	** overflow contains partial record found at end of a rotating buffer.
	*/
	char		*overflow;

	/*
	** overflow_length is the length of the current record fragment.
	*/
	size_t		overflow_length;

	/*
	** overflow_size is the number of bytes that has been allocated to
	** overflow.
	*/
	size_t		overflow_size;

	/*
	** Structure needed when reading variable length records.
	*/
	dfile_read_var_rec_t	read_var_rec;

	/*
	** zipped_buffer is an I/O buffer for GZIP formatted data.
	*/
	unsigned char	*zipped_buffer;

	/*
	** zstream is actually a pointer of type (z_stream *). It is used to
	** communicate with ZLIB's deflate/inflate compression routines.
	*/
	void		*zstream;

	/*
	** crc is an accumulated checksum of characters being processed
	** for the current file. It is used as trailer information in GZIP
	** files.
	*/
	unsigned long	crc;

	/*
	** buffer_length is the number of bytes allocated for each buffer.
	*/
	size_t		buffer_length;

	/*
	** block_length is the number of bytes in a file system block.
	*/
	size_t		block_length;

	/*
	** buffer_block_cnt is the number of blocks to use in each I/O
	** operation.
	*/
	unsigned short	buffer_block_cnt;

	/*
	** open_descriptor is the open file descriptor.
	*/
	int		open_descriptor;

	/*
	** format identifies whether to do ASCII or GZIP I/O.
	*/
	dfile_format_t	format;

	dfile_record_type_t	record_type;

	/*
	** error contains fatal errors.
	*/
	dfile_error_t	error;

	/*
	** parse_func is pointer to a function for parsing input records.
	*/
	dfile_parse_t	( *parse_func )( char **, size_t *, dfile_rec_t );

	/*
	** record_failure_func is a pointer to a function for reporting
	** errors that occur during parsing or formatting records.
	*/
	int	( *record_failure_func )( dfile_t *, unsigned short );

	/*
	** format_str_func is a pointer to a function for formatting strings
	** into output records.
	*/
	int	( *format_str_func )( char **, const char *, const char **, dfile_rec_t );

	/*
	** format_func is a pointer to a function for formatting fields with a
	** known length into output records.
	*/
	int	( *format_func )( char **, const char *, const char **, size_t *, dfile_rec_t );

	/*
	** overflow_func is a pointer to a function for finding the beginning
	** of the last record fragment in a rotating buffer.
	*/
	int	( *overflow_func )( const char **, const char *, size_t, dfile_rec_t );

	/*
	** io_buffer_func is a pointer to a function for doing rotating buffer
	** I/O.
	*/
	int	( *io_buffer_func )( dfile_t * );

#ifdef DFILE_THREAD
	/*
	** thread is used to push compression and I/O to a parallel CPU.
	*/
	pthread_t	thread;
#endif

	dfile_open_mode_t	open_mode;
};

/*
** API functions
*/
int dfile_cfg( dfile_cfg_t *, const char * );

dfile_t *dfile_read_open( const dfile_cfg_t *, dfile_bind_t *, unsigned short, const dfile_tag_t *, unsigned short, unsigned short, unsigned short );

int dfile_read( dfile_t * );

int dfile_read_close( dfile_t * );

dfile_t *dfile_write_open( const dfile_cfg_t *, const dfile_bind_t *, unsigned short, const dfile_tag_t *, unsigned short, unsigned short, unsigned short, dfile_open_mode_t );

int dfile_write( dfile_t * );

int dfile_write_close( dfile_t * );

int dfile_bind_field_name_cmp( const void *, const void * );
