/* Copyright (C) 2009  Wen-Yen Chuang <caleb AT calno DOT com>
 * Copyright (C) 2009  letoh <letoh DOT tw AT gmail DOT com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

#define PROGRAM_NAME "allutf8"
#define PROGRAM_VERSION "0.0.3"

#define DEFAULT_DELIMITER  ' '

#define INVALID_CODEPOINT  0xFFFFFFFF

#define F_INVALID    (0x0)
#define F_NONCHAR    (0x1)
#define F_PRIVATE    (0x1 << 1)
#define F_CONTROL    (0x1 << 2)
#define F_SURROGATE  (0x1 << 3)
#define F_SYMBOL     (0x1 << 4)
#define F_DEFAULT    (0xFFFFFFFF)

#define F_IGNORE     ((F_NONCHAR) | (F_PRIVATE) | (F_CONTROL) | (F_SURROGATE))

/* for test */
#define CHKMASK(val,mask)  ((val) & (mask))


#define ERR(fmt, ...)      fprintf(stderr, "ERR: " fmt "\n\n", ##__VA_ARGS__)


typedef unsigned long cp_t;

struct opt_t
{
	cp_t start;
	cp_t end;
	int mask;
	short width;
	char delimiter;
	char reserved;
};

struct code_range
{
	cp_t head;
	cp_t tail;
	int flag;
};

#include "table.inc"


unsigned int hex_to_number(const char *hex)
{
	cp_t m = 0;
	if(hex) sscanf(hex, "%x", &m);
	return m;
}


/**
 * @param cp   codepoint
 * @param out  UTF-8 data
 * @return byte length
 * @note support U+10000-U+10FFFF
 *       http://en.wikipedia.org/wiki/UTF-8
 */
int to_utf8(const cp_t cp, char *out)
{
	int len = 0;
	if(out == NULL) return;
	if(cp < 0x80) {
		*out++ = cp;
		len = 1;
	} else if(cp < 0x800) {
		*out++ = 0xC0 | ((cp >> 0x06) & 0x1F);
		*out++ = 0x80 | (cp & 0x3F);
		len = 2;
	} else if(cp < 0x10000) {
		*out++ = 0xE0 | ((cp >> 0x0C) & 0x0F);
		*out++ = 0x80 | ((cp >> 0x06) & 0x3F);
		*out++ = 0x80 | (cp & 0x3F);
		len = 3;
	} else if(cp < 0x110000) {
		*out++ = 0xF0 | ((cp >> 0x12) & 0x07);
		*out++ = 0x80 | ((cp >> 0x0C) & 0x3F);
		*out++ = 0x80 | ((cp >> 0x06) & 0x3F);
		*out++ = 0x80 | (cp & 0x3F);
		len = 4;
	}
	*out = '\0';
	return len;
}


/**
 * @note support U+10000-U+10FFFF
 */
void print_glyph(cp_t cp)
{
	char utf8[8] = {0};
	int len = to_utf8(cp, utf8);
	assert(0 < len && len <= 4);
	fwrite(utf8, len, 1, stdout);
}


#define SWAP(n, t) { cp_t z = (n); (n) = (t); (t) = z; }

void print_range(cp_t start, cp_t end, const struct opt_t *opt)
{
	int w = 1;
	if(end == INVALID_CODEPOINT) end = start;
	if(start > end) SWAP(start, end);
	for (; start <= end; start++)
	{
		if(opt->delimiter && w != 1) printf("%c", opt->delimiter);
		print_glyph(start);
		if(++w > opt->width)
		{
			w = 1;
			printf("\n");
		}
	}
	if( opt->width > 1) printf("\n");
}


void show_hand(const struct opt_t *opt)
{
	struct code_range *iter = code_table;
	for(; iter->head != INVALID_CODEPOINT; ++iter)
	{
		if( !CHKMASK(iter->flag, opt->mask) ) continue;
		print_range(iter->head, iter->tail, opt);
	}
}


void help()
{
	fprintf(stderr, "%s, version %s\n\n%s%s%s%s%s%s%s\n", PROGRAM_NAME, PROGRAM_VERSION,
			"Options:\n",
			"\t-h                 show this help\n",
			"\t-r <hex> [<hex>]   print character(s)\n",
			"\t-w <num>           set width for flat output\n",
			"\t-d <delim>         set delimiter, default is space\n",
			"\t-f <filter>        print specific charset\n",
			"\t   help            show all available filters\n");
}


void help_filter()
{
	fprintf(stderr, "%s -f <filter>\n\n%s%s%s\n", PROGRAM_NAME,
			"available filters:\n",
			"\tutf8       valid UTF-8 characters       [default]\n",
			"\tprint      printable characters\n"
			"\t           ignore control,private-use,noncharacter,surrogate\n");
}


#define ADD_CASE(hex) {hex, # hex}
void test_hex_to_number()
{
	struct {
		int dec;
		char *hex;
	} test_case[] = {
		ADD_CASE(0x000a),
		ADD_CASE(0x000A),
		ADD_CASE(0xd10a),
		ADD_CASE(0x0bca),
		ADD_CASE(0x0bfa),
		ADD_CASE(0x0abc),
		{-1, NULL}
	}, *iter = test_case;

	for(; iter->dec > -1; ++iter)
	{
		if(iter->dec != hex_to_number(iter->hex) )
			ERR("hex_to_number failed: %s", iter->hex);
		if(iter->dec != hex_to_number(iter->hex+2) )
			ERR("hex_to_number failed: %s", iter->hex+2);
	}
}


void test_self()
{
	test_hex_to_number();
}


int parse_arg_filter(char *filters, struct opt_t *opt)
{
	if(filters == NULL)
	{
		ERR("Unknown filter: %s", filters); help_filter(); exit(1);
	}
	if( !strncmp(filters, "help", 4) )
	{
		help_filter(); exit(1);
	}
	else if( !strncmp(filters, "print", 5) )
	{
		opt->mask &= ~F_IGNORE;
	}
	else if( !strncmp(filters, "utf8", 4) )
	{
		opt->mask = F_DEFAULT;
	}
	else
	{
		ERR("Invalid filter: %s", filters); help_filter(); exit(1);;
	}
	return 1;
}


int parse_arg_width(char *width, struct opt_t *opt)
{
	if(width == NULL)
	{
		ERR("Invalid width: %s", width); help(); exit(1);;
	}
	opt->width = atoi(width);
	if(opt->width <= 0) opt->width = 1;
	if(opt->delimiter == '\0') opt->delimiter = DEFAULT_DELIMITER;
	return 1;
}


int parse_arg_delim(char *delim, struct opt_t *opt)
{
	if(delim == NULL || !isprint(*delim))
	{
		ERR("Invalid delimiter: %s", delim); help(); exit(1);
	}
	opt->delimiter = *delim;
	return 1;
}


int parse_arg_range_start(char *start, struct opt_t *opt)
{
	if(start == NULL)
	{
		ERR("Unknown codepoint or range, "
				"it needs at least one parameter");
		help(); exit(1);
	}
	opt->start = hex_to_number(start);
	return 1;
}


int parse_arg_range_end(char *end, struct opt_t *opt)
{
	if(end == NULL) return 0;
	opt->end = hex_to_number(end);
	return 1;
}


#define EAT_ARG --argc; ++argv
#define GET_VAL (argc == 1) ? NULL : *(argv + 1)
#define SET_OPT(optfunc) if( parse_arg_ ## optfunc(optval, opt) ) { EAT_ARG; }
void parse_arg(int argc, char *argv[], struct opt_t *opt)
{
	char *optname = NULL;
	char *optval  = NULL;

	EAT_ARG; /* ignore program name (argv[0]) */
	while(argc)
	{
		optname = *argv;
		optval  = GET_VAL;
		switch(*optname)
		{
		case '-':
			switch(optname[1]) /* check 2nd char */
			{
			case 'r':
				SET_OPT(range_start);
				optval = GET_VAL;
				SET_OPT(range_end);
				break;
			case 'f': SET_OPT(filter); break;
			case 'd': SET_OPT(delim);  break;
			case 'w': SET_OPT(width);  break;
			case 'h': case '?': help(); exit(1);
			default: ERR("Unknown option: %s", optname); help(); exit(1);
			}
			break;
		/* a secrete option, don't tell anyone! */
		case 't': test_self(); exit(1);
		default: ERR("Unknown option: %s", optname); help(); exit(1);
		}
		EAT_ARG;
	}
}


int main(int argc, char *argv[])
{
	struct opt_t opts = {
		.start = INVALID_CODEPOINT,
		.end   = INVALID_CODEPOINT,
		.mask  = 0xFFFFFFFF,
		.width = 1,
		.delimiter = '\0'
	};

	parse_arg(argc, argv, &opts);

	if(opts.start != INVALID_CODEPOINT)
		print_range(opts.start, opts.end, &opts);
	else
		show_hand(&opts);

	return 0;
}
