/* $Id: output_man.c,v 1.25 2003/04/28 00:39:28 jtalkington Exp $ */

#include "common.h"
#include "output_man.h"

static unsigned int format_state = 0;

/* output_man_page()
 * converts the output from man to html and outputs it
 */
void 
output_man_page(char *program, char *section) {
	FILE *man_fh = NULL;
	char line[BUFSIZ];
	char *output = NULL;
	char *man_command = NULL;
	char *r_section = NULL;
	int ua_type = 0;
	char *raw = get_query_value(RAW);
	char *user_agent = get_query_value(USER_AGENT);

	if(user_agent != NULL) {
		if(strstr(user_agent, "MSIE") != NULL) {
			ua_type |= UA_MSIE;
		}
	}

	man_command = str_app_s(man_command, Config.man_command);
	
	if(Config.manpath_sw != NULL && Config.manpath != NULL) {
		man_command = str_app_s(man_command, " ");
		man_command = str_app_s(man_command, Config.manpath_sw);
		man_command = str_app_s(man_command, " ");
		man_command = str_app_s(man_command, Config.manpath);
	}
	
	if(program == NULL) {
		return;
	} else if(strchr(program, '/') != NULL) {
		free(man_command);
		program = do_convert(program);

		output_header(NULL, NULL);
		printf("<h2>No manual entry for %s</h2>\n", program);
		output_footer();

		free(program);
		
		return;
	}
	
	if(section != NULL) {
		r_section = get_real_section(section);
		if(Config.section_sw != NULL) {
			man_command = str_app_s(man_command, " ");
			man_command = str_app_s(man_command, Config.section_sw);
		}
		man_command = str_app_s(man_command," ");
		man_command = str_app_s(man_command, r_section);
		r_section = NULL;
	}

	man_command = str_app_s(man_command, " ");
	man_command = str_app_s(man_command, program);
	man_command = str_app_s(man_command, " 2>/dev/null");

	program = do_convert(program);

	output_header(program, section);

		
	if(((man_fh = popen(man_command, "r")) == NULL) || 
		/* yes, we do need to see if the read is null here */
		(fgets(line, sizeof(line), man_fh) == NULL)) {
		
			free(man_command);
			printf("<h2>No manual entry for %s</h2>\n", program);
			output_footer();
			free(program);
			if(man_fh != NULL) {
				pclose(man_fh);
			}

			return;
	} else {
		free(man_command);
		free(program);
#if ENABLE_LYNX
		if(Config.use_lynx == 0) {
#endif
			output_search_form();
#if  ENABLE_LYNX
		}
#endif
		printf("<pre>\n");
		
		output = do_convert(line);
		if(output != NULL) {
			if(ua_type & UA_MSIE) {
				/* use <BR> instead of \n because IE5 is stupid and 
				 * doesn't respect line breaks in <PRE>
				 */
				printf("%s<br />", output);
			} else {
				printf("%s\n", output);
			}
			/* fflush because this could take a while */
			fflush(stdout);
			free(output);
			output = NULL;
		}
	}

	while(fgets(line, sizeof(line), man_fh) != NULL) {

		if(raw != NULL) {
			printf("%s", line);
			fflush(stdout);
		} else {

			output = do_convert(line);
			if(output != NULL) {
				if(ua_type & UA_MSIE) {
					printf("%s<br />", output);
				} else {
					printf("%s\n", output);
				}
				/* fflush because this could take a while */
				fflush(stdout);
				free(output);
				output = NULL;
			}
		}
	}

	pclose(man_fh);

	printf("</pre>\n");
	output_footer();
}

/* do_convert()
 * wrapper to convert the individual lines of man output to html
 */
char *
do_convert(char *line) {
	char *output = NULL;

	if(line == NULL) {
		return(NULL);
	}

	output = mask_special(line);

	if(output != NULL && strlen(output) != 0) {
		output = add_ext_links(output);
		
		if(output != NULL && strlen(output) != 0) {
			output = add_man_links(output);
			
			if(output != NULL && strlen(output) != 0) {
				output = demask_special(output);
			} else {
				return(output);
			}
		} else {
			return(output);
		}
	} else {
		return(output);
	}

	return(output);
}

/* mask_special()
 * strips the control codes out of the man page and wraps underline/italic and
 * bold words in internal control codes
 * also converts HTML reserved characters (&<>) into control codes
 */
char *
mask_special(char *line) {
	char *output = NULL;
	char *next = NULL;
	char *prev = NULL;
	char *b_char = NULL;
	char *line_end = NULL;
	char *cur = line;

	if(line == NULL) {
		return(NULL);
	} else {
		line_end = line + strlen(line);
	}

	/* compress blank lines */
	if(strcmp(line, "\n") == 0) {
		if(format_state & BLANK) {
			return(NULL);
		} else {
			format_state |= BLANK;
			output = strdup("");
			return(output);
		}
	} else {
		format_state &= ~BLANK;
	}

	output = calloc(1, 1);
	while(*cur != '\0') {
		if(cur != line) {
			prev = cur - 1;
		}
		
		next = cur + 1;

		if(*cur == '\n') {
			/* clear bold/underline */
			if((format_state & UNDERLINE) || (format_state & BOLD)) {
				format_state &= ~UNDERLINE;
				format_state &= ~BOLD;
				output = str_app_c(output, SPAN_MASK);
			}
			/* else do nothing */
		} else if(*cur == 0x08) {
			/* this shouldn't happen, so just strip it */
		} else if(*cur == '_') {
			/* possible underline */
			if(*next == 0x08 && *(next + 1) != '_') {
				/* straightforward underline */
				prev = cur;
				cur = next + 1;
				next = cur + 1;

				if((format_state & UNDERLINE) == 0) {
					if(format_state & BOLD) {
						format_state &= ~BOLD;
						format_state |= UNDERLINE;
						output = str_app_c(output, SPAN_MASK);
						output = str_app_c(output, UNDERLINE_MASK);
					} else {
						format_state |= UNDERLINE;
						output = str_app_c(output, UNDERLINE_MASK);
					}

				}
				
				if(*cur == '<') {
					output = str_app_c(output, LT_MASK);
				} else if(*cur == '>') {
					output = str_app_c(output, GT_MASK);
				} else if(*cur == '&') {
					output = str_app_c(output, AMP_MASK);
				} else {
					output = str_app_c(output, *cur);
				}
			} else if(*next == 0x08 && *(next + 1) == '_') {
				/* possibly an _ in the middle of a bold */
				if(*(next + 2) == 0x08) {
					/* underline */
					prev = cur;
					cur = next + 1;
					next = cur + 1;

					if((format_state & UNDERLINE) == 0) {
						if(format_state & BOLD) {
							format_state &= ~BOLD;
							format_state |= UNDERLINE;
							output = str_app_c(output, SPAN_MASK);
							output = str_app_c(output, UNDERLINE_MASK);
						} else {
							format_state |= UNDERLINE;
							output = str_app_c(output, UNDERLINE_MASK);
						}

					}
				
					if(*cur == '<') {
						output = str_app_c(output, LT_MASK);
					} else if(*cur == '>') {
						output = str_app_c(output, GT_MASK);
					} else if(*cur == '&') {
						output = str_app_c(output, AMP_MASK);
					} else {
						output = str_app_c(output, *cur);
					}
				} else {
					/* bold _ */
					if((format_state & BOLD) == 0){
						if(format_state & UNDERLINE) {
							format_state &= ~UNDERLINE;
							format_state |= BOLD;
							output = str_app_c(output, SPAN_MASK);
							output = str_app_c(output, BOLD_MASK);
						} else {
							format_state |= BOLD;
							output = str_app_c(output, BOLD_MASK);
						}
					}

					b_char = cur;

					/* skip until we hit the end of the <C-h>s */
					while(*cur == *b_char && *next == 0x08) {
						cur = next + 1;
						next = cur + 1;
					}

					if(*cur == '<') {
						output = str_app_c(output, LT_MASK);
					} else if(*cur == '>') {
						output = str_app_c(output, GT_MASK);
					} else if(*cur == '&') {
						output = str_app_c(output, AMP_MASK);
					} else {
						output = str_app_c(output, *b_char);
					}
				}
			} else {
				output = str_app_c(output, *cur);
			}
		} else {
			if(*next == 0x08) {
				/* bold */
				if((format_state & BOLD) == 0){
					if(format_state & UNDERLINE) {
						format_state &= ~UNDERLINE;
						format_state |= BOLD;
						output = str_app_c(output, SPAN_MASK);
						output = str_app_c(output, BOLD_MASK);
					} else {
						format_state |= BOLD;
						output = str_app_c(output, BOLD_MASK);
					}
				}


				b_char = cur;

				/* skip until we hit the end of the <C-h>s */
				while(*cur == *b_char && *next == 0x08) {
					cur = next + 1;
					next = cur + 1;
				}

				if(*cur == '<') {
					output = str_app_c(output, LT_MASK);
				} else if(*cur == '>') {
					output = str_app_c(output, GT_MASK);
				} else if(*cur == '&') {
					output = str_app_c(output, AMP_MASK);
				} else if(*b_char == '+' && *cur != '+') {
					/* it's a bold + followed by a \b, assume it's a bullet */
					output = str_app_c(output, BULLET_MASK);
					b_char = next + 1;
					cur = b_char;
					while(*cur == *b_char && *next == 0x08) {
						cur = next + 1;
						next = cur + 1;
					}
				} else {
					output = str_app_c(output, *b_char);
				}
			} else {
				/* normal character */
				if(((format_state & BOLD) || (format_state & UNDERLINE))) {
					format_state &= ~UNDERLINE;
					format_state &= ~BOLD;
					output = str_app_c(output, SPAN_MASK);
				}

				if(*cur == '<') {
					output = str_app_c(output, LT_MASK);
				} else if(*cur == '>') {
					output = str_app_c(output, GT_MASK);
				} else if(*cur == '&') {
					output = str_app_c(output, AMP_MASK);
				} else {
					output = str_app_c(output, *cur);
				}
			}
		}

		cur++;
		prev = NULL;
		next = NULL;
		b_char = NULL;
	}

	return(output);
}

/* demask_special()
 * converts internal control codes to HTML tags
 */
char *
demask_special(char *line) {
	char *output = NULL;
	char *cur = line;
	char *tmp = NULL;

	if(line == NULL) {
		return(NULL);
	}


	while(*cur != '\0') {
		if(*cur == LT_MASK) {
			output = str_app_s(output, "&lt;");
		} else if(*cur == GT_MASK) {
			output = str_app_s(output, "&gt;");
		} else if(*cur == AMP_MASK) {
			output = str_app_s(output, "&amp;");
		} else if(*cur == BOLD_MASK) {
			output = str_app_s(output, BOLD_START);
			format_state |= IN_SPAN;
		} else if(*cur == UNDERLINE_MASK) {
			output = str_app_s(output, UNDERLINE_START);
			format_state |= IN_SPAN;
		} else if(*cur == SPAN_MASK) {
			if(format_state &= IN_SPAN) {
				output = str_app_s(output, SPAN_END);
				format_state &= ~IN_SPAN;
			}
		} else if(*cur == BULLET_MASK) {
			output = str_app_s(output, "&#8226;");
		} else if((unsigned char)(*cur) >= (unsigned char )(160)) {
			tmp = make_iso_string(*cur);
			output = str_app_s(output, tmp);
			free(tmp);
			tmp = NULL;
		} else {
			output = str_app_c(output, *cur);
		}

		cur++;
	}

	free(line);
	return(output);
}


/* add_ext_links()
 * wrapper to convert inline URLS to links
 */

char *
add_ext_links(char *line) {
	char *output = NULL;
	
	if(line == NULL) {
		return(NULL);
	} else if(strlen(line) == 0) {
		return(line);
	}

	output = ext_linkify(line, HTTP_START);
	output = ext_linkify(output, FTP_START);

	if(output != line) {
		free(line);
	}

	return(output);

}

/* ext_linkify()
 * adds external links for inline URLS
 */
char *
ext_linkify(char *line, char *prefix) {
	char *output = NULL;
	char *tmp = NULL;
	char *link_start = NULL;
	char *link_end = NULL;
	char *line_ptr = line;
	char *copy_ptr = line;

	if(line == NULL || prefix == NULL) {
		return(NULL);
	}

	while((link_start = strstr(line_ptr, prefix)) != NULL) {
		link_end = find_url_end(link_start);
		
		tmp = strndup(copy_ptr, link_start - copy_ptr);
		output = str_app_s(output, tmp);
		free(tmp);
		
		tmp = gen_ext_url(link_start, link_end);
		
		if(tmp != NULL) {
			output = str_app_s(output, tmp);

			free(tmp);
			tmp = NULL;

			copy_ptr = link_end + 1;
			line_ptr = copy_ptr;
			link_end = NULL;
			link_start = NULL;
		} else {
			tmp = strndup(copy_ptr, link_end - copy_ptr);
			output = str_app_s(output, tmp);

			free(tmp);
			tmp = NULL;

			copy_ptr = link_end + 1;
			line_ptr = copy_ptr;
			link_end = NULL;
			link_start = NULL;
		}

	}

	if(output == NULL) {
		output = line;
	} else if(copy_ptr != (line + strlen(line)) ) {
		tmp = strndup(copy_ptr, (line + strlen(line) - copy_ptr));
		output = str_app_s(output, tmp);
		free(tmp);
	}

	return(output);
}

/* gen_script_url()
 * generates a URL for references to the program (man pages)
 */
char *
gen_script_url(char *script_name, char *program, char *section) {
	char *output = NULL;

	if(script_name == NULL || program == NULL || section == NULL) {
		return(NULL);
	}
	
	output = calloc(1, strlen(MAN_HREF) + (2 * (strlen(program) + 1)) + (2 * (strlen(section) + 1)) + strlen(script_name) + 1);
	sprintf(output, MAN_HREF, script_name, program, section, program, section);
	return(output);
}

/* gen_ext_url()
 * generates the URL for references to generic objects
 */
char *
gen_ext_url(char *url_start, char *url_end) {
	char *tmp = NULL;
	char *output = NULL;

	if(url_start == NULL || url_end == NULL) {
		return(NULL);
	}

	tmp = strndup(url_start, url_end - url_start + 1);

	output = calloc(1, strlen(EXT_HREF) + (2 * strlen(tmp)) + 1);
	sprintf(output, EXT_HREF, tmp, tmp);
	
	free(tmp);
	return(output);
}

/* find_url_end()
 * finds the last character of a URL
 */
char *
find_url_end(char *url_start) {
	char *end = NULL;
	char *cur = url_start;
	int i = 0;

	if(url_start == NULL) {
		return(NULL);
	}

	for(cur = url_start; *cur != '\0'; cur++) {
		if(*cur < 0x1F || *cur >= 0x7F) {
			end = cur - 1;
			return(end);
		}
		
		for(i = 0; i < sizeof(invalid_url); i++) {
			if(*cur == invalid_url[i]) {
				end = cur - 1;
				return(end);
			}
		}
	}

	/* we reached the end of the string, and there was no terminating char,
	 * so we just process it up to the end of the line.
	 */
	end = cur - 1;
	return(end);
}

/* extract_section()
 * extracts the section from an apparent man page reference (something in 
 * parens), and returns it if it matches a section in Config.sections, or
 * NULL if it doesn't
 */
char *
extract_section(char *input) {
	char *output = NULL;

	if(input == NULL) {
		return(NULL);
	}

	while(*input == BOLD_MASK || *input == UNDERLINE_MASK || *input == '(') {
		input++;
	}

	while(*input != SPAN_MASK && *input != ')') {
		output = str_app_c(output, *input);
		input++;
	}
	return(output);
}

/* add_man_links()
 * adds links for man pages that are not wrapped in control characters
 */
char *
add_man_links(char *line) {
	char *output = NULL;
	char *program = NULL;
	char *section = NULL;
	char *real_section = NULL;
	char *sec_op = NULL;
	char *sec_cp = NULL;
	char *prog_start = NULL;
	char *prog_end = NULL;
	char *tmp = NULL;
	char *line_end = NULL;
	char *copy_ptr = line;
	char *script_name = get_query_value(SCRIPT_NAME);

	if(script_name == NULL || line == NULL) {
		return(line);
	} else {
		line_end = line + strlen(line);
	}

	while(1) {
		sec_op = strchr(copy_ptr, '(');
		if(sec_op != NULL) {
			sec_cp = strchr(sec_op, ')');
			if(sec_cp  != NULL) {
				prog_end = sec_op - 1;

				while(*prog_end == BOLD_MASK || 
						*prog_end == UNDERLINE_MASK || 
						*prog_end == SPAN_MASK) {
					prog_end--;
				}

				if(*prog_end == ' ' || *prog_end == '\t' || is_invalid_url_char(*prog_end)) {
					tmp = strndup(copy_ptr, sec_cp - copy_ptr + 1);
					output = str_app_s(output, tmp);
					free(tmp);
					tmp = NULL;
					copy_ptr = sec_cp + 1;
					continue;
				}

				prog_start = prog_end;
				
				while(  (prog_start > copy_ptr) && 
					(*prog_start != ' ') && 
					(*(prog_start - 1) != ' ') && 
					(*(prog_start - 1) != '\t') ){

					prog_start--;
				}

				if(copy_ptr != prog_start) {
					tmp = strndup(copy_ptr, prog_start - copy_ptr);
					output = str_app_s(output, tmp);
					free(tmp);
					tmp = NULL;
					copy_ptr = prog_start;
				}

				while(*prog_start == BOLD_MASK || *prog_start == UNDERLINE_MASK) {
					prog_start++;
				}

				section = extract_section(sec_op);

				if(section != NULL && (real_section = get_real_section(section)) != NULL) {

					program = strndup(prog_start, prog_end + 1 - prog_start);
					tmp = gen_script_url(script_name, program, section);

					output = str_app_s(output, tmp);
					copy_ptr = sec_cp + 1;

					free(tmp);
					tmp = NULL;

					free(program);
					program = NULL;

					free(section);
					section = NULL;

					real_section = NULL;

				} else {
					if(section != NULL) {
						free(section);
						section = NULL;
					}

					tmp = strndup(copy_ptr, sec_cp + 1 - copy_ptr);
					output = str_app_s(output, tmp);

					free(tmp);
					tmp = NULL;

					copy_ptr = sec_cp + 1;
				}
			} else {
				tmp = strndup(copy_ptr, sec_op - copy_ptr + 1);
				output = str_app_s(output, tmp);
				free(tmp);
				tmp = NULL;
				copy_ptr = sec_op + 1;
				sec_op = NULL;
			}

		} else {
			output = str_app_s(output, copy_ptr);
			copy_ptr = line_end;
		}
		
		if(copy_ptr >= line_end) {
			break;
		}
	}

	free(line);
	return(output);
}

int
is_invalid_url_char(char ch) {
	if(ch <= 32 || ch == 127 || ch == '"' || ch == '#' || ch == '%' || ch == '<' || ch == '>') {
		return(1);
	} else {
		return(0);
	}
}

char *
make_iso_string(char c) {
	char *output = calloc(1, sizeof(int));
	unsigned int i = (c % (unsigned int)256);

	if(i == 0xAD) {
		/* 
		 * It looks like Safari is the only browser that leaves this
		 * alone in <pre> mode
		 */
		sprintf(output, "-");
	} else {
		sprintf(output, "&#%u;", i);
	}

	return(output);
}

