/* text2html.c -- convert a formatted text file to html
 *	In the source:
 *		The first line is the document title
 *		Newlines start paragraphs
 *
 * History
 * 10Jul00 wb initial edit
 * 11Jul00 wb add <br> on white space and some graphic characters
 * 12Jul00 wb add update time
 * 13Jul00 wb pull the name from the passwd entry
 * 21Jul00 wb add <br> before lines starting with a word and colon
 * 01Mar01 wb add scans for urls, make link color stand out more
 * 14Mar01 wb fix assignment to set first_ch
 * 15Mar01 wb convert [*.jpg] file names to URLs
 * 16Mar01 wb convert accented characters with letter + \ + accent, \! comments
 * 20Mar01 wb treat .gif like .jpg
 * 09Jun01 wb recognize ftp:, allow http and ftp inside [], -f fixed format.
 *		[pre] for <pre>, [fixed] for -f fixed format,
 *		[/copy] for no copyright
 * 11Jun01 wb if preformatted. do not convert short dash lines to rules
 * 13Jun01 wb add the time to the update date
 * 22Jun01 wb add [i] for <i>, [b] for <b>, [u] for <u>
 *		treat .html as local file ref
 * 25Jun01 wb add prototypes
 * 27Jun01 wb add s\s for German double s
 * 30Jun01 wb add [code] for <code>, mailto:
 * 03Jul01 wb add [link,image]
 * 21Jul01 wb handle .jpeg
 * 22Jul01 wb compile under sco, handle .tiff, .png
 * 21Aug01 wb add [link;link] for a link to an image
 * 27Aug01 wb for [link1,link2] do not complain if link1 is a URL
 * 14Sep01 wb added align mode
 * 17Sep01 wb add space between consecutive images in align mode
 * 29Sep01 wb add break before lines starting with digits and a dot
 * 01Oct01 wb add [background,image]
 * 04Oct01 wb added [/index]
 * 24Oct01 wb added \. middot, \0 deg, \o ordm, \- soft hyphen, \1/2
 * 03Jan02 wb set image height and width
 * 08Jan02 wb allow commas in semi-colons in urls that look like queries
 * 09Jan02 wb treat .doc, .c and .sh as a local file ref
 *		allow commas in stand-alone urls
 * 10Jan02 wb convert \\- to \- instead of \&shy;
 * 13Jan02 wb treat .1 as a local file ref
 * 23Mar02 wb check image usage counts
 * 09Apr02 wb handle identify lines for multiple gif images, check for favicon.ico
 * 19Apr02 wb check to increment image usage count for links
 * 04Jun02 wb change color of headings from C5B7AE (gray) to FFFFFF (white)
 * 15Jun02 wb better comma skipping in url paths
 * 02Jul02 wb added [dotimage,image]
 * 08Jul02 wb do better job of pairing <p> and </p>
 * 04Oct02 wb add [/unusedcount] to skip the count of unused images
 * 22Oct02 wb add [isin], [sub], [sup], [infin]
 * 31Oct02 wb skip close parens in url paths
 * 26Nov02 wb change (C) to copyright
 * 03Dec02 wb stop reading URLs at [] and ()
 * 12Dec02 wb skip trailing colon or semicolon in url paths, [comment]
 * 19Dec02 wb added [asis]
 * 05Feb03 wb add support for pngs
 * 15Feb03 wb added [name,label] [/name] for generating tag for "label"
 * 19Feb03 wb add support for #name in [url#name,image]
 * 07Apr03 wb add italics to [comment]
 * 14Apr03 wb add support for local references with [#name]
 * 23May03 wb start a new line on lines starting * and then space or tab
 * 19Jul03 wb do not complain on links starting with #
 * 07Aug03 wb added [/footer] to remove identifying information from the footer
 * 16Nov03 wb added [polish] to use a Polish font, must be at the top of the file
 * 17Nov03 wb added conversion sequences for Polish characters
 * 18Mov03 wb added content-language meta tag for Polish
 * 20Nov03 wb place the iso font meta tag first.
 * 24Nov03 wb write Polish characters as Unicode HTML &#<number>; codes instead of single bytes
 * 01Dec03 wb add Polish Unicode for l-bar
 * 05Dec03 wb added [br] for <br>
 * 02Jan04 wb added hard and soft characters
 * 22Jan04 wb allow URL with , followed by %
 * 26Jan04 wb add c\,
 * 07Aug04 wb add checks for https:
 * 05Sep04 wb add a\o
 * 31Aug05 wb add -i to set noindex
 * 01Sep05 wb add [black] for black background, use Helvetica font
 * 04Feb06 wb treat .pdf, .eps and .ps as local references
 * 08Mar06 wb add [p]
 * 28Mar06 wb allow ,+ in links
 * 10Jun06 wb add \+ for lines to show only when indexing allowed, add -a to allow index
 * 11Jun06 wb search for files in ../ to find unhidden versions
 * 03Jul06 wb allow double commas in links
 * 28Sep06 wb always accept polish characters
 * 12Oct06 wb allow :t suffix for files to specify a non-image file
 * 28Oct06 wb add ae and o/
 * 01Feb07 wb added -c
 * 05Feb07 wb allow ,_ in URLs for wikipedia
 * 07Jun07 wb added [/anyfooter] to prevent all footers
 * 01Jul07 wb added [table], [tr], [th], [td], [nbsp] increase MAXLINE from 1000 to 5000
 * 01Dec07 wb added support for mpgs
 * 05Dec07 wb added \= for unneeded public items, currently like \+
 * 13Jun08 wb added Cyrillic letters
 * 17Jun08 wb added support for thms (jpg thumbnails for mpgs)
 * 22Jul08 bc re-added changes so it compiles under Windows (disappeared in last update)
 * 01Nov08 wb do not break urls at accented characters
 * 27Nov08 wb added [big] and [small]
 * 21Mar10 wb treat .txt and .scm as a local files
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "port.h"

#if SYSTEM_UNIX
#include <unistd.h>
#endif
#include <time.h>
#if SYSTEM_UNIX
#include <pwd.h>
#endif
#include <sys/types.h>
#include <errno.h>

#if SYSTEM_WIN
#define access _access
#ifndef R_OK
#define R_OK 2
#endif
#endif

/* global variables for command line options */

/* parameters */

enum param_type_enum {
	NL_STRING = 0,
	URL_STRING,
	NAME_STRING,
	DATE_STRING,
	YEAR_STRING,
	TITLE_STRING,
	LANG_STRING,
	BGCOLOR_STRING,
	NUM_LINE_TYPES
};

static
#ifndef M_UNIX	/* work-around for bug in sco cc */
 const
#endif
 char *param[ NUM_LINE_TYPES ];

/* global variables for parsing lines */

#define	MAXLINE	5000

static char line[ MAXLINE ];
static int line_len = 0;

static int fixed_format = 0;
static int preformatted = 0;
static int do_align = 1;
static int in_align = 0;
static int in_asis = 0;
static int copyright = 1;
static int do_footer = 1;
static int do_anyfooter = 1;
static int allow_index = 1;
static int comment_plus_lines = 0;
static int has_identify_program = 1;
static int do_unused_count = 1;
static int sub_count = 0;
static int sup_count = 0;
static int big_count = 0;
static int small_count = 0;
static int in_name = 0;
static int do_black = 0;
static int in_table = 0;
static int in_tablerow = 0;
static int in_tablehead = 0;
static int in_tabledata = 0;

static enum lang_type_enum {
	LANG_ENGLISH = 0,
	LANG_POLISH
} lang_type = LANG_ENGLISH;

struct image_size_tag {
	char *image_name;
	int image_width;
	int image_height;
	int image_use_count;
	struct image_size_tag *next_image;
};

static struct image_size_tag *image_cache_ptr = NULL;

/*
** Parse an image magick size line
**	adds the image to the list
**	returns a pointer the the image
*/

static struct image_size_tag *parse_image_line(const char *line);

static struct image_size_tag *parse_image_line(const char *line)
{
	int width, height;
	int name_len;
	const char *name;
	struct image_size_tag *image_ptr;

	width = height = 0;

	/* optional leading whitespace */

	while (*line == ' ') line++;

	/* image name */

	name = line;
	name_len = 0;
	while (*line != '\0' && *line != ' ') { line++; name_len++; }

	/* whitespace */

	while (*line == ' ') line++;

	/* optional image type, present in ImageMagick 5.4.3 */

	if (*line >= 'A' && *line <= 'Z') {
		const char *image_type;
		int image_len;
		image_type = line;
		image_len = 0;
		while (*line != '\0' && *line != ' ') { line++; image_len++; }
		while (*line == ' ') line++;
		if ((image_len == 4 && memcmp(image_type, "JPEG", image_len) == 0) ||
		    (image_len == 3 && memcmp(image_type, "GIF", image_len) == 0) ||
		    (image_len == 3 && memcmp(image_type, "PNG", image_len) == 0)) {
			/* ok, nothing to do */
		} else {
			fprintf(stderr,
				"Warning: image %.*s has unknown type: %.*s\n",
				name_len, name, image_len, image_type);
		}
	}

	/* width */

	while (*line >= '0' && *line <= '9') {
		width = width * 10 + *line++ - '0';
	}

	/* x */

	if (*line == 'x') line++;

	/* height */

	while (*line >= '0' && *line <= '9') {
		height = height * 10 + *line++ - '0';
	}

	name_len = 0;

	while (name[name_len] != ' ' && name[name_len] != '[') {
		name_len++;
	}

	if (name_len > 0) {

		/* check for if the image already exists */
		/* this can happen if identify returns a line for each */
		/* image in a multiple image gif */

		for (image_ptr = image_cache_ptr;
		     image_ptr != NULL;
		     image_ptr = image_ptr->next_image) {
			if (strncmp(image_ptr->image_name, name, name_len) == 0 &&
			    image_ptr->image_name[ name_len ] == '\0') {
				if (image_ptr->image_width != width ||
				    image_ptr->image_height != height) {
					fprintf(stderr,
						"found duplicates of %s, %d X %d and %d X %d\n",
						image_ptr->image_name,
						image_ptr->image_width,
						image_ptr->image_height,
						width,
						height);
				}
				return image_ptr;
			}
		}
	}

	image_ptr = NULL;

	if (name_len > 0 && width > 0 && height > 0) {
		image_ptr = malloc(sizeof(struct image_size_tag));
		if (image_ptr != NULL) {
			image_ptr->image_name = malloc(name_len + 1);
			if (!image_ptr->image_name) {
				free(image_ptr);
				image_ptr = NULL;
			} else {
				strncpy(image_ptr->image_name, name, name_len);
				image_ptr->image_name[ name_len ] = '\0';
				image_ptr->image_width = width;
				image_ptr->image_height = height;
				image_ptr->image_use_count = 0;
				image_ptr->next_image = image_cache_ptr;
				image_cache_ptr = image_ptr;
			}
		}
	}

	if (image_ptr) {
#if 0
		fprintf(stderr, "parsed '%s' to\n", name);
		fprintf(stderr, "  %s %dx%d\n",
			image_ptr->image_name,
			image_ptr->image_width,
			image_ptr->image_height);
#endif
	} else {
		fprintf(stderr, "could not parse '%s'\n", name);
	}

	return image_ptr;
}

/*
** Check if the line continues with just alphanumeric text and a right bracket
*/

static int is_only_text(const char *line);

static int is_only_text(line)
const char *line;
{
	while ((*line >= 'a' && *line <= 'z') ||
	       (*line >= 'A' && *line <= 'Z') ||
	       (*line >= '0' && *line <= '9')) {
		line++;
	}
	return *line == ']';
}

/*
** Check if the line continues with file name parts then a colon, a letter, and a right bracket
*/

static int is_text_and_option(const char *line);

static int is_text_and_option(line)
const char *line;
{
	while ((*line >= 'a' && *line <= 'z') ||
	       (*line >= 'A' && *line <= 'Z') ||
	       (*line >= '0' && *line <= '9') ||
	       *line == '.' ||
	       *line == '-' ||
	       *line == '_') {	
		line++;
	}
	return (*line == ':' && *(line+1) >= 'a' && *(line+1) <= 'z' && *(line+2) == ']');
}

/*
** Find the size of an image using the image magick identify program
*/

static void get_image_size(const char *name, int *image_width, int *image_height);

static void get_image_size(const char *name, int *image_width, int *image_height)
{
	FILE *f;
	char ident_cmd[ MAXLINE + 80 ];
	char line[ MAXLINE ];
	struct image_size_tag *image_ptr;
	static int loaded_image_cache = 0;

	*image_width = *image_height = 0;

	if (!loaded_image_cache) {
		loaded_image_cache = 1;
		f = popen("identify *.jpeg *.jpg *.gif *.tiff *.png 2>/dev/null", "r");
		if (f != NULL) {
			while (fgets(line, MAXLINE, f) != NULL) {
				parse_image_line(line);
			}
			fclose(f);
		}
	}

	for (image_ptr = image_cache_ptr;
	     image_ptr != NULL;
	     image_ptr = image_ptr->next_image) {
		if (strcmp(image_ptr->image_name, name) == 0) {
			*image_width = image_ptr->image_width;
			*image_height = image_ptr->image_height;
			image_ptr->image_use_count++;
#if 0
			fprintf(stderr, "found '%s' in cache\n", name);
#endif
			return;
		}
	}

	if (!has_identify_program) {
		return;
	}

	sprintf(ident_cmd, "identify '%.*s'", MAXLINE, name);

	f = popen(ident_cmd, "r");

	if (!f) {
		has_identify_program = 0;
		fprintf(stderr, "Command '%s' returned error %d.\n", ident_cmd, errno);
		return;
	}

	if (fgets(line, MAXLINE, f) != NULL) {
		image_ptr = parse_image_line(line);
		if (image_ptr != NULL) {
			*image_width = image_ptr->image_width;
			*image_height = image_ptr->image_height;
			image_ptr->image_use_count++;
		}
	}

	fclose(f);

	if (*image_width <= 0 && *image_height <= 0) {
		fprintf(stderr, "Unable to find the image size of '%s'.\n", name);
	}
}

/*
** Mark that an image is used
*/

static void mark_image_used(const char *name);

static void mark_image_used(const char *name)
{
	struct image_size_tag *image_ptr;

	for (image_ptr = image_cache_ptr;
	     image_ptr != NULL;
	     image_ptr = image_ptr->next_image) {
		if (strcmp(image_ptr->image_name, name) == 0) {
			image_ptr->image_use_count++;
			return;
		}
	}
}

/* check if a character is in a bracket sequence */

static int in_brackets(const char *line, int pos, int len);

static int in_brackets(const char *line, int pos, int len)
{
  int i;

  i = pos;
  if (i <= 0) return 0;
  if (line[i-1] == '[') return 1;
  if (line[i-1] != ',') return 0;
  while (i >= 0 && line[i] != '[') {
    if (line[i] == ' ') return 0;
    i--;
  }
  if (i < 0 || line[i] != '[') return 0;

  i = pos;
  while (i < len) {
    if (line[i] == ']') return 1;
    i++;
  }
  return 0;
}

/* convert a polish escape sequence into a character string or NULL */

static const char *convert_polish_chars(int ch1, int ch2);

static const char *convert_polish_chars(ch1, ch2)
int ch1, ch2;
{
  int i;
  static char buf[ 10 ];	/* enough for & # digit digit digit ; null */
  static struct {
    int ch1;	/* letter */
    int ch2;	/* accent */
    int result;	/* <256 -> literal byte, >=256 -> &# result ; */
  } polish_chars[] = {
    { 'A', ',', /* 161 */ 260 },
    { 'L', '/', /* 163 */ 321 },
    { 'L', '\'', 165 },
    { 'S', '\'', /* 166 */ 346 },
    { 'S', 'v', /* 169 */ 352 },
    { 'S', ',', 170 },
    { 'T', 'v', 171 },
    { 'Z', '\'', /* 172 */ 377 },
    { 'Z', 'v', /* 174 */ 381 },
    { 'Z', '.', /* 175 */ 379 },
    { 'a', ',', /* 177 */ 261 },
    { 'l', '/', /* 179 */ 322 },
    { 'l', ',', 181 },
    { 's', '\'', /* 182 */ 347 },
    { 's', 'v', /* 185 */ 353 },
    { 's', ',', 186 },
    { 't', '\'', 187 },
    { 'z', '\'', /* 188 */ 378 },
    { '\'', '\'', 189 },
    { 'z', 'v', /* 190 */ 382 },
    { 'z', '.', /* 191 */ 380 },
    { 'R', '\'', 192 },
    { 'A', '\'', 193 },
    { 'A', 'v', 194 },
    { 'A', 'u', 195 },
    { 'A', '"', 196 },
    { 'L', '\'', /* 197 */ 321 },
    { 'C', '\'', /* 198 */ 262 },
    { 'C', ',', 199 },
    { 'C', 'v', 200 /* 268 */ },
    { 'E', '\'', 201 },
    { 'E', ',', /* 202 */ 280 },
    { 'E', '"', 203 },
    { 'E', 'v', 204 },
    { 'I', '\'', 205 },
    { 'I', 'v', 206 },
    { 'D', 'v', 207 },
    { 'D', '-', 209 },
    { 'N', '\'', /* 209 */ 323 },
    { 'N', 'v', 210 },
    { 'O', '\'', /* 211 */ 211 },
    { 'O', 'v', 212 },
    { 'O', '#', 213 },
    { 'O', '"', 214 },
    { 'x', 'x', 215 },
    { 'R', 'v', 216 },
    { 'U', 'o', 217 },
    { 'U', '\'', 218 },
    { 'U', '#', 219 },
    { 'U', '"', 220 },
    { 'Y', '\'', 221 },
    { 'T', ',', 222 },
    { 's', 's', 223 },
    { 'r', '\'', 224 },
    { 'a', '\'', 225 },
    { 'a', 'v', 226 },
    { 'a', 'u', 227 },
    { 'a', '"', 228 },
    { 'l', '\'', /* 229 */ 322 },
    { 'c', '\'', /* 230 */ 263 },
    { 'c', ',', 231 },
    { 'c', 'v', /* 232 */ 269 },
    { 'e', '\'', 233 },
    { 'e', ',', /* 234 */ 281 },
    { 'e', '"', 235 },
    { 'e', 'v', 236 },
    { 'i', '\'', 237 },
    { 'i', 'v', 238 },
    { 'd', '\'', 239 },
    { 'd', '-', 240 },
    { 'n', '\'', /* 241 */ 324 },
    { 'n', 'v', 242 },
    { 'o', '\'', /* 243 */ 243 },
    { 'o', 'v', 244 },
    { 'o', '#', 245 },
    { 'o', '"', 246 },
    { '-', ':', 247 },
    { 'r', 'v', 248 },
    { 'u', 'o', 249 },
    { 'u', '\'', 250 },
    { 'u', '#', 251 },
    { 'u', '"', 252 },
    { 'y', '\'', 253 },
    { 't', ',', 254 },
    { 'a', '-', 257 },
    { 'a', 'u', 259 },
    { 'c', '\'', 263 },
    { 'e', '-', 275 },
    { 'e', 'u', 277 },
    { 'E', '.', 278 },
    { 'e', '.', 279 },
    { 'i', '-', 299 },
    { 'i', 'u', 301 },
    { 'I', ',', 302 },
    { 'i', ',', 303 },
    { 'n', '\'', 324 },
    { 'n', ',', 326 },
    { 'o', '-', 333 },
    { 'o', 'u', 335 },
    { 's', '\'', 347 },
    { 'U', '-', 362 },
    { 'u', '-', 363 },
    { 'U', ',', 370 },
    { 'u', ',', 371 },
    { 'u', 'u', 365 },
    { 'o', ',', 491 },
    { 'b', '-', 1098 }, /* hard sign / back jer */
    { 'b', '_', 1100 },	/* soft sign / front jer */

    { 'A', 'c', 1040 },
    { 'B', 'c', 1041 },
    { 'V', 'c', 1042 },
    { 'G', 'c', 1043 },
    { 'D', 'c', 1044 },
    { 'E', 'c', 1045 },
    { 'J', 'c', 1046 },
    { 'Z', 'c', 1047 },
    { 'I', 'c', 1048 },
    { 'I', 'u', 1049 },
    { 'K', 'c', 1050 },
    { 'L', 'c', 1051 },
    { 'M', 'c', 1052 },
    { 'N', 'c', 1053 },
    { 'O', 'c', 1054 },
    { 'P', 'c', 1055 },
    { 'R', 'c', 1056 },
    { 'S', 'c', 1057 },
    { 'T', 'c', 1058 },
    { 'Y', 'c', 1059 },
    { 'F', 'c', 1060 },
    { 'H', 'c', 1061 },
    { 'T', 's', 1062 },
    { 'C', 'h', 1063 },
    { 'S', 'h', 1064 },
    { 'S', 't', 1065 },
    { 'U', 'c', 1066 },
    { 'b', 'I', 1067 },
    { 'b', 'c', 1068 },
    { '3', 'c', 1069 },
    { 'I', 'o', 1070 },
    { 'Y', 'a', 1071 },

    { 'a', 'c', 1072 },
    { 'b', 'c', 1073 },
    { 'v', 'c', 1074 },
    { 'g', 'c', 424 },
    { 'd', 'c', 'g' },
    { 'e', 'c', 1077 },
    { 'j', 'c', 1078 },
    { 'z', 'c', 1079 },
    { 'i', 'c', 'u' },
    { 'i', 'C', 363 },
    { 'k', 'c', 1082 },
    { 'l', 'c', 652 },
    { 'm', 'c', 1084 },
    { 'n', 'c', 1085 },
    { 'o', 'c', 1086 },
    { 'p', 'c', 1087 },
    { 'r', 'c', 1088 },
    { 's', 'c', 1089 },
    { 't', 'c', 'm' },
    { 'y', 'c', 1091 },
    { 'f', 'c', 1092 },
    { 'h', 'c', 1093 },
    { 't', 's', 1094 },
    { 'c', 'h', 1095 },
    { 's', 'h', 1096 },
    { 's', 't', 1097 },
    { 'u', 'c', 1098 },
    { 'b', 'I', 1099 },
    { 'b', 'c', 1100 },
    { '3', 'c', 1101 },
    { 'i', 'o', 1102 },
    { 'y', 'a', 1103 },

    { 0, 0, 0 }
  };

  for (i = 0; polish_chars[i].ch1 != 0; i++) {
    if (polish_chars[i].ch1 == ch1 && polish_chars[i].ch2 == ch2) {
      if (polish_chars[i].result < 256) {
	buf[0] = (char) polish_chars[i].result;
	buf[1] = '\0';
      } else {
	sprintf(buf, "&#%d;", polish_chars[i].result);
      }
      return buf;
    }
  }

  return NULL;
}

/* check directories of paths */

static void check_dir(char *name, int *len_ptr, int max_len);

static void check_dir(name, len_ptr, max_len)
char *name;
int *len_ptr;
int max_len;
{
  int len, newlen;
  char newname[ MAXLINE ];

  len = *len_ptr;

  if (len > 4 &&
      len + 3 < max_len &&
      len + 3 < MAXLINE &&
      access(name, R_OK) != 0) {
    memcpy(newname, "../", 3);
    memcpy(&newname[3], name, len);
    newlen = len + 3;
    newname[ newlen ] = '\0';
    if (access(newname, R_OK) == 0) {
      strcpy(name, newname);
      *len_ptr = newlen;
    }
  }
}

/* read the next line into line and line_len */
/*	returns the length or -1 for EOF */

static int get_line(FILE *f);

static int get_line(FILE *f)
{
	int ch;
	int i;
	int new_len;
	int force_copy;
	char new_line[ MAXLINE ];
	const char *polish_str;

	line_len = 0;
	ch = getc(f);
	while (ch != '\r' && ch != '\n' && ch != EOF && line_len < MAXLINE-10) {
		if (in_asis) {
			if (ch == '\0') ch = ' ';
			line[ line_len++ ] = (char) ch;
		} else if (ch == '<') {
			strcpy(&line[ line_len ], "&lt;");
			line_len += 4;
		} else if (ch == '>') {
			strcpy(&line[ line_len ], "&gt;");
			line_len += 4;
		} else if (ch == '&') {
			strcpy(&line[ line_len ], "&amp;");
			line_len += 5;
		} else if (ch == '~' && line_len > 0 &&
			(line[ line_len-1 ] == 'N' ||
			 line[ line_len-1 ] == 'n')) {
			sprintf(&line[ line_len - 1 ],
				"&%ctilde;", line[ line_len - 1 ]);
			line_len += 7;
		} else if (ch == '~' && line_len > 1 &&
			line[ line_len-1 ] == '\\' &&
			(line[ line_len-2 ] == 'N' ||
			 line[ line_len-2 ] == 'n')) {
			sprintf(&line[ line_len - 2 ],
				"&%ctilde;", line[ line_len - 2 ]);
			line_len += 6;
		} else if ((ch == '\'' || ch == '`' || ch == '^' ||
				ch == '~' || ch == '"' || ch == '/' ||
				ch == 'e' || ch == 'o') &&
			line_len > 1 &&
			line[ line_len-1 ] == '\\' &&
			strchr("aeiouAEIOU", line[ line_len-2 ]) != NULL) {
			const char *accent;
			switch (ch) {
			case '\'': accent = "acute"; break;
			case '`': accent = "grave"; break;
			case '^': accent = "circ"; break;
			case '~': accent = "tilde"; break;
			case '"': accent = "uml"; break;
			case '/': accent = "slash"; break;
			case 'e': accent = "elig"; break;
			case 'o': accent = "ring"; break;
			default: accent = ""; break;
			}
			sprintf(&line[ line_len - 2 ],
				"&%c%s;", line[ line_len - 2 ], accent);
			line_len += 1 + strlen(accent);
		} else if (line_len > 1 && line[line_len-1] == '\\' &&
			   (polish_str = convert_polish_chars(line[line_len-2], ch)) != NULL) {
			line_len -= 2;
			while (*polish_str != '\0' && line_len < MAXLINE) {
			  line[ line_len++ ] = *polish_str++;
			}
		} else if (ch == ')' &&
			   line_len > 3 &&
			   (line[ line_len-1 ] == 'c' || line[ line_len-1 ] == 'C') &&
			   line[ line_len-2 ] == '(') {
			strcpy(&line[ line_len-2 ], "&copy;");
			line_len += 6 - 2;
		} else if (ch == 's' &&
			   line_len > 2 &&
			   line[ line_len-1 ] == '\\' &&
			   line[ line_len-2 ] == 's') {
		  sprintf(&line[ line_len-2 ], "&szlig;");
		  line_len += 7 - 2;
		} else if (ch == ',' &&
			   line_len > 2 &&
			   line[ line_len-1 ] == '\\' &&
			   line[ line_len-2 ] == 'c') {
		  sprintf(&line[ line_len-2 ], "&ccedil;");
		  line_len += 8 - 2;
		} else if (ch == '.' && line_len > 1 &&
				line[ line_len-1 ] == '\\') {
		  sprintf(&line[ line_len-1 ], "&middot;");
		  line_len += 8 - 1;
		} else if (ch == '0' && line_len > 1 &&
				line[ line_len - 1 ] == '\\') {
		  sprintf(&line[ line_len-1 ], "&deg;");
		  line_len += 5 - 1;
		} else if (ch == 'o' && line_len > 1 &&
				line[ line_len - 1 ] == '\\') {
		  sprintf(&line[ line_len-1 ], "&ordm;");
		  line_len += 6 - 1;
		} else if (ch == '-' && line_len > 1 &&
				line[ line_len - 1 ] == '\\') {
		  if (line_len - 2 >= 0 && line[ line_len - 2 ] == '\\') {
		    line[ line_len - 2 ] = '\\';
		    line[ line_len - 1 ] = '-';
		  } else {
		    sprintf(&line[ line_len-1 ], "&shy;");
		    line_len += 5 - 1;
		  }
		} else if (ch == '2' && line_len > 3 &&
				line[ line_len - 1 ] == '/' &&
				line[ line_len - 2 ] == '1' &&
				line[ line_len - 3 ] == '\\') {
		  sprintf(&line[ line_len-3 ], "&frac12;");
		  line_len += 8 - 3;
		} else if (ch == '"') {
			strcpy(&line[ line_len ], "&quot;");
			line_len += 6;
		} else {
			if (ch == '\0') ch = ' ';
			line[ line_len++ ] = (char) ch;
		}
		ch = getc(f);
	}
	if (ch == '\r') ch = getc(f);

	/* check for comments, backslash + exclamation at start */

	if (line_len >= 2 && line[ 0 ] == '\\' && line[ 1 ] == '!') {
		line_len = 0;
	}

	/* check for index-only text */
	/*	look for + or = */
	/*	currently they are the same, but maybe later they will be different */
	/*	use + for hidden items, = for public but unneeded items */

	if (line_len >= 2 && line[ 0 ] == '\\' && (line[ 1 ] == '+' || line[ 1 ] == '=')) {
		if (allow_index && !comment_plus_lines) {
			int strip_len;
			/* strip the code */
			strip_len = 2;
			if (line_len > strip_len && line[ strip_len ] == ' ') {
				strip_len++;
			}
			line_len -= strip_len;
			for (i = 0; i < line_len; i++) {
				line[ i ] = line[ i + strip_len ];
			}
		} else {
			/* remove the line */
#if 1
			line_len = 0;
#else
			line[ 0 ] = '['; line[ 1 ] = 'b'; line[ 2 ] = 'r'; line[ 3 ] = ']';
			line_len = 4;
#endif
		}
	}

	line[ line_len ] = '\0';

	if (line_len == 0 && ch == EOF) {
		return -1;
	}

	/* convert urls into links */

	i = 0;
	new_len = 0;
	force_copy = 0;

	while (i < line_len) {
		if (in_asis) {
			if (strncmp(&line[i], "[/asis]", 7) == 0) {
				i += 7;
				in_asis--;
				force_copy = 1;
			} else {
				if (new_len >= MAXLINE-10) break;
				new_line[ new_len++ ] = line[ i++ ];
			}
		} else if (((line[i] == 'h' && strncmp(&line[i], "http:/", 6) == 0) ||
			    (line[i] == 'h' && strncmp(&line[i], "https:/", 7) == 0) ||
			    (line[i] == 'f' && strncmp(&line[i], "ftp:/", 5) == 0) ||
			    (line[i] == 'm' && strncmp(&line[i], "mailto:", 7) == 0)) &&
			   (i <= 0 || line[i-1] != '[') &&
			   !in_brackets(line, i, line_len)) {
			int pos, url_len;
			int found_lparen, found_rparen;
			/* Gather the URL from line[i] to line[pos-1] */
			found_lparen = found_rparen = 0;
			pos = i+1;
			while (line[ pos ] != '\0' && (line[ pos ] & 0xFF) > ' ') {
				if (line[ pos ] == '[' || line[ pos ] == ']' ||
				    (line[ pos ] == '(' && line[ pos-1 ] != '_') ||
				    (line[ pos ] == ')' && found_lparen <= found_rparen)) {
					break;
				}
				if (line[ pos ] == '(') {
					found_lparen++;
				}
				if (line[ pos ] == ')' && found_lparen > found_rparen) {
					found_rparen++;
				}
				if (line[ pos ] == ',' &&
				    !((line[ pos+1 ] >= 'a' && line[ pos+1 ] <= 'z') ||
				      (line[ pos+1 ] >= 'A' && line[ pos+1 ] <= 'Z') ||
				      (line[ pos+1 ] >= '0' && line[ pos+1 ] <= '9') ||
				      (line[ pos+1 ] == '%') ||
				      (line[ pos+1 ] == '+') ||
				      (line[ pos+1 ] == '_') ||
				      (line[ pos+1 ] == ',' && line[ pos+2 ] >= '0' && line[ pos+2 ] <= '9'))) {
					break;
				}
				pos++;
			}
			if (line[ pos-1 ] == '.' || line[ pos-1 ] == ',' ||
			    line[ pos-1 ] == ':' || line[ pos-1 ] == ';' ||
			    (line[ pos-1 ] == ')' && found_rparen == 0)) {
				pos--;
			}
			url_len = pos - i;
			if (new_len + 2 * url_len + 20 >= MAXLINE - 10) break;
			/* Generate <a href=" + URL + "> + URL + </a> */
			strcpy(&new_line[ new_len ], "<a href=\"");
			new_len += 9;
			memcpy(&new_line[ new_len ], &line[ i ], url_len);
			new_len += url_len;
			new_line[ new_len++ ] = '"';
			new_line[ new_len++ ] = '>';
			if (line[i] == 'm') {
				memcpy(&new_line[ new_len ], &line[ i+7 ], url_len-7);
				new_len += url_len-7;
			} else {
				memcpy(&new_line[ new_len ], &line[ i ], url_len);
				new_len += url_len;
			}
			strcpy(&new_line[ new_len ], "</a>");
			new_len += 4;
			i = pos;
		} else if (line[i] == '[' && new_len < MAXLINE - 10) {
			force_copy = 1;
			if (strncmp(&line[i], "[pre]", 5) == 0) {
				i += 5;
				strcpy(&new_line[ new_len ], "<pre>");
				new_len += 5;
				preformatted = 1;
			} else if (strncmp(&line[i], "[/pre]", 6) == 0) {
				i += 6;
				strcpy(&new_line[ new_len ], "</pre>");
				new_len += 6;
				preformatted = 0;
			} else if (strncmp(&line[i], "[fixed]", 7) == 0) {
				i += 7;
				fixed_format = 1;
			} else if (strncmp(&line[i], "[/fixed]", 8) == 0) {
				i += 8;
				fixed_format = 0;
			} else if (strncmp(&line[i], "[align]", 7) == 0) {
				i += 7;
				do_align = 1;
			} else if (strncmp(&line[i], "[/align]", 8) == 0) {
				i += 8;
				do_align = 0;
			} else if (strncmp(&line[i], "[code]", 6) == 0) {
				i += 6;
				strcpy(&new_line[ new_len ], "<code>");
				new_len += 6;
			} else if (strncmp(&line[i], "[/code]", 7) == 0) {
				i += 7;
				strcpy(&new_line[ new_len ], "</code>");
				new_len += 7;
			} else if (strncmp(&line[i], "[comment]", 9) == 0) {
				i += 9;
				if (do_black) {
				  strcpy(&new_line[ new_len ], "<font color=\"#FFFF40\"><i>");	/* yellow */
				} else {
				  strcpy(&new_line[ new_len ], "<font color=\"#FF0000\"><i>");	/* red */
				}
				new_len += 22 + 3;
			} else if (strncmp(&line[i], "[/comment]", 10) == 0) {
				i += 10;
				strcpy(&new_line[ new_len ], "</i></font>");
				new_len += 4 + 7;
			} else if (strncmp(&line[i], "[i]", 3) == 0) {
				i += 3;
				strcpy(&new_line[ new_len ], "<i>");
				new_len += 3;
			} else if (strncmp(&line[i], "[/i]", 4) == 0) {
				i += 4;
				strcpy(&new_line[ new_len ], "</i>");
				new_len += 4;
			} else if (strncmp(&line[i], "[b]", 3) == 0) {
				i += 3;
				strcpy(&new_line[ new_len ], "<b>");
				new_len += 3;
			} else if (strncmp(&line[i], "[/b]", 4) == 0) {
				i += 4;
				strcpy(&new_line[ new_len ], "</b>");
				new_len += 4;
			} else if (strncmp(&line[i], "[u]", 3) == 0) {
				i += 3;
				strcpy(&new_line[ new_len ], "<u>");
				new_len += 3;
			} else if (strncmp(&line[i], "[/u]", 4) == 0) {
				i += 4;
				strcpy(&new_line[ new_len ], "</u>");
				new_len += 4;
			} else if (strncmp(&line[i], "[nbsp]", 6) == 0) {
				i += 6;
				strcpy(&new_line[ new_len ], "&nbsp;");
				new_len += 6;

			} else if (strncmp(&line[i], "[table]", 7) == 0) {
				in_table++;
				i += 7;
				strcpy(&new_line[ new_len ], "<table border cellpadding=1>");
				new_len += 28;
			} else if (strncmp(&line[i], "[/table]", 8) == 0) {
				if (in_table > 0) {
					in_table--;
					if (in_tablerow > in_table) {
						in_tablerow--;
						if (in_tabledata + in_tablehead > in_tablerow) {
							if (in_tabledata > 0) {
								in_tabledata--;
								strcpy(&new_line[ new_len ], "</td>");
								new_len += 5;
							} else {
								in_tablehead--;
								strcpy(&new_line[ new_len ], "</th>");
								new_len += 5;
							}
						}
						strcpy(&new_line[ new_len ], "</tr>");
						new_len += 5;
					}
				} else {
					fprintf(stderr, "Warning: too many table ends\n");
				}
				i += 8;
				strcpy(&new_line[ new_len ], "</table>");
				new_len += 8;

			} else if (strncmp(&line[i], "[tr]", 4) == 0) {
				if (in_table == 0) {
					in_table++;
					strcpy(&new_line[ new_len ], "<table>");
					new_len += 7;		
				}
				if (in_table > 0 && in_tablerow >= in_table && in_tabledata + in_tablehead >= in_tablerow) {
					if (in_tabledata > 0) {
						in_tabledata--;
						strcpy(&new_line[ new_len ], "</td>");
						new_len += 5;
					} else {
						in_tablehead--;
						strcpy(&new_line[ new_len ], "</th>");
						new_len += 5;
					}
				}
				if (in_table > 0 && in_tablerow >= in_table) {
					in_tablerow--;
					strcpy(&new_line[ new_len ], "</tr>");
					new_len += 5;
				}
				in_tablerow++;
				i += 4;
				strcpy(&new_line[ new_len ], "<tr>");
				new_len += 4;
			} else if (strncmp(&line[i], "[/tr]", 5) == 0) {
				if (in_tablerow > 0) {
					in_tablerow--;
					if (in_tabledata > in_tablerow) {
						in_tabledata--;
						strcpy(&new_line[ new_len ], "</td>");
						new_len += 5;
					}
				} else {
					fprintf(stderr, "Warning: too many table row ends\n");
				}
				i += 5;
				strcpy(&new_line[ new_len ], "</tr>");
				new_len += 5;

			} else if (strncmp(&line[i], "[th]", 4) == 0) {
				if (in_table == 0) {
					in_table++;
					strcpy(&new_line[ new_len ], "<table>");
					new_len += 7;		
				}
				if (in_tablerow < in_table) {
					in_tablerow++;
					strcpy(&new_line[ new_len ], "<tr>");
					new_len += 4;
				}
				if (in_table > 0 && in_tablerow >= in_table && in_tabledata + in_tablehead >= in_tablerow) {
					if (in_tabledata > 0) {
						in_tabledata--;
						strcpy(&new_line[ new_len ], "</td>");
						new_len += 5;
					} else {
						in_tablehead--;
						strcpy(&new_line[ new_len ], "</th>");
						new_len += 5;
					}
				}
				in_tablehead++;
				i += 4;
				strcpy(&new_line[ new_len ], "<th>");
				new_len += 4;
			} else if (strncmp(&line[i], "[/th]", 5) == 0) {
				if (in_tablehead > 0) {
					in_tablehead--;
				} else {
					fprintf(stderr, "Warning: too many table head ends\n");
				}
				i += 5;
				strcpy(&new_line[ new_len ], "</th>");
				new_len += 5;

			} else if (strncmp(&line[i], "[td]", 4) == 0) {
				if (in_table == 0) {
					in_table++;
					strcpy(&new_line[ new_len ], "<table>");
					new_len += 7;		
				}
				if (in_tablerow < in_table) {
					in_tablerow++;
					strcpy(&new_line[ new_len ], "<tr>");
					new_len += 4;
				}
				if (in_table > 0 && in_tablerow >= in_table && in_tabledata + in_tablehead >= in_tablerow) {
					if (in_tabledata > 0) {
						in_tabledata--;
						strcpy(&new_line[ new_len ], "</td>");
						new_len += 5;
					} else {
						in_tablehead--;
						strcpy(&new_line[ new_len ], "</th>");
						new_len += 5;
					}
				}
				in_tabledata++;
				i += 4;
				strcpy(&new_line[ new_len ], "<td>");
				new_len += 4;
			} else if (strncmp(&line[i], "[/td]", 5) == 0) {
				if (in_tabledata > 0) {
					in_tabledata--;
				} else {
					fprintf(stderr, "Warning: too many table data ends\n");
				}
				i += 5;
				strcpy(&new_line[ new_len ], "</td>");
				new_len += 5;

			} else if (strncmp(&line[i], "[br]", 4) == 0) {
				i += 4;
				strcpy(&new_line[ new_len ], "<br>");
				new_len += 4;
			} else if (strncmp(&line[i], "[p]", 3) == 0) {
				i += 3;
				strcpy(&new_line[ new_len ], "<br clear=\"all\">&nbsp;</p><p>");
				new_len += 29;
			} else if (strncmp(&line[i], "[/copy]", 7) == 0) {
				i += 7;
				copyright = 0;
			} else if (strncmp(&line[i], "[/footer]", 9) == 0) {
				i += 9;
				do_footer = 0;
			} else if (strncmp(&line[i], "[/anyfooter]", 12) == 0) {
				i += 12;
				do_anyfooter = 0;
			} else if (strncmp(&line[i], "[/index]", 8) == 0) {
				i += 8;
				allow_index = 0;
			} else if (strncmp(&line[i], "[/unusedcount]", 14) == 0) {
				i += 14;
				do_unused_count = 0;
			} else if (strncmp(&line[i], "[isin]", 6) == 0) {
				i += 6;
				strcpy(&new_line[ new_len ], "&#8712;");
				new_len += 7;
			} else if (strncmp(&line[i], "[infin]", 7) == 0) {
				i += 7;
				strcpy(&new_line[ new_len ], "&#8734;");
				new_len += 7;
			} else if (strncmp(&line[i], "[phi]", 5) == 0) {
				i += 5;
				strcpy(&new_line[ new_len ], "&#966;");
				new_len += 6;
			} else if (strncmp(&line[i], "[sub]", 5) == 0) {
				i += 5;
				strcpy(&new_line[ new_len ], "<sub><font size=\"-1\">");
				new_len += 21;
				sub_count++;
			} else if (strncmp(&line[i], "[/sub]", 6) == 0) {
				i += 6;
				strcpy(&new_line[ new_len ], "</font></sub>");
				new_len += 13;
				sub_count--;
			} else if (strncmp(&line[i], "[sup]", 5) == 0) {
				i += 5;
				strcpy(&new_line[ new_len ], "<sup><font size=\"-1\">");
				new_len += 21;
				sup_count++;
			} else if (strncmp(&line[i], "[/sup]", 6) == 0) {
				i += 6;
				strcpy(&new_line[ new_len ], "</font></sup>");
				new_len += 13;
				sup_count--;
			} else if (strncmp(&line[i], "[big]", 5) == 0) {
				i += 5;
				strcpy(&new_line[ new_len ], "<font size=\"+1\">");
				new_len += 16;
				big_count++;
			} else if (strncmp(&line[i], "[/big]", 6) == 0) {
				i += 6;
				strcpy(&new_line[ new_len ], "</font></sub>");
				new_len += 13;
				big_count--;
			} else if (strncmp(&line[i], "[small]", 7) == 0) {
				i += 7;
				strcpy(&new_line[ new_len ], "<font size=\"-1\">");
				new_len += 16;
				small_count++;
			} else if (strncmp(&line[i], "[/small]", 8) == 0) {
				i += 8;
				strcpy(&new_line[ new_len ], "</font></sup>");
				new_len += 13;
				small_count--;
			} else if (strncmp(&line[i], "[asis]", 6) == 0) {
				i += 6;
				in_asis++;
			} else if (strncmp(&line[i], "[/asis]", 7) == 0) {
				i += 7;
				if (in_asis > 0) {
					in_asis--;
				} else {
					fprintf(stderr, "Warning: extra end asis\n");
				}
			} else if (strncmp(&line[i], "[polish]", 8) == 0) {
				i += 8;
				lang_type = LANG_POLISH;
			} else if (strncmp(&line[i], "[name,", 6) == 0) {
				int label_start, pos;
				label_start = pos = i + 6;
				while (pos < line_len &&
					((line[pos] >= 'a' && line[pos] <= 'z') ||
					 (line[pos] >= 'A' && line[pos] <= 'Z') ||
					 (line[pos] >= '0' && line[pos] <= '9'))) {
					pos++;
				}
				if (pos < line_len && pos > label_start && line[pos] == ']') {
					sprintf(&new_line[ new_len ],
						"<a name=\"%.*s\">",
						pos - label_start, &line[ label_start ]);
					new_len += 11 + pos - label_start;
					i = pos+1;
					in_name++;
				}
			} else if (strncmp(&line[i], "[/name]", 7) == 0) {
				i += 7;
				if (in_name > 0) {
					in_name--;
					strcpy(&new_line[ new_len ], "</a>");
					new_len += 4;
				} else {
					fprintf(stderr, "Warning: extra end name\n");
				}
			} else if (strncmp(&line[i], "[black]", 7) == 0) {
				i += 7;
				do_black = 1;
			} else if (i+1 < line_len && line[i+1] == '#') {
				int pos;
				pos = i+2;
				while (pos < line_len &&
				       ((line[pos] >= 'a' && line[pos] <= 'z') ||
				        (line[pos] >= 'A' && line[pos] <= 'Z') ||
				        (line[pos] >= '0' && line[pos] <= '9'))) {
					pos++;
				}
				if (pos < line_len && line[pos] == ']') {
					int url_len;
					url_len = pos - i - 1;
					sprintf(&new_line[ new_len ],
						"<a href=\"%.*s\">%.*s</a>",
						url_len, &line[i+1],
						url_len, &line[i+1]);
					new_len += 15 + 2 * url_len;
					i = pos+1;
				} else {
					new_line[ new_len++ ] = line[ i++ ];
				}
			} else {
				new_line[ new_len++ ] = line[ i++ ];
			}
		} else if (line[i] == '.' && new_len < MAXLINE - 100 &&
				(strncmp(&line[i], ".jpg]", 5) == 0 ||
				 strncmp(&line[i], ".jpeg]", 6) == 0 ||
				 strncmp(&line[i], ".gif]", 5) == 0 ||
				 strncmp(&line[i], ".tiff]", 6) == 0 ||
				 strncmp(&line[i], ".png]", 5) == 0 ||
				 strncmp(&line[i], ".html]", 6) == 0 ||
				 (strncmp(&line[i], ".html#", 6) == 0 && is_only_text(&line[i+6])) ||
				 strncmp(&line[i], ".doc]", 5) == 0 ||
				 strncmp(&line[i], ".pdf]", 5) == 0 ||
				 strncmp(&line[i], ".eps]", 5) == 0 ||
				 strncmp(&line[i], ".ps]", 4) == 0 ||
				 strncmp(&line[i], ".c]", 3) == 0 ||
				 strncmp(&line[i], ".sh]", 4) == 0 ||
				 strncmp(&line[i], ".1]", 3) == 0 ||
				 strncmp(&line[i], ".mpg]", 5) == 0 ||
				 strncmp(&line[i], ".thm]", 5) == 0 ||
				 strncmp(&line[i], ".txt]", 5) == 0 ||
				 strncmp(&line[i], ".scm]", 5) == 0 ||
				 (line[i+1] != ' ' && is_text_and_option(&line[i])))) {

			/* gather the file name backwards */

			int pos, name_len, ext_len, i_save;
			int comma_pos, link_pos, link_len, image_pos;
			int is_background;
			int is_dot_image;
			int is_special_file;
			int semi_ok;
			int comma_ok;
			char sep_char;
			char name[ MAXLINE ], ext[ MAXLINE ], link[ MAXLINE ];

			/* read the extension */

			is_special_file = 0;

			ext_len = 0;
			i_save = i;
			ext[ ext_len++ ] = line[ i++ ];
			while ((line[i] >= 'a' && line[i] <= 'z') ||
				(line[i] >= 'A' && line[i] <= 'Z') ||
				(line[i] >= '0' && line[i] <= '9') ||
				line[i] == '#' ||
				line[i] == '-' ||
				line[i] == '_' ||
				line[i] == '.') {
				ext[ ext_len++ ] = line[ i++ ];
			}
			ext[ ext_len ] = '\0';
			if (line[i] == ':') {
				i++;
				is_special_file = line[i];
				if (line[i] >= 'a' && line[i] <= 'z') i++;
			}
			if (line[i] == ']') i++;

			/* locate the file name */

			pos = new_len;
			comma_pos = 0;
			semi_ok = 0;
			comma_ok = 0;
			sep_char = ' ';
			while (pos > 0 &&
				new_line[ pos-1 ] > ' ' &&
				new_line[ pos-1 ] != '>' &&
				new_line[ pos-1 ] != '"' &&
				new_line[ pos-1 ] != '[') {
				if (new_line[ pos-1 ] == '=' ||
				    new_line[ pos-1 ] == '&') {
					semi_ok = 1;
				}
				if (new_line[ pos-1 ] == '.' &&
				    comma_pos > 0 &&
				    !comma_ok &&
				    pos + 2 < new_len &&
				    new_line[ pos ] == 'h' &&
				    new_line[ pos+1 ] == 't' &&
				    new_line[ pos+2 ] == 'm') {
					/* allow commas in url path names */
					comma_ok = 1;
				}
				if ((new_line[ pos-1 ] == ',' && !comma_ok && !semi_ok &&
				     (pos >= new_len || new_line[ pos ] != '_')) ||
				    (new_line[ pos-1 ] == ';' && !semi_ok)) {
					if (comma_pos != 0 ||
					    pos <= 2 ||
					    new_line[ pos-2 ] == '[') {
						break;
					}
					comma_pos = pos-1;
					sep_char = new_line[ comma_pos ];
				}
				pos--;
			}

			/* check if we found something */

			if (pos <= 0 || new_line[pos-1] != '[') {
				i = i_save;
				new_line[ new_len++ ] = line[ i++ ];
				continue;
			}

			/* extract the link */

			link_pos = link_len = 0;
			image_pos = pos;
			is_background = 0;
			is_dot_image = 0;

			if (comma_pos > 0) {
				image_pos = comma_pos + 1;
				link_pos = pos;
				link_len = comma_pos - pos;
				memcpy(link, &new_line[ link_pos ], link_len);
				link[ link_len ] = '\0';
				check_dir(link, &link_len, MAXLINE);
				if (strcmp(link, "background") == 0) {
					is_background = 1;
				} else if (strcmp(link, "dotimage") == 0) {
					is_dot_image = 1;
				} else if (strncmp(link, "http:", 5) != 0 &&
					    strncmp(link, "https:", 6) != 0 &&
					    strncmp(link, "ftp:", 4) != 0 &&
					    access(link, R_OK) != 0) {
					int trim_pos;
					int access_result;
					access_result = 1;
					for (trim_pos = 0; link[ trim_pos ] != '\0'; trim_pos++) {
						if (link[ trim_pos ] == '#') {
							if (trim_pos == 0) {
								access_result = 0;
							} else {
								char newlink[ MAXLINE ];
								int newlen;
								link[ trim_pos ] = '\0';
								newlen = trim_pos;
								strcpy(newlink, link);
								check_dir(newlink, &newlen, MAXLINE);
								access_result = access(newlink, R_OK);
								link[ trim_pos ] = '#';
								if (access_result == 0 &&
								    newlen + link_len - trim_pos < MAXLINE) {
									memcpy(&newlink[newlen], &link[trim_pos],
										link_len - trim_pos);
									link_len += (newlen - trim_pos);
									memcpy(link, newlink, link_len);
									link[ link_len ] = '\0';
									break;
								}
							}
							if (access_result == 0) break;
						}
					}
					if (access_result != 0) {
						fprintf(stderr,
							"Warning: can not access link %s.\n",
							link);
					}
				}
			}

			/* build the name in a string */

			name_len = new_len - image_pos;
			sprintf(name, "%.*s%s",
				name_len, &new_line[ image_pos ], ext);
			name_len += ext_len;
			check_dir(name, &name_len, MAXLINE);
			if (strncmp(name, "http:", 5) != 0 &&
			    strncmp(name, "https:", 6) != 0 &&
			    strncmp(name, "ftp:", 4) != 0 &&
			    access(name, R_OK) != 0) {
				int trim_pos;
				int access_result;
				access_result = 1;
				for (trim_pos = 0; name[ trim_pos ] != '\0'; trim_pos++) {
					if (name[ trim_pos ] == '#') {
						char newname[ MAXLINE ];
						int newlen;
						name[ trim_pos ] = '\0';
						newlen = trim_pos;
						strcpy(newname, name);
						check_dir(newname, &newlen, MAXLINE);
						access_result = access(newname, R_OK);
						name[ trim_pos ] = '#';
						if (access_result == 0) {
							if (newlen + name_len - trim_pos < MAXLINE) {
								memcpy(&newname[newlen], &name[trim_pos], name_len - trim_pos);
								name_len += (newlen - trim_pos);
								memcpy(name, newname, name_len);
								name[ name_len ] = '\0';
							}
							break;
						}
					}
				}
				if (access_result != 0) {
					fprintf(stderr,
						"Warning: can not access image %s.\n", name);
				}
			}

			/* Generate <img src=" + name + "> */

			new_len = pos - 1;

			if (is_special_file != '\0' && is_special_file != 't') {
				fprintf(stderr, "File '%s' has unknown type '%c'\n", name, is_special_file);
			}

			if (is_background) {
				sprintf(&new_line[ new_len ],
					"<body background=\"%s\">", name);
				new_len += name_len + 20;
			} else if (is_dot_image) {
				sprintf(&new_line[ new_len ],
					"<img align=\"left\" src=\"%s\" width=\"1\" height=\"1\">",
					name);
				new_len += name_len + 46;
			} else if (ext[1] == 'h' || ext[1] == 'd' ||
				   ext[1] == 'c' || ext[1] == 's' ||
				   ext[1] == '1' ||
				   ext[1] == 'm' ||
				   (ext[1] == 't' && ext[2] == 'x' && ext[3] == 't') ||
				   (ext[1] == 's' && ext[2] == 'c' && ext[3] == 'm') ||
				   sep_char == ';' ||
				   is_special_file == 't') {
				int desc_pos;
				desc_pos = 0;
				if (name_len > 5 && memcmp(name, "high/", 5) == 0) {
					desc_pos += 5;
				}
				sprintf(&new_line[ new_len ],
					"<a href=\"%s\">%s</a>", name, &name[ desc_pos ]);
				new_len += name_len + 9 + 2 +
						name_len - desc_pos + 4;
			} else {
				int image_width, image_height;
				char image_size_buf[ 80 ];
				if (in_align) {
					sprintf(&new_line[ new_len ],
						"<br clear=\"all\">&nbsp;</p><p>");
					new_len += 29;
					in_align = 0;
				}
				if (link_len > 0) {
					sprintf(&new_line[ new_len ],
						"<a href=\"%.*s\">",
						link_len, link);
					mark_image_used(link);
					new_len += link_len + 9 + 2;
				}
				image_width = image_height = 0;
				get_image_size(name, &image_width, &image_height);
				image_size_buf[0] = '\0';
				if (image_height > 0 && image_width > 0) {
					sprintf(image_size_buf,
						" width=\"%d\" height=\"%d\"",
						image_width, image_height);
				}
				sprintf(&new_line[ new_len ],
					"<img src=\"%s\"%s%s>",
					name,
					image_size_buf,
					(do_align? " align=\"left\"": ""));
				if (do_align) { in_align = 2; new_len += 13; }
				new_len += name_len + 10 + 2 + strlen(image_size_buf);
				if (link_len > 0) {
					sprintf(&new_line[ new_len ], "</a>");
					new_len += 4;
				}
			}
		} else {
			if (new_len >= MAXLINE-100) break;
			new_line[ new_len++ ] = line[ i++ ];
		}
	}

	new_line[ new_len ] = '\0';

	if (force_copy || new_len > line_len) {
		strcpy(line, new_line);
		line_len = new_len;
	}

	return line_len;
}

/* test if a line is all dashes */

static int all_dashes(void)
{
	int i;
	char ch;

	for (i = 0; i < line_len; i++) {
		ch = line[ i ];
		if (ch != ' ' && ch != '-' && ch != '=' && ch != '+') return 0;
	}

	return 1;
}

/* HTML heading */

const struct {
	enum param_type_enum param;
	const char *text;
} header[] = {
	{ NL_STRING, "<html>" },
	{ NL_STRING, "<head>" },
	{ LANG_STRING, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-" },
	{ NL_STRING, "\">" },
	{ TITLE_STRING, "<title>" },
	{ NL_STRING, "</title>" },
	{ NL_STRING, "</head>" },
	{ BGCOLOR_STRING, "<body bgcolor=\"#" },
	{ NL_STRING, "\" text=\"#FFFFFF\" link=\"#9090FF\" vlink=\"#FF80FF\">" },
	{ NL_STRING, "<font face=\"Helvetica,Arial,sans-serif\">" },
	{ NL_STRING, "<h2><font color=\"#FFFFFF\">" },
	{ NL_STRING, NULL }
};

const char *header2[] =
{
"</font></h2>",
NULL
};

/* HTML footing line data */

const struct {
	enum param_type_enum param;
	int is_footer;
	int is_copyright;
	const char *text;
} footer[] = {
{ NL_STRING,	0, 0, "<br clear=\"all\">" },
{ NL_STRING,	0, 0, "<hr>" },
{ NL_STRING,	0, 0, "<center>" },
{ NL_STRING,	0, 0, "  <p><font size=\"-1\">" },
{ URL_STRING,	1, 0, "  " },
{ NL_STRING,	1, 0, " - " },
{ DATE_STRING,	0, 0, "Revised " },
{ NL_STRING,	0, 0, ".<br>" },
{ YEAR_STRING,	0, 1, "    Copyright &copy; " },
{ NAME_STRING,	0, 1, " " },
{ NL_STRING,	0, 1, "." },
{ NL_STRING,	0, 0, "  </font></p>" },
{ NL_STRING,	0, 0, "</center>" },
{ NL_STRING,	0, 0, "</font>" },	/* close font opened at top of body */
{ NL_STRING,	0, 0, "</body>" },
{ NL_STRING,	0, 0, "</html>" },
{ NL_STRING,	0, 0, NULL }
};

/* Clear the left alignment */

static void clear_align(void)
{
	if (in_align) {
		printf("<br clear=\"all\">");
		in_align = 0;
	}
}


/* Process one file */

static void process_file(FILE *f)
{
	int i;
	int need_p;
	int in_p;
	int last_line_len;
	char last_ch;
	char first_ch;

	/* initialize counts */

	sub_count = sup_count = 0;
	big_count = small_count = 0;

	/* skip lines to the heading */

	get_line(f);
	while (line_len == 0) get_line(f);

	/* set the heading parameters */

	param[ TITLE_STRING ] = line;
	param[ BGCOLOR_STRING ] = (do_black? "000000": "000066");

	switch (lang_type) {
	case LANG_POLISH: param[ LANG_STRING ] = "2"; break;
	default: param[ LANG_STRING ] = "1"; break;
	}

	/* write the heading */

	for (i = 0; header[i].text != NULL; i++) {
		printf("%s%s", header[i].text, param[ header[i].param ]);
		if (i == 1 && !allow_index) {
			printf("<meta name=\"robots\" content=\"noindex,nofollow\">\n");
		}
		if (i == 5) {
			const char *favicon_dir;
#if 0
			/* commented out because the majority is in English */
			if (lang_type == LANG_POLISH) {
				printf("<meta http-equiv=\"Content-Language\" content=\"pl\">\n");
			}
#endif
			favicon_dir = NULL;
			if (access("favicon.ico", R_OK) == 0) {
				favicon_dir = "";
			} else if (access("../favicon.ico", R_OK) == 0) {
				favicon_dir = "../";
			} else if (access("../../favicon.ico", R_OK) == 0) {
				favicon_dir = "../../";
			} else if (access("../../../favicon.ico", R_OK) == 0) {
				favicon_dir = "../../../";
			}
			if (favicon_dir != NULL) {
				printf("<link rel=\"icon\" href=\"%sfavicon.ico\">\n",
					favicon_dir);
			}
		}
	}

	while (line_len > 0) {
		if (!all_dashes()) {
			printf("%s<br>\n", line);
		}
		get_line(f);
	}

	for (i = 0; header2[i] != NULL; i++) {
		printf("%s\n", header2[i]);
	}

	/* process the body */

	need_p = 0;
	in_p = 0;
	last_line_len = -1;
	last_ch = '\0';

	while (get_line(f) >= 0) {

		/* end the previous line */

		if (last_line_len > 0) {
			if (preformatted) {
				/* nothing to do */
			} else if (line_len <= 0) {
				if (in_align > 1) {
					in_align--;
				} else {
					clear_align();
				}
				if (in_p) {
					printf("</p>");
					in_p = 0;
				}
			} else {
				first_ch = line[ 0 ];
				i = 0;
				while (i < line_len && (i < 10 || line[i] != ' ')) {
					if (line[i] == ' ' && i > 5) {
						break;
					}
					i++;
					if (line[i] == ':') {
						if (i+1 < line_len &&
						    (line[i+1] == ' ' ||
						     line[i+1] == '\t')) {
							last_ch = ':';
						}
						break;
					}
				}
				if (line_len > 2 &&
				    line[0] == '*' &&
				    (line[1] == ' ' || line[1] == '\t')) {
					last_ch = ':';
				}
				i = 0;
				while (i < line_len && i < 5 &&
						(line[i] == ' ' || line[i] == '\t')) {
					i++;
				}
				if (i < line_len && line[i] >= '0' && line[i] <= '9') {
					while (i < line_len && i < 5 &&
							line[i] >= '0' && line[i] <= '9') {
						i++;
					}
					if (i+1 < line_len &&
						(line[i] == '.' || line[i] == ':' ||
						 line[i] == ')') &&
						(line[i+1] == ' ' ||
						 (line[i+1] >= 'A' && line[i+1] <= 'Z'))) {
						last_ch = ':';
					}
				}
				if (in_table == 0 &&
				    (fixed_format != 0 ||
				     last_ch == ':' ||
				     /* last_ch == '.' || */
				     first_ch == ' ' ||
				     first_ch == '\t' ||
				     first_ch == '+' ||
				     first_ch == '-' ||
				     first_ch == '_' ||
				     first_ch == '.' ||
				     first_ch == '!' ||
				     first_ch == '<' ||
				     first_ch == '>' ||
				     first_ch == ',' ||
				     first_ch == '=' ||
				     first_ch == '/')) {
					printf("<br>");
					clear_align();
				}
			}

			printf("\n");
		}

		/* start a new line */

		if (line_len == 0) {
			need_p = !preformatted;
		} else {
			if (all_dashes() &&
			    (!preformatted ||
			     (line_len > 40 &&
			      (line[0] == '-' || line[0] == '=')))) {
				clear_align();
				printf("<hr>");
			} else {
				if (need_p) {
					if (in_p) printf("</p>");
					printf("<p>");
					in_p = 1;
				}
				printf("%s", line);
			}
			need_p = 0;
		}

		/* save information about this line */

		last_line_len = line_len;

		last_ch = '\0';

		for (i = line_len - 1; i >= 0; i++) {
			if (line[i] != ' ') {
				last_ch = line[i];
				break;
			}
		}
	}

	clear_align();

	/* write the footer */

	if (preformatted) {
		preformatted = 0;
		printf("</pre>");
	}

	if (in_p) {
		in_p = 0;
		printf("</p>");
	}


	if (do_anyfooter) {
		for (i = 0; footer[i].text != NULL; i++) {
			if ((do_footer || !footer[i].is_footer) &&
			    (copyright || !footer[i].is_copyright)) {
				printf("%s%s",
					footer[i].text, param[ footer[i].param ]);
			}
		}
	}

	/* check counts */

	if (sub_count != 0) {
		fprintf(stderr, "Warning: subscript count is %d\n", sub_count);
	}

	if (sup_count != 0) {
		fprintf(stderr, "Warning: superscript count is %d\n", sup_count);
	}

	if (big_count != 0) {
		fprintf(stderr, "Warning: big count is %d\n", big_count);
	}

	if (small_count != 0) {
		fprintf(stderr, "Warning: small count is %d\n", small_count);
	}

	if (in_asis != 0) {
		fprintf(stderr, "Warning: asis count is %d\n", in_asis);
	}

	if (in_name != 0) {
		fprintf(stderr, "Warning: name count is %d\n", in_name);
	}

	if (in_table != 0) {
		fprintf(stderr, "Warning: table count is %d\n", in_table);
	}
	if (in_tablerow != 0) {
		fprintf(stderr, "Warning: table row count is %d\n", in_tablerow);
	}
	if (in_tablehead != 0) {
		fprintf(stderr, "Warning: table head count is %d\n", in_tablehead);
	}
	if (in_tabledata != 0) {
		fprintf(stderr, "Warning: table data count is %d\n", in_tabledata);
	}
}

/* set the year and date */

static void set_date(void)
{
	enum param_len_enum {
		YEAR_LEN = 10,
		DATE_LEN = 250
	} ;

	time_t t;
	struct tm *time_ptr;
	int year;
	char *s;

	time(&t);

	time_ptr = localtime(&t);

	if (!time_ptr) {
		fprintf(stderr, "error getting the time\n");
		return;
	}

	year = time_ptr->tm_year;
	if (year < 1900) year += 1900;

	s = malloc(YEAR_LEN);

	if (!s) {
		fprintf(stderr, "malloc for year failed\n");
		return;
	}

	sprintf(s, "%d", year);

	param[ YEAR_STRING ] = s;

	s = malloc(DATE_LEN);

	if (!s) {
		fprintf(stderr, "malloc for date failed\n");
		return;
	}

	strftime(s, DATE_LEN, "%B %d, %Y %r", time_ptr);

	param[ DATE_STRING ] = s;
}

/* set the user information */

static void set_user(void);

static void set_user(void)
{
#if SYSTEM_UNIX
	struct passwd *pw;
	char *u, *n, *g, *url;
	int len;

	pw = getpwuid( getuid() );

	if (!pw) return;

	g = pw->pw_gecos;

	if (!g) return;

	len = strlen(g);

	if (len <= 0) return;

	param[ NAME_STRING ] = n = malloc(len + 1);
	strcpy(n, g);

	url = u = malloc(len + 20);
	strcpy(u, "http://");
	u += strlen(u);
	while (*n != '\0') {
		if ((*n >= 'a' && *n <= 'z') || (*n >= '0' && *n <= '9')) {
			*u++ = *n;
		} else if (*n >= 'A' && *n <= 'Z') {
			*u++ = (char) ((*n - 'A') + 'a');
		}
		n++;
	}
	strcpy(u, ".com");

	param[ URL_STRING ] = u = malloc(strlen(url) * 2 + 30);
	sprintf(u, "<a href=\"%s\">%s</a>", url, url);

	free(url);
#endif
}

/* check the usage counts of images */

static void check_image_counts(void);

static void check_image_counts()
{
	static struct image_size_tag *image_ptr;

	image_ptr = image_cache_ptr;

	while (image_ptr != NULL) {
		if (image_ptr->image_use_count == 0) {
			if (do_unused_count) {
				fprintf(stderr, "Warning: image %s not used.\n",
					image_ptr->image_name);
			}
		} else if (image_ptr->image_use_count > 1) {
			fprintf(stderr, "Warning: image %s used %d times.\n",
				image_ptr->image_name, image_ptr->image_use_count);
		}
		image_ptr = image_ptr->next_image;
	}
}

/* main program */

int main(argc, argv)
int argc;
char *argv[];
{
	int i;
	int file_count;
	int did_help;
	char *s;
	FILE *f;

	for (i = 0; i < NUM_LINE_TYPES; i++) {
		param[ i ] = "";
	}

	param[ NL_STRING ] = "\n";

	set_date();

	set_user();

	file_count = 0;
	did_help = 0;

	for (i = 1; i < argc; i++) {
		s = argv[ i ];
		if (*s == '-') {
			s++;
			if (*s == '\0') {
				process_file(stdin);
				file_count++;
			} else if (*s == 'n') {
				param[ NAME_STRING ] = s+1;
			} else if (*s == 'u') {
				param[ URL_STRING ] = s+1;
			} else if (*s == 'f') {
				fixed_format = 1;
			} else if (*s == 'i') {
				allow_index = 0;
			} else if (*s == 'a') {
				allow_index = 1;
			} else if (*s == 'c') {
				comment_plus_lines = 1;
			} else {
				if (*s != 'h' && *s != '?') {
					fprintf(stderr,
						"Unknown option '%c'\n", *s);
				}
				if (!did_help) {
					did_help = 1;
					fprintf(stderr,
						"text2html: convert text to html\n");
					fprintf(stderr,
						"	-f      = fixed format\n");
					fprintf(stderr,
						"	-i      = set noindex\n");
					fprintf(stderr,
						"	-c      = comment \\+ lines, implied by -i\n");
					fprintf(stderr,
						"	-a      = allow index\n");
					fprintf(stderr,
						"	-n text = set name\n");
					fprintf(stderr,
						"	-u text = set url\n");
				}
			}
		} else {
			f = fopen(s, "r");
			if (f == NULL) {
				perror(s);
			} else {
				process_file(f);
				file_count++;
				fclose(f);
			}
		}
	}

	if (file_count == 0 && !did_help) {
		process_file(stdin);
		file_count++;
	}

	check_image_counts();

	return EXIT_SUCCESS;
}
