#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>


int iLineNum = 0;

typedef wchar_t UnicodeChar; // Unicode type

class CCharCodePoint // a code point. Translates into unicode and conatins a list of possible char conmbinations
{
	CCharCodePoint * FollowingChar[256]; // list of all chars which might follow. NULL if one char might not follow.
	char cCodePoint; // code point value
	UnicodeChar uUnicodeChar; // translated unicode value
	char sComment[256];
public:
	CCharCodePoint(char c);
	~CCharCodePoint();
	char GetCodePoint() const {return cCodePoint;}
	CCharCodePoint & SetUnicodeChar(UnicodeChar u) {uUnicodeChar = u; return *this;}
	UnicodeChar GetUnicodeChar() const {return uUnicodeChar;}
	const char * GetComment() const {return sComment;}
	CCharCodePoint & SetComment(const char * s) {strncpy(sComment, s, sizeof(sComment)); sComment[sizeof(sComment)-1] = 0; return *this;}
	CCharCodePoint * SeekFollowingChar(char c) const;
	CCharCodePoint * CreateFollowingChar(char c);
	CCharCodePoint * SeekFollowingUnicode(UnicodeChar u) const;
	void WriteToFile(FILE * fout, const char * sPrefix = NULL) const;
};


CCharCodePoint::CCharCodePoint(char c)
{
	for (int i = 0; i < sizeof(FollowingChar)/sizeof(FollowingChar[0]); i++) 
		FollowingChar[i] = NULL;
	cCodePoint = c;
	uUnicodeChar = 0;
	*sComment = 0;
}

CCharCodePoint::~CCharCodePoint()
{
	for (int i = 0; i < sizeof(FollowingChar)/sizeof(FollowingChar[0]); i++) 
		if (FollowingChar[i])
			delete FollowingChar[i];
}

CCharCodePoint * CCharCodePoint::SeekFollowingChar(char c) const
{
	if (!FollowingChar[(unsigned char)c])
		return NULL;
	assert (FollowingChar[(unsigned char)c]->GetCodePoint() == c);
	return FollowingChar[(unsigned char)c];
}

CCharCodePoint * CCharCodePoint::CreateFollowingChar(char c)
{
	CCharCodePoint * CodePoint = SeekFollowingChar(c);
	if (CodePoint)
		return CodePoint;
	CodePoint = FollowingChar[(unsigned char)c] = new CCharCodePoint(c);
	return CodePoint;
}

CCharCodePoint * CCharCodePoint::SeekFollowingUnicode(UnicodeChar u) const
{
	for (int i = 0; i < sizeof(FollowingChar)/sizeof(FollowingChar[0]); i++) 
		if (FollowingChar[i] && FollowingChar[i]->GetUnicodeChar() == u)
			return FollowingChar[i];
	return NULL;
}

void CCharCodePoint::WriteToFile(FILE * fout, const char * sPrefix) const
{
	char prefix[128];
	*prefix = 0;
	if (sPrefix)
	{
		strcpy(prefix, sPrefix);
		strcat(prefix, "+");
	}
	char * c = prefix+strlen(prefix);
		sprintf(c, "%02X", (unsigned char)cCodePoint);
	for (int i = 0; i < sizeof(FollowingChar)/sizeof(FollowingChar[0]); i++) 
		if (FollowingChar[i])
			FollowingChar[i]->WriteToFile(fout, prefix);
	if (uUnicodeChar)
		fprintf(fout, "%s=%04X", prefix, uUnicodeChar);
	if (*sComment)
		fprintf(fout, "#%s", sComment);
	if (*sComment || uUnicodeChar)
	fprintf(fout, "\n");
}




void readln(FILE * f, char * s)
{
	if (!fscanf(f, "%[^\n]", s))
		*s = 0;
	fgetc(f);
	iLineNum++;
}

void readlnIgnoringComments(FILE * f, char * s, char ** Comment = NULL)
{
	do
	{
		readln(f, s);
	}
	while (!feof(f) && strlen(s) && (*s == '#'));
	if (feof(f))
	{
		*s = 0;
		return;
	}
	char * c = strchr(s, '#');
	if (c)
	{
		if (Comment)
			*Comment = c+1;
		*c = 0;
	}
}

long splitstr(char *& sBuffer, char & cTerminator)
{
	cTerminator = 0;
	char * endptr = NULL;
	long lRet = strtol(sBuffer, & endptr, 16);
	if (endptr == sBuffer)
		return 0;
	cTerminator = *endptr;
	sBuffer = cTerminator?endptr+1:NULL;
	return lRet;
}


void AddCombinedCharacter(CCharCodePoint & CodePointRoot, UnicodeChar uni1, UnicodeChar uni2, UnicodeChar uni3, UnicodeChar lCombined, const char * Comment = NULL)
{
	CCharCodePoint * Ansel1 = CodePointRoot.SeekFollowingUnicode(uni1);
	CCharCodePoint * Ansel2 = CodePointRoot.SeekFollowingUnicode(uni2);
	CCharCodePoint * Ansel3 = CodePointRoot.SeekFollowingUnicode(uni3);
	if (!Ansel3 && uni3)
		return;
	if (Ansel1 && Ansel2)
	{
		CCharCodePoint * cp = CodePointRoot.CreateFollowingChar(Ansel1->GetCodePoint());
		cp = cp->CreateFollowingChar(Ansel2->GetCodePoint());
		if (Ansel3)
			cp = cp->CreateFollowingChar(Ansel3->GetCodePoint());
		cp->SetUnicodeChar(lCombined);
		if (Comment && !*cp->GetComment())
			cp->SetComment(Comment);
	}
}


int main (int argc, char *argv[ ])
{
	FILE * fans2uni = fopen("ans2uni.out", "rt");
	if (!fans2uni) return -1;
	FILE * fdia2spac = fopen("dia2spac.out", "rt");
	if (!fdia2spac) return -2;
	FILE * funicomb = fopen("unicomb.out", "rt");
	if (!funicomb) return -3;

	CCharCodePoint CodePointRoot(0);
	char sBuffer[2048], * Comment;

	// read the ansel to unicode table
	while (!feof(fans2uni))
	{
		readlnIgnoringComments(fans2uni, sBuffer, & Comment);
		if (*sBuffer)
		{
			char * str = sBuffer, cTerminator1 = 0, cTerminator2 = 0;
			long Ansel = splitstr(str, cTerminator1);
			long Unicode = splitstr(str, cTerminator2);
			if (!Ansel || cTerminator1 != '=' || !Unicode || str)
			{
				printf ("invalid line #%d in ansi2uni.out\n", iLineNum);
				return -4;
			}
			CCharCodePoint * newCodePoint = CodePointRoot.CreateFollowingChar((char)Ansel);
			assert(newCodePoint);
			newCodePoint->SetUnicodeChar((UnicodeChar)Unicode);
			if (Comment && !*newCodePoint->GetComment())
				newCodePoint->SetComment(Comment);
		}
	}
	// add normal ASCII chars
	for (int i = 0x20; i <= 0x7f; i++)
	{
		CCharCodePoint * cp = CodePointRoot.SeekFollowingChar((char)i);
		if (!cp)
		{
			cp = CodePointRoot.CreateFollowingChar((char)i);
			assert(cp);
			cp->SetUnicodeChar(i);
			sprintf(sBuffer, "ASCII character %c", (char)i);
			cp->SetComment(sBuffer);
		}
	}

	// read the unicode combined letter table
	iLineNum = 0;
	while (!feof(funicomb))
	{
		readlnIgnoringComments(funicomb, sBuffer, & Comment);
		if (*sBuffer)
		{
			char * str = sBuffer, cTerminator1 = 0, cTerminator2 = 0;
			UnicodeChar uni1 = (UnicodeChar) splitstr(str, cTerminator1), uni2 = 0, uni3 = 0;
			if (cTerminator1 == '+')
				uni2 = (UnicodeChar) splitstr(str, cTerminator1);
			if (cTerminator1 == '+')
				uni3 = (UnicodeChar) splitstr(str, cTerminator1);
			UnicodeChar lCombined = (UnicodeChar) splitstr(str, cTerminator2);
			if (!uni1 || !uni2 || cTerminator1 != '=' || str)
			{
				printf ("invalid line #%d in unicomb.out\n", iLineNum);
				return -5;
			}
			if (uni3)
			{
				AddCombinedCharacter(CodePointRoot, uni3, uni2, uni1, lCombined, Comment);
				AddCombinedCharacter(CodePointRoot, uni2, uni3, uni1, lCombined, Comment);
			}
			else
			{
				AddCombinedCharacter(CodePointRoot, uni2, uni1, 0, lCombined, Comment);
			}
		}
	}


	// write result
	FILE * fout = fopen("ans2uni.con", "wt");
	if (!fout)
		return -7;
	for (i = 1; i < 255; i++) 
	{
		char c = (char)i;
		CCharCodePoint * cp = CodePointRoot.SeekFollowingChar(c);
		if (cp)
			cp->WriteToFile(fout);
	}

	fclose (fout);
	fclose (fans2uni);
	fclose (fdia2spac);
	fclose (funicomb);
	return 0;
}