view dep/anitomy/dep/srell/unicode/updataout3.cpp @ 347:a0aa8c8c4307

dep/anitomy: port to use UCS-4 rather than wide strings rationale: wide strings are not the same on every platform, and might not even be Unicode. (while they usually are, its possible that they are not) I was *going* to change StringToInt to use a string stream, but outputting to an integer doesn't seem to work at all with UCS-4, even though it ought to, so I just rolled my own that uses the arabic digits only.
author Paper <paper@paper.us.eu.org>
date Sun, 23 Jun 2024 10:32:09 -0400
parents
children
line wrap: on
line source

//
//  updataout.cpp: version 3.002 (2023/12/29).
//
//  This is a program that generates srell_updata3.h from:
//    DerivedCoreProperties.txt
//    DerivedNormalizationProps.txt
//    PropList.txt
//    PropertyValueAliases.txt
//    ScriptExtensions.txt
//    Scripts.txt
//    UnicodeData.txt
//    emoji-data.txt
//    emoji-sequences.txt
//    emoji-zwj-sequences.txt
//  provided by the Unicode Consortium. The latese versions of them are
//  available at:
//    emoji-data.txt: http://www.unicode.org/Public/UNIDATA/emoji/
//    emoji-sequences.txt and emoji-zwj-sequences.txt:
//      http://www.unicode.org/Public/emoji/
//    others: http://www.unicode.org/Public/UNIDATA/
//

#include <cstdio>
#include <cstdlib>
#include <cstdarg>
#include <string>
#include <vector>
#include <map>
#include <stdexcept>
#include <algorithm>	//  For std::swap in C++98/03
#include <utility>	//  For std::swap in C++11-
#define SRELL_NO_UNICODE_DATA
#include "../srell.hpp"

#if defined(_MSC_VER) && _MSC_VER >= 1400
#pragma warning(disable:4996)
#endif

namespace updata
{
static const char *const property_names[] = {	//  3
	"General_Category:gc", "Script:sc", "Script_Extensions:scx", ""
};
static const char *const binary_property_names[] = {	//  53 (52+1)
	//  *1: http://unicode.org/reports/tr18/#General_Category_Property
	//  *2: 9th field in UnicodeData.txt
	"ASCII",								//  *1
	"ASCII_Hex_Digit:AHex",					//  PropList.txt
	"Alphabetic:Alpha",						//  DerivedCoreProperties.txt
	"Any",									//  *1
	"Assigned",								//  *1
	"Bidi_Control:Bidi_C",					//  PropList.txt
	"Bidi_Mirrored:Bidi_M",					//  *2
	"Case_Ignorable:CI",					//  DerivedCoreProperties.txt
	"Cased",								//  DerivedCoreProperties.txt
	"Changes_When_Casefolded:CWCF",			//  DerivedCoreProperties.txt
	"Changes_When_Casemapped:CWCM",			//  DerivedCoreProperties.txt
	"Changes_When_Lowercased:CWL",			//  DerivedCoreProperties.txt
	"Changes_When_NFKC_Casefolded:CWKCF",	//  DerivedNormalizationProps.txt
	"Changes_When_Titlecased:CWT",			//  DerivedCoreProperties.txt
	"Changes_When_Uppercased:CWU",			//  DerivedCoreProperties.txt
	"Dash",									//  PropList.txt
	"Default_Ignorable_Code_Point:DI",		//  DerivedCoreProperties.txt
	"Deprecated:Dep",						//  PropList.txt
	"Diacritic:Dia",						//  PropList.txt
	"Emoji",								//  emoji-data.txt
	"Emoji_Component:EComp",				//  emoji-data.txt
	"Emoji_Modifier:EMod",					//  emoji-data.txt
	"Emoji_Modifier_Base:EBase",			//  emoji-data.txt
	"Emoji_Presentation:EPres",				//  emoji-data.txt
	"Extended_Pictographic:ExtPict",		//  emoji-data.txt
	"Extender:Ext",							//  PropList.txt
	"Grapheme_Base:Gr_Base",				//  DerivedCoreProperties.txt
	"Grapheme_Extend:Gr_Ext",				//  DerivedCoreProperties.txt
	"Hex_Digit:Hex",						//  PropList.txt
	"IDS_Binary_Operator:IDSB",				//  PropList.txt
	"IDS_Trinary_Operator:IDST",			//  PropList.txt
	"ID_Continue:IDC",						//  DerivedCoreProperties.txt
	"ID_Start:IDS",							//  DerivedCoreProperties.txt
	"Ideographic:Ideo",						//  PropList.txt
	"Join_Control:Join_C",					//  PropList.txt
	"Logical_Order_Exception:LOE",			//  PropList.txt
	"Lowercase:Lower",						//  DerivedCoreProperties.txt
	"Math",									//  DerivedCoreProperties.txt
	"Noncharacter_Code_Point:NChar",		//  PropList.txt
	"Pattern_Syntax:Pat_Syn",				//  PropList.txt
	"Pattern_White_Space:Pat_WS",			//  PropList.txt
	"Quotation_Mark:QMark",					//  PropList.txt
	"Radical",								//  PropList.txt
	"Regional_Indicator:RI",				//  PropList.txt
	"Sentence_Terminal:STerm",				//  PropList.txt
	"Soft_Dotted:SD",						//  PropList.txt
	"Terminal_Punctuation:Term",			//  PropList.txt
	"Unified_Ideograph:UIdeo",				//  PropList.txt
	"Uppercase:Upper",						//  DerivedCoreProperties.txt
	"Variation_Selector:VS",				//  PropList.txt
	"White_Space:space",					//  PropList.txt
	"XID_Continue:XIDC",					//  DerivedCoreProperties.txt
	"XID_Start:XIDS",						//  DerivedCoreProperties.txt
	//  ECMAScript 2019/Unicode 11:
	//    "Extended_Pictographic:ExtPict",
	//  ECMAScript 2021/Unicode 13:
	//    Aliases: EComp, EMod, EBase, EPres, and ExtPict
	""
};
static const char *const emoseq_property_names[] = {
	"RGI_Emoji",
	"Basic_Emoji",							//  emoji-sequences.txt
	"Emoji_Keycap_Sequence",				//  emoji-sequences.txt
	"RGI_Emoji_Modifier_Sequence",			//  emoji-sequences.txt
	"RGI_Emoji_Flag_Sequence",				//  emoji-sequences.txt
	"RGI_Emoji_Tag_Sequence",				//  emoji-sequences.txt
	"RGI_Emoji_ZWJ_Sequence",				//  emoji-zwj-sequences.txt
	""
};
static const char *const gc_values[] = {	//  38
	"Other:C", "Control:Cc:cntrl", "Format:Cf", "Unassigned:Cn",
	"Private_Use:Co", "Surrogate:Cs", "Letter:L", "Cased_Letter:LC",
	"Lowercase_Letter:Ll", "Titlecase_Letter:Lt", "Uppercase_Letter:Lu", "Modifier_Letter:Lm",
	"Other_Letter:Lo", "Mark:M:Combining_Mark", "Spacing_Mark:Mc", "Enclosing_Mark:Me",
	"Nonspacing_Mark:Mn", "Number:N", "Decimal_Number:Nd:digit", "Letter_Number:Nl",
	"Other_Number:No", "Punctuation:P:punct", "Connector_Punctuation:Pc", "Dash_Punctuation:Pd",
	"Close_Punctuation:Pe", "Final_Punctuation:Pf", "Initial_Punctuation:Pi", "Other_Punctuation:Po",
	"Open_Punctuation:Ps", "Symbol:S", "Currency_Symbol:Sc", "Modifier_Symbol:Sk",
	"Math_Symbol:Sm", "Other_Symbol:So", "Separator:Z", "Line_Separator:Zl",
	"Paragraph_Separator:Zp", "Space_Separator:Zs", ""
};
}	//  namespace updata

namespace unishared
{
template <typename T>
std::string to_string(T value, int radix = 10, const int precision = 1)
{
	std::string num;

	if (radix >= 2 && radix <= 16)
	{
		typedef typename std::string::size_type size_type;
		const bool minus = value < 0 ? (value = 0 - value, true) : false;

		for (; value; value /= radix)
			num.push_back("0123456789ABCDEF"[value % radix]);

		if (precision > 0 && num.size() < static_cast<size_type>(precision))
			num.append(static_cast<size_type>(precision) - num.size(), static_cast<char>('0'));

		if (minus)
			num.push_back(static_cast<char>('-'));

		const size_type mid = num.size() / 2;

		for (size_type i = 0; i < mid; ++i)
			std::swap(num[i], num[num.size() - i - 1]);
	}
	return num;
}

void throw_error(const char *const s, ...)
{
	char buffer[256];

	va_list va;
	va_start(va, s);
	std::vsprintf(buffer, s, va);
	va_end(va);
	throw std::runtime_error(buffer);
}

void read_file(std::string &str, const char *const filename, const char *const dir)
{
	const std::string path(std::string(dir ? dir : "") + filename);
	FILE *const fp = std::fopen(path.c_str(), "r");

	std::fprintf(stdout, "Reading '%s'... ", path.c_str());

	if (fp)
	{
		static const std::size_t bufsize = 4096;
		char *const buffer = static_cast<char *>(std::malloc(bufsize));

		if (buffer)
		{
			for (;;)
			{
				const std::size_t size = std::fread(buffer, 1, bufsize, fp);

				if (!size)
					break;

				str.append(buffer, size);
			}
			std::fclose(fp);
			std::fputs("done.\n", stdout);
			std::free(buffer);
			return;
		}
	}
	std::fputs("failed...", stdout);
	throw_error("could not open!");
}

bool write_file(const char *const filename, const std::string &str)
{
	FILE *const fp = std::fopen(filename, "wb");

	std::fprintf(stdout, "Writing '%s'... ", filename);

	if (fp)
	{
		const bool success = std::fwrite(str.c_str(), 1, str.size(), fp) == str.size();
		std::fclose(fp);
		if (success)
		{
			std::fputs("done.\n", stdout);
			return true;
		}
	}
	std::fputs("failed...\n", stdout);
	return false;
}
}	//  namespace unishared

struct up_options
{
	const char *outfilename;
	const char *indir;
	int version;
	int errorno;

	up_options(const int argc, const char *const *const argv)
		: outfilename("srell_updata3.h")
		, indir("")
		, version(301)
		, errorno(0)
	{
		for (int index = 1; index < argc; ++index)
		{
			const char firstchar = argv[index][0];

			if (firstchar == '-' || firstchar == '/')
			{
				const char *const option = argv[index] + 1;

				if (std::strcmp(option, "o") == 0)
				{
					if (index + 1 >= argc)
						goto NO_ARGUMENT;
					outfilename = argv[++index];
				}
				else if (std::strcmp(option, "v") == 0)
				{
					if (index + 1 >= argc)
						goto NO_ARGUMENT;
					version = static_cast<int>(std::strtod(argv[++index], NULL) * 100.0 + 0.5);
				}
				else if (std::strcmp(option, "i") == 0 || std::strcmp(option, "id") == 0)
				{
					if (index + 1 >= argc)
						goto NO_ARGUMENT;
					indir = argv[++index];
				}
				else if (std::strcmp(option, "?") == 0 || std::strcmp(option, "h") == 0)
				{
					std::fputs("Usage: updataout2 [options]\nOptions:\n", stdout);
					std::fputs("  -i <DIRECTORY>\tSame as -id.\n", stdout);
					std::fputs("  -id <DIRECTORY>\tAssume that input files exist in <DIRECTORY>.\n\t\t\t<DIRECTORY> must ends with '/' or '\\'.\n", stdout);
					std::fputs("  -o <FILE>\t\tOutput to <FILE>.\n", stdout);
//					std::fputs("  -v <VERNO>\t\tOutput in the version VERNO format.\n", stdout);
					errorno = 1;
					return;
				}
				else
					goto UNKNOWN_OPTION;

				continue;

				NO_ARGUMENT:
				std::fprintf(stdout, "[Error] no argument for \"%s\" specified.\n", argv[index]);
				errorno = -2;
			}
			else
			{
				UNKNOWN_OPTION:
				std::fprintf(stdout, "[Error] unknown option \"%s\" found.\n", argv[index]);
				errorno = -1;
			}
		}
	}
};
//  struct up_options

class unicode_property
{
public:

	unicode_property()
		: re_colon_(":")
	{
	}

	int create_updata(std::string &outdata, const up_options &opts)
	{
		int errorno = opts.errorno;
		const char *const unidatafilename = "UnicodeData.txt";
		const char *const propdatafiles[] = { "PropList.txt", "DerivedCoreProperties.txt", "emoji-data.txt", "DerivedNormalizationProps.txt", "" };
		const char *const emodatafiles[] = { "emoji-sequences.txt", "emoji-zwj-sequences.txt", "" };
		const char *const scfilename = "Scripts.txt";
		const char *const scxfilename = "ScriptExtensions.txt";
		const char *const pvafilename = "PropertyValueAliases.txt";
		canonicalname_mapper scriptname_maps;
		strings_type scriptname_aliases;
		std::string licensetext;
		rangeholder general_category_values;
		rangeholder binary_properties;
		seqholder emoseq_properties;
		rangeholder scripts;
		rangeholder scriptextensions;
		sortedrangeholder combined_properties;
		sortedseqholder combined_pos;
//		scriptnameholder ucs_to_scriptname;	//  codepoint->scriptname.

		if (errorno)
			return errorno;

		try
		{
			licensetext = "//  ";
			licensetext += unidatafilename;
			licensetext += "\n//\n";

			read_unidata(general_category_values, binary_properties, unidatafilename, opts.indir);
			set_additionalbinprops(binary_properties, general_category_values);	//  for ASCII, Any, Cn.
			create_compositecategories(general_category_values);	//  This needs "Cn".

			read_binprops(binary_properties, licensetext, propdatafiles, opts.indir);
#if !defined(SRELL_NO_VMODE)
			read_emoseq(emoseq_properties, licensetext, emodatafiles, opts.indir);
#endif

			read_scriptnames(scriptname_maps, scriptname_aliases, licensetext, scfilename, pvafilename, opts);

			read_scripts(scripts, licensetext, scfilename, opts.indir);

			scriptextensions = scripts;
			modify_for_scx(scriptextensions, scriptname_maps, licensetext, scxfilename, opts.indir);

			combine_properties(combined_properties, general_category_values, "gc", updata::gc_values);
			combine_properties(combined_properties, binary_properties, "bp", updata::binary_property_names);
			combine_properties(combined_properties, scripts, "sc", scriptname_aliases);
			combine_properties(combined_properties, scriptextensions, "scx", scriptname_aliases);
#if !defined(SRELL_NO_VMODE)
			combine_pos(combined_pos, emoseq_properties, "bp", updata::emoseq_property_names);
#endif

			do_formatting(outdata, combined_properties, combined_pos, opts.version);

			licensetext.append(1, '\n');
			outdata.insert(0, licensetext);
		}
		catch (srell::regex_error &e)
		{
			std::printf("\nError: %s,%d\n", e.what(), e.code());
			errorno = 1;
		}
		catch (std::runtime_error &e)
		{
			std::printf("\nError: %s\n", e.what());
			errorno = 2;
		}
		return errorno;
	}

private:

	typedef srell::re_detail::ui_l32 ui_l32;
	typedef srell::re_detail::range_pairs ucprange_array;
	typedef srell::re_detail::range_pair u32pair;
	typedef u32pair ucprange;
	typedef srell::re_detail::range_pair_helper u32rp_helper;
	typedef u32rp_helper ucprange_helper;
	typedef std::map<std::string, ucprange_array> rangeholder;
	typedef srell::re_detail::simple_array<ui_l32> u32array;
	typedef std::map<std::string, u32array> seqholder;
	typedef std::vector<std::string> strings_type;
	typedef std::vector<srell::csub_match> matchranges_type;
	typedef std::map<ui_l32, std::string> scriptnameholder;
	typedef std::map<std::string, std::string> name_mapper;
	typedef std::map<std::string, ui_l32> namenumber_mapper;
	typedef name_mapper canonicalname_mapper;
	static const ui_l32 invalid_u32value = srell::re_detail::constants::invalid_u32value;
	static const ui_l32 compositeclass = invalid_u32value;

	struct sorted_name_and_ranges
	{
		std::string ptype;
		std::string canonicalname;
		std::string namealiases;
		ucprange_array ucpranges;
	};
	typedef std::vector<sorted_name_and_ranges> sortedrangeholder;

	struct sorted_name_and_seqs
	{
		std::string ptype;
		std::string canonicalname;
		std::string namealiases;
		u32array ucpseqs;
	};
	typedef std::vector<sorted_name_and_seqs> sortedseqholder;

	void split2(matchranges_type &parts, const std::string &data, const char splitter)
	{
		std::string::size_type readpos = 0;
		srell::csub_match csm;

		csm.matched = true;
		for (;;)
		{
			std::string::size_type lineend = data.find(splitter, readpos);

			csm.first = data.data() + readpos;
			if (lineend == std::string::npos)
			{
				csm.second = data.data() + data.size();
				parts.push_back(csm);
				break;
			}

			csm.second = data.data() + lineend;
			parts.push_back(csm);
			++lineend;
			readpos = lineend;
		}
	}

	std::string join(const char c, const strings_type &parts, const bool add_final_also = false)
	{
		std::string out;

		for (strings_type::size_type i = 0; i < parts.size(); ++i)
			out.append(parts[i] + c);

		if (!add_final_also && out.size())
			out.resize(out.size() - 1);

		return out;
	}

	void read_unidata(rangeholder &gc, rangeholder &bp, const char *const unidatafilename, const char *const indir)
	{
		const srell::regex re_dataline("^([0-9A-F]+);([^;]*);(([^;]*);(?:[^;]*;){6}([^;]*)(?:;[^;]*){5})$");
		const srell::regex re_rangefirst("^<(.*), First>$");

		const std::string stringY("Y");
		const std::string stringN("N");
		ui_l32 prevucp = invalid_u32value;
		std::string data;
		matchranges_type lines;
		srell::cmatch cmatch;
//		matchranges_type parts;
		std::string rangename;
		std::string rangefirstproperty;
		ui_l32 rangefirstcp = 0;
		ucprange range;
		ucprange_array bidi_mirrored_ranges;

		unishared::read_file(data, unidatafilename, indir);
		split2(lines, data, '\n');

		for (matchranges_type::size_type i = 0; i < lines.size(); ++i)
		{
			const srell::csub_match &line = lines[i];

			if (srell::regex_match(line.first, line.second, cmatch, re_dataline))
			{
				const srell::cmatch::value_type &codepoint = cmatch[1];
				const srell::cmatch::value_type &name = cmatch[2];
				const std::string name_string(name.str());
				const std::string property(cmatch[3].str());

				range.first = range.second = static_cast<ui_l32>(std::strtol(codepoint.first, NULL, 16));

				if (prevucp >= range.first && prevucp != invalid_u32value)
					unishared::throw_error("Out of order: %.4lX >= %.4lX", prevucp, range.first);

//				parts.clear();
//				split2(parts, property, ';');
//				if (parts.size() != 13)
//					unishared::throw_error("number of fields is not 13, but %u\n\t[%s]", parts.size(), line.str().c_str());

//				const std::string &general_category = parts[0];
//				const std::string &bidi_mirrored = parts[7];
				const std::string general_category(cmatch[4].str());
				const std::string bidi_mirrored(cmatch[5].str());

				prevucp = range.first;

				if (rangename.size())
				{
					if (name_string.compare("<" + rangename + ", Last>") != 0)
						unishared::throw_error("<%s, Last> does not follow its First line.\n\t%s follows insteadly.", rangename.c_str(), name_string.c_str());

					if (property != rangefirstproperty)
					{
						unishared::throw_error("\"%s\": properties of First and Last are different.\n\tFirst: %s\n\tLast:  %s", rangename.c_str(), rangefirstproperty.c_str(), property.c_str());
					}

					range.first = rangefirstcp;
					rangename.clear();
				}
				else if (srell::regex_match(name.first, name.second, cmatch, re_rangefirst))
				{
					rangename = cmatch[1];
					rangefirstproperty = property;
					rangefirstcp = range.first;
					continue;
				}

				//  Registers "general_category" value.
				gc[general_category].join(range);

				//  Registers "bidi_mirrored" value.
				if (bidi_mirrored == stringY)
				{
					bidi_mirrored_ranges.join(range);
				}
				else if (bidi_mirrored != stringN)
					unishared::throw_error("Unknown Bidi_Mirrored value [%s] in %s.", bidi_mirrored.c_str(), line.str().c_str());
			}
			else if (line.first != line.second)
				unishared::throw_error("Unknown format [%s]", line.str().c_str());
		}
		bp["Bidi_Mirrored"] = bidi_mirrored_ranges;
	}

	void read_scriptnames(canonicalname_mapper &sn_maps, strings_type &sn_aliases, std::string &licensetext, const char *const scfilename, const char *const pvafilename, const up_options &opts)
	{
		const srell::regex re_scline("^[0-9A-Fa-f.]+\\s*;\\s*(\\S+)");
		const srell::regex re_pvaline("scx?\\s*;\\s*(\\S.*)\\r?\\n?");
		const srell::regex re_split("[ ;]+");
		ui_l32 count = 0;
		std::string data;
		matchranges_type lines;
		srell::cmatch cmatch;
		namenumber_mapper seennames;

		unishared::read_file(data, scfilename, opts.indir);

		lines.clear();
		split2(lines, data, '\n');

		for (matchranges_type::size_type i = 0; i < lines.size(); ++i)
		{
			const srell::csub_match &line = lines[i];

			if (srell::regex_search(line.first, line.second, cmatch, re_scline, srell::regex_constants::match_continuous))
			{
				const std::string scname(cmatch.str(1));

				if (!seennames.count(scname))
				{
					seennames[scname] = count++;
				}
			}
		}

		if (opts.version >= 300)
		{
			seennames["Unknown"] = count++;
			sn_aliases.resize(count);
		}

		typedef std::vector<srell::csub_match> scnames_type;
		canonicalname_mapper aliases_tmp;
		scnames_type scnames;

		data.clear();
		unishared::read_file(data, pvafilename, opts.indir);

		lines.clear();
		split2(lines, data, '\n');

		matchranges_type::size_type i = read_license(licensetext, lines, 0);

		for (; i < lines.size(); ++i)
		{
			const srell::csub_match &line = lines[i];

			if (srell::regex_match(line.first, line.second, cmatch, re_pvaline, srell::regex_constants::match_continuous))
			{
				scnames.clear();
				re_split.split(scnames, cmatch[1].first, cmatch[1].second);

				if (scnames.size() >= 2)
				{
					const std::string canonicalname(scnames[1]);

					if (seennames.count(canonicalname))
					{
						std::string aliases(canonicalname);

						for (scnames_type::size_type i = 0; i < scnames.size(); ++i)
						{
							const std::string scname(scnames[i].str());

							sn_maps[scname] = canonicalname;
							if ((opts.version < 300 && i != 1)
								|| (opts.version >= 300 && scname != canonicalname))
							{
								aliases += ':';
								aliases += scname;
							}
						}
						if (opts.version >= 300)
							sn_aliases[seennames[canonicalname]] = aliases;
						else
							aliases_tmp[canonicalname] = aliases;
					}
				}
			}
		}

		if (opts.version < 300)
		{
			for (canonicalname_mapper::const_iterator it = aliases_tmp.begin(); it != aliases_tmp.end(); ++it)
				sn_aliases.push_back(it->second);
		}
	}

	matchranges_type::size_type read_license(std::string &licensetext, const matchranges_type &lines, matchranges_type::size_type pos)
	{
		static const srell::regex re_license("^#[ \\t]*(\\S.*)?$");
		srell::cmatch cm;

		for (; pos < lines.size(); ++pos)
		{
			const srell::csub_match &line = lines[pos];

			if (srell::regex_search(line.first, line.second, cm, re_license, srell::regex_constants::match_continuous))
			{
				const std::string comment(cm[1].str());

				if (comment.size())
					licensetext += "//  " + comment + '\n';
				else
				{
					licensetext += "//\n";
					break;
				}
			}
		}
		return pos;
	}

	//  binary properties created from UnicodeData.txt.
	void set_additionalbinprops(rangeholder &bp, rangeholder &gc)
	{
		ucprange_array assigned_ranges;

		for (rangeholder::iterator it = gc.begin(); it != gc.end(); ++it)
			assigned_ranges.merge(it->second);

		bp["Any"].join(ucprange_helper(0x0000, 0x10ffff));
		bp["ASCII"].join(ucprange_helper(0x0000, 0x007f));
		bp["Assigned"];	//  Only creates. No data.

//		bp["Assigned"] = assigned_ranges;
		assigned_ranges.negation();
		gc["Cn"] = assigned_ranges;
	}

	void create_compositecategory(rangeholder &gc, const char *const newname, const char *const *categories)
	{
		ucprange_array array;
		ui_l32 total = 0;

		array.append_newpair(ucprange_helper(compositeclass, 0));

		for (; **categories; ++categories)
		{
			const char *const c = *categories;
			const ui_l32 count = static_cast<ui_l32>(gc[*categories].size());

			array.append_newpair(ucprange_helper(c[0], c[1]));
			array.append_newpair(ucprange_helper(count, 0));
			total += count;
		}
		array[0].second = total;
		gc[newname] = array;
	}

	void create_compositecategories(rangeholder &gc)
	{
		const char *const categoryLC[] = { "Ll", "Lt", "Lu", "" };
		const char *const categoryL[] = { "Ll", "Lt", "Lu", "Lm", "Lo", "" };
		const char *const categoryM[] = { "Mc", "Me", "Mn", "" };
		const char *const categoryN[] = { "Nd", "Nl", "No", "" };
		const char *const categoryC[] = { "Cc", "Cf", "Cn", "Co", "Cs", "" };
		const char *const categoryP[] = { "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps", "" };
		const char *const categoryZ[] = { "Zl", "Zp", "Zs", "" };
		const char *const categoryS[] = { "Sc", "Sk", "Sm", "So", "" };

		create_compositecategory(gc, "LC", categoryLC);
		create_compositecategory(gc, "L", categoryL);
		create_compositecategory(gc, "M", categoryM);
		create_compositecategory(gc, "N", categoryN);
		create_compositecategory(gc, "C", categoryC);
		create_compositecategory(gc, "P", categoryP);
		create_compositecategory(gc, "Z", categoryZ);
		create_compositecategory(gc, "S", categoryS);
	}

	void read_binprops(rangeholder &bp, std::string &licensetext, const char *const *propdatafiles, const char *const indir)
	{
		static const srell::regex re_propfmt("^\\s*([0-9A-Fa-f]{4,})(?:\\.\\.([0-9A-Fa-f]{4,}))?\\s*;\\s*([^\\s;#]+)\\s*");	//  (#.*)?$");
		ucprange range;
		std::string data;
		matchranges_type lines;
		srell::cmatch cmatch;

		for (; **propdatafiles; ++propdatafiles)
		{
			data.clear();
			unishared::read_file(data, *propdatafiles, indir);

			lines.clear();
			split2(lines, data, '\n');

			matchranges_type::size_type i = read_license(licensetext, lines, 0);

			for (; i < lines.size(); ++i)
			{
				const srell::csub_match &line = lines[i];

				if (srell::regex_search(line.first, line.second, cmatch, re_propfmt, srell::regex_constants::match_continuous))
				{
					const srell::cmatch::value_type &begin = cmatch[1];
					const srell::cmatch::value_type &end = cmatch[2];
					const srell::cmatch::value_type &property = cmatch[3];
//					const srell::cmatch::value_type &comment = cmatch[4];

					range.first = static_cast<ui_l32>(std::strtol(begin.first, NULL, 16));
					range.second = end.matched ? static_cast<ui_l32>(std::strtol(end.first, NULL, 16)) : range.first;

					bp[property.str()].join(range);
				}
			}
		}
	}

	void read_emoseq(seqholder &emsq, std::string &licensetext, const char *const *emodatafiles, const char *const indir)
	{
		const srell::regex re_emsqfmt("^\\s*([0-9A-Fa-f]{4,})(?:\\.\\.([0-9A-Fa-f]{4,})|((?:\\s+[0-9A-Fa-f]{4,})+))?\\s*;\\s*([^\\s;#]+)\\s*");	//  (?:\\s*;[^#]*)(#.*)?$");
		const srell::regex re_emsq2fmt("\\s*([0-9A-Fa-f]{4,})");
		std::string data;
		matchranges_type lines;
		srell::cmatch cmatch;

		for (; **emodatafiles; ++emodatafiles)
		{
			data.clear();
			unishared::read_file(data, *emodatafiles, indir);

			lines.clear();
			split2(lines, data, '\n');

			matchranges_type::size_type i = read_license(licensetext, lines, 0);

			for (; i < lines.size(); ++i)
			{
				const srell::csub_match &line = lines[i];

				if (srell::regex_search(line.first, line.second, cmatch, re_emsqfmt, srell::regex_constants::match_continuous))
				{
					const srell::cmatch::value_type &begin = cmatch[1];
					const srell::cmatch::value_type &end = cmatch[2];
					const srell::cmatch::value_type &seqs = cmatch[3];
					const std::string seqname = cmatch[4].str();
//					const srell::cmatch::value_type &comment = cmatch[5];
					const ui_l32 first = static_cast<ui_l32>(std::strtol(begin.first, NULL, 16));

					if (seqs.matched)
					{
						const u32array::size_type orgsize = emsq[seqname].size();
						srell::cregex_iterator2 it(seqs.first, seqs.second, re_emsq2fmt, srell::regex_constants::match_continuous);
						ui_l32 count = 2;

						emsq[seqname].push_backncr(0);	//  Number of code points.
						emsq[seqname].push_back(first);

						for (; !it.done(); ++it, ++count)
						{
							const srell::cmatch::value_type &ucp = (*it)[1];

							emsq[seqname].push_back(static_cast<ui_l32>(std::strtol(ucp.first, NULL, 16)));
						}
						emsq[seqname][orgsize] = count;
					}
					else
					{
						if (end.matched)
						{
							emsq[seqname].push_backncr(1);	//  Range.
							emsq[seqname].push_back(first);
							emsq[seqname].push_back(static_cast<ui_l32>(std::strtol(end.first, NULL, 16)));
						}
						else
						{
							emsq[seqname].push_backncr(2);	//  Single code point.
							emsq[seqname].push_back(first);
						}
					}
				}
			}
		}

		for (seqholder::iterator it = emsq.begin(); it != emsq.end(); ++it)
		{
			if (it->second.size() & 1)
			{
				std::printf("[Info] Padding added to \"%s\" (%u).\n", it->first.c_str(), static_cast<unsigned int>(it->second.size()));
				it->second.push_backncr(0);
			}
		}

		emsq["RGI_Emoji"].push_backncr(compositeclass);	//  Dummy data.
	}

	void read_scripts(rangeholder &sc, std::string &licensetext, const char *const filename, const char *const indir)
	{
		const srell::regex re_scriptdata("^\\s*([0-9A-Fa-f]{4,})(?:\\.\\.([0-9A-Fa-f]{4,}))?\\s*;\\s*([^\\s;#]+)\\s*");	//  (#.*)?$");
		ucprange range;
		std::string data;
		matchranges_type lines;
		srell::cmatch cmatch;
		ucprange_array assigned_ranges;

		data.clear();
		unishared::read_file(data, filename, indir);

		lines.clear();
		split2(lines, data, '\n');

		matchranges_type::size_type i = read_license(licensetext, lines, 0);

		for (; i < lines.size(); ++i)
		{
			const srell::csub_match &line = lines[i];

			if (srell::regex_search(line.first, line.second, cmatch, re_scriptdata, srell::regex_constants::match_continuous))
			{
				const srell::cmatch::value_type &begin = cmatch[1];
				const srell::cmatch::value_type &end = cmatch[2];
				const srell::cmatch::value_type &scriptname = cmatch[3];
//				const srell::cmatch::value_type &comment = cmatch[4];

				range.first = static_cast<ui_l32>(std::strtol(begin.first, NULL, 16));
				range.second = end.matched ? static_cast<ui_l32>(std::strtol(end.first, NULL, 16)) : range.first;

				sc[scriptname].join(range);
				assigned_ranges.join(range);
			}
		}
		assigned_ranges.negation();
		sc["Unknown"] = assigned_ranges;
	}

	canonicalname_mapper load_canonicalnames(const char *const *names)
	{
		canonicalname_mapper canonicalnames;
		matchranges_type parts;

		for (; **names; ++names)
		{
			parts.clear();
			split2(parts, *names, ':');
			const std::string canonicalname(parts[0].str());
			for (matchranges_type::size_type i = 0; i < parts.size(); ++i)
			{
				canonicalnames[parts[i].str()] = canonicalname;
			}
		}
		return canonicalnames;
	}

	void modify_for_scx(rangeholder &scx, const canonicalname_mapper &canonicalnames, std::string &licensetext, const char *const filename, const char *const indir)
	{
		const srell::regex re_scxdata("^\\s*([0-9A-Fa-f]{4,})(?:\\.\\.([0-9A-Fa-f]{4,}))?\\s*;\\s*([^\\s;#][^;#]*[^\\s;#])\\s*", srell::regex::multiline);	//  (#.*)?$");
		const srell::regex re_space(" ");
		const std::string name_common("Common");
		const std::string name_inherited("Inherited");
		ucprange_array common = scx[name_common];
		ucprange_array inherited = scx[name_inherited];
		ucprange range;
		std::map<std::string, bool> warning_out;
		std::string data;
		matchranges_type lines;
		srell::cmatch cmatch;

		unishared::read_file(data, filename, indir);

		lines.clear();
		split2(lines, data, '\n');

		matchranges_type::size_type i = read_license(licensetext, lines, 0);

		for (; i < lines.size(); ++i)
		{
			const srell::csub_match &line = lines[i];

			if (srell::regex_search(line.first, line.second, cmatch, re_scxdata, srell::regex_constants::match_continuous))
			{
				const srell::cmatch::value_type &begin = cmatch[1];
				const srell::cmatch::value_type &end = cmatch[2];
				const srell::cmatch::value_type &scxnames = cmatch[3];
//				const srell::cmatch::value_type &comment = cmatch[4];

				range.first = static_cast<ui_l32>(std::strtol(begin.str().c_str(), NULL, 16));
				range.second = end.matched ? static_cast<ui_l32>(std::strtol(end.str().c_str(), NULL, 16)) : range.first;

				common.remove_range(range);
				inherited.remove_range(range);

				srell::cregex_iterator2 rei2s(scxnames.first, scxnames.second, re_space);

				for (rei2s.split_begin();; rei2s.split_next())
				{
					const std::string scriptname(!rei2s.done() ? rei2s.split_range() : rei2s.split_remainder());

					if (scriptname.size())
					{
						const canonicalname_mapper::const_iterator it = canonicalnames.find(scriptname);

						if (it != canonicalnames.end())
							scx[it->second].join(range);
						else
						{
//							unishared::throw_error("Canonical name for \"%s\" is not found.", scriptname.c_str());
							if (!warning_out.count(scriptname))
							{
								std::printf("[Info] Canonical name for \"%s\" is not found. New script?\n", scriptname.c_str());
								warning_out[scriptname] = true;
							}
						}
					}
					if (rei2s.done())
						break;
				}
			}
		}
		scx[name_common] = common;
		scx[name_inherited] = inherited;
	}

	void combine_properties(sortedrangeholder &base, const rangeholder &addition, const char *const ptype, const char *const *aliasnames)
	{
		strings_type aliases;

		for (; **aliasnames; ++aliasnames)
			aliases.push_back(std::string(*aliasnames));

		return combine_properties(base, addition, ptype, aliases);
	}

	void combine_properties(sortedrangeholder &base, const rangeholder &addition, const char *const ptype, const strings_type &aliasnames)
	{
		sorted_name_and_ranges elem;
		matchranges_type names;

		for (strings_type::size_type i = 0; i < aliasnames.size(); ++i)
		{
			const std::string &aliases = aliasnames[i];
			bool pdata_found = false;

			names.clear();
			split2(names, aliases, ':');

			const std::string canonicalname(names[0].str());

			for (matchranges_type::size_type j = 0; j < names.size(); ++j)
			{
				const rangeholder::const_iterator it = addition.find(names[j].str());

				if (it != addition.end())
				{
					elem.ucpranges = it->second;
					pdata_found = true;
					break;
				}
			}

			if (!pdata_found)
				unishared::throw_error("No property value for \"%s\" found.", aliases.c_str());

			elem.ptype = ptype;
			elem.canonicalname = canonicalname;
			elem.namealiases = aliases;
			base.push_back(elem);
		}
	}

#if !defined(SRELL_NO_VMODE)

	void combine_pos(sortedseqholder &base, const seqholder &addition, const char *const ptype, const char *const *aliasnames)
	{
		ui_l32 total = 0;
		sorted_name_and_seqs elem;
		matchranges_type names;
		u32array compclass;

		//  Composite class.
		compclass.push_backncr(compositeclass);
		compclass.push_backncr(0);

		elem.ptype = ptype;
		for (; **aliasnames; ++aliasnames)
		{
			const std::string aliases(*aliasnames);
			bool pdata_found = false;

			names.clear();
			split2(names, aliases, ':');

			const std::string canonicalname(names[0].str());

			for (strings_type::size_type i = 0; i < names.size(); ++i)
			{
				const seqholder::const_iterator it = addition.find(names[i].str());

				if (it != addition.end())
				{
					elem.ucpseqs = it->second;
					pdata_found = true;
					if (elem.ucpseqs.size() != 1 || elem.ucpseqs[0] != compositeclass)
					{
						compclass.push_back(elem.ucpseqs.size());
						total += static_cast<ui_l32>(elem.ucpseqs.size());
					}
					break;
				}
			}

			if (!pdata_found)
				unishared::throw_error("No property value for \"%s\" found.", aliases.c_str());

			elem.canonicalname = canonicalname;
			elem.namealiases = aliases;
			base.push_back(elem);
		}

		//  Composite class.
		compclass[1] = total;
		base[0].ucpseqs = compclass;	//  [0] = RGI_Emoji.
	}

#endif	//  !defined(SRELL_NO_VMODE)

	name_mapper create_ptype_mappings()
	{
		name_mapper categories;

		categories["gc"] = "general_category";
		categories["bp"] = "binary";
		categories["sc"] = "script";
		categories["scx"] = "script_extensions";
		return categories;
	}

	std::string create_ptypes(const name_mapper &ptypes, const int version)
	{
		std::string ptypedef(version >= 300 ? "" : (version >= 201 ? "\tuptype_unknown = 0,\n" : "\tstruct ptype\n\t{\n\t\tstatic const T2 unknown = 0;\n"));
		const char *names[] = { "bp", "gc", "sc", "scx", "" };
		const std::string t2head = version >= 201 ? "\t" : "\t\tstatic const T2 ";
		const std::string t2tail = version >= 201 ? "," : ";";
		const std::string t2finaltail = version >= 201 ? "" : ";";
		const std::string t2prefix = version >= 201 ? "uptype_" : "";

		for (unsigned int i = 0; *names[i];)
		{
			const char *const name = names[i];
			const name_mapper::const_iterator it = ptypes.find(name);

			if (it == ptypes.end())
				unishared::throw_error("Name for ptype \"%s\" is not found.", name);

			ptypedef += t2head + t2prefix + (version >= 300 ? name : it->second) + " = " + unishared::to_string(++i) + t2tail + "\n";
		}

		if (version >= 300)
		{
		}
		else if (version >= 201)
		{
			drop_finalcomma(ptypedef);
		}
		else
			ptypedef += "\t};\n";

		return ptypedef;
	}

	std::string ranges_to_string(const ucprange_array &array, const std::string &indent, const bool compositeclass)
	{
		std::string rangestring(indent);

		if (compositeclass)
		{
			rangestring += "//  ";

			for (ucprange_array::size_type i = 1; i < array.size(); ++i)
			{
				const ucprange &range = array[i];

				if (i > 1)
					rangestring += " + ";
				rangestring += static_cast<char>(range.first);
				rangestring += static_cast<char>(range.second);
				rangestring += ':' + unishared::to_string(array[++i].first);
			}
		}
		else
		{
			unsigned count = 0;

			for (ucprange_array::size_type i = 0; i < array.size(); ++i)
			{
				const ucprange &range = array[i];
				if (count == 4)
				{
					count = 0;
					rangestring += '\n' + indent;
				}
				else if (count)
				{
					rangestring += ' ';
				}
				rangestring += "0x" + unishared::to_string(range.first, 16, 4) + ", 0x" + unishared::to_string(range.second, 16, 4) + ',';
				++count;
			}
		}
		return rangestring;
	}

#if !defined(SRELL_NO_VMODE)
	std::string seqs_to_string(const u32array &array, const std::string &indent)
	{
		std::string seqstring;

		if (array.size() == 1 && array[0] == compositeclass)
		{
		}
		else
		{
			for (u32array::size_type i = 0; i < array.size();)
			{
				const ui_l32 num = array[i];

				if (num == compositeclass)
				{
					break;
				}

				if (num == 0)	//  Padding.
				{
					seqstring += indent + "0,\t//  Padding.\n";
					break;
				}

				if (++i == array.size())
					unishared::throw_error("[InternalError] No data follows %u.", num);

				seqstring += indent + unishared::to_string(num);
				seqstring += ", 0x" + unishared::to_string(array[i++], 16, 4);

				if (num == 1)	//  Range.
				{
					if (i == array.size())
						unishared::throw_error("[InternalError] No pair for %.4lX.", array[i - 1]);

					seqstring += ", 0x" + unishared::to_string(array[i++], 16, 4);
				}
				else
				{
					for (ui_l32 j = 2; j < num; ++j)
					{
						if (i == array.size())
							unishared::throw_error("[InternalError] Broken after %.4lX.", array[i - 1]);

						seqstring += ", 0x" + unishared::to_string(array[i++], 16, 4);
					}
				}
				seqstring += ",\n";
			}

			if (seqstring.size())
				seqstring.resize(seqstring.size() - 1);
		}
		return seqstring;
	}
#endif	//  !defined(SRELL_NO_VMODE)

	void drop_finalcomma(std::string &data)
	{
		std::string::size_type commapos = data.rfind(',');
		if (commapos != std::string::npos)
			data.erase(commapos, 1);
	}

	std::string create_pnametable(ui_l32 &count, const int version, const std::string &indent)
	{
		const char *const *pnames = updata::property_names;
		std::string out;

		if (version >= 300)
		{
			namenumber_mapper categories;

			count = 0u;
			for (unsigned int i = 2; **pnames; ++pnames, ++i)
			{
				const std::string names(*pnames);
				srell::sregex_iterator2 rei2(names, re_colon_);

				for (rei2.split_begin();; rei2.split_next())
				{
					const std::string name(!rei2.done() ? rei2.split_range() : rei2.split_remainder());
					categories[name] = i;
					++count;

					if (rei2.done())
						break;
				}
			}

			out.assign(indent + "{ \"\", " + unishared::to_string(count) + " },\n");

			for (namenumber_mapper::const_iterator it = categories.begin(); it != categories.end(); ++it)
			{
				out.append(indent);
				out.append("{ \"");
#if !defined(NO_LITERAL_ESCAPING)
				out.append(escape_string(it->first));
#else
				out.append(it->first);
#endif
				out.append("\", " + unishared::to_string(it->second) + " },\n");
			}
		}
		else
		{
			out.append(indent + "\"*\",\t//  #0:unknown\n");
			out.append(indent + "\"*\",\t//  #1:binary\n");

			for (unsigned int i = 2; **pnames; ++pnames, ++i)
			{
				out.append(indent);
				out.append(1, '"');
				out.append(*pnames);
				out.append("\",\t//  #" + unishared::to_string(i) + '\n');
			}
			out.append(indent + "\"\"\n");
		}
		return out;
	}

	std::string join_dropcomma_append(const strings_type &s, const std::string &return_table)
	{
		std::string tmp(join('\n', s, true));

		drop_finalcomma(tmp);
		tmp.append(return_table);
		return tmp;
	}

	void do_formatting(std::string &out, const sortedrangeholder &alldata, const sortedseqholder &emsq, const int version)
	{
		const std::size_t numofproperties = sizeof (updata::property_names) / sizeof (updata::property_names[0]) + 1;
		const std::string template1(version >= 300 ? "template <typename T3, typename T4, typename T5>\n" : (version >= 201 ? "template <typename T3, typename T4, typename T5, typename T6>\n" : "template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>\n"));
		const std::string template2(version >= 300 ? "unicode_property_data<T3, T4, T5>::" : (version >= 201 ? "unicode_property_data<T3, T4, T5, T6>::" : "unicode_property_data<T1, T2, T3, T4, T5, T6>::"));
		const std::string return_table(version == 100 ? "\t\t};\n\t\treturn table;\n\t}\n" : "};\n");
		const std::string indent(version == 100 ? "\t\t\t" : "\t");
		name_mapper ptype_mappings(create_ptype_mappings());
		const std::string ptypes(create_ptypes(ptype_mappings, version));	//  T2, property types.
		const std::string t1head = version >= 201 ? "\t" : "\tstatic const T1 ";
		const std::string t1tail = version >= 201 ? "," : ";";
		const std::string t1finaltail = version >= 201 ? "" : ";";
		const std::string t1prefix = version >= 201 ? "upid_" : "";
		const std::string t2scope = version >= 201 ? "{ uptype_" : "{ ptype::";
		const std::string maxorlast = version >= 200 ? "max" : "last";

		const ui_l32 pno_base = version >= 300 ? numofproperties : 1u;
		ui_l32 offset = 0u;
		ui_l32 property_number = pno_base;
		ui_l32 property_id_number = pno_base;

		std::string pnumbers(t1head + t1prefix + "unknown = 0" + t1tail + "\n");	//  T1, property numbers.
		strings_type rangetable;
		strings_type lookup_ranges;
		std::string lookup_numbers;
		namenumber_mapper rangeno_map;

		if (version >= 300)
		{
			pnumbers += t1head + t1prefix + "invalid = 0" + t1tail + "\n";
			pnumbers += t1head + t1prefix + "error = 0" + t1tail + "\n";
			pnumbers += ptypes;
		}

		do_formatting2(rangeno_map, lookup_numbers, lookup_ranges, rangetable, pnumbers, property_id_number, property_number, offset, pno_base, maxorlast, t2scope, t1prefix, t1finaltail, t1tail, t1head, ptype_mappings, indent, alldata, emsq, version);

		ui_l32 basepos = 0u;
		std::string pnames(create_pnametable(basepos, version, indent));

		if (version >= 300)
		{
			u32pair posinfo[numofproperties];

			sort_rangeno_table(posinfo, basepos, lookup_numbers, rangeno_map, indent);

			lookup_numbers.append(return_table);

			merge_posinfo(lookup_ranges, posinfo, numofproperties, indent);
		}
		else if (version >= 200)
		{
			lookup_numbers.append(indent + t2scope + "unknown, 0, \"\" }\n");
			lookup_numbers.append(return_table);
			lookup_numbers.insert(0, template1 + "const T5 " + template2 + "rangenumbertable[] =\n{\n\t" + t2scope + "unknown, 0, \"*\" },\t//  #0\n");
		}
		else
		{
			lookup_numbers.append(indent + t2scope + "unknown, \"\", 0 }\n");
			lookup_numbers.append(return_table);
			lookup_numbers.insert(0, version == 100 ? "\tstatic const T5 *rangenumber_table()\n\t{\n\t\tstatic const T5 table[] =\n\t\t{\n\t\t\t" + t2scope + "unknown, \"*\", 0 },\t//  #0\n" : template1 + "const T5 " + template2 + "rangenumbertable[] =\n{\n\t" + t2scope + "unknown, \"*\", 0 },\t//  #0\n");
		}

		pnames.insert(0, version == 100 ? "\tstatic const T3 *propertyname_table()\n\t{\n\t\tstatic const T3 table[] =\n\t\t{\n" : template1 + "const T3 " + template2 + (version >= 300 ? "propertynumbertable" : "propertynametable") + "[] =\n{\n");
		if (version < 300)
			pnames.append(return_table);

		if (version >= 201)
		{
			out.append("enum upid_type\n{\n");
			out.append(pnumbers);	//  T1
			out.append("};\n\n");
			if (version < 300)
			{
				out.append("enum up_type\n{\n");
				out.append(ptypes);
				out.append("};\n\n");
			}
			out.append(template1 + "struct unicode_property_data\n{\n");
		}
		else
		{
			out.append(template1 + "struct unicode_property_data\n{\n");
			out.append(pnumbers);
			out.append(ptypes);
		}
		if (version == 100)
		{
			out.append(pnames);
			out.append(std::string("\tstatic const T4 *ranges()\n\t{\n\t\tstatic const T4 table[] =\n\t\t{\n"));
			out.append(join_dropcomma_append(rangetable, return_table));
			out.append(lookup_numbers);
			out.append(std::string("\tstatic const T6 *position_table()\n\t{\n\t\tstatic const T6 table[] =\n\t\t{\n\t\t\t{ 0, 0 },\t//  #0 unknown\n"));
			out.append(join_dropcomma_append(lookup_ranges, return_table));
			out.append("};\n");
		}
		else
		{
			if (version >= 300)
			{
				out.append("\tstatic const T3 propertynumbertable[];\n");
				out.append("\tstatic const T4 positiontable[];\n");
				out.append("\tstatic const T5 rangetable[];\n");
			}
			else
			{
				out.append("\tstatic const T3 propertynametable[];\n");
				out.append("\tstatic const T4 rangetable[];\n");
				out.append("\tstatic const T5 rangenumbertable[];\n");
				out.append("\tstatic const T6 positiontable[];\n");
			}

			if (version <= 200)
			{
				out.append("\n\tstatic const T3 *propertyname_table()\n\t{\n\t\treturn propertynametable;\n\t}\n");
				out.append("\tstatic const T4 *ranges()\n\t{\n\t\treturn rangetable;\n\t}\n");
				out.append("\tstatic const T5 *rangenumber_table()\n\t{\n\t\treturn rangenumbertable;\n\t}\n");
				out.append("\tstatic const T6 *position_table()\n\t{\n\t\treturn positiontable;\n\t}\n");
			}
			out.append("};\n\n");
			out.append(pnames);	//  T3

			if (version < 300)
			{
				out.append("\n");
				out.append(template1 + "const T4 " + template2 + "rangetable[] =\n{\n");
				out.append(join_dropcomma_append(rangetable, return_table));	//  T4
				out.append("\n");
			}

			out.append(lookup_numbers);	//  T5
			out.append("\n");

			out.append(template1 + (version >= 300 ? "const T4 " : "const T6 ") + template2 + "positiontable[] =\n{\n\t{ 0, 0 },\t//  #0 unknown\n");
			out.append(join_dropcomma_append(lookup_ranges, return_table));	//  T6
			if (version >= 300)
			{
				out.append("\n");

				out.append(template1 + "const T5 " + template2 + "rangetable[] =\n{\n");
				out.append(join_dropcomma_append(rangetable, return_table));	//  T4
			}
		}
		if (version > 100)
			out.append("#define SRELL_UPDATA_VERSION " + unishared::to_string(static_cast<unsigned int>(version)) + "\n");
	}

	void do_formatting2(
		namenumber_mapper &rangeno_map, std::string &lookup_numbers, strings_type &lookup_ranges, strings_type &rangetable, std::string &pnumbers,
		ui_l32 &property_id_number, ui_l32 &property_number, ui_l32 &offset, const ui_l32 pno_base,
		const std::string &maxorlast, const std::string &t2scope, const std::string &t1prefix, const std::string &t1finaltail, const std::string &t1tail, const std::string &t1head, name_mapper &ptype_mappings, const std::string &indent, const sortedrangeholder &alldata, const sortedseqholder &emsq, const int version)
	{
		namenumber_mapper registered;
		srell::re_detail::simple_array<ucprange> rangepos;
		srell::sregex_iterator2 rei2;

		for (sortedrangeholder::size_type i = 0; i < alldata.size(); ++i)
		{
			const sorted_name_and_ranges &elem = alldata[i];
			const std::string ptype = elem.ptype;
			const std::string name = elem.canonicalname;
			const std::string aliases = elem.namealiases;
			const ucprange_array &array = elem.ucpranges;
			const std::string pnumber_keyname(ptype + '_' + name);
			const std::string position_comment(' ' + ptype + '=' + aliases);
			const bool compositeclass_found = array.size() && array[0].first == compositeclass;
			std::string rangestring(ranges_to_string(array, indent, compositeclass_found));
			ui_l32 numofranges = static_cast<ui_l32 >(array.size());
			ui_l32 pno = property_number;
			const namenumber_mapper::const_iterator rit = registered.find(rangestring);

			if (rit != registered.end())
			{
				pno = rit->second;

				lookup_ranges[pno - pno_base] += position_comment;
				rangetable[(pno - pno_base) * 2] += position_comment;

				if (version >= 300)
				{
					rei2.assign(aliases, re_colon_);

					for (rei2.split_begin();; rei2.split_next())
					{
						const std::string alias(!rei2.done() ? rei2.split_range() : rei2.split_remainder());
						rangeno_map[ptype + ':' + alias] = pno;
						if (rei2.done())
							break;
					}
				}
				else if (version >= 200)
				{
					lookup_numbers.append(indent + t2scope + ptype_mappings[ptype] + ", " + unishared::to_string(pno) + ", \"" + aliases + "\" },\t//  #" + unishared::to_string(property_id_number) + "\n");
				}
				else
					lookup_numbers.append(indent + t2scope + ptype_mappings[ptype] + ", \"" + aliases + "\", " + unishared::to_string(pno) + " },\t//  #" + unishared::to_string(property_id_number) + "\n");
			}
			else
			{
				//  ucpranges of "Assigned" is empty.
				if (compositeclass_found)
				{
					std::printf("[Info] Composite property \"%s\" found.\n", aliases.c_str());
					numofranges = array[0].second;
				}
				else
				{
					registered[rangestring] = property_number;
				}

				if (version >= 300)
				{
					rei2.assign(aliases, re_colon_);

					for (rei2.split_begin();; rei2.split_next())
					{
						const std::string alias(!rei2.done() ? rei2.split_range() : rei2.split_remainder());
						rangeno_map[ptype + ':' + alias] = property_number;
						if (rei2.done())
							break;
					}
				}
				else if (version >= 200)
				{
					lookup_numbers.append(indent + t2scope + ptype_mappings[ptype] + ", " + unishared::to_string(property_number) + ", \"" + aliases + "\" },\t//  #" + unishared::to_string(property_id_number) + "\n");
				}
				else
					lookup_numbers.append(indent + t2scope + ptype_mappings[ptype] + ", \"" + aliases + "\", " + unishared::to_string(property_number) + " },\t//  #" + unishared::to_string(property_id_number) + "\n");

				lookup_ranges.push_back(indent + "{ " + unishared::to_string(offset) + ", " + unishared::to_string(numofranges) + " },\t//  #" + unishared::to_string(pno) + position_comment);

				rangetable.push_back(indent + "//  #" + unishared::to_string(pno) + " (" + unishared::to_string(offset) + '+' + unishared::to_string(numofranges) + "):" + position_comment);
				rangetable.push_back(rangestring);

				rangepos.push_back(ucprange_helper(offset, numofranges));

				if (!compositeclass_found)
					offset += numofranges;

				++property_number;
			}

			if (version >= 300)
				pnumbers.append(t1head + pnumber_keyname + " = " + unishared::to_string(pno) + t1tail + (pno != property_id_number ? ("\t//  #" + unishared::to_string(property_id_number)) : "") + '\n');
			else
				pnumbers.append(t1head + pnumber_keyname + " = " + unishared::to_string(property_id_number) + t1tail + "\t//  #" + unishared::to_string(pno) + '\n');
			++property_id_number;
		}

		pnumbers.append(t1head + t1prefix + maxorlast + "_property_number = " + unishared::to_string(property_number - 1) + t1tail + "\n");

#if !defined(SRELL_NO_VMODE)
		if (rangetable.size())
			drop_finalcomma(rangetable[rangetable.size() - 1]);
		rangetable.push_back("#if !defined(SRELL_NO_UNICODE_POS)\n" + indent + ",");

		if (version < 300)
			lookup_numbers.append("#if !defined(SRELL_NO_UNICODE_POS)\n");

		for (sortedseqholder::size_type i = 0; i < emsq.size(); ++i)
		{
			const sorted_name_and_seqs &elem = emsq[i];
			const std::string ptype = elem.ptype;
			const std::string name = elem.canonicalname;
			const std::string aliases = elem.namealiases;
			const u32array &array = elem.ucpseqs;
			const bool compositeclass_found = array.size() && array[0] == compositeclass;
			const std::string pnumber_keyname(ptype + '_' + name);
			const std::string position_comment(' ' + ptype + '=' + aliases);
			ui_l32 numofseqs = static_cast<ui_l32>(array.size());
			std::string seqstring;

			if (compositeclass_found)
			{
				std::printf("[Info] Composite property \"%s\" found.\n", aliases.c_str());
				numofseqs = array[1];
				seqstring = indent + "//  ";

				for (u32array::size_type i = 2; i < array.size(); ++i)
				{
					if (i > 2)
						seqstring += " + ";
					seqstring += unishared::to_string(array[i]) + "/2";
				}
			}
			else
			{
				seqstring = seqs_to_string(array, indent);
			}

			const ui_l32 numofranges = numofseqs / 2;

			if (version >= 300)
				pnumbers.append(t1head + pnumber_keyname + " = " + unishared::to_string(property_number) + t1tail + "\t//  #" + unishared::to_string(property_id_number) + '\n');
			else
				pnumbers.append(t1head + pnumber_keyname + " = " + unishared::to_string(property_id_number) + t1tail + "\t//  #" + unishared::to_string(property_number) + '\n');

			if (version >= 300)
			{
				rei2.assign(aliases, re_colon_);

				for (rei2.split_begin();; rei2.split_next())
				{
					const std::string alias(!rei2.done() ? rei2.split_range() : rei2.split_remainder());
					rangeno_map[ptype + ':' + aliases] = property_number;
					if (rei2.done())
						break;
				}
			}
			else if (version >= 200)
			{
				lookup_numbers.append(indent + t2scope + ptype_mappings[ptype] + ", " + unishared::to_string(property_number) + ", \"" + aliases + "\" },\t//  #" + unishared::to_string(property_id_number) + "\n");
			}
			else
				lookup_numbers.append(indent + t2scope + ptype_mappings[ptype] + ", \"" + aliases + "\", " + unishared::to_string(property_number) + " },\t//  #" + unishared::to_string(property_id_number) + "\n");
			lookup_ranges.push_back(indent + "{ " + unishared::to_string(offset) + ", " + unishared::to_string(numofranges) + " },\t//  #" + unishared::to_string(property_number) + position_comment);
			rangetable.push_back(indent + "//  #" + unishared::to_string(property_number) + " (" + unishared::to_string(offset) + '+' + unishared::to_string(numofseqs) + "/2):" + position_comment);
			rangetable.push_back(seqstring);

			++property_number;
			++property_id_number;
			if (!compositeclass_found)
				offset += numofranges;
		}

		pnumbers.append(t1head + t1prefix + maxorlast + "_pos_number = " + unishared::to_string(--property_number) + t1finaltail + "\n");
		rangetable.push_back("#endif\t//  !defined(SRELL_NO_UNICODE_POS)");
		if (version < 300)
			lookup_numbers.append("#endif\t//  !defined(SRELL_NO_UNICODE_POS)\n");

#endif	//  !defined(SRELL_NO_VMODE)
	}

	void sort_rangeno_table(u32pair *const posinfo, ui_l32 offset, std::string &lookup_numbers, const namenumber_mapper &rangeno_map, const std::string &indent)
	{
		typedef std::vector<srell::ssub_match> names_type;
		names_type names;
		name_mapper pvalues;
		namenumber_mapper pcounts;

		for (namenumber_mapper::const_iterator it = rangeno_map.begin(); it != rangeno_map.end(); ++it)
		{
			names.clear();
			re_colon_.split(names, it->first, 2);

			if (names.size() == 2)
			{
				const std::string pname(names[0].str());
				const std::string pvalue(names[1].str());
#if !defined(NO_LITERAL_ESCAPING)
				pvalues[pname] += indent + "{ \"" + escape_string(pvalue) + "\", " + unishared::to_string(it->second) + " },\n";
#else
				pvalues[pname] += indent + "{ \"" + pvalue + "\", " + unishared::to_string(it->second) + " },\n";
#endif
				++pcounts[pname];
			}
		}

		offset += set_pvalue_and_count(lookup_numbers, posinfo[2], "gc", offset, pcounts, pvalues, indent);
		offset += set_pvalue_and_count(lookup_numbers, posinfo[1], "bp", offset, pcounts, pvalues, indent);
		offset += set_pvalue_and_count(lookup_numbers, posinfo[3], "sc", offset, pcounts, pvalues, indent);
		offset += set_pvalue_and_count(lookup_numbers, posinfo[4], "scx", offset, pcounts, pvalues, indent);
		drop_finalcomma(lookup_numbers);
	}

	ui_l32 set_pvalue_and_count(std::string &lookup_numbers, u32pair &posinfo, const std::string category, const ui_l32 offset, namenumber_mapper &pcounts, name_mapper &pvalues, const std::string &indent)
	{
		lookup_numbers.append(indent + "//  " + category + ": " + unishared::to_string(pcounts[category]) + "\n" + pvalues[category]);
		posinfo.set(offset, pcounts[category]);
		return posinfo.second;
	}

	void merge_posinfo(strings_type &lookup_ranges, const u32pair *const posinfo, const std::size_t numofproperties, const std::string &indent)
	{
		for (std::size_t i = 1; i < numofproperties; ++i)
		{
			const u32pair &pair = posinfo[i];
			const std::string line(indent + "{ " + unishared::to_string(pair.first) + ", " + unishared::to_string(pair.second) + " },\t//  #" + unishared::to_string(i) + ' ' + (i == 1 ? "binary" : updata::property_names[i - 2]));

			lookup_ranges.insert(lookup_ranges.begin() + i - 1, line);
		}
	}

	std::string escape_string(const std::string &s)
	{
		static const char hex[] = "0123456789ABCDEF";
		std::string out;

		for (std::string::size_type i = 0; i < s.size(); ++i)
		{
			out.append("\\x");
			out.append(1, hex[(s[i] >> 4) & 15]);
			out.append(1, hex[s[i] & 15]);
		}
		return out;
	}

	srell::regex re_colon_;
};
//  class unicode_property

int main(const int argc, const char *const *const argv)
{
	up_options upopts(argc, argv);
	std::string outdata;
	unicode_property up;
	int errorno = up.create_updata(outdata, upopts);

	if (errorno == 0)
	{
		if (!unishared::write_file(upopts.outfilename, outdata))
			errorno = 2;
	}
	return errorno;
}