refactor: Refactorized whole project structure

2025-01-24 22:36:25 +03:00
parent d50f55086b
commit a479b5f85d
321 changed files with 288 additions and 219 deletions
--- a/source/NoNut/include/StringHelpers.h
+++ b/source/NoNut/include/StringHelpers.h
@@ -0,0 +1,347 @@
+#ifndef NONUT_CORE_STRING_HELPERS_H
+#define NONUT_CORE_STRING_HELPERS_H
+#include <string>
+#include <unordered_map>
+
+namespace nonut
+{
+	const std::unordered_map<unsigned char, std::string_view> WINDOWS1250_UTF8_MAP
+	{
+		{'\x80', "\xe2\x82\xac"}, // euro sign
+		{'\x82', "\xe2\x80\x9a"}, // lower quotation mark
+		{'\x84', "\xe2\x80\x9e"}, // lower quotation marks
+		{'\x85', "\xe2\x80\xa6"}, // ellipsis
+		{'\x86', "\xe2\x80\xa0"}, // dagger
+		{'\x87', "\xe2\x80\xa1"}, // double dagger
+		{'\x89', "\xe2\x80\xb0"}, // per mille
+		{'\x8a', "\xc5\xa0"}, // S with caron
+		{'\x8b', "\xe2\x80\xb9"}, // left guillemet
+		{'\x8c', "\xc5\x9a"}, // S with acute
+		{'\x8d', "\xc5\xa4"}, // T with caron
+		{'\x8e', "\xc5\xbd"}, // Z with caron
+		{'\x8f', "\xc5\xb9"}, // Z with acute
+
+		{'\x91', "\xe2\x80\x98"}, // upper quotation mark (opening)
+		{'\x92', "\xe2\x80\x99"}, // upper quotation mark (closing)
+		{'\x93', "\xe2\x80\x9c"}, // upper quotation marks (opening)
+		{'\x94', "\xe2\x80\x9d"}, // upper quotation marks (closing)
+		{'\x95', "\xe2\x80\xa2"}, // bullet sign
+		{'\x96', "\xe2\x80\x93"}, // en dash
+		{'\x97', "\xe2\x80\x94"}, // em dash
+		{'\x99', "\xe2\x84\xa2"}, // trademark sign
+		{'\x9a', "\xc5\xa1"}, // s with caron
+		{'\x9b', "\xe2\x80\xba"}, // right guillemet
+		{'\x9c', "\xc5\x9b"}, // s with acute
+		{'\x9d', "\xc5\xa5"}, // t with caron
+		{'\x9e', "\xc5\xbe"}, // z with caron
+		{'\x9f', "\xc5\xba"}, // z with acute
+
+		{'\xa0', "\x20"}, // NBSP
+		{'\xa1', "\xcb\x87"}, // caron
+		{'\xa2', "\xcb\x98"}, // breve
+		{'\xa3', "\xc5\x81"}, // L with stroke
+		{'\xa4', "\xc2\xa4"}, // currency sign
+		{'\xa5', "\xc4\x84"}, // A with ogonek
+		{'\xa6', "\xc2\xa6"}, // vertical bar
+		{'\xa7', "\xc2\xa7"}, // section sign
+		{'\xa8', "\xc2\xa8"}, // diaeresis
+		{'\xa9', "\xc2\xa9"}, // copyright sign
+		{'\xaa', "\xc5\x9e"}, // S-cedilla
+		{'\xab', "\xc2\xab"}, // left guillemets
+		{'\xac', "\xc2\xac"}, // negation
+		{'\xad', "\xc2\xad"}, // soft hyphen
+		{'\xae', "\xc2\xae"}, // registered trademark sign
+		{'\xaf', "\xc5\xbb"}, // Z with dot above
+
+		{'\xb0', "\xc2\xb0"}, // degree sign
+		{'\xb1', "\xc2\xb1"}, // plus-minus sign
+		{'\xb2', "\xcb\x9b"}, // ogonek
+		{'\xb3', "\xc5\x82"}, // l with stroke
+		{'\xb4', "\xc2\xb4"}, // acute accent
+		{'\xb5', "\xc2\xb5"}, // Mu letter
+		{'\xb6', "\xc2\xb6"}, // pilcrow
+		{'\xb7', "\xc2\xb7"}, // middle dot
+		{'\xb8', "\xc2\xb8"}, // cedilla
+		{'\xb9', "\xc4\x85"}, // a with ogonek
+		{'\xba', "\xc5\x9f"}, // s-cedilla
+		{'\xbb', "\xc2\xbb"}, // right guillemets
+		{'\xbc', "\xc4\xbd"}, // Lj-
+		{'\xbd', "\xcb\x9d"}, // double acute accent
+		{'\xbe', "\xc4\xbe"}, // lj-
+		{'\xbf', "\xc5\xbc"}, // z with dot above
+
+		{'\xc0', "\xc5\x94"}, // R with acute
+		{'\xc1', "\xc3\x81"}, // A with acute
+		{'\xc2', "\xc3\x82"}, // A-circumflex
+		{'\xc3', "\xc4\x82"}, // A-breve
+		{'\xc4', "\xc3\x84"}, // A with diaeresis
+		{'\xc5', "\xc4\xb9"}, // L with acute
+		{'\xc6', "\xc4\x86"}, // C with acute
+		{'\xc7', "\xc3\x87"}, // C-cedilla
+		{'\xc8', "\xc4\x8c"}, // C with caron
+		{'\xc9', "\xc3\x89"}, // C with acute
+		{'\xca', "\xc4\x98"}, // E with ogonek
+		{'\xcb', "\xc3\x8b"}, // E with diaeresis
+		{'\xcc', "\xc3\x8b"}, // E with caron
+		{'\xcd', "\xc3\x8d"}, // I with acute
+		{'\xce', "\xc3\x8e"}, // I-circumflex
+		{'\xcf', "\xc4\x8e"}, // D with caron
+
+		{'\xd0', "\xc4\x90"}, // crossed D
+		{'\xd1', "\xc5\x83"}, // N with acute
+		{'\xd2', "\xc5\x87"}, // N with caron
+		{'\xd3', "\xc3\x93"}, // O with acute
+		{'\xd4', "\xc3\x94"}, // O-circumflex
+		{'\xd5', "\xc5\x90"}, // O with dobule accute
+		{'\xd6', "\xc3\x96"}, // O with diaeresis
+		{'\xd7', "\xc3\x97"}, // multiplication sign
+		{'\xd8', "\xc5\x98"}, // R with caron
+		{'\xd9', "\xc5\xae"}, // U with diacritic
+		{'\xda', "\xc3\x9a"}, // U with acute
+		{'\xdb', "\xc5\xb0"}, // U with double accent
+		{'\xdc', "\xc3\x9c"}, // U with diaeresis
+		{'\xdd', "\xc3\x9d"}, // Y with acute
+		{'\xdf', "\xc5\xa2"}, // T-cedilla
+
+		{'\xe0', "\xc5\x95"}, // r with acute
+		{'\xe1', "\xc3\xa1"}, // a with acute
+		{'\xe2', "\xc3\xa2"}, // a-circumflex
+		{'\xe3', "\xc4\x83"}, // a-breve
+		{'\xe4', "\xc3\xa4"}, // a with diaeresis
+		{'\xe5', "\xc4\xba"}, // l with acute
+		{'\xe6', "\xc4\x87"}, // c with acute
+		{'\xe7', "\xc3\xa7"}, // c-cedilla
+		{'\xe8', "\xc4\x8d"}, // c with caron
+		{'\xe9', "\xc3\xa9"}, // c with acute
+		{'\xea', "\xc4\x99"}, // e with ogonek
+		{'\xeb', "\xc3\xab"}, // e with diaeresis
+		{'\xec', "\xc4\x9b"}, // e with caron
+		{'\xed', "\xc3\xad"}, // i with acute
+		{'\xee', "\xc3\xae"}, // i-circumflex
+		{'\xef', "\xc4\x8f"}, // d with caron
+
+		{'\xf0', "\xc4\x91"}, // crossed d
+		{'\xf1', "\xc5\x84"}, // n with acute
+		{'\xf2', "\xc5\x88"}, // n with caron
+		{'\xf3', "\xc3\xb3"}, // o with acute
+		{'\xf4', "\xc3\xb4"}, // o-circumflex
+		{'\xf5', "\xc5\x91"}, // o with double accent
+		{'\xf6', "\xc3\xb6"}, // o with diaeresis
+		{'\xf7', "\xc3\xb7"}, // division sign
+		{'\xf8', "\xc5\x99"}, // r with caron
+		{'\xf9', "\xc5\xaf"}, // u with diacritic
+		{'\xfa', "\xc3\xba"}, // u with acute
+		{'\xfb', "\xc5\xb1"}, // u with double accent
+		{'\xfc', "\xc3\xbc"}, // u with diaeresis
+		{'\xfd', "\xc3\xbd"}, // y with acute
+		{'\xfe', "\xc5\xa3"}, // t-cedilla
+		{'\xff', "\xcb\x99"}, // diactric dot
+	};
+
+	const std::unordered_map<std::string_view, unsigned char> UTF8_WINDOWS1250_MAP
+	{
+		{"\xe2\x82\xac", '\x80'}, // euro sign
+		{"\xe2\x80\x9a", '\x82'}, // lower quotation mark
+		{"\xe2\x80\x9e", '\x84'}, // lower quotation marks
+		{"\xe2\x80\xa6", '\x85'}, // ellipsis
+		{"\xe2\x80\xa0", '\x86'}, // dagger
+		{"\xe2\x80\xa1", '\x87'}, // double dagger
+		{"\xe2\x80\xb0", '\x89'}, // per mille
+		{"\xc5\xa0", '\x8a'}, // S with caron
+		{"\xe2\x80\xb9", '\x8b'}, // left guillemet
+		{"\xc5\x9a", '\x8c'}, // S with acute
+		{"\xc5\xa4", '\x8d'}, // T with caron
+		{"\xc5\xbd", '\x8e'}, // Z with caron
+		{"\xc5\xb9", '\x8f'}, // Z with acute
+
+		{"\xe2\x80\x98", '\x91'}, // upper quotation mark (opening)
+		{"\xe2\x80\x99", '\x92'}, // upper quotation mark (closing)
+		{"\xe2\x80\x9c", '\x93'}, // upper quotation marks (opening)
+		{"\xe2\x80\x9d", '\x94'}, // upper quotation marks (closing)
+		{"\xe2\x80\xa2", '\x95'}, // bullet sign
+		{"\xe2\x80\x93", '\x96'}, // en dash
+		{"\xe2\x80\x94", '\x97'}, // em dash
+		{"\xe2\x84\xa2", '\x99'}, // trademark sign
+		{"\xc5\xa1", '\x9a'}, // s with caron
+		{"\xe2\x80\xba", '\x9b'}, // right guillemet
+		{"\xc5\x9b", '\x9c'}, // s with acute
+		{"\xc5\xa5", '\x9d'}, // t with caron
+		{"\xc5\xbe", '\x9e'}, // z with caron
+		{"\xc5\xba", '\x9f'}, // z with acute
+
+		{"\x20", '\xa0'}, // NBSP
+		{"\xcb\x87", '\xa1'}, // caron
+		{"\xcb\x98", '\xa2'}, // breve
+		{"\xc5\x81", '\xa3'}, // L with stroke
+		{"\xc2\xa4", '\xa4'}, // currency sign
+		{"\xc4\x84", '\xa5'}, // A with ogonek
+		{"\xc2\xa6", '\xa6'}, // vertical bar
+		{"\xc2\xa7", '\xa7'}, // section sign
+		{"\xc2\xa8", '\xa8'}, // diaeresis
+		{"\xc2\xa9", '\xa9'}, // copyright sign
+		{"\xc5\x9e", '\xaa'}, // S-cedilla
+		{"\xc2\xab", '\xab'}, // left guillemets
+		{"\xc2\xac", '\xac'}, // negation
+		{"\xc2\xad", '\xad'}, // soft hyphen
+		{"\xc2\xae", '\xae'}, // registered trademark sign
+		{"\xc5\xbb", '\xaf'}, // Z with dot above
+
+		{"\xc2\xb0", '\xb0'}, // degree sign
+		{"\xc2\xb1", '\xb1'}, // plus-minus sign
+		{"\xcb\x9b", '\xb2'}, // ogonek
+		{"\xc5\x82", '\xb3'}, // l with stroke
+		{"\xc2\xb4", '\xb4'}, // acute accent
+		{"\xc2\xb5", '\xb5'}, // Mu letter
+		{"\xc2\xb6", '\xb6'}, // pilcrow
+		{"\xc2\xb7", '\xb7'}, // middle dot
+		{"\xc2\xb8", '\xb8'}, // cedilla
+		{"\xc4\x85", '\xb9'}, // a with ogonek
+		{"\xc5\x9f", '\xba'}, // s-cedilla
+		{"\xc2\xbb", '\xbb'}, // right guillemets
+		{"\xc4\xbd", '\xbc'}, // Lj-
+		{"\xcb\x9d", '\xbd'}, // double acute accent
+		{"\xc4\xbe", '\xbe'}, // lj-
+		{"\xc5\xbc", '\xbf'}, // z with dot above
+
+		{"\xc5\x94", '\xc0'}, // R with acute
+		{"\xc3\x81", '\xc1'}, // A with acute
+		{"\xc3\x82", '\xc2'}, // A-circumflex
+		{"\xc4\x82", '\xc3'}, // A-breve
+		{"\xc3\x84", '\xc4'}, // A with diaeresis
+		{"\xc4\xb9", '\xc5'}, // L with acute
+		{"\xc4\x86", '\xc6'}, // C with acute
+		{"\xc3\x87", '\xc7'}, // C-cedilla
+		{"\xc4\x8c", '\xc8'}, // C with caron
+		{"\xc3\x89", '\xc9'}, // C with acute
+		{"\xc4\x98", '\xca'}, // E with ogonek
+		{"\xc3\x8b", '\xcb'}, // E with diaeresis
+		{"\xc3\x8b", '\xcc'}, // E with caron
+		{"\xc3\x8d", '\xcd'}, // I with acute
+		{"\xc3\x8e", '\xce'}, // I-circumflex
+		{"\xc4\x8e", '\xcf'}, // D with caron
+
+		{"\xc4\x90", '\xd0'}, // crossed D
+		{"\xc5\x83", '\xd1'}, // N with acute
+		{"\xc5\x87", '\xd2'}, // N with caron
+		{"\xc3\x93", '\xd3'}, // O with acute
+		{"\xc3\x94", '\xd4'}, // O-circumflex
+		{"\xc5\x90", '\xd5'}, // O with dobule accute
+		{"\xc3\x96", '\xd6'}, // O with diaeresis
+		{"\xc3\x97", '\xd7'}, // multiplication sign
+		{"\xc5\x98", '\xd8'}, // R with caron
+		{"\xc5\xae", '\xd9'}, // U with diacritic
+		{"\xc3\x9a", '\xda'}, // U with acute
+		{"\xc5\xb0", '\xdb'}, // U with double accent
+		{"\xc3\x9c", '\xdc'}, // U with diaeresis
+		{"\xc3\x9d", '\xdd'}, // Y with acute
+		{"\xc5\xa2", '\xdf'}, // T-cedilla
+
+		{"\xc5\x95", '\xe0'}, // r with acute
+		{"\xc3\xa1", '\xe1'}, // a with acute
+		{"\xc3\xa2", '\xe2'}, // a-circumflex
+		{"\xc4\x83", '\xe3'}, // a-breve
+		{"\xc3\xa4", '\xe4'}, // a with diaeresis
+		{"\xc4\xba", '\xe5'}, // l with acute
+		{"\xc4\x87", '\xe6'}, // c with acute
+		{"\xc3\xa7", '\xe7'}, // c-cedilla
+		{"\xc4\x8d", '\xe8'}, // c with caron
+		{"\xc3\xa9", '\xe9'}, // c with acute
+		{"\xc4\x99", '\xea'}, // e with ogonek
+		{"\xc3\xab", '\xeb'}, // e with diaeresis
+		{"\xc4\x9b", '\xec'}, // e with caron
+		{"\xc3\xad", '\xed'}, // i with acute
+		{"\xc3\xae", '\xee'}, // i-circumflex
+		{"\xc4\x8f", '\xef'}, // d with caron
+
+		{"\xc4\x91", '\xf0'}, // crossed d
+		{"\xc5\x84", '\xf1'}, // n with acute
+		{"\xc5\x88", '\xf2'}, // n with caron
+		{"\xc3\xb3", '\xf3'}, // o with acute
+		{"\xc3\xb4", '\xf4'}, // o-circumflex
+		{"\xc5\x91", '\xf5'}, // o with double accent
+		{"\xc3\xb6", '\xf6'}, // o with diaeresis
+		{"\xc3\xb7", '\xf7'}, // division sign
+		{"\xc5\x99", '\xf8'}, // r with caron
+		{"\xc5\xaf", '\xf9'}, // u with diacritic
+		{"\xc3\xba", '\xfa'}, // u with acute
+		{"\xc5\xb1", '\xfb'}, // u with double accent
+		{"\xc3\xbc", '\xfc'}, // u with diaeresis
+		{"\xc3\xbd", '\xfd'}, // y with acute
+		{"\xc5\xa3", '\xfe'}, // t-cedilla
+		{"\xcb\x99", '\xff'}, // diactric dot
+	};
+
+	inline std::string win1250ToUTF8(const std::string& strRef)
+	{
+		std::string result;
+		result.reserve(strRef.size() * 2);
+
+		for (auto&& c : strRef)
+		{
+			if (WINDOWS1250_UTF8_MAP.contains(static_cast<unsigned char>(c)))
+				result += WINDOWS1250_UTF8_MAP.at(static_cast<unsigned char>(c));
+			else
+				result += c;
+		}
+
+		return result;
+	}
+
+	inline std::string UTF8ToWin1250(const std::string& strRef)
+	{
+		std::string result;
+		result.reserve(strRef.size());
+		size_t charSize = 1;
+
+		for (size_t i = 0; i < strRef.length(); ++i)
+		{
+			if (const int c = static_cast<unsigned char>(strRef[i]); c >= 128)
+			{
+				if (c < 224)
+					charSize = 2;
+				else if (c < 240)
+					charSize = 3;
+				else if (c < 248)
+					charSize = 4;
+				else if (c == 252)
+					charSize = 5;
+				else
+					charSize = 6;
+			}
+			// Update loop index according to UTF8 charSize;
+			i += charSize - 1;
+
+			if (charSize == 1)
+			{
+				result += strRef[i];
+				continue;
+			}
+
+			if (i + charSize > strRef.length())
+			{
+				result += '?';
+				return result;
+			}
+
+			String utfChar;
+			utfChar.reserve(charSize);
+
+			for (size_t j = 0; j < charSize; ++j)
+			{
+				utfChar += strRef[i + j];
+			}
+
+			if (UTF8_WINDOWS1250_MAP.contains(utfChar))
+			{
+				result += static_cast<char>(UTF8_WINDOWS1250_MAP.at(utfChar));
+			}
+			else
+			{
+				result += '?';
+			}
+		}
+
+		return result;
+	}
+}
+#endif // NONUT_CORE_STRING_HELPERS_H