310 lines
		
	
	
		
			8.1 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			310 lines
		
	
	
		
			8.1 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /* eslint no-bitwise: "off", max-statements: "off", max-lines: "off" */
 | |
| 
 | |
| // Taken from: https://github.com/walling/unorm/blob/master/lib/unorm.js
 | |
| 
 | |
| /*
 | |
|  * UnicodeNormalizer 1.0.0
 | |
|  * Copyright (c) 2008 Matsuza
 | |
|  * Dual licensed under the MIT (MIT-LICENSE.txt) and
 | |
|  * GPL (GPL-LICENSE.txt) licenses.
 | |
|  * $Date: 2008-06-05 16:44:17 +0200 (Thu, 05 Jun 2008) $
 | |
|  * $Rev: 13309 $
 | |
|  */
 | |
| 
 | |
| "use strict";
 | |
| 
 | |
| var primitiveSet = require("../../../object/primitive-set")
 | |
|   , validValue   = require("../../../object/valid-value")
 | |
|   , data         = require("./_data");
 | |
| 
 | |
| var floor = Math.floor
 | |
|   , forms = primitiveSet("NFC", "NFD", "NFKC", "NFKD")
 | |
|   , DEFAULT_FEATURE = [null, 0, {}]
 | |
|   , CACHE_THRESHOLD = 10
 | |
|   , SBase = 0xac00
 | |
|   , LBase = 0x1100
 | |
|   , VBase = 0x1161
 | |
|   , TBase = 0x11a7
 | |
|   , LCount = 19
 | |
|   , VCount = 21
 | |
|   , TCount = 28
 | |
|   , NCount = VCount * TCount
 | |
|   , SCount = LCount * NCount
 | |
|   , UChar
 | |
|   , cache = {}
 | |
|   , cacheCounter = []
 | |
|   , fromCache
 | |
|   , fromData
 | |
|   , fromCpOnly
 | |
|   , fromRuleBasedJamo
 | |
|   , fromCpFilter
 | |
|   , strategies
 | |
|   , UCharIterator
 | |
|   , RecursDecompIterator
 | |
|   , DecompIterator
 | |
|   , CompIterator
 | |
|   , createIterator
 | |
|   , normalize;
 | |
| 
 | |
| UChar = function (cp, feature) {
 | |
| 	this.codepoint = cp;
 | |
| 	this.feature = feature;
 | |
| };
 | |
| 
 | |
| // Strategies
 | |
| (function () { for (var i = 0; i <= 0xff; ++i) cacheCounter[i] = 0; })();
 | |
| 
 | |
| fromCache = function (nextStep, cp, needFeature) {
 | |
| 	var ret = cache[cp];
 | |
| 	if (!ret) {
 | |
| 		ret = nextStep(cp, needFeature);
 | |
| 		if (Boolean(ret.feature) && ++cacheCounter[(cp >> 8) & 0xff] > CACHE_THRESHOLD) {
 | |
| 			cache[cp] = ret;
 | |
| 		}
 | |
| 	}
 | |
| 	return ret;
 | |
| };
 | |
| 
 | |
| fromData = function (next, cp) {
 | |
| 	var hash = cp & 0xff00, dunit = UChar.udata[hash] || {}, feature = dunit[cp];
 | |
| 	return feature ? new UChar(cp, feature) : new UChar(cp, DEFAULT_FEATURE);
 | |
| };
 | |
| fromCpOnly = function (next, cp, needFeature) {
 | |
| 	return needFeature ? next(cp, needFeature) : new UChar(cp, null);
 | |
| };
 | |
| 
 | |
| fromRuleBasedJamo = function (next, cp, needFeature) {
 | |
| 	var char, base, i, arr, SIndex, TIndex, feature, j;
 | |
| 	if (cp < LBase || (LBase + LCount <= cp && cp < SBase) || SBase + SCount < cp) {
 | |
| 		return next(cp, needFeature);
 | |
| 	}
 | |
| 	if (LBase <= cp && cp < LBase + LCount) {
 | |
| 		char = {};
 | |
| 		base = (cp - LBase) * VCount;
 | |
| 		for (i = 0; i < VCount; ++i) {
 | |
| 			char[VBase + i] = SBase + TCount * (i + base);
 | |
| 		}
 | |
| 		arr = new Array(3);
 | |
| 		arr[2] = char;
 | |
| 		return new UChar(cp, arr);
 | |
| 	}
 | |
| 
 | |
| 	SIndex = cp - SBase;
 | |
| 	TIndex = SIndex % TCount;
 | |
| 	feature = [];
 | |
| 	if (TIndex === 0) {
 | |
| 		feature[0] = [LBase + floor(SIndex / NCount), VBase + floor((SIndex % NCount) / TCount)];
 | |
| 		feature[2] = {};
 | |
| 		for (j = 1; j < TCount; ++j) {
 | |
| 			feature[2][TBase + j] = cp + j;
 | |
| 		}
 | |
| 	} else {
 | |
| 		feature[0] = [SBase + SIndex - TIndex, TBase + TIndex];
 | |
| 	}
 | |
| 	return new UChar(cp, feature);
 | |
| };
 | |
| 
 | |
| fromCpFilter = function (next, cp, needFeature) {
 | |
| 	return cp < 60 || (cp > 13311 && cp < 42607)
 | |
| 		? new UChar(cp, DEFAULT_FEATURE)
 | |
| 		: next(cp, needFeature);
 | |
| };
 | |
| 
 | |
| strategies = [fromCpFilter, fromCache, fromCpOnly, fromRuleBasedJamo, fromData];
 | |
| 
 | |
| UChar.fromCharCode = strategies.reduceRight(function (next, strategy) {
 | |
| 	return function (cp, needFeature) { return strategy(next, cp, needFeature); };
 | |
| }, null);
 | |
| 
 | |
| UChar.isHighSurrogate = function (cp) { return cp >= 0xd800 && cp <= 0xdbff; };
 | |
| UChar.isLowSurrogate = function (cp) { return cp >= 0xdc00 && cp <= 0xdfff; };
 | |
| 
 | |
| UChar.prototype.prepFeature = function () {
 | |
| 	if (!this.feature) {
 | |
| 		this.feature = UChar.fromCharCode(this.codepoint, true).feature;
 | |
| 	}
 | |
| };
 | |
| 
 | |
| UChar.prototype.toString = function () {
 | |
| 	var num;
 | |
| 	if (this.codepoint < 0x10000) return String.fromCharCode(this.codepoint);
 | |
| 	num = this.codepoint - 0x10000;
 | |
| 	return String.fromCharCode(floor(num / 0x400) + 0xd800, (num % 0x400) + 0xdc00);
 | |
| };
 | |
| 
 | |
| UChar.prototype.getDecomp = function () {
 | |
| 	this.prepFeature();
 | |
| 	return this.feature[0] || null;
 | |
| };
 | |
| 
 | |
| UChar.prototype.isCompatibility = function () {
 | |
| 	this.prepFeature();
 | |
| 	return Boolean(this.feature[1]) && this.feature[1] & (1 << 8);
 | |
| };
 | |
| UChar.prototype.isExclude = function () {
 | |
| 	this.prepFeature();
 | |
| 	return Boolean(this.feature[1]) && this.feature[1] & (1 << 9);
 | |
| };
 | |
| UChar.prototype.getCanonicalClass = function () {
 | |
| 	this.prepFeature();
 | |
| 	return this.feature[1] ? this.feature[1] & 0xff : 0;
 | |
| };
 | |
| UChar.prototype.getComposite = function (following) {
 | |
| 	var cp;
 | |
| 	this.prepFeature();
 | |
| 	if (!this.feature[2]) return null;
 | |
| 	cp = this.feature[2][following.codepoint];
 | |
| 	return cp ? UChar.fromCharCode(cp) : null;
 | |
| };
 | |
| 
 | |
| UCharIterator = function (str) {
 | |
| 	this.str = str;
 | |
| 	this.cursor = 0;
 | |
| };
 | |
| UCharIterator.prototype.next = function () {
 | |
| 	if (Boolean(this.str) && this.cursor < this.str.length) {
 | |
| 		var cp = this.str.charCodeAt(this.cursor++), d;
 | |
| 		if (
 | |
| 			UChar.isHighSurrogate(cp) &&
 | |
| 			this.cursor < this.str.length &&
 | |
| 			UChar.isLowSurrogate((d = this.str.charCodeAt(this.cursor)))
 | |
| 		) {
 | |
| 			cp = (cp - 0xd800) * 0x400 + (d - 0xdc00) + 0x10000;
 | |
| 			++this.cursor;
 | |
| 		}
 | |
| 		return UChar.fromCharCode(cp);
 | |
| 	}
 | |
| 	this.str = null;
 | |
| 	return null;
 | |
| };
 | |
| 
 | |
| RecursDecompIterator = function (it, cano) {
 | |
| 	this.it = it;
 | |
| 	this.canonical = cano;
 | |
| 	this.resBuf = [];
 | |
| };
 | |
| 
 | |
| RecursDecompIterator.prototype.next = function () {
 | |
| 	var recursiveDecomp, uchar;
 | |
| 	recursiveDecomp = function (cano, ucharLoc) {
 | |
| 		var decomp = ucharLoc.getDecomp(), ret, i, a, j;
 | |
| 		if (Boolean(decomp) && !(cano && ucharLoc.isCompatibility())) {
 | |
| 			ret = [];
 | |
| 			for (i = 0; i < decomp.length; ++i) {
 | |
| 				a = recursiveDecomp(cano, UChar.fromCharCode(decomp[i]));
 | |
| 				// Ret.concat(a); //<-why does not this work?
 | |
| 				// following block is a workaround.
 | |
| 				for (j = 0; j < a.length; ++j) ret.push(a[j]);
 | |
| 			}
 | |
| 			return ret;
 | |
| 		}
 | |
| 		return [ucharLoc];
 | |
| 	};
 | |
| 	if (this.resBuf.length === 0) {
 | |
| 		uchar = this.it.next();
 | |
| 		if (!uchar) return null;
 | |
| 		this.resBuf = recursiveDecomp(this.canonical, uchar);
 | |
| 	}
 | |
| 	return this.resBuf.shift();
 | |
| };
 | |
| 
 | |
| DecompIterator = function (it) {
 | |
| 	this.it = it;
 | |
| 	this.resBuf = [];
 | |
| };
 | |
| 
 | |
| DecompIterator.prototype.next = function () {
 | |
| 	var cc, uchar, inspt, uchar2, cc2;
 | |
| 	if (this.resBuf.length === 0) {
 | |
| 		do {
 | |
| 			uchar = this.it.next();
 | |
| 			if (!uchar) break;
 | |
| 			cc = uchar.getCanonicalClass();
 | |
| 			inspt = this.resBuf.length;
 | |
| 			if (cc !== 0) {
 | |
| 				for (inspt; inspt > 0; --inspt) {
 | |
| 					uchar2 = this.resBuf[inspt - 1];
 | |
| 					cc2 = uchar2.getCanonicalClass();
 | |
| 					// eslint-disable-next-line max-depth
 | |
| 					if (cc2 <= cc) break;
 | |
| 				}
 | |
| 			}
 | |
| 			this.resBuf.splice(inspt, 0, uchar);
 | |
| 		} while (cc !== 0);
 | |
| 	}
 | |
| 	return this.resBuf.shift();
 | |
| };
 | |
| 
 | |
| CompIterator = function (it) {
 | |
| 	this.it = it;
 | |
| 	this.procBuf = [];
 | |
| 	this.resBuf = [];
 | |
| 	this.lastClass = null;
 | |
| };
 | |
| 
 | |
| CompIterator.prototype.next = function () {
 | |
| 	var uchar, starter, composite, cc;
 | |
| 	while (this.resBuf.length === 0) {
 | |
| 		uchar = this.it.next();
 | |
| 		if (!uchar) {
 | |
| 			this.resBuf = this.procBuf;
 | |
| 			this.procBuf = [];
 | |
| 			break;
 | |
| 		}
 | |
| 		if (this.procBuf.length === 0) {
 | |
| 			this.lastClass = uchar.getCanonicalClass();
 | |
| 			this.procBuf.push(uchar);
 | |
| 		} else {
 | |
| 			starter = this.procBuf[0];
 | |
| 			composite = starter.getComposite(uchar);
 | |
| 			cc = uchar.getCanonicalClass();
 | |
| 			if (Boolean(composite) && (this.lastClass < cc || this.lastClass === 0)) {
 | |
| 				this.procBuf[0] = composite;
 | |
| 			} else {
 | |
| 				if (cc === 0) {
 | |
| 					this.resBuf = this.procBuf;
 | |
| 					this.procBuf = [];
 | |
| 				}
 | |
| 				this.lastClass = cc;
 | |
| 				this.procBuf.push(uchar);
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	return this.resBuf.shift();
 | |
| };
 | |
| 
 | |
| createIterator = function (mode, str) {
 | |
| 	switch (mode) {
 | |
| 		case "NFD":
 | |
| 			return new DecompIterator(new RecursDecompIterator(new UCharIterator(str), true));
 | |
| 		case "NFKD":
 | |
| 			return new DecompIterator(new RecursDecompIterator(new UCharIterator(str), false));
 | |
| 		case "NFC":
 | |
| 			return new CompIterator(
 | |
| 				new DecompIterator(new RecursDecompIterator(new UCharIterator(str), true))
 | |
| 			);
 | |
| 		case "NFKC":
 | |
| 			return new CompIterator(
 | |
| 				new DecompIterator(new RecursDecompIterator(new UCharIterator(str), false))
 | |
| 			);
 | |
| 		default:
 | |
| 			throw new Error(mode + " is invalid");
 | |
| 	}
 | |
| };
 | |
| normalize = function (mode, str) {
 | |
| 	var it = createIterator(mode, str), ret = "", uchar;
 | |
| 	while ((uchar = it.next())) ret += uchar.toString();
 | |
| 	return ret;
 | |
| };
 | |
| 
 | |
| /* Unicode data */
 | |
| UChar.udata = data;
 | |
| 
 | |
| module.exports = function (/* Form*/) {
 | |
| 	var str = String(validValue(this)), form = arguments[0];
 | |
| 	if (form === undefined) form = "NFC";
 | |
| 	else form = String(form);
 | |
| 	if (!forms[form]) throw new RangeError("Invalid normalization form: " + form);
 | |
| 	return normalize(form, str);
 | |
| };
 |