You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1209 lines
34 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/*! https://mths.be/regenerate v1.4.1 by @mathias | MIT license */
;(function(root) {
// Detect free variables `exports`.
var freeExports = typeof exports == 'object' && exports;
// Detect free variable `module`.
var freeModule = typeof module == 'object' && module &&
module.exports == freeExports && module;
// Detect free variable `global`, from Node.js/io.js or Browserified code,
// and use it as `root`.
var freeGlobal = typeof global == 'object' && global;
if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) {
root = freeGlobal;
}
/*--------------------------------------------------------------------------*/
var ERRORS = {
'rangeOrder': 'A range\u2019s `stop` value must be greater than or equal ' +
'to the `start` value.',
'codePointRange': 'Invalid code point value. Code points range from ' +
'U+000000 to U+10FFFF.'
};
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-pairs
var HIGH_SURROGATE_MIN = 0xD800;
var HIGH_SURROGATE_MAX = 0xDBFF;
var LOW_SURROGATE_MIN = 0xDC00;
var LOW_SURROGATE_MAX = 0xDFFF;
// In Regenerate output, `\0` is never preceded by `\` because we sort by
// code point value, so lets keep this regular expression simple.
var regexNull = /\\x00([^0123456789]|$)/g;
var object = {};
var hasOwnProperty = object.hasOwnProperty;
var extend = function(destination, source) {
var key;
for (key in source) {
if (hasOwnProperty.call(source, key)) {
destination[key] = source[key];
}
}
return destination;
};
var forEach = function(array, callback) {
var index = -1;
var length = array.length;
while (++index < length) {
callback(array[index], index);
}
};
var toString = object.toString;
var isArray = function(value) {
return toString.call(value) == '[object Array]';
};
var isNumber = function(value) {
return typeof value == 'number' ||
toString.call(value) == '[object Number]';
};
// This assumes that `number` is a positive integer that `toString()`s nicely
// (which is the case for all code point values).
var zeroes = '0000';
var pad = function(number, totalCharacters) {
var string = String(number);
return string.length < totalCharacters
? (zeroes + string).slice(-totalCharacters)
: string;
};
var hex = function(number) {
return Number(number).toString(16).toUpperCase();
};
var slice = [].slice;
/*--------------------------------------------------------------------------*/
var dataFromCodePoints = function(codePoints) {
var index = -1;
var length = codePoints.length;
var max = length - 1;
var result = [];
var isStart = true;
var tmp;
var previous = 0;
while (++index < length) {
tmp = codePoints[index];
if (isStart) {
result.push(tmp);
previous = tmp;
isStart = false;
} else {
if (tmp == previous + 1) {
if (index != max) {
previous = tmp;
continue;
} else {
isStart = true;
result.push(tmp + 1);
}
} else {
// End the previous range and start a new one.
result.push(previous + 1, tmp);
previous = tmp;
}
}
}
if (!isStart) {
result.push(tmp + 1);
}
return result;
};
var dataRemove = function(data, codePoint) {
// Iterate over the data per `(start, end)` pair.
var index = 0;
var start;
var end;
var length = data.length;
while (index < length) {
start = data[index];
end = data[index + 1];
if (codePoint >= start && codePoint < end) {
// Modify this pair.
if (codePoint == start) {
if (end == start + 1) {
// Just remove `start` and `end`.
data.splice(index, 2);
return data;
} else {
// Just replace `start` with a new value.
data[index] = codePoint + 1;
return data;
}
} else if (codePoint == end - 1) {
// Just replace `end` with a new value.
data[index + 1] = codePoint;
return data;
} else {
// Replace `[start, end]` with `[startA, endA, startB, endB]`.
data.splice(index, 2, start, codePoint, codePoint + 1, end);
return data;
}
}
index += 2;
}
return data;
};
var dataRemoveRange = function(data, rangeStart, rangeEnd) {
if (rangeEnd < rangeStart) {
throw Error(ERRORS.rangeOrder);
}
// Iterate over the data per `(start, end)` pair.
var index = 0;
var start;
var end;
while (index < data.length) {
start = data[index];
end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
// Exit as soon as no more matching pairs can be found.
if (start > rangeEnd) {
return data;
}
// Check if this range pair is equal to, or forms a subset of, the range
// to be removed.
// E.g. we have `[0, 11, 40, 51]` and want to remove 0-10 → `[40, 51]`.
// E.g. we have `[40, 51]` and want to remove 0-100 → `[]`.
if (rangeStart <= start && rangeEnd >= end) {
// Remove this pair.
data.splice(index, 2);
continue;
}
// Check if both `rangeStart` and `rangeEnd` are within the bounds of
// this pair.
// E.g. we have `[0, 11]` and want to remove 4-6 → `[0, 4, 7, 11]`.
if (rangeStart >= start && rangeEnd < end) {
if (rangeStart == start) {
// Replace `[start, end]` with `[startB, endB]`.
data[index] = rangeEnd + 1;
data[index + 1] = end + 1;
return data;
}
// Replace `[start, end]` with `[startA, endA, startB, endB]`.
data.splice(index, 2, start, rangeStart, rangeEnd + 1, end + 1);
return data;
}
// Check if only `rangeStart` is within the bounds of this pair.
// E.g. we have `[0, 11]` and want to remove 4-20 → `[0, 4]`.
if (rangeStart >= start && rangeStart <= end) {
// Replace `end` with `rangeStart`.
data[index + 1] = rangeStart;
// Note: we cannot `return` just yet, in case any following pairs still
// contain matching code points.
// E.g. we have `[0, 11, 14, 31]` and want to remove 4-20
// → `[0, 4, 21, 31]`.
}
// Check if only `rangeEnd` is within the bounds of this pair.
// E.g. we have `[14, 31]` and want to remove 4-20 → `[21, 31]`.
else if (rangeEnd >= start && rangeEnd <= end) {
// Just replace `start`.
data[index] = rangeEnd + 1;
return data;
}
index += 2;
}
return data;
};
var dataAdd = function(data, codePoint) {
// Iterate over the data per `(start, end)` pair.
var index = 0;
var start;
var end;
var lastIndex = null;
var length = data.length;
if (codePoint < 0x0 || codePoint > 0x10FFFF) {
throw RangeError(ERRORS.codePointRange);
}
while (index < length) {
start = data[index];
end = data[index + 1];
// Check if the code point is already in the set.
if (codePoint >= start && codePoint < end) {
return data;
}
if (codePoint == start - 1) {
// Just replace `start` with a new value.
data[index] = codePoint;
return data;
}
// At this point, if `start` is `greater` than `codePoint`, insert a new
// `[start, end]` pair before the current pair, or after the current pair
// if there is a known `lastIndex`.
if (start > codePoint) {
data.splice(
lastIndex != null ? lastIndex + 2 : 0,
0,
codePoint,
codePoint + 1
);
return data;
}
if (codePoint == end) {
// Check if adding this code point causes two separate ranges to become
// a single range, e.g. `dataAdd([0, 4, 5, 10], 4)` → `[0, 10]`.
if (codePoint + 1 == data[index + 2]) {
data.splice(index, 4, start, data[index + 3]);
return data;
}
// Else, just replace `end` with a new value.
data[index + 1] = codePoint + 1;
return data;
}
lastIndex = index;
index += 2;
}
// The loop has finished; add the new pair to the end of the data set.
data.push(codePoint, codePoint + 1);
return data;
};
var dataAddData = function(dataA, dataB) {
// Iterate over the data per `(start, end)` pair.
var index = 0;
var start;
var end;
var data = dataA.slice();
var length = dataB.length;
while (index < length) {
start = dataB[index];
end = dataB[index + 1] - 1;
if (start == end) {
data = dataAdd(data, start);
} else {
data = dataAddRange(data, start, end);
}
index += 2;
}
return data;
};
var dataRemoveData = function(dataA, dataB) {
// Iterate over the data per `(start, end)` pair.
var index = 0;
var start;
var end;
var data = dataA.slice();
var length = dataB.length;
while (index < length) {
start = dataB[index];
end = dataB[index + 1] - 1;
if (start == end) {
data = dataRemove(data, start);
} else {
data = dataRemoveRange(data, start, end);
}
index += 2;
}
return data;
};
var dataAddRange = function(data, rangeStart, rangeEnd) {
if (rangeEnd < rangeStart) {
throw Error(ERRORS.rangeOrder);
}
if (
rangeStart < 0x0 || rangeStart > 0x10FFFF ||
rangeEnd < 0x0 || rangeEnd > 0x10FFFF
) {
throw RangeError(ERRORS.codePointRange);
}
// Iterate over the data per `(start, end)` pair.
var index = 0;
var start;
var end;
var added = false;
var length = data.length;
while (index < length) {
start = data[index];
end = data[index + 1];
if (added) {
// The range has already been added to the set; at this point, we just
// need to get rid of the following ranges in case they overlap.
// Check if this range can be combined with the previous range.
if (start == rangeEnd + 1) {
data.splice(index - 1, 2);
return data;
}
// Exit as soon as no more possibly overlapping pairs can be found.
if (start > rangeEnd) {
return data;
}
// E.g. `[0, 11, 12, 16]` and weve added 5-15, so we now have
// `[0, 16, 12, 16]`. Remove the `12,16` part, as it lies within the
// `0,16` range that was previously added.
if (start >= rangeStart && start <= rangeEnd) {
// `start` lies within the range that was previously added.
if (end > rangeStart && end - 1 <= rangeEnd) {
// `end` lies within the range that was previously added as well,
// so remove this pair.
data.splice(index, 2);
index -= 2;
// Note: we cannot `return` just yet, as there may still be other
// overlapping pairs.
} else {
// `start` lies within the range that was previously added, but
// `end` doesnt. E.g. `[0, 11, 12, 31]` and weve added 5-15, so
// now we have `[0, 16, 12, 31]`. This must be written as `[0, 31]`.
// Remove the previously added `end` and the current `start`.
data.splice(index - 1, 2);
index -= 2;
}
// Note: we cannot return yet.
}
}
else if (start == rangeEnd + 1 || start == rangeEnd) {
data[index] = rangeStart;
return data;
}
// Check if a new pair must be inserted *before* the current one.
else if (start > rangeEnd) {
data.splice(index, 0, rangeStart, rangeEnd + 1);
return data;
}
else if (rangeStart >= start && rangeStart < end && rangeEnd + 1 <= end) {
// The new range lies entirely within an existing range pair. No action
// needed.
return data;
}
else if (
// E.g. `[0, 11]` and you add 5-15 → `[0, 16]`.
(rangeStart >= start && rangeStart < end) ||
// E.g. `[0, 3]` and you add 3-6 → `[0, 7]`.
end == rangeStart
) {
// Replace `end` with the new value.
data[index + 1] = rangeEnd + 1;
// Make sure the next range pair doesnt overlap, e.g. `[0, 11, 12, 14]`
// and you add 5-15 → `[0, 16]`, i.e. remove the `12,14` part.
added = true;
// Note: we cannot `return` just yet.
}
else if (rangeStart <= start && rangeEnd + 1 >= end) {
// The new range is a superset of the old range.
data[index] = rangeStart;
data[index + 1] = rangeEnd + 1;
added = true;
}
index += 2;
}
// The loop has finished without doing anything; add the new pair to the end
// of the data set.
if (!added) {
data.push(rangeStart, rangeEnd + 1);
}
return data;
};
var dataContains = function(data, codePoint) {
var index = 0;
var length = data.length;
// Exit early if `codePoint` is not within `data`s overall range.
var start = data[index];
var end = data[length - 1];
if (length >= 2) {
if (codePoint < start || codePoint > end) {
return false;
}
}
// Iterate over the data per `(start, end)` pair.
while (index < length) {
start = data[index];
end = data[index + 1];
if (codePoint >= start && codePoint < end) {
return true;
}
index += 2;
}
return false;
};
var dataIntersection = function(data, codePoints) {
var index = 0;
var length = codePoints.length;
var codePoint;
var result = [];
while (index < length) {
codePoint = codePoints[index];
if (dataContains(data, codePoint)) {
result.push(codePoint);
}
++index;
}
return dataFromCodePoints(result);
};
var dataIsEmpty = function(data) {
return !data.length;
};
var dataIsSingleton = function(data) {
// Check if the set only represents a single code point.
return data.length == 2 && data[0] + 1 == data[1];
};
var dataToArray = function(data) {
// Iterate over the data per `(start, end)` pair.
var index = 0;
var start;
var end;
var result = [];
var length = data.length;
while (index < length) {
start = data[index];
end = data[index + 1];
while (start < end) {
result.push(start);
++start;
}
index += 2;
}
return result;
};
/*--------------------------------------------------------------------------*/
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
var floor = Math.floor;
var highSurrogate = function(codePoint) {
return parseInt(
floor((codePoint - 0x10000) / 0x400) + HIGH_SURROGATE_MIN,
10
);
};
var lowSurrogate = function(codePoint) {
return parseInt(
(codePoint - 0x10000) % 0x400 + LOW_SURROGATE_MIN,
10
);
};
var stringFromCharCode = String.fromCharCode;
var codePointToString = function(codePoint) {
var string;
// https://mathiasbynens.be/notes/javascript-escapes#single
// Note: the `\b` escape sequence for U+0008 BACKSPACE in strings has a
// different meaning in regular expressions (word boundary), so it cannot
// be used here.
if (codePoint == 0x09) {
string = '\\t';
}
// Note: IE < 9 treats `'\v'` as `'v'`, so avoid using it.
// else if (codePoint == 0x0B) {
// string = '\\v';
// }
else if (codePoint == 0x0A) {
string = '\\n';
}
else if (codePoint == 0x0C) {
string = '\\f';
}
else if (codePoint == 0x0D) {
string = '\\r';
}
else if (codePoint == 0x2D) {
// https://mathiasbynens.be/notes/javascript-escapes#hexadecimal
// Note: `-` (U+002D HYPHEN-MINUS) is escaped in this way rather
// than by backslash-escaping, in case the output is used outside
// of a character class in a `u` RegExp. /\-/u throws, but
// /\x2D/u is fine.
string = '\\x2D';
}
else if (codePoint == 0x5C) {
string = '\\\\';
}
else if (
codePoint == 0x24 ||
(codePoint >= 0x28 && codePoint <= 0x2B) ||
codePoint == 0x2E || codePoint == 0x2F ||
codePoint == 0x3F ||
(codePoint >= 0x5B && codePoint <= 0x5E) ||
(codePoint >= 0x7B && codePoint <= 0x7D)
) {
// The code point maps to an unsafe printable ASCII character;
// backslash-escape it. Heres the list of those symbols:
//
// $()*+./?[\]^{|}
//
// This matches SyntaxCharacters as well as `/` (U+002F SOLIDUS).
// https://tc39.github.io/ecma262/#prod-SyntaxCharacter
string = '\\' + stringFromCharCode(codePoint);
}
else if (codePoint >= 0x20 && codePoint <= 0x7E) {
// The code point maps to one of these printable ASCII symbols
// (including the space character):
//
// !"#%&',/0123456789:;<=>@ABCDEFGHIJKLMNO
// PQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz~
//
// These can safely be used directly.
string = stringFromCharCode(codePoint);
}
else if (codePoint <= 0xFF) {
string = '\\x' + pad(hex(codePoint), 2);
}
else { // `codePoint <= 0xFFFF` holds true.
// https://mathiasbynens.be/notes/javascript-escapes#unicode
string = '\\u' + pad(hex(codePoint), 4);
}
// Theres no need to account for astral symbols / surrogate pairs here,
// since `codePointToString` is private and only used for BMP code points.
// But if thats what you need, just add an `else` block with this code:
//
// string = '\\u' + pad(hex(highSurrogate(codePoint)), 4)
// + '\\u' + pad(hex(lowSurrogate(codePoint)), 4);
return string;
};
var codePointToStringUnicode = function(codePoint) {
if (codePoint <= 0xFFFF) {
return codePointToString(codePoint);
}
return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
};
var symbolToCodePoint = function(symbol) {
var length = symbol.length;
var first = symbol.charCodeAt(0);
var second;
if (
first >= HIGH_SURROGATE_MIN && first <= HIGH_SURROGATE_MAX &&
length > 1 // There is a next code unit.
) {
// `first` is a high surrogate, and there is a next character. Assume
// its a low surrogate (else its invalid usage of Regenerate anyway).
second = symbol.charCodeAt(1);
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
return (first - HIGH_SURROGATE_MIN) * 0x400 +
second - LOW_SURROGATE_MIN + 0x10000;
}
return first;
};
var createBMPCharacterClasses = function(data) {
// Iterate over the data per `(start, end)` pair.
var result = '';
var index = 0;
var start;
var end;
var length = data.length;
if (dataIsSingleton(data)) {
return codePointToString(data[0]);
}
while (index < length) {
start = data[index];
end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
if (start == end) {
result += codePointToString(start);
} else if (start + 1 == end) {
result += codePointToString(start) + codePointToString(end);
} else {
result += codePointToString(start) + '-' + codePointToString(end);
}
index += 2;
}
return '[' + result + ']';
};
var createUnicodeCharacterClasses = function(data) {
// Iterate over the data per `(start, end)` pair.
var result = '';
var index = 0;
var start;
var end;
var length = data.length;
if (dataIsSingleton(data)) {
return codePointToStringUnicode(data[0]);
}
while (index < length) {
start = data[index];
end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
if (start == end) {
result += codePointToStringUnicode(start);
} else if (start + 1 == end) {
result += codePointToStringUnicode(start) + codePointToStringUnicode(end);
} else {
result += codePointToStringUnicode(start) + '-' + codePointToStringUnicode(end);
}
index += 2;
}
return '[' + result + ']';
};
var splitAtBMP = function(data) {
// Iterate over the data per `(start, end)` pair.
var loneHighSurrogates = [];
var loneLowSurrogates = [];
var bmp = [];
var astral = [];
var index = 0;
var start;
var end;
var length = data.length;
while (index < length) {
start = data[index];
end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
if (start < HIGH_SURROGATE_MIN) {
// The range starts and ends before the high surrogate range.
// E.g. (0, 0x10).
if (end < HIGH_SURROGATE_MIN) {
bmp.push(start, end + 1);
}
// The range starts before the high surrogate range and ends within it.
// E.g. (0, 0xD855).
if (end >= HIGH_SURROGATE_MIN && end <= HIGH_SURROGATE_MAX) {
bmp.push(start, HIGH_SURROGATE_MIN);
loneHighSurrogates.push(HIGH_SURROGATE_MIN, end + 1);
}
// The range starts before the high surrogate range and ends in the low
// surrogate range. E.g. (0, 0xDCFF).
if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) {
bmp.push(start, HIGH_SURROGATE_MIN);
loneHighSurrogates.push(HIGH_SURROGATE_MIN, HIGH_SURROGATE_MAX + 1);
loneLowSurrogates.push(LOW_SURROGATE_MIN, end + 1);
}
// The range starts before the high surrogate range and ends after the
// low surrogate range. E.g. (0, 0x10FFFF).
if (end > LOW_SURROGATE_MAX) {
bmp.push(start, HIGH_SURROGATE_MIN);
loneHighSurrogates.push(HIGH_SURROGATE_MIN, HIGH_SURROGATE_MAX + 1);
loneLowSurrogates.push(LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1);
if (end <= 0xFFFF) {
bmp.push(LOW_SURROGATE_MAX + 1, end + 1);
} else {
bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1);
astral.push(0xFFFF + 1, end + 1);
}
}
} else if (start >= HIGH_SURROGATE_MIN && start <= HIGH_SURROGATE_MAX) {
// The range starts and ends in the high surrogate range.
// E.g. (0xD855, 0xD866).
if (end >= HIGH_SURROGATE_MIN && end <= HIGH_SURROGATE_MAX) {
loneHighSurrogates.push(start, end + 1);
}
// The range starts in the high surrogate range and ends in the low
// surrogate range. E.g. (0xD855, 0xDCFF).
if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) {
loneHighSurrogates.push(start, HIGH_SURROGATE_MAX + 1);
loneLowSurrogates.push(LOW_SURROGATE_MIN, end + 1);
}
// The range starts in the high surrogate range and ends after the low
// surrogate range. E.g. (0xD855, 0x10FFFF).
if (end > LOW_SURROGATE_MAX) {
loneHighSurrogates.push(start, HIGH_SURROGATE_MAX + 1);
loneLowSurrogates.push(LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1);
if (end <= 0xFFFF) {
bmp.push(LOW_SURROGATE_MAX + 1, end + 1);
} else {
bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1);
astral.push(0xFFFF + 1, end + 1);
}
}
} else if (start >= LOW_SURROGATE_MIN && start <= LOW_SURROGATE_MAX) {
// The range starts and ends in the low surrogate range.
// E.g. (0xDCFF, 0xDDFF).
if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) {
loneLowSurrogates.push(start, end + 1);
}
// The range starts in the low surrogate range and ends after the low
// surrogate range. E.g. (0xDCFF, 0x10FFFF).
if (end > LOW_SURROGATE_MAX) {
loneLowSurrogates.push(start, LOW_SURROGATE_MAX + 1);
if (end <= 0xFFFF) {
bmp.push(LOW_SURROGATE_MAX + 1, end + 1);
} else {
bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1);
astral.push(0xFFFF + 1, end + 1);
}
}
} else if (start > LOW_SURROGATE_MAX && start <= 0xFFFF) {
// The range starts and ends after the low surrogate range.
// E.g. (0xFFAA, 0x10FFFF).
if (end <= 0xFFFF) {
bmp.push(start, end + 1);
} else {
bmp.push(start, 0xFFFF + 1);
astral.push(0xFFFF + 1, end + 1);
}
} else {
// The range starts and ends in the astral range.
astral.push(start, end + 1);
}
index += 2;
}
return {
'loneHighSurrogates': loneHighSurrogates,
'loneLowSurrogates': loneLowSurrogates,
'bmp': bmp,
'astral': astral
};
};
var optimizeSurrogateMappings = function(surrogateMappings) {
var result = [];
var tmpLow = [];
var addLow = false;
var mapping;
var nextMapping;
var highSurrogates;
var lowSurrogates;
var nextHighSurrogates;
var nextLowSurrogates;
var index = -1;
var length = surrogateMappings.length;
while (++index < length) {
mapping = surrogateMappings[index];
nextMapping = surrogateMappings[index + 1];
if (!nextMapping) {
result.push(mapping);
continue;
}
highSurrogates = mapping[0];
lowSurrogates = mapping[1];
nextHighSurrogates = nextMapping[0];
nextLowSurrogates = nextMapping[1];
// Check for identical high surrogate ranges.
tmpLow = lowSurrogates;
while (
nextHighSurrogates &&
highSurrogates[0] == nextHighSurrogates[0] &&
highSurrogates[1] == nextHighSurrogates[1]
) {
// Merge with the next item.
if (dataIsSingleton(nextLowSurrogates)) {
tmpLow = dataAdd(tmpLow, nextLowSurrogates[0]);
} else {
tmpLow = dataAddRange(
tmpLow,
nextLowSurrogates[0],
nextLowSurrogates[1] - 1
);
}
++index;
mapping = surrogateMappings[index];
highSurrogates = mapping[0];
lowSurrogates = mapping[1];
nextMapping = surrogateMappings[index + 1];
nextHighSurrogates = nextMapping && nextMapping[0];
nextLowSurrogates = nextMapping && nextMapping[1];
addLow = true;
}
result.push([
highSurrogates,
addLow ? tmpLow : lowSurrogates
]);
addLow = false;
}
return optimizeByLowSurrogates(result);
};
var optimizeByLowSurrogates = function(surrogateMappings) {
if (surrogateMappings.length == 1) {
return surrogateMappings;
}
var index = -1;
var innerIndex = -1;
while (++index < surrogateMappings.length) {
var mapping = surrogateMappings[index];
var lowSurrogates = mapping[1];
var lowSurrogateStart = lowSurrogates[0];
var lowSurrogateEnd = lowSurrogates[1];
innerIndex = index; // Note: the loop starts at the next index.
while (++innerIndex < surrogateMappings.length) {
var otherMapping = surrogateMappings[innerIndex];
var otherLowSurrogates = otherMapping[1];
var otherLowSurrogateStart = otherLowSurrogates[0];
var otherLowSurrogateEnd = otherLowSurrogates[1];
if (
lowSurrogateStart == otherLowSurrogateStart &&
lowSurrogateEnd == otherLowSurrogateEnd
) {
// Add the code points in the other item to this one.
if (dataIsSingleton(otherMapping[0])) {
mapping[0] = dataAdd(mapping[0], otherMapping[0][0]);
} else {
mapping[0] = dataAddRange(
mapping[0],
otherMapping[0][0],
otherMapping[0][1] - 1
);
}
// Remove the other, now redundant, item.
surrogateMappings.splice(innerIndex, 1);
--innerIndex;
}
}
}
return surrogateMappings;
};
var surrogateSet = function(data) {
// Exit early if `data` is an empty set.
if (!data.length) {
return [];
}
// Iterate over the data per `(start, end)` pair.
var index = 0;
var start;
var end;
var startHigh;
var startLow;
var endHigh;
var endLow;
var surrogateMappings = [];
var length = data.length;
while (index < length) {
start = data[index];
end = data[index + 1] - 1;
startHigh = highSurrogate(start);
startLow = lowSurrogate(start);
endHigh = highSurrogate(end);
endLow = lowSurrogate(end);
var startsWithLowestLowSurrogate = startLow == LOW_SURROGATE_MIN;
var endsWithHighestLowSurrogate = endLow == LOW_SURROGATE_MAX;
var complete = false;
// Append the previous high-surrogate-to-low-surrogate mappings.
// Step 1: `(startHigh, startLow)` to `(startHigh, LOW_SURROGATE_MAX)`.
if (
startHigh == endHigh ||
startsWithLowestLowSurrogate && endsWithHighestLowSurrogate
) {
surrogateMappings.push([
[startHigh, endHigh + 1],
[startLow, endLow + 1]
]);
complete = true;
} else {
surrogateMappings.push([
[startHigh, startHigh + 1],
[startLow, LOW_SURROGATE_MAX + 1]
]);
}
// Step 2: `(startHigh + 1, LOW_SURROGATE_MIN)` to
// `(endHigh - 1, LOW_SURROGATE_MAX)`.
if (!complete && startHigh + 1 < endHigh) {
if (endsWithHighestLowSurrogate) {
// Combine step 2 and step 3.
surrogateMappings.push([
[startHigh + 1, endHigh + 1],
[LOW_SURROGATE_MIN, endLow + 1]
]);
complete = true;
} else {
surrogateMappings.push([
[startHigh + 1, endHigh],
[LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1]
]);
}
}
// Step 3. `(endHigh, LOW_SURROGATE_MIN)` to `(endHigh, endLow)`.
if (!complete) {
surrogateMappings.push([
[endHigh, endHigh + 1],
[LOW_SURROGATE_MIN, endLow + 1]
]);
}
index += 2;
}
// The format of `surrogateMappings` is as follows:
//
// [ surrogateMapping1, surrogateMapping2 ]
//
// i.e.:
//
// [
// [ highSurrogates1, lowSurrogates1 ],
// [ highSurrogates2, lowSurrogates2 ]
// ]
return optimizeSurrogateMappings(surrogateMappings);
};
var createSurrogateCharacterClasses = function(surrogateMappings) {
var result = [];
forEach(surrogateMappings, function(surrogateMapping) {
var highSurrogates = surrogateMapping[0];
var lowSurrogates = surrogateMapping[1];
result.push(
createBMPCharacterClasses(highSurrogates) +
createBMPCharacterClasses(lowSurrogates)
);
});
return result.join('|');
};
var createCharacterClassesFromData = function(data, bmpOnly, hasUnicodeFlag) {
if (hasUnicodeFlag) {
return createUnicodeCharacterClasses(data);
}
var result = [];
var parts = splitAtBMP(data);
var loneHighSurrogates = parts.loneHighSurrogates;
var loneLowSurrogates = parts.loneLowSurrogates;
var bmp = parts.bmp;
var astral = parts.astral;
var hasLoneHighSurrogates = !dataIsEmpty(loneHighSurrogates);
var hasLoneLowSurrogates = !dataIsEmpty(loneLowSurrogates);
var surrogateMappings = surrogateSet(astral);
if (bmpOnly) {
bmp = dataAddData(bmp, loneHighSurrogates);
hasLoneHighSurrogates = false;
bmp = dataAddData(bmp, loneLowSurrogates);
hasLoneLowSurrogates = false;
}
if (!dataIsEmpty(bmp)) {
// The data set contains BMP code points that are not high surrogates
// needed for astral code points in the set.
result.push(createBMPCharacterClasses(bmp));
}
if (surrogateMappings.length) {
// The data set contains astral code points; append character classes
// based on their surrogate pairs.
result.push(createSurrogateCharacterClasses(surrogateMappings));
}
// https://gist.github.com/mathiasbynens/bbe7f870208abcfec860
if (hasLoneHighSurrogates) {
result.push(
createBMPCharacterClasses(loneHighSurrogates) +
// Make sure the high surrogates arent part of a surrogate pair.
'(?![\\uDC00-\\uDFFF])'
);
}
if (hasLoneLowSurrogates) {
result.push(
// It is not possible to accurately assert the low surrogates arent
// part of a surrogate pair, since JavaScript regular expressions do
// not support lookbehind.
'(?:[^\\uD800-\\uDBFF]|^)' +
createBMPCharacterClasses(loneLowSurrogates)
);
}
return result.join('|');
};
/*--------------------------------------------------------------------------*/
// `regenerate` can be used as a constructor (and new methods can be added to
// its prototype) but also as a regular function, the latter of which is the
// documented and most common usage. For that reason, its not capitalized.
var regenerate = function(value) {
if (arguments.length > 1) {
value = slice.call(arguments);
}
if (this instanceof regenerate) {
this.data = [];
return value ? this.add(value) : this;
}
return (new regenerate).add(value);
};
regenerate.version = '1.4.1';
var proto = regenerate.prototype;
extend(proto, {
'add': function(value) {
var $this = this;
if (value == null) {
return $this;
}
if (value instanceof regenerate) {
// Allow passing other Regenerate instances.
$this.data = dataAddData($this.data, value.data);
return $this;
}
if (arguments.length > 1) {
value = slice.call(arguments);
}
if (isArray(value)) {
forEach(value, function(item) {
$this.add(item);
});
return $this;
}
$this.data = dataAdd(
$this.data,
isNumber(value) ? value : symbolToCodePoint(value)
);
return $this;
},
'remove': function(value) {
var $this = this;
if (value == null) {
return $this;
}
if (value instanceof regenerate) {
// Allow passing other Regenerate instances.
$this.data = dataRemoveData($this.data, value.data);
return $this;
}
if (arguments.length > 1) {
value = slice.call(arguments);
}
if (isArray(value)) {
forEach(value, function(item) {
$this.remove(item);
});
return $this;
}
$this.data = dataRemove(
$this.data,
isNumber(value) ? value : symbolToCodePoint(value)
);
return $this;
},
'addRange': function(start, end) {
var $this = this;
$this.data = dataAddRange($this.data,
isNumber(start) ? start : symbolToCodePoint(start),
isNumber(end) ? end : symbolToCodePoint(end)
);
return $this;
},
'removeRange': function(start, end) {
var $this = this;
var startCodePoint = isNumber(start) ? start : symbolToCodePoint(start);
var endCodePoint = isNumber(end) ? end : symbolToCodePoint(end);
$this.data = dataRemoveRange(
$this.data,
startCodePoint,
endCodePoint
);
return $this;
},
'intersection': function(argument) {
var $this = this;
// Allow passing other Regenerate instances.
// TODO: Optimize this by writing and using `dataIntersectionData()`.
var array = argument instanceof regenerate ?
dataToArray(argument.data) :
argument;
$this.data = dataIntersection($this.data, array);
return $this;
},
'contains': function(codePoint) {
return dataContains(
this.data,
isNumber(codePoint) ? codePoint : symbolToCodePoint(codePoint)
);
},
'clone': function() {
var set = new regenerate;
set.data = this.data.slice(0);
return set;
},
'toString': function(options) {
var result = createCharacterClassesFromData(
this.data,
options ? options.bmpOnly : false,
options ? options.hasUnicodeFlag : false
);
if (!result) {
// For an empty set, return something that can be inserted `/here/` to
// form a valid regular expression. Avoid `(?:)` since that matches the
// empty string.
return '[]';
}
// Use `\0` instead of `\x00` where possible.
return result.replace(regexNull, '\\0$1');
},
'toRegExp': function(flags) {
var pattern = this.toString(
flags && flags.indexOf('u') != -1 ?
{ 'hasUnicodeFlag': true } :
null
);
return RegExp(pattern, flags || '');
},
'valueOf': function() { // Note: `valueOf` is aliased as `toArray`.
return dataToArray(this.data);
}
});
proto.toArray = proto.valueOf;
// Some AMD build optimizers, like r.js, check for specific condition patterns
// like the following:
if (
typeof define == 'function' &&
typeof define.amd == 'object' &&
define.amd
) {
define(function() {
return regenerate;
});
} else if (freeExports && !freeExports.nodeType) {
if (freeModule) { // in Node.js, io.js, or RingoJS v0.8.0+
freeModule.exports = regenerate;
} else { // in Narwhal or RingoJS v0.7.0-
freeExports.regenerate = regenerate;
}
} else { // in Rhino or a web browser
root.regenerate = regenerate;
}
}(this));