diff options
author | 2020-11-16 00:10:28 +0100 | |
---|---|---|
committer | 2020-11-16 00:10:28 +0100 | |
commit | e06ec920f7a5d784e674c4c4b4e6d1da3dc7391d (patch) | |
tree | 55713f725f77b44ebfec86e4eec3ce33e71458ca /node_modules/regenerate | |
download | website_creator-e06ec920f7a5d784e674c4c4b4e6d1da3dc7391d.tar.gz website_creator-e06ec920f7a5d784e674c4c4b4e6d1da3dc7391d.tar.bz2 website_creator-e06ec920f7a5d784e674c4c4b4e6d1da3dc7391d.zip |
api, login, auth
Diffstat (limited to 'node_modules/regenerate')
-rw-r--r-- | node_modules/regenerate/LICENSE-MIT.txt | 20 | ||||
-rw-r--r-- | node_modules/regenerate/README.md | 338 | ||||
-rw-r--r-- | node_modules/regenerate/package.json | 65 | ||||
-rw-r--r-- | node_modules/regenerate/regenerate.js | 1209 |
4 files changed, 1632 insertions, 0 deletions
diff --git a/node_modules/regenerate/LICENSE-MIT.txt b/node_modules/regenerate/LICENSE-MIT.txt new file mode 100644 index 0000000..a41e0a7 --- /dev/null +++ b/node_modules/regenerate/LICENSE-MIT.txt @@ -0,0 +1,20 @@ +Copyright Mathias Bynens <https://mathiasbynens.be/> + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/node_modules/regenerate/README.md b/node_modules/regenerate/README.md new file mode 100644 index 0000000..ec243ce --- /dev/null +++ b/node_modules/regenerate/README.md @@ -0,0 +1,338 @@ +# Regenerate [](https://travis-ci.org/mathiasbynens/regenerate) [](https://codecov.io/gh/mathiasbynens/regenerate) + +_Regenerate_ is a Unicode-aware regex generator for JavaScript. It allows you to easily generate ES5-compatible regular expressions based on a given set of Unicode symbols or code points. (This is trickier than you might think, because of [how JavaScript deals with astral symbols](https://mathiasbynens.be/notes/javascript-unicode).) + +## Installation + +Via [npm](https://npmjs.org/): + +```bash +npm install regenerate +``` + +Via [Bower](http://bower.io/): + +```bash +bower install regenerate +``` + +In a browser: + +```html +<script src="regenerate.js"></script> +``` + +In [Node.js](https://nodejs.org/), [io.js](https://iojs.org/), and [RingoJS ≥ v0.8.0](http://ringojs.org/): + +```js +var regenerate = require('regenerate'); +``` + +In [Narwhal](http://narwhaljs.org/) and [RingoJS ≤ v0.7.0](http://ringojs.org/): + +```js +var regenerate = require('regenerate').regenerate; +``` + +In [Rhino](http://www.mozilla.org/rhino/): + +```js +load('regenerate.js'); +``` + +Using an AMD loader like [RequireJS](http://requirejs.org/): + +```js +require( + { + 'paths': { + 'regenerate': 'path/to/regenerate' + } + }, + ['regenerate'], + function(regenerate) { + console.log(regenerate); + } +); +``` + +## API + +### `regenerate(value1, value2, value3, ...)` + +The main Regenerate function. Calling this function creates a new set that gets a chainable API. + +```js +var set = regenerate() + .addRange(0x60, 0x69) // add U+0060 to U+0069 + .remove(0x62, 0x64) // remove U+0062 and U+0064 + .add(0x1D306); // add U+1D306 +set.valueOf(); +// → [0x60, 0x61, 0x63, 0x65, 0x66, 0x67, 0x68, 0x69, 0x1D306] +set.toString(); +// → '[`ace-i]|\\uD834\\uDF06' +set.toRegExp(); +// → /[`ace-i]|\uD834\uDF06/ +``` + +Any arguments passed to `regenerate()` will be added to the set right away. Both code points (numbers) and symbols (strings consisting of a single Unicode symbol) are accepted, as well as arrays containing values of these types. + +```js +regenerate(0x1D306, 'A', '©', 0x2603).toString(); +// → '[A\\xA9\\u2603]|\\uD834\\uDF06' + +var items = [0x1D306, 'A', '©', 0x2603]; +regenerate(items).toString(); +// → '[A\\xA9\\u2603]|\\uD834\\uDF06' +``` + +### `regenerate.prototype.add(value1, value2, value3, ...)` + +Any arguments passed to `add()` are added to the set. Both code points (numbers) and symbols (strings consisting of a single Unicode symbol) are accepted, as well as arrays containing values of these types. + +```js +regenerate().add(0x1D306, 'A', '©', 0x2603).toString(); +// → '[A\\xA9\\u2603]|\\uD834\\uDF06' + +var items = [0x1D306, 'A', '©', 0x2603]; +regenerate().add(items).toString(); +// → '[A\\xA9\\u2603]|\\uD834\\uDF06' +``` + +It’s also possible to pass in a Regenerate instance. Doing so adds all code points in that instance to the current set. + +```js +var set = regenerate(0x1D306, 'A'); +regenerate().add('©', 0x2603).add(set).toString(); +// → '[A\\xA9\\u2603]|\\uD834\\uDF06' +``` + +Note that the initial call to `regenerate()` acts like `add()`. This allows you to create a new Regenerate instance and add some code points to it in one go: + +```js +regenerate(0x1D306, 'A', '©', 0x2603).toString(); +// → '[A\\xA9\\u2603]|\\uD834\\uDF06' +``` + +### `regenerate.prototype.remove(value1, value2, value3, ...)` + +Any arguments passed to `remove()` are removed from the set. Both code points (numbers) and symbols (strings consisting of a single Unicode symbol) are accepted, as well as arrays containing values of these types. + +```js +regenerate(0x1D306, 'A', '©', 0x2603).remove('☃').toString(); +// → '[A\\xA9]|\\uD834\\uDF06' +``` + +It’s also possible to pass in a Regenerate instance. Doing so removes all code points in that instance from the current set. + +```js +var set = regenerate('☃'); +regenerate(0x1D306, 'A', '©', 0x2603).remove(set).toString(); +// → '[A\\xA9]|\\uD834\\uDF06' +``` + +### `regenerate.prototype.addRange(start, end)` + +Adds a range of code points from `start` to `end` (inclusive) to the set. Both code points (numbers) and symbols (strings consisting of a single Unicode symbol) are accepted. + +```js +regenerate(0x1D306).addRange(0x00, 0xFF).toString(16); +// → '[\\0-\\xFF]|\\uD834\\uDF06' + +regenerate().addRange('A', 'z').toString(); +// → '[A-z]' +``` + +### `regenerate.prototype.removeRange(start, end)` + +Removes a range of code points from `start` to `end` (inclusive) from the set. Both code points (numbers) and symbols (strings consisting of a single Unicode symbol) are accepted. + +```js +regenerate() + .addRange(0x000000, 0x10FFFF) // add all Unicode code points + .removeRange('A', 'z') // remove all symbols from `A` to `z` + .toString(); +// → '[\\0-@\\{-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF]' + +regenerate() + .addRange(0x000000, 0x10FFFF) // add all Unicode code points + .removeRange(0x0041, 0x007A) // remove all code points from U+0041 to U+007A + .toString(); +// → '[\\0-@\\{-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF]' +``` + +### `regenerate.prototype.intersection(codePoints)` + +Removes any code points from the set that are not present in both the set and the given `codePoints` array. `codePoints` must be an array of numeric code point values, i.e. numbers. + +```js +regenerate() + .addRange(0x00, 0xFF) // add extended ASCII code points + .intersection([0x61, 0x69]) // remove all code points from the set except for these + .toString(); +// → '[ai]' +``` + +Instead of the `codePoints` array, it’s also possible to pass in a Regenerate instance. + +```js +var whitelist = regenerate(0x61, 0x69); + +regenerate() + .addRange(0x00, 0xFF) // add extended ASCII code points + .intersection(whitelist) // remove all code points from the set except for those in the `whitelist` set + .toString(); +// → '[ai]' +``` + +### `regenerate.prototype.contains(value)` + +Returns `true` if the given value is part of the set, and `false` otherwise. Both code points (numbers) and symbols (strings consisting of a single Unicode symbol) are accepted. + +```js +var set = regenerate().addRange(0x00, 0xFF); +set.contains('A'); +// → true +set.contains(0x1D306); +// → false +``` + +### `regenerate.prototype.clone()` + +Returns a clone of the current code point set. Any actions performed on the clone won’t mutate the original set. + +```js +var setA = regenerate(0x1D306); +var setB = setA.clone().add(0x1F4A9); +setA.toArray(); +// → [0x1D306] +setB.toArray(); +// → [0x1D306, 0x1F4A9] +``` + +### `regenerate.prototype.toString(options)` + +Returns a string representing (part of) a regular expression that matches all the symbols mapped to the code points within the set. + +```js +regenerate(0x1D306, 0x1F4A9).toString(); +// → '\\uD834\\uDF06|\\uD83D\\uDCA9' +``` + +If the `bmpOnly` property of the optional `options` object is set to `true`, the output matches surrogates individually, regardless of whether they’re lone surrogates or just part of a surrogate pair. This simplifies the output, but it can only be used in case you’re certain the strings it will be used on don’t contain any astral symbols. + +```js +var highSurrogates = regenerate().addRange(0xD800, 0xDBFF); +highSurrogates.toString(); +// → '[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])' +highSurrogates.toString({ 'bmpOnly': true }); +// → '[\\uD800-\\uDBFF]' + +var lowSurrogates = regenerate().addRange(0xDC00, 0xDFFF); +lowSurrogates.toString(); +// → '(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF]' +lowSurrogates.toString({ 'bmpOnly': true }); +// → '[\\uDC00-\\uDFFF]' +``` + +Note that lone low surrogates cannot be matched accurately using regular expressions in JavaScript without the use of [lookbehind assertions](https://mathiasbynens.be/notes/es-regexp-proposals#lookbehinds), which aren't yet widely supported. Regenerate’s output makes a best-effort approach but [there can be false negatives in this regard](https://github.com/mathiasbynens/regenerate/issues/28#issuecomment-72224808). + +If the `hasUnicodeFlag` property of the optional `options` object is set to `true`, the output makes use of Unicode code point escapes (`\u{…}`) where applicable. This simplifies the output at the cost of compatibility and portability, since it means the output can only be used as a pattern in a regular expression with [the ES6 `u` flag](https://mathiasbynens.be/notes/es6-unicode-regex) enabled. + +```js +var set = regenerate().addRange(0x0, 0x10FFFF); + +set.toString(); +// → '[\\0-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF]'' + +set.toString({ 'hasUnicodeFlag': true }); +// → '[\\0-\\u{10FFFF}]' +``` + +### `regenerate.prototype.toRegExp(flags = '')` + +Returns a regular expression that matches all the symbols mapped to the code points within the set. Optionally, you can pass [flags](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#Parameters) to be added to the regular expression. + +```js +var regex = regenerate(0x1D306, 0x1F4A9).toRegExp(); +// → /\uD834\uDF06|\uD83D\uDCA9/ +regex.test('𝌆'); +// → true +regex.test('A'); +// → false + +// With flags: +var regex = regenerate(0x1D306, 0x1F4A9).toRegExp('g'); +// → /\uD834\uDF06|\uD83D\uDCA9/g +``` + +**Note:** This probably shouldn’t be used. Regenerate is intended as a tool that is used as part of a build process, not at runtime. + +### `regenerate.prototype.valueOf()` or `regenerate.prototype.toArray()` + +Returns a sorted array of unique code points in the set. + +```js +regenerate(0x1D306) + .addRange(0x60, 0x65) + .add(0x59, 0x60) // note: 0x59 is added after 0x65, and 0x60 is a duplicate + .valueOf(); +// → [0x59, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x1D306] +``` + +### `regenerate.version` + +A string representing the semantic version number. + +## Combine Regenerate with other libraries + +Regenerate gets even better when combined with other libraries such as [Punycode.js](https://mths.be/punycode). Here’s an example where [Punycode.js](https://mths.be/punycode) is used to convert a string into an array of code points, that is then passed on to Regenerate: + +```js +var regenerate = require('regenerate'); +var punycode = require('punycode'); + +var string = 'Lorem ipsum dolor sit amet.'; +// Get an array of all code points used in the string: +var codePoints = punycode.ucs2.decode(string); + +// Generate a regular expression that matches any of the symbols used in the string: +regenerate(codePoints).toString(); +// → '[ \\.Ladeilmopr-u]' +``` + +In ES6 you can do something similar with [`Array.from`](https://mths.be/array-from) which uses [the string’s iterator](https://mathiasbynens.be/notes/javascript-unicode#iterating-over-symbols) to split the given string into an array of strings that each contain a single symbol. [`regenerate()`](#regenerateprototypeaddvalue1-value2-value3-) accepts both strings and code points, remember? + +```js +var regenerate = require('regenerate'); + +var string = 'Lorem ipsum dolor sit amet.'; +// Get an array of all symbols used in the string: +var symbols = Array.from(string); + +// Generate a regular expression that matches any of the symbols used in the string: +regenerate(symbols).toString(); +// → '[ \\.Ladeilmopr-u]' +``` + +## Support + +Regenerate supports at least Chrome 27+, Firefox 3+, Safari 4+, Opera 10+, IE 6+, Node.js v0.10.0+, io.js v1.0.0+, Narwhal 0.3.2+, RingoJS 0.8+, PhantomJS 1.9.0+, and Rhino 1.7RC4+. + +## Unit tests & code coverage + +After cloning this repository, run `npm install` to install the dependencies needed for Regenerate development and testing. You may want to install Istanbul _globally_ using `npm install istanbul -g`. + +Once that’s done, you can run the unit tests in Node using `npm test` or `node tests/tests.js`. To run the tests in Rhino, Ringo, Narwhal, and web browsers as well, use `grunt test`. + +To generate the code coverage report, use `grunt cover`. + +## Author + +| [](https://twitter.com/mathias "Follow @mathias on Twitter") | +|---| +| [Mathias Bynens](https://mathiasbynens.be/) | + +## License + +Regenerate is available under the [MIT](https://mths.be/mit) license. diff --git a/node_modules/regenerate/package.json b/node_modules/regenerate/package.json new file mode 100644 index 0000000..436e5cd --- /dev/null +++ b/node_modules/regenerate/package.json @@ -0,0 +1,65 @@ +{ + "_from": "regenerate@^1.2.1", + "_id": "regenerate@1.4.2", + "_inBundle": false, + "_integrity": "sha512-zrceR/XhGYU/d/opr2EKO7aRHUeiBI8qjtfHqADTwZd6Szfy16la6kqD0MIUs5z5hx6AaKa+PixpPrR289+I0A==", + "_location": "/regenerate", + "_phantomChildren": {}, + "_requested": { + "type": "range", + "registry": true, + "raw": "regenerate@^1.2.1", + "name": "regenerate", + "escapedName": "regenerate", + "rawSpec": "^1.2.1", + "saveSpec": null, + "fetchSpec": "^1.2.1" + }, + "_requiredBy": [ + "/regexpu-core" + ], + "_resolved": "https://registry.npmjs.org/regenerate/-/regenerate-1.4.2.tgz", + "_shasum": "b9346d8827e8f5a32f7ba29637d398b69014848a", + "_spec": "regenerate@^1.2.1", + "_where": "/home/pruss/Dev/3-minute-website/node_modules/regexpu-core", + "author": { + "name": "Mathias Bynens", + "url": "https://mathiasbynens.be/" + }, + "bugs": { + "url": "https://github.com/mathiasbynens/regenerate/issues" + }, + "bundleDependencies": false, + "deprecated": false, + "description": "Generate JavaScript-compatible regular expressions based on a given set of Unicode symbols or code points.", + "devDependencies": { + "codecov": "^1.0.1", + "grunt": "^0.4.5", + "grunt-shell": "^1.1.1", + "istanbul": "^0.4.3", + "qunit-extras": "^1.1.0", + "qunitjs": "~1.11.0", + "requirejs": "^2.1.15" + }, + "homepage": "https://mths.be/regenerate", + "keywords": [ + "regex", + "regexp", + "javascript", + "unicode", + "generator", + "tool" + ], + "license": "MIT", + "main": "regenerate.js", + "name": "regenerate", + "repository": { + "type": "git", + "url": "git+https://github.com/mathiasbynens/regenerate.git" + }, + "scripts": { + "cover": "istanbul cover --report html --verbose --dir coverage tests/tests.js", + "test": "node tests/tests.js" + }, + "version": "1.4.2" +} diff --git a/node_modules/regenerate/regenerate.js b/node_modules/regenerate/regenerate.js new file mode 100644 index 0000000..0686ade --- /dev/null +++ b/node_modules/regenerate/regenerate.js @@ -0,0 +1,1209 @@ +/*! https://mths.be/regenerate v1.4.2 by @mathias | MIT license */ +;(function(root) { + + // Detect free variables `exports`. + var freeExports = typeof exports == 'object' && exports; + + // Detect free variable `module`. + var freeModule = typeof module == 'object' && module && + module.exports == freeExports && module; + + // Detect free variable `global`, from Node.js/io.js or Browserified code, + // and use it as `root`. + var freeGlobal = typeof global == 'object' && global; + if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) { + root = freeGlobal; + } + + /*--------------------------------------------------------------------------*/ + + var ERRORS = { + 'rangeOrder': 'A range\u2019s `stop` value must be greater than or equal ' + + 'to the `start` value.', + 'codePointRange': 'Invalid code point value. Code points range from ' + + 'U+000000 to U+10FFFF.' + }; + + // https://mathiasbynens.be/notes/javascript-encoding#surrogate-pairs + var HIGH_SURROGATE_MIN = 0xD800; + var HIGH_SURROGATE_MAX = 0xDBFF; + var LOW_SURROGATE_MIN = 0xDC00; + var LOW_SURROGATE_MAX = 0xDFFF; + + // In Regenerate output, `\0` is never preceded by `\` because we sort by + // code point value, so let’s keep this regular expression simple. + var regexNull = /\\x00([^0123456789]|$)/g; + + var object = {}; + var hasOwnProperty = object.hasOwnProperty; + var extend = function(destination, source) { + var key; + for (key in source) { + if (hasOwnProperty.call(source, key)) { + destination[key] = source[key]; + } + } + return destination; + }; + + var forEach = function(array, callback) { + var index = -1; + var length = array.length; + while (++index < length) { + callback(array[index], index); + } + }; + + var toString = object.toString; + var isArray = function(value) { + return toString.call(value) == '[object Array]'; + }; + var isNumber = function(value) { + return typeof value == 'number' || + toString.call(value) == '[object Number]'; + }; + + // This assumes that `number` is a positive integer that `toString()`s nicely + // (which is the case for all code point values). + var zeroes = '0000'; + var pad = function(number, totalCharacters) { + var string = String(number); + return string.length < totalCharacters + ? (zeroes + string).slice(-totalCharacters) + : string; + }; + + var hex = function(number) { + return Number(number).toString(16).toUpperCase(); + }; + + var slice = [].slice; + + /*--------------------------------------------------------------------------*/ + + var dataFromCodePoints = function(codePoints) { + var index = -1; + var length = codePoints.length; + var max = length - 1; + var result = []; + var isStart = true; + var tmp; + var previous = 0; + while (++index < length) { + tmp = codePoints[index]; + if (isStart) { + result.push(tmp); + previous = tmp; + isStart = false; + } else { + if (tmp == previous + 1) { + if (index != max) { + previous = tmp; + continue; + } else { + isStart = true; + result.push(tmp + 1); + } + } else { + // End the previous range and start a new one. + result.push(previous + 1, tmp); + previous = tmp; + } + } + } + if (!isStart) { + result.push(tmp + 1); + } + return result; + }; + + var dataRemove = function(data, codePoint) { + // Iterate over the data per `(start, end)` pair. + var index = 0; + var start; + var end; + var length = data.length; + while (index < length) { + start = data[index]; + end = data[index + 1]; + if (codePoint >= start && codePoint < end) { + // Modify this pair. + if (codePoint == start) { + if (end == start + 1) { + // Just remove `start` and `end`. + data.splice(index, 2); + return data; + } else { + // Just replace `start` with a new value. + data[index] = codePoint + 1; + return data; + } + } else if (codePoint == end - 1) { + // Just replace `end` with a new value. + data[index + 1] = codePoint; + return data; + } else { + // Replace `[start, end]` with `[startA, endA, startB, endB]`. + data.splice(index, 2, start, codePoint, codePoint + 1, end); + return data; + } + } + index += 2; + } + return data; + }; + + var dataRemoveRange = function(data, rangeStart, rangeEnd) { + if (rangeEnd < rangeStart) { + throw Error(ERRORS.rangeOrder); + } + // Iterate over the data per `(start, end)` pair. + var index = 0; + var start; + var end; + while (index < data.length) { + start = data[index]; + end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive. + + // Exit as soon as no more matching pairs can be found. + if (start > rangeEnd) { + return data; + } + + // Check if this range pair is equal to, or forms a subset of, the range + // to be removed. + // E.g. we have `[0, 11, 40, 51]` and want to remove 0-10 → `[40, 51]`. + // E.g. we have `[40, 51]` and want to remove 0-100 → `[]`. + if (rangeStart <= start && rangeEnd >= end) { + // Remove this pair. + data.splice(index, 2); + continue; + } + + // Check if both `rangeStart` and `rangeEnd` are within the bounds of + // this pair. + // E.g. we have `[0, 11]` and want to remove 4-6 → `[0, 4, 7, 11]`. + if (rangeStart >= start && rangeEnd < end) { + if (rangeStart == start) { + // Replace `[start, end]` with `[startB, endB]`. + data[index] = rangeEnd + 1; + data[index + 1] = end + 1; + return data; + } + // Replace `[start, end]` with `[startA, endA, startB, endB]`. + data.splice(index, 2, start, rangeStart, rangeEnd + 1, end + 1); + return data; + } + + // Check if only `rangeStart` is within the bounds of this pair. + // E.g. we have `[0, 11]` and want to remove 4-20 → `[0, 4]`. + if (rangeStart >= start && rangeStart <= end) { + // Replace `end` with `rangeStart`. + data[index + 1] = rangeStart; + // Note: we cannot `return` just yet, in case any following pairs still + // contain matching code points. + // E.g. we have `[0, 11, 14, 31]` and want to remove 4-20 + // → `[0, 4, 21, 31]`. + } + + // Check if only `rangeEnd` is within the bounds of this pair. + // E.g. we have `[14, 31]` and want to remove 4-20 → `[21, 31]`. + else if (rangeEnd >= start && rangeEnd <= end) { + // Just replace `start`. + data[index] = rangeEnd + 1; + return data; + } + + index += 2; + } + return data; + }; + + var dataAdd = function(data, codePoint) { + // Iterate over the data per `(start, end)` pair. + var index = 0; + var start; + var end; + var lastIndex = null; + var length = data.length; + if (codePoint < 0x0 || codePoint > 0x10FFFF) { + throw RangeError(ERRORS.codePointRange); + } + while (index < length) { + start = data[index]; + end = data[index + 1]; + + // Check if the code point is already in the set. + if (codePoint >= start && codePoint < end) { + return data; + } + + if (codePoint == start - 1) { + // Just replace `start` with a new value. + data[index] = codePoint; + return data; + } + + // At this point, if `start` is `greater` than `codePoint`, insert a new + // `[start, end]` pair before the current pair, or after the current pair + // if there is a known `lastIndex`. + if (start > codePoint) { + data.splice( + lastIndex != null ? lastIndex + 2 : 0, + 0, + codePoint, + codePoint + 1 + ); + return data; + } + + if (codePoint == end) { + // Check if adding this code point causes two separate ranges to become + // a single range, e.g. `dataAdd([0, 4, 5, 10], 4)` → `[0, 10]`. + if (codePoint + 1 == data[index + 2]) { + data.splice(index, 4, start, data[index + 3]); + return data; + } + // Else, just replace `end` with a new value. + data[index + 1] = codePoint + 1; + return data; + } + lastIndex = index; + index += 2; + } + // The loop has finished; add the new pair to the end of the data set. + data.push(codePoint, codePoint + 1); + return data; + }; + + var dataAddData = function(dataA, dataB) { + // Iterate over the data per `(start, end)` pair. + var index = 0; + var start; + var end; + var data = dataA.slice(); + var length = dataB.length; + while (index < length) { + start = dataB[index]; + end = dataB[index + 1] - 1; + if (start == end) { + data = dataAdd(data, start); + } else { + data = dataAddRange(data, start, end); + } + index += 2; + } + return data; + }; + + var dataRemoveData = function(dataA, dataB) { + // Iterate over the data per `(start, end)` pair. + var index = 0; + var start; + var end; + var data = dataA.slice(); + var length = dataB.length; + while (index < length) { + start = dataB[index]; + end = dataB[index + 1] - 1; + if (start == end) { + data = dataRemove(data, start); + } else { + data = dataRemoveRange(data, start, end); + } + index += 2; + } + return data; + }; + + var dataAddRange = function(data, rangeStart, rangeEnd) { + if (rangeEnd < rangeStart) { + throw Error(ERRORS.rangeOrder); + } + if ( + rangeStart < 0x0 || rangeStart > 0x10FFFF || + rangeEnd < 0x0 || rangeEnd > 0x10FFFF + ) { + throw RangeError(ERRORS.codePointRange); + } + // Iterate over the data per `(start, end)` pair. + var index = 0; + var start; + var end; + var added = false; + var length = data.length; + while (index < length) { + start = data[index]; + end = data[index + 1]; + + if (added) { + // The range has already been added to the set; at this point, we just + // need to get rid of the following ranges in case they overlap. + + // Check if this range can be combined with the previous range. + if (start == rangeEnd + 1) { + data.splice(index - 1, 2); + return data; + } + + // Exit as soon as no more possibly overlapping pairs can be found. + if (start > rangeEnd) { + return data; + } + + // E.g. `[0, 11, 12, 16]` and we’ve added 5-15, so we now have + // `[0, 16, 12, 16]`. Remove the `12,16` part, as it lies within the + // `0,16` range that was previously added. + if (start >= rangeStart && start <= rangeEnd) { + // `start` lies within the range that was previously added. + + if (end > rangeStart && end - 1 <= rangeEnd) { + // `end` lies within the range that was previously added as well, + // so remove this pair. + data.splice(index, 2); + index -= 2; + // Note: we cannot `return` just yet, as there may still be other + // overlapping pairs. + } else { + // `start` lies within the range that was previously added, but + // `end` doesn’t. E.g. `[0, 11, 12, 31]` and we’ve added 5-15, so + // now we have `[0, 16, 12, 31]`. This must be written as `[0, 31]`. + // Remove the previously added `end` and the current `start`. + data.splice(index - 1, 2); + index -= 2; + } + + // Note: we cannot return yet. + } + + } + + else if (start == rangeEnd + 1 || start == rangeEnd) { + data[index] = rangeStart; + return data; + } + + // Check if a new pair must be inserted *before* the current one. + else if (start > rangeEnd) { + data.splice(index, 0, rangeStart, rangeEnd + 1); + return data; + } + + else if (rangeStart >= start && rangeStart < end && rangeEnd + 1 <= end) { + // The new range lies entirely within an existing range pair. No action + // needed. + return data; + } + + else if ( + // E.g. `[0, 11]` and you add 5-15 → `[0, 16]`. + (rangeStart >= start && rangeStart < end) || + // E.g. `[0, 3]` and you add 3-6 → `[0, 7]`. + end == rangeStart + ) { + // Replace `end` with the new value. + data[index + 1] = rangeEnd + 1; + // Make sure the next range pair doesn’t overlap, e.g. `[0, 11, 12, 14]` + // and you add 5-15 → `[0, 16]`, i.e. remove the `12,14` part. + added = true; + // Note: we cannot `return` just yet. + } + + else if (rangeStart <= start && rangeEnd + 1 >= end) { + // The new range is a superset of the old range. + data[index] = rangeStart; + data[index + 1] = rangeEnd + 1; + added = true; + } + + index += 2; + } + // The loop has finished without doing anything; add the new pair to the end + // of the data set. + if (!added) { + data.push(rangeStart, rangeEnd + 1); + } + return data; + }; + + var dataContains = function(data, codePoint) { + var index = 0; + var length = data.length; + // Exit early if `codePoint` is not within `data`’s overall range. + var start = data[index]; + var end = data[length - 1]; + if (length >= 2) { + if (codePoint < start || codePoint > end) { + return false; + } + } + // Iterate over the data per `(start, end)` pair. + while (index < length) { + start = data[index]; + end = data[index + 1]; + if (codePoint >= start && codePoint < end) { + return true; + } + index += 2; + } + return false; + }; + + var dataIntersection = function(data, codePoints) { + var index = 0; + var length = codePoints.length; + var codePoint; + var result = []; + while (index < length) { + codePoint = codePoints[index]; + if (dataContains(data, codePoint)) { + result.push(codePoint); + } + ++index; + } + return dataFromCodePoints(result); + }; + + var dataIsEmpty = function(data) { + return !data.length; + }; + + var dataIsSingleton = function(data) { + // Check if the set only represents a single code point. + return data.length == 2 && data[0] + 1 == data[1]; + }; + + var dataToArray = function(data) { + // Iterate over the data per `(start, end)` pair. + var index = 0; + var start; + var end; + var result = []; + var length = data.length; + while (index < length) { + start = data[index]; + end = data[index + 1]; + while (start < end) { + result.push(start); + ++start; + } + index += 2; + } + return result; + }; + + /*--------------------------------------------------------------------------*/ + + // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + var floor = Math.floor; + var highSurrogate = function(codePoint) { + return parseInt( + floor((codePoint - 0x10000) / 0x400) + HIGH_SURROGATE_MIN, + 10 + ); + }; + + var lowSurrogate = function(codePoint) { + return parseInt( + (codePoint - 0x10000) % 0x400 + LOW_SURROGATE_MIN, + 10 + ); + }; + + var stringFromCharCode = String.fromCharCode; + var codePointToString = function(codePoint) { + var string; + // https://mathiasbynens.be/notes/javascript-escapes#single + // Note: the `\b` escape sequence for U+0008 BACKSPACE in strings has a + // different meaning in regular expressions (word boundary), so it cannot + // be used here. + if (codePoint == 0x09) { + string = '\\t'; + } + // Note: IE < 9 treats `'\v'` as `'v'`, so avoid using it. + // else if (codePoint == 0x0B) { + // string = '\\v'; + // } + else if (codePoint == 0x0A) { + string = '\\n'; + } + else if (codePoint == 0x0C) { + string = '\\f'; + } + else if (codePoint == 0x0D) { + string = '\\r'; + } + else if (codePoint == 0x2D) { + // https://mathiasbynens.be/notes/javascript-escapes#hexadecimal + // Note: `-` (U+002D HYPHEN-MINUS) is escaped in this way rather + // than by backslash-escaping, in case the output is used outside + // of a character class in a `u` RegExp. /\-/u throws, but + // /\x2D/u is fine. + string = '\\x2D'; + } + else if (codePoint == 0x5C) { + string = '\\\\'; + } + else if ( + codePoint == 0x24 || + (codePoint >= 0x28 && codePoint <= 0x2B) || + codePoint == 0x2E || codePoint == 0x2F || + codePoint == 0x3F || + (codePoint >= 0x5B && codePoint <= 0x5E) || + (codePoint >= 0x7B && codePoint <= 0x7D) + ) { + // The code point maps to an unsafe printable ASCII character; + // backslash-escape it. Here’s the list of those symbols: + // + // $()*+./?[\]^{|} + // + // This matches SyntaxCharacters as well as `/` (U+002F SOLIDUS). + // https://tc39.github.io/ecma262/#prod-SyntaxCharacter + string = '\\' + stringFromCharCode(codePoint); + } + else if (codePoint >= 0x20 && codePoint <= 0x7E) { + // The code point maps to one of these printable ASCII symbols + // (including the space character): + // + // !"#%&',/0123456789:;<=>@ABCDEFGHIJKLMNO + // PQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz~ + // + // These can safely be used directly. + string = stringFromCharCode(codePoint); + } + else if (codePoint <= 0xFF) { + string = '\\x' + pad(hex(codePoint), 2); + } + else { // `codePoint <= 0xFFFF` holds true. + // https://mathiasbynens.be/notes/javascript-escapes#unicode + string = '\\u' + pad(hex(codePoint), 4); + } + + // There’s no need to account for astral symbols / surrogate pairs here, + // since `codePointToString` is private and only used for BMP code points. + // But if that’s what you need, just add an `else` block with this code: + // + // string = '\\u' + pad(hex(highSurrogate(codePoint)), 4) + // + '\\u' + pad(hex(lowSurrogate(codePoint)), 4); + + return string; + }; + + var codePointToStringUnicode = function(codePoint) { + if (codePoint <= 0xFFFF) { + return codePointToString(codePoint); + } + return '\\u{' + codePoint.toString(16).toUpperCase() + '}'; + }; + + var symbolToCodePoint = function(symbol) { + var length = symbol.length; + var first = symbol.charCodeAt(0); + var second; + if ( + first >= HIGH_SURROGATE_MIN && first <= HIGH_SURROGATE_MAX && + length > 1 // There is a next code unit. + ) { + // `first` is a high surrogate, and there is a next character. Assume + // it’s a low surrogate (else it’s invalid usage of Regenerate anyway). + second = symbol.charCodeAt(1); + // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + return (first - HIGH_SURROGATE_MIN) * 0x400 + + second - LOW_SURROGATE_MIN + 0x10000; + } + return first; + }; + + var createBMPCharacterClasses = function(data) { + // Iterate over the data per `(start, end)` pair. + var result = ''; + var index = 0; + var start; + var end; + var length = data.length; + if (dataIsSingleton(data)) { + return codePointToString(data[0]); + } + while (index < length) { + start = data[index]; + end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive. + if (start == end) { + result += codePointToString(start); + } else if (start + 1 == end) { + result += codePointToString(start) + codePointToString(end); + } else { + result += codePointToString(start) + '-' + codePointToString(end); + } + index += 2; + } + return '[' + result + ']'; + }; + + var createUnicodeCharacterClasses = function(data) { + // Iterate over the data per `(start, end)` pair. + var result = ''; + var index = 0; + var start; + var end; + var length = data.length; + if (dataIsSingleton(data)) { + return codePointToStringUnicode(data[0]); + } + while (index < length) { + start = data[index]; + end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive. + if (start == end) { + result += codePointToStringUnicode(start); + } else if (start + 1 == end) { + result += codePointToStringUnicode(start) + codePointToStringUnicode(end); + } else { + result += codePointToStringUnicode(start) + '-' + codePointToStringUnicode(end); + } + index += 2; + } + return '[' + result + ']'; + }; + + var splitAtBMP = function(data) { + // Iterate over the data per `(start, end)` pair. + var loneHighSurrogates = []; + var loneLowSurrogates = []; + var bmp = []; + var astral = []; + var index = 0; + var start; + var end; + var length = data.length; + while (index < length) { + start = data[index]; + end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive. + + if (start < HIGH_SURROGATE_MIN) { + + // The range starts and ends before the high surrogate range. + // E.g. (0, 0x10). + if (end < HIGH_SURROGATE_MIN) { + bmp.push(start, end + 1); + } + + // The range starts before the high surrogate range and ends within it. + // E.g. (0, 0xD855). + if (end >= HIGH_SURROGATE_MIN && end <= HIGH_SURROGATE_MAX) { + bmp.push(start, HIGH_SURROGATE_MIN); + loneHighSurrogates.push(HIGH_SURROGATE_MIN, end + 1); + } + + // The range starts before the high surrogate range and ends in the low + // surrogate range. E.g. (0, 0xDCFF). + if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) { + bmp.push(start, HIGH_SURROGATE_MIN); + loneHighSurrogates.push(HIGH_SURROGATE_MIN, HIGH_SURROGATE_MAX + 1); + loneLowSurrogates.push(LOW_SURROGATE_MIN, end + 1); + } + + // The range starts before the high surrogate range and ends after the + // low surrogate range. E.g. (0, 0x10FFFF). + if (end > LOW_SURROGATE_MAX) { + bmp.push(start, HIGH_SURROGATE_MIN); + loneHighSurrogates.push(HIGH_SURROGATE_MIN, HIGH_SURROGATE_MAX + 1); + loneLowSurrogates.push(LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1); + if (end <= 0xFFFF) { + bmp.push(LOW_SURROGATE_MAX + 1, end + 1); + } else { + bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1); + astral.push(0xFFFF + 1, end + 1); + } + } + + } else if (start >= HIGH_SURROGATE_MIN && start <= HIGH_SURROGATE_MAX) { + + // The range starts and ends in the high surrogate range. + // E.g. (0xD855, 0xD866). + if (end >= HIGH_SURROGATE_MIN && end <= HIGH_SURROGATE_MAX) { + loneHighSurrogates.push(start, end + 1); + } + + // The range starts in the high surrogate range and ends in the low + // surrogate range. E.g. (0xD855, 0xDCFF). + if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) { + loneHighSurrogates.push(start, HIGH_SURROGATE_MAX + 1); + loneLowSurrogates.push(LOW_SURROGATE_MIN, end + 1); + } + + // The range starts in the high surrogate range and ends after the low + // surrogate range. E.g. (0xD855, 0x10FFFF). + if (end > LOW_SURROGATE_MAX) { + loneHighSurrogates.push(start, HIGH_SURROGATE_MAX + 1); + loneLowSurrogates.push(LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1); + if (end <= 0xFFFF) { + bmp.push(LOW_SURROGATE_MAX + 1, end + 1); + } else { + bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1); + astral.push(0xFFFF + 1, end + 1); + } + } + + } else if (start >= LOW_SURROGATE_MIN && start <= LOW_SURROGATE_MAX) { + + // The range starts and ends in the low surrogate range. + // E.g. (0xDCFF, 0xDDFF). + if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) { + loneLowSurrogates.push(start, end + 1); + } + + // The range starts in the low surrogate range and ends after the low + // surrogate range. E.g. (0xDCFF, 0x10FFFF). + if (end > LOW_SURROGATE_MAX) { + loneLowSurrogates.push(start, LOW_SURROGATE_MAX + 1); + if (end <= 0xFFFF) { + bmp.push(LOW_SURROGATE_MAX + 1, end + 1); + } else { + bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1); + astral.push(0xFFFF + 1, end + 1); + } + } + + } else if (start > LOW_SURROGATE_MAX && start <= 0xFFFF) { + + // The range starts and ends after the low surrogate range. + // E.g. (0xFFAA, 0x10FFFF). + if (end <= 0xFFFF) { + bmp.push(start, end + 1); + } else { + bmp.push(start, 0xFFFF + 1); + astral.push(0xFFFF + 1, end + 1); + } + + } else { + + // The range starts and ends in the astral range. + astral.push(start, end + 1); + + } + + index += 2; + } + return { + 'loneHighSurrogates': loneHighSurrogates, + 'loneLowSurrogates': loneLowSurrogates, + 'bmp': bmp, + 'astral': astral + }; + }; + + var optimizeSurrogateMappings = function(surrogateMappings) { + var result = []; + var tmpLow = []; + var addLow = false; + var mapping; + var nextMapping; + var highSurrogates; + var lowSurrogates; + var nextHighSurrogates; + var nextLowSurrogates; + var index = -1; + var length = surrogateMappings.length; + while (++index < length) { + mapping = surrogateMappings[index]; + nextMapping = surrogateMappings[index + 1]; + if (!nextMapping) { + result.push(mapping); + continue; + } + highSurrogates = mapping[0]; + lowSurrogates = mapping[1]; + nextHighSurrogates = nextMapping[0]; + nextLowSurrogates = nextMapping[1]; + + // Check for identical high surrogate ranges. + tmpLow = lowSurrogates; + while ( + nextHighSurrogates && + highSurrogates[0] == nextHighSurrogates[0] && + highSurrogates[1] == nextHighSurrogates[1] + ) { + // Merge with the next item. + if (dataIsSingleton(nextLowSurrogates)) { + tmpLow = dataAdd(tmpLow, nextLowSurrogates[0]); + } else { + tmpLow = dataAddRange( + tmpLow, + nextLowSurrogates[0], + nextLowSurrogates[1] - 1 + ); + } + ++index; + mapping = surrogateMappings[index]; + highSurrogates = mapping[0]; + lowSurrogates = mapping[1]; + nextMapping = surrogateMappings[index + 1]; + nextHighSurrogates = nextMapping && nextMapping[0]; + nextLowSurrogates = nextMapping && nextMapping[1]; + addLow = true; + } + result.push([ + highSurrogates, + addLow ? tmpLow : lowSurrogates + ]); + addLow = false; + } + return optimizeByLowSurrogates(result); + }; + + var optimizeByLowSurrogates = function(surrogateMappings) { + if (surrogateMappings.length == 1) { + return surrogateMappings; + } + var index = -1; + var innerIndex = -1; + while (++index < surrogateMappings.length) { + var mapping = surrogateMappings[index]; + var lowSurrogates = mapping[1]; + var lowSurrogateStart = lowSurrogates[0]; + var lowSurrogateEnd = lowSurrogates[1]; + innerIndex = index; // Note: the loop starts at the next index. + while (++innerIndex < surrogateMappings.length) { + var otherMapping = surrogateMappings[innerIndex]; + var otherLowSurrogates = otherMapping[1]; + var otherLowSurrogateStart = otherLowSurrogates[0]; + var otherLowSurrogateEnd = otherLowSurrogates[1]; + if ( + lowSurrogateStart == otherLowSurrogateStart && + lowSurrogateEnd == otherLowSurrogateEnd && + otherLowSurrogates.length === 2 + ) { + // Add the code points in the other item to this one. + if (dataIsSingleton(otherMapping[0])) { + mapping[0] = dataAdd(mapping[0], otherMapping[0][0]); + } else { + mapping[0] = dataAddRange( + mapping[0], + otherMapping[0][0], + otherMapping[0][1] - 1 + ); + } + // Remove the other, now redundant, item. + surrogateMappings.splice(innerIndex, 1); + --innerIndex; + } + } + } + return surrogateMappings; + }; + + var surrogateSet = function(data) { + // Exit early if `data` is an empty set. + if (!data.length) { + return []; + } + + // Iterate over the data per `(start, end)` pair. + var index = 0; + var start; + var end; + var startHigh; + var startLow; + var endHigh; + var endLow; + var surrogateMappings = []; + var length = data.length; + while (index < length) { + start = data[index]; + end = data[index + 1] - 1; + + startHigh = highSurrogate(start); + startLow = lowSurrogate(start); + endHigh = highSurrogate(end); + endLow = lowSurrogate(end); + + var startsWithLowestLowSurrogate = startLow == LOW_SURROGATE_MIN; + var endsWithHighestLowSurrogate = endLow == LOW_SURROGATE_MAX; + var complete = false; + + // Append the previous high-surrogate-to-low-surrogate mappings. + // Step 1: `(startHigh, startLow)` to `(startHigh, LOW_SURROGATE_MAX)`. + if ( + startHigh == endHigh || + startsWithLowestLowSurrogate && endsWithHighestLowSurrogate + ) { + surrogateMappings.push([ + [startHigh, endHigh + 1], + [startLow, endLow + 1] + ]); + complete = true; + } else { + surrogateMappings.push([ + [startHigh, startHigh + 1], + [startLow, LOW_SURROGATE_MAX + 1] + ]); + } + + // Step 2: `(startHigh + 1, LOW_SURROGATE_MIN)` to + // `(endHigh - 1, LOW_SURROGATE_MAX)`. + if (!complete && startHigh + 1 < endHigh) { + if (endsWithHighestLowSurrogate) { + // Combine step 2 and step 3. + surrogateMappings.push([ + [startHigh + 1, endHigh + 1], + [LOW_SURROGATE_MIN, endLow + 1] + ]); + complete = true; + } else { + surrogateMappings.push([ + [startHigh + 1, endHigh], + [LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1] + ]); + } + } + + // Step 3. `(endHigh, LOW_SURROGATE_MIN)` to `(endHigh, endLow)`. + if (!complete) { + surrogateMappings.push([ + [endHigh, endHigh + 1], + [LOW_SURROGATE_MIN, endLow + 1] + ]); + } + + index += 2; + } + + // The format of `surrogateMappings` is as follows: + // + // [ surrogateMapping1, surrogateMapping2 ] + // + // i.e.: + // + // [ + // [ highSurrogates1, lowSurrogates1 ], + // [ highSurrogates2, lowSurrogates2 ] + // ] + return optimizeSurrogateMappings(surrogateMappings); + }; + + var createSurrogateCharacterClasses = function(surrogateMappings) { + var result = []; + forEach(surrogateMappings, function(surrogateMapping) { + var highSurrogates = surrogateMapping[0]; + var lowSurrogates = surrogateMapping[1]; + result.push( + createBMPCharacterClasses(highSurrogates) + + createBMPCharacterClasses(lowSurrogates) + ); + }); + return result.join('|'); + }; + + var createCharacterClassesFromData = function(data, bmpOnly, hasUnicodeFlag) { + if (hasUnicodeFlag) { + return createUnicodeCharacterClasses(data); + } + var result = []; + + var parts = splitAtBMP(data); + var loneHighSurrogates = parts.loneHighSurrogates; + var loneLowSurrogates = parts.loneLowSurrogates; + var bmp = parts.bmp; + var astral = parts.astral; + var hasLoneHighSurrogates = !dataIsEmpty(loneHighSurrogates); + var hasLoneLowSurrogates = !dataIsEmpty(loneLowSurrogates); + + var surrogateMappings = surrogateSet(astral); + + if (bmpOnly) { + bmp = dataAddData(bmp, loneHighSurrogates); + hasLoneHighSurrogates = false; + bmp = dataAddData(bmp, loneLowSurrogates); + hasLoneLowSurrogates = false; + } + + if (!dataIsEmpty(bmp)) { + // The data set contains BMP code points that are not high surrogates + // needed for astral code points in the set. + result.push(createBMPCharacterClasses(bmp)); + } + if (surrogateMappings.length) { + // The data set contains astral code points; append character classes + // based on their surrogate pairs. + result.push(createSurrogateCharacterClasses(surrogateMappings)); + } + // https://gist.github.com/mathiasbynens/bbe7f870208abcfec860 + if (hasLoneHighSurrogates) { + result.push( + createBMPCharacterClasses(loneHighSurrogates) + + // Make sure the high surrogates aren’t part of a surrogate pair. + '(?![\\uDC00-\\uDFFF])' + ); + } + if (hasLoneLowSurrogates) { + result.push( + // It is not possible to accurately assert the low surrogates aren’t + // part of a surrogate pair, since JavaScript regular expressions do + // not support lookbehind. + '(?:[^\\uD800-\\uDBFF]|^)' + + createBMPCharacterClasses(loneLowSurrogates) + ); + } + return result.join('|'); + }; + + /*--------------------------------------------------------------------------*/ + + // `regenerate` can be used as a constructor (and new methods can be added to + // its prototype) but also as a regular function, the latter of which is the + // documented and most common usage. For that reason, it’s not capitalized. + var regenerate = function(value) { + if (arguments.length > 1) { + value = slice.call(arguments); + } + if (this instanceof regenerate) { + this.data = []; + return value ? this.add(value) : this; + } + return (new regenerate).add(value); + }; + + regenerate.version = '1.4.2'; + + var proto = regenerate.prototype; + extend(proto, { + 'add': function(value) { + var $this = this; + if (value == null) { + return $this; + } + if (value instanceof regenerate) { + // Allow passing other Regenerate instances. + $this.data = dataAddData($this.data, value.data); + return $this; + } + if (arguments.length > 1) { + value = slice.call(arguments); + } + if (isArray(value)) { + forEach(value, function(item) { + $this.add(item); + }); + return $this; + } + $this.data = dataAdd( + $this.data, + isNumber(value) ? value : symbolToCodePoint(value) + ); + return $this; + }, + 'remove': function(value) { + var $this = this; + if (value == null) { + return $this; + } + if (value instanceof regenerate) { + // Allow passing other Regenerate instances. + $this.data = dataRemoveData($this.data, value.data); + return $this; + } + if (arguments.length > 1) { + value = slice.call(arguments); + } + if (isArray(value)) { + forEach(value, function(item) { + $this.remove(item); + }); + return $this; + } + $this.data = dataRemove( + $this.data, + isNumber(value) ? value : symbolToCodePoint(value) + ); + return $this; + }, + 'addRange': function(start, end) { + var $this = this; + $this.data = dataAddRange($this.data, + isNumber(start) ? start : symbolToCodePoint(start), + isNumber(end) ? end : symbolToCodePoint(end) + ); + return $this; + }, + 'removeRange': function(start, end) { + var $this = this; + var startCodePoint = isNumber(start) ? start : symbolToCodePoint(start); + var endCodePoint = isNumber(end) ? end : symbolToCodePoint(end); + $this.data = dataRemoveRange( + $this.data, + startCodePoint, + endCodePoint + ); + return $this; + }, + 'intersection': function(argument) { + var $this = this; + // Allow passing other Regenerate instances. + // TODO: Optimize this by writing and using `dataIntersectionData()`. + var array = argument instanceof regenerate ? + dataToArray(argument.data) : + argument; + $this.data = dataIntersection($this.data, array); + return $this; + }, + 'contains': function(codePoint) { + return dataContains( + this.data, + isNumber(codePoint) ? codePoint : symbolToCodePoint(codePoint) + ); + }, + 'clone': function() { + var set = new regenerate; + set.data = this.data.slice(0); + return set; + }, + 'toString': function(options) { + var result = createCharacterClassesFromData( + this.data, + options ? options.bmpOnly : false, + options ? options.hasUnicodeFlag : false + ); + if (!result) { + // For an empty set, return something that can be inserted `/here/` to + // form a valid regular expression. Avoid `(?:)` since that matches the + // empty string. + return '[]'; + } + // Use `\0` instead of `\x00` where possible. + return result.replace(regexNull, '\\0$1'); + }, + 'toRegExp': function(flags) { + var pattern = this.toString( + flags && flags.indexOf('u') != -1 ? + { 'hasUnicodeFlag': true } : + null + ); + return RegExp(pattern, flags || ''); + }, + 'valueOf': function() { // Note: `valueOf` is aliased as `toArray`. + return dataToArray(this.data); + } + }); + + proto.toArray = proto.valueOf; + + // Some AMD build optimizers, like r.js, check for specific condition patterns + // like the following: + if ( + typeof define == 'function' && + typeof define.amd == 'object' && + define.amd + ) { + define(function() { + return regenerate; + }); + } else if (freeExports && !freeExports.nodeType) { + if (freeModule) { // in Node.js, io.js, or RingoJS v0.8.0+ + freeModule.exports = regenerate; + } else { // in Narwhal or RingoJS v0.7.0- + freeExports.regenerate = regenerate; + } + } else { // in Rhino or a web browser + root.regenerate = regenerate; + } + +}(this)); |