diff options
| author | Adam Mathes <adam@adammathes.com> | 2026-02-13 21:34:48 -0800 |
|---|---|---|
| committer | Adam Mathes <adam@adammathes.com> | 2026-02-13 21:34:48 -0800 |
| commit | 76cb9c2a39d477a64824a985ade40507e3bbade1 (patch) | |
| tree | 41e997aa9c6f538d3a136af61dae9424db2005a9 /vanilla/node_modules/bidi-js/src | |
| parent | 819a39a21ac992b1393244a4c283bbb125208c69 (diff) | |
| download | neko-76cb9c2a39d477a64824a985ade40507e3bbade1.tar.gz neko-76cb9c2a39d477a64824a985ade40507e3bbade1.tar.bz2 neko-76cb9c2a39d477a64824a985ade40507e3bbade1.zip | |
feat(vanilla): add testing infrastructure and tests (NK-wjnczv)
Diffstat (limited to 'vanilla/node_modules/bidi-js/src')
| -rw-r--r-- | vanilla/node_modules/bidi-js/src/brackets.js | 30 | ||||
| -rw-r--r-- | vanilla/node_modules/bidi-js/src/charTypes.js | 66 | ||||
| -rw-r--r-- | vanilla/node_modules/bidi-js/src/data/bidiBrackets.data.js | 5 | ||||
| -rw-r--r-- | vanilla/node_modules/bidi-js/src/data/bidiCharTypes.data.js | 25 | ||||
| -rw-r--r-- | vanilla/node_modules/bidi-js/src/data/bidiMirroring.data.js | 2 | ||||
| -rw-r--r-- | vanilla/node_modules/bidi-js/src/embeddingLevels.js | 690 | ||||
| -rw-r--r-- | vanilla/node_modules/bidi-js/src/index.js | 5 | ||||
| -rw-r--r-- | vanilla/node_modules/bidi-js/src/mirroring.js | 48 | ||||
| -rw-r--r-- | vanilla/node_modules/bidi-js/src/reordering.js | 99 | ||||
| -rw-r--r-- | vanilla/node_modules/bidi-js/src/util/parseCharacterMap.js | 30 |
10 files changed, 1000 insertions, 0 deletions
diff --git a/vanilla/node_modules/bidi-js/src/brackets.js b/vanilla/node_modules/bidi-js/src/brackets.js new file mode 100644 index 0000000..c598bc1 --- /dev/null +++ b/vanilla/node_modules/bidi-js/src/brackets.js @@ -0,0 +1,30 @@ +import data from './data/bidiBrackets.data.js' +import { parseCharacterMap } from './util/parseCharacterMap.js' + +let openToClose, closeToOpen, canonical + +function parse () { + if (!openToClose) { + //const start = performance.now() + let { map, reverseMap } = parseCharacterMap(data.pairs, true) + openToClose = map + closeToOpen = reverseMap + canonical = parseCharacterMap(data.canonical, false).map + //console.log(`brackets parsed in ${performance.now() - start}ms`) + } +} + +export function openingToClosingBracket (char) { + parse() + return openToClose.get(char) || null +} + +export function closingToOpeningBracket (char) { + parse() + return closeToOpen.get(char) || null +} + +export function getCanonicalBracket (char) { + parse() + return canonical.get(char) || null +} diff --git a/vanilla/node_modules/bidi-js/src/charTypes.js b/vanilla/node_modules/bidi-js/src/charTypes.js new file mode 100644 index 0000000..057e871 --- /dev/null +++ b/vanilla/node_modules/bidi-js/src/charTypes.js @@ -0,0 +1,66 @@ +import DATA from './data/bidiCharTypes.data.js' + +const TYPES = {} +const TYPES_TO_NAMES = {} +TYPES.L = 1 //L is the default +TYPES_TO_NAMES[1] = 'L' +Object.keys(DATA).forEach((type, i) => { + TYPES[type] = 1 << (i + 1) + TYPES_TO_NAMES[TYPES[type]] = type +}) +Object.freeze(TYPES) + +const ISOLATE_INIT_TYPES = TYPES.LRI | TYPES.RLI | TYPES.FSI +const STRONG_TYPES = TYPES.L | TYPES.R | TYPES.AL +const NEUTRAL_ISOLATE_TYPES = TYPES.B | TYPES.S | TYPES.WS | TYPES.ON | TYPES.FSI | TYPES.LRI | TYPES.RLI | TYPES.PDI +const BN_LIKE_TYPES = TYPES.BN | TYPES.RLE | TYPES.LRE | TYPES.RLO | TYPES.LRO | TYPES.PDF +const TRAILING_TYPES = TYPES.S | TYPES.WS | TYPES.B | ISOLATE_INIT_TYPES | TYPES.PDI | BN_LIKE_TYPES + +let map = null + +function parseData () { + if (!map) { + //const start = performance.now() + map = new Map() + for (let type in DATA) { + if (DATA.hasOwnProperty(type)) { + let lastCode = 0 + DATA[type].split(',').forEach(range => { + let [skip, step] = range.split('+') + skip = parseInt(skip, 36) + step = step ? parseInt(step, 36) : 0 + map.set(lastCode += skip, TYPES[type]) + for (let i = 0; i < step; i++) { + map.set(++lastCode, TYPES[type]) + } + }) + } + } + //console.log(`char types parsed in ${performance.now() - start}ms`) + } +} + +/** + * @param {string} char + * @return {number} + */ +function getBidiCharType (char) { + parseData() + return map.get(char.codePointAt(0)) || TYPES.L +} + +function getBidiCharTypeName(char) { + return TYPES_TO_NAMES[getBidiCharType(char)] +} + +export { + getBidiCharType, + getBidiCharTypeName, + TYPES, + TYPES_TO_NAMES, + ISOLATE_INIT_TYPES, + STRONG_TYPES, + NEUTRAL_ISOLATE_TYPES, + BN_LIKE_TYPES, + TRAILING_TYPES +} diff --git a/vanilla/node_modules/bidi-js/src/data/bidiBrackets.data.js b/vanilla/node_modules/bidi-js/src/data/bidiBrackets.data.js new file mode 100644 index 0000000..885ae97 --- /dev/null +++ b/vanilla/node_modules/bidi-js/src/data/bidiBrackets.data.js @@ -0,0 +1,5 @@ +// Bidi bracket pairs data, auto generated +export default { + "pairs": "14>1,1e>2,u>2,2wt>1,1>1,1ge>1,1wp>1,1j>1,f>1,hm>1,1>1,u>1,u6>1,1>1,+5,28>1,w>1,1>1,+3,b8>1,1>1,+3,1>3,-1>-1,3>1,1>1,+2,1s>1,1>1,x>1,th>1,1>1,+2,db>1,1>1,+3,3>1,1>1,+2,14qm>1,1>1,+1,4q>1,1e>2,u>2,2>1,+1", + "canonical": "6f1>-6dx,6dy>-6dx,6ec>-6ed,6ee>-6ed,6ww>2jj,-2ji>2jj,14r4>-1e7l,1e7m>-1e7l,1e7m>-1e5c,1e5d>-1e5b,1e5c>-14qx,14qy>-14qx,14vn>-1ecg,1ech>-1ecg,1edu>-1ecg,1eci>-1ecg,1eda>-1ecg,1eci>-1ecg,1eci>-168q,168r>-168q,168s>-14ye,14yf>-14ye" +} diff --git a/vanilla/node_modules/bidi-js/src/data/bidiCharTypes.data.js b/vanilla/node_modules/bidi-js/src/data/bidiCharTypes.data.js new file mode 100644 index 0000000..b263c80 --- /dev/null +++ b/vanilla/node_modules/bidi-js/src/data/bidiCharTypes.data.js @@ -0,0 +1,25 @@ +// Bidi character types data, auto generated +export default { + "R": "13k,1a,2,3,3,2+1j,ch+16,a+1,5+2,2+n,5,a,4,6+16,4+3,h+1b,4mo,179q,2+9,2+11,2i9+7y,2+68,4,3+4,5+13,4+3,2+4k,3+29,8+cf,1t+7z,w+17,3+3m,1t+3z,16o1+5r,8+30,8+mc,29+1r,29+4v,75+73", + "EN": "1c+9,3d+1,6,187+9,513,4+5,7+9,sf+j,175h+9,qw+q,161f+1d,4xt+a,25i+9", + "ES": "17,2,6dp+1,f+1,av,16vr,mx+1,4o,2", + "ET": "z+2,3h+3,b+1,ym,3e+1,2o,p4+1,8,6u,7c,g6,1wc,1n9+4,30+1b,2n,6d,qhx+1,h0m,a+1,49+2,63+1,4+1,6bb+3,12jj", + "AN": "16o+5,2j+9,2+1,35,ed,1ff2+9,87+u", + "CS": "18,2+1,b,2u,12k,55v,l,17v0,2,3,53,2+1,b", + "B": "a,3,f+2,2v,690", + "S": "9,2,k", + "WS": "c,k,4f4,1vk+a,u,1j,335", + "ON": "x+1,4+4,h+5,r+5,r+3,z,5+3,2+1,2+1,5,2+2,3+4,o,w,ci+1,8+d,3+d,6+8,2+g,39+1,9,6+1,2,33,b8,3+1,3c+1,7+1,5r,b,7h+3,sa+5,2,3i+6,jg+3,ur+9,2v,ij+1,9g+9,7+a,8m,4+1,49+x,14u,2+2,c+2,e+2,e+2,e+1,i+n,e+e,2+p,u+2,e+2,36+1,2+3,2+1,b,2+2,6+5,2,2,2,h+1,5+4,6+3,3+f,16+2,5+3l,3+81,1y+p,2+40,q+a,m+13,2r+ch,2+9e,75+hf,3+v,2+2w,6e+5,f+6,75+2a,1a+p,2+2g,d+5x,r+b,6+3,4+o,g,6+1,6+2,2k+1,4,2j,5h+z,1m+1,1e+f,t+2,1f+e,d+3,4o+3,2s+1,w,535+1r,h3l+1i,93+2,2s,b+1,3l+x,2v,4g+3,21+3,kz+1,g5v+1,5a,j+9,n+v,2,3,2+8,2+1,3+2,2,3,46+1,4+4,h+5,r+5,r+a,3h+2,4+6,b+4,78,1r+24,4+c,4,1hb,ey+6,103+j,16j+c,1ux+7,5+g,fsh,jdq+1t,4,57+2e,p1,1m,1m,1m,1m,4kt+1,7j+17,5+2r,d+e,3+e,2+e,2+10,m+4,w,1n+5,1q,4z+5,4b+rb,9+c,4+c,4+37,d+2g,8+b,l+b,5+1j,9+9,7+13,9+t,3+1,27+3c,2+29,2+3q,d+d,3+4,4+2,6+6,a+o,8+6,a+2,e+6,16+42,2+1i", + "BN": "0+8,6+d,2s+5,2+p,e,4m9,1kt+2,2b+5,5+5,17q9+v,7k,6p+8,6+1,119d+3,440+7,96s+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+75,6p+2rz,1ben+1,1ekf+1,1ekf+1", + "NSM": "lc+33,7o+6,7c+18,2,2+1,2+1,2,21+a,1d+k,h,2u+6,3+5,3+1,2+3,10,v+q,2k+a,1n+8,a,p+3,2+8,2+2,2+4,18+2,3c+e,2+v,1k,2,5+7,5,4+6,b+1,u,1n,5+3,9,l+1,r,3+1,1m,5+1,5+1,3+2,4,v+1,4,c+1,1m,5+4,2+1,5,l+1,n+5,2,1n,3,2+3,9,8+1,c+1,v,1q,d,1f,4,1m+2,6+2,2+3,8+1,c+1,u,1n,g+1,l+1,t+1,1m+1,5+3,9,l+1,u,21,8+2,2,2j,3+6,d+7,2r,3+8,c+5,23+1,s,2,2,1k+d,2+4,2+1,6+a,2+z,a,2v+3,2+5,2+1,3+1,q+1,5+2,h+3,e,3+1,7,g,jk+2,qb+2,u+2,u+1,v+1,1t+1,2+6,9,3+a,a,1a+2,3c+1,z,3b+2,5+1,a,7+2,64+1,3,1n,2+6,2,2,3+7,7+9,3,1d+g,1s+3,1d,2+4,2,6,15+8,d+1,x+3,3+1,2+2,1l,2+1,4,2+2,1n+7,3+1,49+2,2+c,2+6,5,7,4+1,5j+1l,2+4,k1+w,2db+2,3y,2p+v,ff+3,30+1,n9x+3,2+9,x+1,29+1,7l,4,5,q+1,6,48+1,r+h,e,13+7,q+a,1b+2,1d,3+3,3+1,14,1w+5,3+1,3+1,d,9,1c,1g,2+2,3+1,6+1,2,17+1,9,6n,3,5,fn5,ki+f,h+f,r2,6b,46+4,1af+2,2+1,6+3,15+2,5,4m+1,fy+3,as+1,4a+a,4x,1j+e,1l+2,1e+3,3+1,1y+2,11+4,2+7,1r,d+1,1h+8,b+3,3,2o+2,3,2+1,7,4h,4+7,m+1,1m+1,4,12+6,4+4,5g+7,3+2,2,o,2d+5,2,5+1,2+1,6n+3,7+1,2+1,s+1,2e+7,3,2+1,2z,2,3+5,2,2u+2,3+3,2+4,78+8,2+1,75+1,2,5,41+3,3+1,5,x+5,3+1,15+5,3+3,9,a+5,3+2,1b+c,2+1,bb+6,2+5,2d+l,3+6,2+1,2+1,3f+5,4,2+1,2+6,2,21+1,4,2,9o+1,f0c+4,1o+6,t5,1s+3,2a,f5l+1,43t+2,i+7,3+6,v+3,45+2,1j0+1i,5+1d,9,f,n+4,2+e,11t+6,2+g,3+6,2+1,2+4,7a+6,c6+3,15t+6,32+6,gzhy+6n", + "AL": "16w,3,2,e+1b,z+2,2+2s,g+1,8+1,b+m,2+t,s+2i,c+e,4h+f,1d+1e,1bwe+dp,3+3z,x+c,2+1,35+3y,2rm+z,5+7,b+5,dt+l,c+u,17nl+27,1t+27,4x+6n,3+d", + "LRO": "6ct", + "RLO": "6cu", + "LRE": "6cq", + "RLE": "6cr", + "PDF": "6cs", + "LRI": "6ee", + "RLI": "6ef", + "FSI": "6eg", + "PDI": "6eh" +} diff --git a/vanilla/node_modules/bidi-js/src/data/bidiMirroring.data.js b/vanilla/node_modules/bidi-js/src/data/bidiMirroring.data.js new file mode 100644 index 0000000..b9cf987 --- /dev/null +++ b/vanilla/node_modules/bidi-js/src/data/bidiMirroring.data.js @@ -0,0 +1,2 @@ +// Bidi mirrored chars data, auto generated +export default "14>1,j>2,t>2,u>2,1a>g,2v3>1,1>1,1ge>1,1wd>1,b>1,1j>1,f>1,ai>3,-2>3,+1,8>1k0,-1jq>1y7,-1y6>1hf,-1he>1h6,-1h5>1ha,-1h8>1qi,-1pu>1,6>3u,-3s>7,6>1,1>1,f>1,1>1,+2,3>1,1>1,+13,4>1,1>1,6>1eo,-1ee>1,3>1mg,-1me>1mk,-1mj>1mi,-1mg>1mi,-1md>1,1>1,+2,1>10k,-103>1,1>1,4>1,5>1,1>1,+10,3>1,1>8,-7>8,+1,-6>7,+1,a>1,1>1,u>1,u6>1,1>1,+5,26>1,1>1,2>1,2>2,8>1,7>1,4>1,1>1,+5,b8>1,1>1,+3,1>3,-2>1,2>1,1>1,+2,c>1,3>1,1>1,+2,h>1,3>1,a>1,1>1,2>1,3>1,1>1,d>1,f>1,3>1,1a>1,1>1,6>1,7>1,13>1,k>1,1>1,+19,4>1,1>1,+2,2>1,1>1,+18,m>1,a>1,1>1,lk>1,1>1,4>1,2>1,f>1,3>1,1>1,+3,db>1,1>1,+3,3>1,1>1,+2,14qm>1,1>1,+1,6>1,4j>1,j>2,t>2,u>2,2>1,+1" diff --git a/vanilla/node_modules/bidi-js/src/embeddingLevels.js b/vanilla/node_modules/bidi-js/src/embeddingLevels.js new file mode 100644 index 0000000..3815393 --- /dev/null +++ b/vanilla/node_modules/bidi-js/src/embeddingLevels.js @@ -0,0 +1,690 @@ +import { + BN_LIKE_TYPES, + getBidiCharType, + ISOLATE_INIT_TYPES, + NEUTRAL_ISOLATE_TYPES, + STRONG_TYPES, + TRAILING_TYPES, + TYPES +} from './charTypes.js' +import { closingToOpeningBracket, getCanonicalBracket, openingToClosingBracket } from './brackets.js' + +// Local type aliases +const { + L: TYPE_L, + R: TYPE_R, + EN: TYPE_EN, + ES: TYPE_ES, + ET: TYPE_ET, + AN: TYPE_AN, + CS: TYPE_CS, + B: TYPE_B, + S: TYPE_S, + ON: TYPE_ON, + BN: TYPE_BN, + NSM: TYPE_NSM, + AL: TYPE_AL, + LRO: TYPE_LRO, + RLO: TYPE_RLO, + LRE: TYPE_LRE, + RLE: TYPE_RLE, + PDF: TYPE_PDF, + LRI: TYPE_LRI, + RLI: TYPE_RLI, + FSI: TYPE_FSI, + PDI: TYPE_PDI +} = TYPES + +/** + * @typedef {object} GetEmbeddingLevelsResult + * @property {{start, end, level}[]} paragraphs + * @property {Uint8Array} levels + */ + +/** + * This function applies the Bidirectional Algorithm to a string, returning the resolved embedding levels + * in a single Uint8Array plus a list of objects holding each paragraph's start and end indices and resolved + * base embedding level. + * + * @param {string} string - The input string + * @param {"ltr"|"rtl"|"auto"} [baseDirection] - Use "ltr" or "rtl" to force a base paragraph direction, + * otherwise a direction will be chosen automatically from each paragraph's contents. + * @return {GetEmbeddingLevelsResult} + */ +export function getEmbeddingLevels (string, baseDirection) { + const MAX_DEPTH = 125 + + // Start by mapping all characters to their unicode type, as a bitmask integer + const charTypes = new Uint32Array(string.length) + for (let i = 0; i < string.length; i++) { + charTypes[i] = getBidiCharType(string[i]) + } + + const charTypeCounts = new Map() //will be cleared at start of each paragraph + function changeCharType(i, type) { + const oldType = charTypes[i] + charTypes[i] = type + charTypeCounts.set(oldType, charTypeCounts.get(oldType) - 1) + if (oldType & NEUTRAL_ISOLATE_TYPES) { + charTypeCounts.set(NEUTRAL_ISOLATE_TYPES, charTypeCounts.get(NEUTRAL_ISOLATE_TYPES) - 1) + } + charTypeCounts.set(type, (charTypeCounts.get(type) || 0) + 1) + if (type & NEUTRAL_ISOLATE_TYPES) { + charTypeCounts.set(NEUTRAL_ISOLATE_TYPES, (charTypeCounts.get(NEUTRAL_ISOLATE_TYPES) || 0) + 1) + } + } + + const embedLevels = new Uint8Array(string.length) + const isolationPairs = new Map() //init->pdi and pdi->init + + // === 3.3.1 The Paragraph Level === + // 3.3.1 P1: Split the text into paragraphs + const paragraphs = [] // [{start, end, level}, ...] + let paragraph = null + for (let i = 0; i < string.length; i++) { + if (!paragraph) { + paragraphs.push(paragraph = { + start: i, + end: string.length - 1, + // 3.3.1 P2-P3: Determine the paragraph level + level: baseDirection === 'rtl' ? 1 : baseDirection === 'ltr' ? 0 : determineAutoEmbedLevel(i, false) + }) + } + if (charTypes[i] & TYPE_B) { + paragraph.end = i + paragraph = null + } + } + + const FORMATTING_TYPES = TYPE_RLE | TYPE_LRE | TYPE_RLO | TYPE_LRO | ISOLATE_INIT_TYPES | TYPE_PDI | TYPE_PDF | TYPE_B + const nextEven = n => n + ((n & 1) ? 1 : 2) + const nextOdd = n => n + ((n & 1) ? 2 : 1) + + // Everything from here on will operate per paragraph. + for (let paraIdx = 0; paraIdx < paragraphs.length; paraIdx++) { + paragraph = paragraphs[paraIdx] + const statusStack = [{ + _level: paragraph.level, + _override: 0, //0=neutral, 1=L, 2=R + _isolate: 0 //bool + }] + let stackTop + let overflowIsolateCount = 0 + let overflowEmbeddingCount = 0 + let validIsolateCount = 0 + charTypeCounts.clear() + + // === 3.3.2 Explicit Levels and Directions === + for (let i = paragraph.start; i <= paragraph.end; i++) { + let charType = charTypes[i] + stackTop = statusStack[statusStack.length - 1] + + // Set initial counts + charTypeCounts.set(charType, (charTypeCounts.get(charType) || 0) + 1) + if (charType & NEUTRAL_ISOLATE_TYPES) { + charTypeCounts.set(NEUTRAL_ISOLATE_TYPES, (charTypeCounts.get(NEUTRAL_ISOLATE_TYPES) || 0) + 1) + } + + // Explicit Embeddings: 3.3.2 X2 - X3 + if (charType & FORMATTING_TYPES) { //prefilter all formatters + if (charType & (TYPE_RLE | TYPE_LRE)) { + embedLevels[i] = stackTop._level // 5.2 + const level = (charType === TYPE_RLE ? nextOdd : nextEven)(stackTop._level) + if (level <= MAX_DEPTH && !overflowIsolateCount && !overflowEmbeddingCount) { + statusStack.push({ + _level: level, + _override: 0, + _isolate: 0 + }) + } else if (!overflowIsolateCount) { + overflowEmbeddingCount++ + } + } + + // Explicit Overrides: 3.3.2 X4 - X5 + else if (charType & (TYPE_RLO | TYPE_LRO)) { + embedLevels[i] = stackTop._level // 5.2 + const level = (charType === TYPE_RLO ? nextOdd : nextEven)(stackTop._level) + if (level <= MAX_DEPTH && !overflowIsolateCount && !overflowEmbeddingCount) { + statusStack.push({ + _level: level, + _override: (charType & TYPE_RLO) ? TYPE_R : TYPE_L, + _isolate: 0 + }) + } else if (!overflowIsolateCount) { + overflowEmbeddingCount++ + } + } + + // Isolates: 3.3.2 X5a - X5c + else if (charType & ISOLATE_INIT_TYPES) { + // X5c - FSI becomes either RLI or LRI + if (charType & TYPE_FSI) { + charType = determineAutoEmbedLevel(i + 1, true) === 1 ? TYPE_RLI : TYPE_LRI + } + + embedLevels[i] = stackTop._level + if (stackTop._override) { + changeCharType(i, stackTop._override) + } + const level = (charType === TYPE_RLI ? nextOdd : nextEven)(stackTop._level) + if (level <= MAX_DEPTH && overflowIsolateCount === 0 && overflowEmbeddingCount === 0) { + validIsolateCount++ + statusStack.push({ + _level: level, + _override: 0, + _isolate: 1, + _isolInitIndex: i + }) + } else { + overflowIsolateCount++ + } + } + + // Terminating Isolates: 3.3.2 X6a + else if (charType & TYPE_PDI) { + if (overflowIsolateCount > 0) { + overflowIsolateCount-- + } else if (validIsolateCount > 0) { + overflowEmbeddingCount = 0 + while (!statusStack[statusStack.length - 1]._isolate) { + statusStack.pop() + } + // Add to isolation pairs bidirectional mapping: + const isolInitIndex = statusStack[statusStack.length - 1]._isolInitIndex + if (isolInitIndex != null) { + isolationPairs.set(isolInitIndex, i) + isolationPairs.set(i, isolInitIndex) + } + statusStack.pop() + validIsolateCount-- + } + stackTop = statusStack[statusStack.length - 1] + embedLevels[i] = stackTop._level + if (stackTop._override) { + changeCharType(i, stackTop._override) + } + } + + + // Terminating Embeddings and Overrides: 3.3.2 X7 + else if (charType & TYPE_PDF) { + if (overflowIsolateCount === 0) { + if (overflowEmbeddingCount > 0) { + overflowEmbeddingCount-- + } else if (!stackTop._isolate && statusStack.length > 1) { + statusStack.pop() + stackTop = statusStack[statusStack.length - 1] + } + } + embedLevels[i] = stackTop._level // 5.2 + } + + // End of Paragraph: 3.3.2 X8 + else if (charType & TYPE_B) { + embedLevels[i] = paragraph.level + } + } + + // Non-formatting characters: 3.3.2 X6 + else { + embedLevels[i] = stackTop._level + // NOTE: This exclusion of BN seems to go against what section 5.2 says, but is required for test passage + if (stackTop._override && charType !== TYPE_BN) { + changeCharType(i, stackTop._override) + } + } + } + + // === 3.3.3 Preparations for Implicit Processing === + + // Remove all RLE, LRE, RLO, LRO, PDF, and BN characters: 3.3.3 X9 + // Note: Due to section 5.2, we won't remove them, but we'll use the BN_LIKE_TYPES bitset to + // easily ignore them all from here on out. + + // 3.3.3 X10 + // Compute the set of isolating run sequences as specified by BD13 + const levelRuns = [] + let currentRun = null + let isolationLevel = 0 + for (let i = paragraph.start; i <= paragraph.end; i++) { + const charType = charTypes[i] + if (!(charType & BN_LIKE_TYPES)) { + const lvl = embedLevels[i] + const isIsolInit = charType & ISOLATE_INIT_TYPES + const isPDI = charType === TYPE_PDI + if (isIsolInit) { + isolationLevel++ + } + if (currentRun && lvl === currentRun._level) { + currentRun._end = i + currentRun._endsWithIsolInit = isIsolInit + } else { + levelRuns.push(currentRun = { + _start: i, + _end: i, + _level: lvl, + _startsWithPDI: isPDI, + _endsWithIsolInit: isIsolInit + }) + } + if (isPDI) { + isolationLevel-- + } + } + } + const isolatingRunSeqs = [] // [{seqIndices: [], sosType: L|R, eosType: L|R}] + for (let runIdx = 0; runIdx < levelRuns.length; runIdx++) { + const run = levelRuns[runIdx] + if (!run._startsWithPDI || (run._startsWithPDI && !isolationPairs.has(run._start))) { + const seqRuns = [currentRun = run] + for (let pdiIndex; currentRun && currentRun._endsWithIsolInit && (pdiIndex = isolationPairs.get(currentRun._end)) != null;) { + for (let i = runIdx + 1; i < levelRuns.length; i++) { + if (levelRuns[i]._start === pdiIndex) { + seqRuns.push(currentRun = levelRuns[i]) + break + } + } + } + // build flat list of indices across all runs: + const seqIndices = [] + for (let i = 0; i < seqRuns.length; i++) { + const run = seqRuns[i] + for (let j = run._start; j <= run._end; j++) { + seqIndices.push(j) + } + } + // determine the sos/eos types: + let firstLevel = embedLevels[seqIndices[0]] + let prevLevel = paragraph.level + for (let i = seqIndices[0] - 1; i >= 0; i--) { + if (!(charTypes[i] & BN_LIKE_TYPES)) { //5.2 + prevLevel = embedLevels[i] + break + } + } + const lastIndex = seqIndices[seqIndices.length - 1] + let lastLevel = embedLevels[lastIndex] + let nextLevel = paragraph.level + if (!(charTypes[lastIndex] & ISOLATE_INIT_TYPES)) { + for (let i = lastIndex + 1; i <= paragraph.end; i++) { + if (!(charTypes[i] & BN_LIKE_TYPES)) { //5.2 + nextLevel = embedLevels[i] + break + } + } + } + isolatingRunSeqs.push({ + _seqIndices: seqIndices, + _sosType: Math.max(prevLevel, firstLevel) % 2 ? TYPE_R : TYPE_L, + _eosType: Math.max(nextLevel, lastLevel) % 2 ? TYPE_R : TYPE_L + }) + } + } + + // The next steps are done per isolating run sequence + for (let seqIdx = 0; seqIdx < isolatingRunSeqs.length; seqIdx++) { + const { _seqIndices: seqIndices, _sosType: sosType, _eosType: eosType } = isolatingRunSeqs[seqIdx] + /** + * All the level runs in an isolating run sequence have the same embedding level. + * + * DO NOT change any `embedLevels[i]` within the current scope. + */ + const embedDirection = ((embedLevels[seqIndices[0]]) & 1) ? TYPE_R : TYPE_L; + + // === 3.3.4 Resolving Weak Types === + + // W1 + 5.2. Search backward from each NSM to the first character in the isolating run sequence whose + // bidirectional type is not BN, and set the NSM to ON if it is an isolate initiator or PDI, and to its + // type otherwise. If the NSM is the first non-BN character, change the NSM to the type of sos. + if (charTypeCounts.get(TYPE_NSM)) { + for (let si = 0; si < seqIndices.length; si++) { + const i = seqIndices[si] + if (charTypes[i] & TYPE_NSM) { + let prevType = sosType + for (let sj = si - 1; sj >= 0; sj--) { + if (!(charTypes[seqIndices[sj]] & BN_LIKE_TYPES)) { //5.2 scan back to first non-BN + prevType = charTypes[seqIndices[sj]] + break + } + } + changeCharType(i, (prevType & (ISOLATE_INIT_TYPES | TYPE_PDI)) ? TYPE_ON : prevType) + } + } + } + + // W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos) + // is found. If an AL is found, change the type of the European number to Arabic number. + if (charTypeCounts.get(TYPE_EN)) { + for (let si = 0; si < seqIndices.length; si++) { + const i = seqIndices[si] + if (charTypes[i] & TYPE_EN) { + for (let sj = si - 1; sj >= -1; sj--) { + const prevCharType = sj === -1 ? sosType : charTypes[seqIndices[sj]] + if (prevCharType & STRONG_TYPES) { + if (prevCharType === TYPE_AL) { + changeCharType(i, TYPE_AN) + } + break + } + } + } + } + } + + // W3. Change all ALs to R + if (charTypeCounts.get(TYPE_AL)) { + for (let si = 0; si < seqIndices.length; si++) { + const i = seqIndices[si] + if (charTypes[i] & TYPE_AL) { + changeCharType(i, TYPE_R) + } + } + } + + // W4. A single European separator between two European numbers changes to a European number. A single common + // separator between two numbers of the same type changes to that type. + if (charTypeCounts.get(TYPE_ES) || charTypeCounts.get(TYPE_CS)) { + for (let si = 1; si < seqIndices.length - 1; si++) { + const i = seqIndices[si] + if (charTypes[i] & (TYPE_ES | TYPE_CS)) { + let prevType = 0, nextType = 0 + for (let sj = si - 1; sj >= 0; sj--) { + prevType = charTypes[seqIndices[sj]] + if (!(prevType & BN_LIKE_TYPES)) { //5.2 + break + } + } + for (let sj = si + 1; sj < seqIndices.length; sj++) { + nextType = charTypes[seqIndices[sj]] + if (!(nextType & BN_LIKE_TYPES)) { //5.2 + break + } + } + if (prevType === nextType && (charTypes[i] === TYPE_ES ? prevType === TYPE_EN : (prevType & (TYPE_EN | TYPE_AN)))) { + changeCharType(i, prevType) + } + } + } + } + + // W5. A sequence of European terminators adjacent to European numbers changes to all European numbers. + if (charTypeCounts.get(TYPE_EN)) { + for (let si = 0; si < seqIndices.length; si++) { + const i = seqIndices[si] + if (charTypes[i] & TYPE_EN) { + for (let sj = si - 1; sj >= 0 && (charTypes[seqIndices[sj]] & (TYPE_ET | BN_LIKE_TYPES)); sj--) { + changeCharType(seqIndices[sj], TYPE_EN) + } + for (si++; si < seqIndices.length && (charTypes[seqIndices[si]] & (TYPE_ET | BN_LIKE_TYPES | TYPE_EN)); si++) { + if (charTypes[seqIndices[si]] !== TYPE_EN) { + changeCharType(seqIndices[si], TYPE_EN) + } + } + } + } + } + + // W6. Otherwise, separators and terminators change to Other Neutral. + if (charTypeCounts.get(TYPE_ET) || charTypeCounts.get(TYPE_ES) || charTypeCounts.get(TYPE_CS)) { + for (let si = 0; si < seqIndices.length; si++) { + const i = seqIndices[si] + if (charTypes[i] & (TYPE_ET | TYPE_ES | TYPE_CS)) { + changeCharType(i, TYPE_ON) + // 5.2 transform adjacent BNs too: + for (let sj = si - 1; sj >= 0 && (charTypes[seqIndices[sj]] & BN_LIKE_TYPES); sj--) { + changeCharType(seqIndices[sj], TYPE_ON) + } + for (let sj = si + 1; sj < seqIndices.length && (charTypes[seqIndices[sj]] & BN_LIKE_TYPES); sj++) { + changeCharType(seqIndices[sj], TYPE_ON) + } + } + } + } + + // W7. Search backward from each instance of a European number until the first strong type (R, L, or sos) + // is found. If an L is found, then change the type of the European number to L. + // NOTE: implemented in single forward pass for efficiency + if (charTypeCounts.get(TYPE_EN)) { + for (let si = 0, prevStrongType = sosType; si < seqIndices.length; si++) { + const i = seqIndices[si] + const type = charTypes[i] + if (type & TYPE_EN) { + if (prevStrongType === TYPE_L) { + changeCharType(i, TYPE_L) + } + } else if (type & STRONG_TYPES) { + prevStrongType = type + } + } + } + + // === 3.3.5 Resolving Neutral and Isolate Formatting Types === + + if (charTypeCounts.get(NEUTRAL_ISOLATE_TYPES)) { + // N0. Process bracket pairs in an isolating run sequence sequentially in the logical order of the text + // positions of the opening paired brackets using the logic given below. Within this scope, bidirectional + // types EN and AN are treated as R. + const R_TYPES_FOR_N_STEPS = (TYPE_R | TYPE_EN | TYPE_AN) + const STRONG_TYPES_FOR_N_STEPS = R_TYPES_FOR_N_STEPS | TYPE_L + + // * Identify the bracket pairs in the current isolating run sequence according to BD16. + const bracketPairs = [] + { + const openerStack = [] + for (let si = 0; si < seqIndices.length; si++) { + // NOTE: for any potential bracket character we also test that it still carries a NI + // type, as that may have been changed earlier. This doesn't seem to be explicitly + // called out in the spec, but is required for passage of certain tests. + if (charTypes[seqIndices[si]] & NEUTRAL_ISOLATE_TYPES) { + const char = string[seqIndices[si]] + let oppositeBracket + // Opening bracket + if (openingToClosingBracket(char) !== null) { + if (openerStack.length < 63) { + openerStack.push({ char, seqIndex: si }) + } else { + break + } + } + // Closing bracket + else if ((oppositeBracket = closingToOpeningBracket(char)) !== null) { + for (let stackIdx = openerStack.length - 1; stackIdx >= 0; stackIdx--) { + const stackChar = openerStack[stackIdx].char + if (stackChar === oppositeBracket || + stackChar === closingToOpeningBracket(getCanonicalBracket(char)) || + openingToClosingBracket(getCanonicalBracket(stackChar)) === char + ) { + bracketPairs.push([openerStack[stackIdx].seqIndex, si]) + openerStack.length = stackIdx //pop the matching bracket and all following + break + } + } + } + } + } + bracketPairs.sort((a, b) => a[0] - b[0]) + } + // * For each bracket-pair element in the list of pairs of text positions + for (let pairIdx = 0; pairIdx < bracketPairs.length; pairIdx++) { + const [openSeqIdx, closeSeqIdx] = bracketPairs[pairIdx] + // a. Inspect the bidirectional types of the characters enclosed within the bracket pair. + // b. If any strong type (either L or R) matching the embedding direction is found, set the type for both + // brackets in the pair to match the embedding direction. + let foundStrongType = false + let useStrongType = 0 + for (let si = openSeqIdx + 1; si < closeSeqIdx; si++) { + const i = seqIndices[si] + if (charTypes[i] & STRONG_TYPES_FOR_N_STEPS) { + foundStrongType = true + const lr = (charTypes[i] & R_TYPES_FOR_N_STEPS) ? TYPE_R : TYPE_L + if (lr === embedDirection) { + useStrongType = lr + break + } + } + } + // c. Otherwise, if there is a strong type it must be opposite the embedding direction. Therefore, test + // for an established context with a preceding strong type by checking backwards before the opening paired + // bracket until the first strong type (L, R, or sos) is found. + // 1. If the preceding strong type is also opposite the embedding direction, context is established, so + // set the type for both brackets in the pair to that direction. + // 2. Otherwise set the type for both brackets in the pair to the embedding direction. + if (foundStrongType && !useStrongType) { + useStrongType = sosType + for (let si = openSeqIdx - 1; si >= 0; si--) { + const i = seqIndices[si] + if (charTypes[i] & STRONG_TYPES_FOR_N_STEPS) { + const lr = (charTypes[i] & R_TYPES_FOR_N_STEPS) ? TYPE_R : TYPE_L + if (lr !== embedDirection) { + useStrongType = lr + } else { + useStrongType = embedDirection + } + break + } + } + } + if (useStrongType) { + charTypes[seqIndices[openSeqIdx]] = charTypes[seqIndices[closeSeqIdx]] = useStrongType + // * Any number of characters that had original bidirectional character type NSM prior to the application + // of W1 that immediately follow a paired bracket which changed to L or R under N0 should change to match + // the type of their preceding bracket. + if (useStrongType !== embedDirection) { + for (let si = openSeqIdx + 1; si < seqIndices.length; si++) { + if (!(charTypes[seqIndices[si]] & BN_LIKE_TYPES)) { + if (getBidiCharType(string[seqIndices[si]]) & TYPE_NSM) { + charTypes[seqIndices[si]] = useStrongType + } + break + } + } + } + if (useStrongType !== embedDirection) { + for (let si = closeSeqIdx + 1; si < seqIndices.length; si++) { + if (!(charTypes[seqIndices[si]] & BN_LIKE_TYPES)) { + if (getBidiCharType(string[seqIndices[si]]) & TYPE_NSM) { + charTypes[seqIndices[si]] = useStrongType + } + break + } + } + } + } + } + + // N1. A sequence of NIs takes the direction of the surrounding strong text if the text on both sides has the + // same direction. + // N2. Any remaining NIs take the embedding direction. + for (let si = 0; si < seqIndices.length; si++) { + if (charTypes[seqIndices[si]] & NEUTRAL_ISOLATE_TYPES) { + let niRunStart = si, niRunEnd = si + let prevType = sosType //si === 0 ? sosType : (charTypes[seqIndices[si - 1]] & R_TYPES_FOR_N_STEPS) ? TYPE_R : TYPE_L + for (let si2 = si - 1; si2 >= 0; si2--) { + if (charTypes[seqIndices[si2]] & BN_LIKE_TYPES) { + niRunStart = si2 //5.2 treat BNs adjacent to NIs as NIs + } else { + prevType = (charTypes[seqIndices[si2]] & R_TYPES_FOR_N_STEPS) ? TYPE_R : TYPE_L + break + } + } + let nextType = eosType + for (let si2 = si + 1; si2 < seqIndices.length; si2++) { + if (charTypes[seqIndices[si2]] & (NEUTRAL_ISOLATE_TYPES | BN_LIKE_TYPES)) { + niRunEnd = si2 + } else { + nextType = (charTypes[seqIndices[si2]] & R_TYPES_FOR_N_STEPS) ? TYPE_R : TYPE_L + break + } + } + for (let sj = niRunStart; sj <= niRunEnd; sj++) { + charTypes[seqIndices[sj]] = prevType === nextType ? prevType : embedDirection + } + si = niRunEnd + } + } + } + } + + // === 3.3.6 Resolving Implicit Levels === + + for (let i = paragraph.start; i <= paragraph.end; i++) { + const level = embedLevels[i] + const type = charTypes[i] + // I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level. + if (level & 1) { + if (type & (TYPE_L | TYPE_EN | TYPE_AN)) { + embedLevels[i]++ + } + } + // I1. For all characters with an even (left-to-right) embedding level, those of type R go up one level + // and those of type AN or EN go up two levels. + else { + if (type & TYPE_R) { + embedLevels[i]++ + } else if (type & (TYPE_AN | TYPE_EN)) { + embedLevels[i] += 2 + } + } + + // 5.2: Resolve any LRE, RLE, LRO, RLO, PDF, or BN to the level of the preceding character if there is one, + // and otherwise to the base level. + if (type & BN_LIKE_TYPES) { + embedLevels[i] = i === 0 ? paragraph.level : embedLevels[i - 1] + } + + // 3.4 L1.1-4: Reset the embedding level of segment/paragraph separators, and any sequence of whitespace or + // isolate formatting characters preceding them or the end of the paragraph, to the paragraph level. + // NOTE: this will also need to be applied to each individual line ending after line wrapping occurs. + if (i === paragraph.end || getBidiCharType(string[i]) & (TYPE_S | TYPE_B)) { + for (let j = i; j >= 0 && (getBidiCharType(string[j]) & TRAILING_TYPES); j--) { + embedLevels[j] = paragraph.level + } + } + } + } + + // DONE! The resolved levels can then be used, after line wrapping, to flip runs of characters + // according to section 3.4 Reordering Resolved Levels + return { + levels: embedLevels, + paragraphs + } + + function determineAutoEmbedLevel (start, isFSI) { + // 3.3.1 P2 - P3 + for (let i = start; i < string.length; i++) { + const charType = charTypes[i] + if (charType & (TYPE_R | TYPE_AL)) { + return 1 + } + if ((charType & (TYPE_B | TYPE_L)) || (isFSI && charType === TYPE_PDI)) { + return 0 + } + if (charType & ISOLATE_INIT_TYPES) { + const pdi = indexOfMatchingPDI(i) + i = pdi === -1 ? string.length : pdi + } + } + return 0 + } + + function indexOfMatchingPDI (isolateStart) { + // 3.1.2 BD9 + let isolationLevel = 1 + for (let i = isolateStart + 1; i < string.length; i++) { + const charType = charTypes[i] + if (charType & TYPE_B) { + break + } + if (charType & TYPE_PDI) { + if (--isolationLevel === 0) { + return i + } + } else if (charType & ISOLATE_INIT_TYPES) { + isolationLevel++ + } + } + return -1 + } +} diff --git a/vanilla/node_modules/bidi-js/src/index.js b/vanilla/node_modules/bidi-js/src/index.js new file mode 100644 index 0000000..d146bb0 --- /dev/null +++ b/vanilla/node_modules/bidi-js/src/index.js @@ -0,0 +1,5 @@ +export { getEmbeddingLevels } from './embeddingLevels.js' +export { getReorderSegments, getReorderedIndices, getReorderedString } from './reordering.js' +export { getBidiCharType, getBidiCharTypeName } from './charTypes.js' +export { getMirroredCharacter, getMirroredCharactersMap } from './mirroring.js' +export { closingToOpeningBracket, openingToClosingBracket, getCanonicalBracket } from './brackets.js' diff --git a/vanilla/node_modules/bidi-js/src/mirroring.js b/vanilla/node_modules/bidi-js/src/mirroring.js new file mode 100644 index 0000000..c214b04 --- /dev/null +++ b/vanilla/node_modules/bidi-js/src/mirroring.js @@ -0,0 +1,48 @@ +import data from './data/bidiMirroring.data.js' +import { parseCharacterMap } from './util/parseCharacterMap.js' + +let mirrorMap + +function parse () { + if (!mirrorMap) { + //const start = performance.now() + const { map, reverseMap } = parseCharacterMap(data, true) + // Combine both maps into one + reverseMap.forEach((value, key) => { + map.set(key, value) + }) + mirrorMap = map + //console.log(`mirrored chars parsed in ${performance.now() - start}ms`) + } +} + +export function getMirroredCharacter (char) { + parse() + return mirrorMap.get(char) || null +} + +/** + * Given a string and its resolved embedding levels, build a map of indices to replacement chars + * for any characters in right-to-left segments that have defined mirrored characters. + * @param string + * @param embeddingLevels + * @param [start] + * @param [end] + * @return {Map<number, string>} + */ +export function getMirroredCharactersMap(string, embeddingLevels, start, end) { + let strLen = string.length + start = Math.max(0, start == null ? 0 : +start) + end = Math.min(strLen - 1, end == null ? strLen - 1 : +end) + + const map = new Map() + for (let i = start; i <= end; i++) { + if (embeddingLevels[i] & 1) { //only odd (rtl) levels + const mirror = getMirroredCharacter(string[i]) + if (mirror !== null) { + map.set(i, mirror) + } + } + } + return map +} diff --git a/vanilla/node_modules/bidi-js/src/reordering.js b/vanilla/node_modules/bidi-js/src/reordering.js new file mode 100644 index 0000000..94a42ed --- /dev/null +++ b/vanilla/node_modules/bidi-js/src/reordering.js @@ -0,0 +1,99 @@ +import { getBidiCharType, TRAILING_TYPES } from './charTypes.js' +import { getMirroredCharacter } from './mirroring.js' + +/** + * Given a start and end denoting a single line within a string, and a set of precalculated + * bidi embedding levels, produce a list of segments whose ordering should be flipped, in sequence. + * @param {string} string - the full input string + * @param {GetEmbeddingLevelsResult} embeddingLevelsResult - the result object from getEmbeddingLevels + * @param {number} [start] - first character in a subset of the full string + * @param {number} [end] - last character in a subset of the full string + * @return {number[][]} - the list of start/end segments that should be flipped, in order. + */ +export function getReorderSegments(string, embeddingLevelsResult, start, end) { + let strLen = string.length + start = Math.max(0, start == null ? 0 : +start) + end = Math.min(strLen - 1, end == null ? strLen - 1 : +end) + + const segments = [] + embeddingLevelsResult.paragraphs.forEach(paragraph => { + const lineStart = Math.max(start, paragraph.start) + const lineEnd = Math.min(end, paragraph.end) + if (lineStart < lineEnd) { + // Local slice for mutation + const lineLevels = embeddingLevelsResult.levels.slice(lineStart, lineEnd + 1) + + // 3.4 L1.4: Reset any sequence of whitespace characters and/or isolate formatting characters at the + // end of the line to the paragraph level. + for (let i = lineEnd; i >= lineStart && (getBidiCharType(string[i]) & TRAILING_TYPES); i--) { + lineLevels[i] = paragraph.level + } + + // L2. From the highest level found in the text to the lowest odd level on each line, including intermediate levels + // not actually present in the text, reverse any contiguous sequence of characters that are at that level or higher. + let maxLevel = paragraph.level + let minOddLevel = Infinity + for (let i = 0; i < lineLevels.length; i++) { + const level = lineLevels[i] + if (level > maxLevel) maxLevel = level + if (level < minOddLevel) minOddLevel = level | 1 + } + for (let lvl = maxLevel; lvl >= minOddLevel; lvl--) { + for (let i = 0; i < lineLevels.length; i++) { + if (lineLevels[i] >= lvl) { + const segStart = i + while (i + 1 < lineLevels.length && lineLevels[i + 1] >= lvl) { + i++ + } + if (i > segStart) { + segments.push([segStart + lineStart, i + lineStart]) + } + } + } + } + } + }) + return segments +} + +/** + * @param {string} string + * @param {GetEmbeddingLevelsResult} embedLevelsResult + * @param {number} [start] + * @param {number} [end] + * @return {string} the new string with bidi segments reordered + */ +export function getReorderedString(string, embedLevelsResult, start, end) { + const indices = getReorderedIndices(string, embedLevelsResult, start, end) + const chars = [...string] + indices.forEach((charIndex, i) => { + chars[i] = ( + (embedLevelsResult.levels[charIndex] & 1) ? getMirroredCharacter(string[charIndex]) : null + ) || string[charIndex] + }) + return chars.join('') +} + +/** + * @param {string} string + * @param {GetEmbeddingLevelsResult} embedLevelsResult + * @param {number} [start] + * @param {number} [end] + * @return {number[]} an array with character indices in their new bidi order + */ +export function getReorderedIndices(string, embedLevelsResult, start, end) { + const segments = getReorderSegments(string, embedLevelsResult, start, end) + // Fill an array with indices + const indices = [] + for (let i = 0; i < string.length; i++) { + indices[i] = i + } + // Reverse each segment in order + segments.forEach(([start, end]) => { + const slice = indices.slice(start, end + 1) + for (let i = slice.length; i--;) { + indices[end - i] = slice[i] + } + }) + return indices +} diff --git a/vanilla/node_modules/bidi-js/src/util/parseCharacterMap.js b/vanilla/node_modules/bidi-js/src/util/parseCharacterMap.js new file mode 100644 index 0000000..86a96b8 --- /dev/null +++ b/vanilla/node_modules/bidi-js/src/util/parseCharacterMap.js @@ -0,0 +1,30 @@ +/** + * Parses an string that holds encoded codepoint mappings, e.g. for bracket pairs or + * mirroring characters, as encoded by scripts/generateBidiData.js. Returns an object + * holding the `map`, and optionally a `reverseMap` if `includeReverse:true`. + * @param {string} encodedString + * @param {boolean} includeReverse - true if you want reverseMap in the output + * @return {{map: Map<number, number>, reverseMap?: Map<number, number>}} + */ +export function parseCharacterMap (encodedString, includeReverse) { + const radix = 36 + let lastCode = 0 + const map = new Map() + const reverseMap = includeReverse && new Map() + let prevPair + encodedString.split(',').forEach(function visit(entry) { + if (entry.indexOf('+') !== -1) { + for (let i = +entry; i--;) { + visit(prevPair) + } + } else { + prevPair = entry + let [a, b] = entry.split('>') + a = String.fromCodePoint(lastCode += parseInt(a, radix)) + b = String.fromCodePoint(lastCode += parseInt(b, radix)) + map.set(a, b) + includeReverse && reverseMap.set(b, a) + } + }) + return { map, reverseMap } +} |
