feat: support any unicode grapheme clusters.

This commit is contained in:
Shibo Lyu 2025-01-28 14:31:14 +08:00
parent 8940f26f17
commit 4dd8121ebb
4 changed files with 14 additions and 17 deletions

View file

@ -1,8 +1,9 @@
{
"name": "@textplace/core",
"version": "0.3.2",
"version": "0.4.0",
"exports": "./mod.ts",
"imports": {
"@deno/dnt": "jsr:@deno/dnt@^0.41.3"
"@deno/dnt": "jsr:@deno/dnt@^0.41.3",
"@std/cli": "jsr:@std/cli@^1.0.11"
}
}

7
deno.lock generated
View file

@ -7,6 +7,7 @@
"jsr:@std/assert@0.223": "0.223.0",
"jsr:@std/assert@0.226": "0.226.0",
"jsr:@std/bytes@0.223": "0.223.0",
"jsr:@std/cli@^1.0.11": "1.0.11",
"jsr:@std/fmt@0.223": "0.223.0",
"jsr:@std/fmt@1": "1.0.3",
"jsr:@std/fs@0.223": "0.223.0",
@ -54,6 +55,9 @@
"@std/bytes@0.223.0": {
"integrity": "84b75052cd8680942c397c2631318772b295019098f40aac5c36cead4cba51a8"
},
"@std/cli@1.0.11": {
"integrity": "ec219619fdcd31bcf0d8e53bee1e2706ec9a02f70255365a094f69755dadd340"
},
"@std/fmt@0.223.0": {
"integrity": "6deb37794127dfc7d7bded2586b9fc6f5d50e62a8134846608baf71ffc1a5208"
},
@ -151,7 +155,8 @@
},
"workspace": {
"dependencies": [
"jsr:@deno/dnt@~0.41.3"
"jsr:@deno/dnt@~0.41.3",
"jsr:@std/cli@^1.0.11"
]
}
}

View file

@ -1,7 +1,6 @@
import { unicodeWidth } from "@std/cli/unicode-width";
const segmenter = new Intl.Segmenter("en", { granularity: "grapheme" });
const cjkRegex =
/[\p{Unified_Ideograph}\u30A0-\u30FF\u3040-\u309F\u31F0-\u31FF]/u;
const printableASCIIRegex = /^[\x20-\x7E]$/;
export function getCharacterWidth(ch: string): number {
const segments = [...segmenter.segment(ch)];
@ -11,11 +10,5 @@ export function getCharacterWidth(ch: string): number {
);
}
const matchesASCII = ch.match(printableASCIIRegex);
const matchesCJK = ch.match(cjkRegex);
if (!matchesASCII && !matchesCJK) throw new Error(`Invalid character: ${ch}`);
// TODO: Support Emojis.
return matchesCJK ? 2 : 1;
return unicodeWidth(ch);
}

View file

@ -26,14 +26,12 @@ Deno.test("getCharacterWidth CJK", () => {
assertEquals(getCharacterWidth("グ"), 2);
assertEquals(getCharacterWidth("ソ"), 2);
assertThrows(() => getCharacterWidth(""));
assertThrows(() => getCharacterWidth(""));
assertEquals(getCharacterWidth(""), 2);
assertEquals(getCharacterWidth(""), 2);
assertThrows(() => getCharacterWidth("你好"));
assertThrows(() => getCharacterWidth("ヨスガノ"));
});
Deno.test("getCharacterWidth previously faulty cases", () => {
assertEquals(getCharacterWidth("𤲶"), 2);
assertThrows(() => getCharacterWidth("𤲶"[0]));
});