From 4dd8121ebb60c023465199d565557699cf70fea6 Mon Sep 17 00:00:00 2001 From: Shibo Lyu Date: Tue, 28 Jan 2025 14:31:14 +0800 Subject: [PATCH] feat: support any unicode grapheme clusters. --- deno.json | 5 +++-- deno.lock | 7 ++++++- logic/character.ts | 13 +++---------- tests/character.test.ts | 6 ++---- 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/deno.json b/deno.json index 948c947..2ac2e2b 100644 --- a/deno.json +++ b/deno.json @@ -1,8 +1,9 @@ { "name": "@textplace/core", - "version": "0.3.2", + "version": "0.4.0", "exports": "./mod.ts", "imports": { - "@deno/dnt": "jsr:@deno/dnt@^0.41.3" + "@deno/dnt": "jsr:@deno/dnt@^0.41.3", + "@std/cli": "jsr:@std/cli@^1.0.11" } } diff --git a/deno.lock b/deno.lock index 491b9f0..afae4c4 100644 --- a/deno.lock +++ b/deno.lock @@ -7,6 +7,7 @@ "jsr:@std/assert@0.223": "0.223.0", "jsr:@std/assert@0.226": "0.226.0", "jsr:@std/bytes@0.223": "0.223.0", + "jsr:@std/cli@^1.0.11": "1.0.11", "jsr:@std/fmt@0.223": "0.223.0", "jsr:@std/fmt@1": "1.0.3", "jsr:@std/fs@0.223": "0.223.0", @@ -54,6 +55,9 @@ "@std/bytes@0.223.0": { "integrity": "84b75052cd8680942c397c2631318772b295019098f40aac5c36cead4cba51a8" }, + "@std/cli@1.0.11": { + "integrity": "ec219619fdcd31bcf0d8e53bee1e2706ec9a02f70255365a094f69755dadd340" + }, "@std/fmt@0.223.0": { "integrity": "6deb37794127dfc7d7bded2586b9fc6f5d50e62a8134846608baf71ffc1a5208" }, @@ -151,7 +155,8 @@ }, "workspace": { "dependencies": [ - "jsr:@deno/dnt@~0.41.3" + "jsr:@deno/dnt@~0.41.3", + "jsr:@std/cli@^1.0.11" ] } } diff --git a/logic/character.ts b/logic/character.ts index d1b9f69..074001d 100644 --- a/logic/character.ts +++ b/logic/character.ts @@ -1,7 +1,6 @@ +import { unicodeWidth } from "@std/cli/unicode-width"; + const segmenter = new Intl.Segmenter("en", { granularity: "grapheme" }); -const cjkRegex = - /[\p{Unified_Ideograph}\u30A0-\u30FF\u3040-\u309F\u31F0-\u31FF]/u; -const printableASCIIRegex = /^[\x20-\x7E]$/; export function getCharacterWidth(ch: string): number { const segments = [...segmenter.segment(ch)]; @@ -11,11 +10,5 @@ export function getCharacterWidth(ch: string): number { ); } - const matchesASCII = ch.match(printableASCIIRegex); - const matchesCJK = ch.match(cjkRegex); - - if (!matchesASCII && !matchesCJK) throw new Error(`Invalid character: ${ch}`); - - // TODO: Support Emojis. - return matchesCJK ? 2 : 1; + return unicodeWidth(ch); } diff --git a/tests/character.test.ts b/tests/character.test.ts index 886d7ad..d1330f9 100644 --- a/tests/character.test.ts +++ b/tests/character.test.ts @@ -26,14 +26,12 @@ Deno.test("getCharacterWidth CJK", () => { assertEquals(getCharacterWidth("グ"), 2); assertEquals(getCharacterWidth("ソ"), 2); - assertThrows(() => getCharacterWidth("?")); - assertThrows(() => getCharacterWidth("!")); + assertEquals(getCharacterWidth("?"), 2); + assertEquals(getCharacterWidth("!"), 2); assertThrows(() => getCharacterWidth("你好")); assertThrows(() => getCharacterWidth("ヨスガノ")); }); Deno.test("getCharacterWidth previously faulty cases", () => { assertEquals(getCharacterWidth("𤲶"), 2); - - assertThrows(() => getCharacterWidth("𤲶"[0])); });