feat: support any unicode grapheme clusters.

This commit is contained in:
Shibo Lyu 2025-01-28 14:31:14 +08:00
parent 8940f26f17
commit 4dd8121ebb
4 changed files with 14 additions and 17 deletions

View file

@ -1,7 +1,6 @@
import { unicodeWidth } from "@std/cli/unicode-width";
const segmenter = new Intl.Segmenter("en", { granularity: "grapheme" });
const cjkRegex =
/[\p{Unified_Ideograph}\u30A0-\u30FF\u3040-\u309F\u31F0-\u31FF]/u;
const printableASCIIRegex = /^[\x20-\x7E]$/;
export function getCharacterWidth(ch: string): number {
const segments = [...segmenter.segment(ch)];
@ -11,11 +10,5 @@ export function getCharacterWidth(ch: string): number {
);
}
const matchesASCII = ch.match(printableASCIIRegex);
const matchesCJK = ch.match(cjkRegex);
if (!matchesASCII && !matchesCJK) throw new Error(`Invalid character: ${ch}`);
// TODO: Support Emojis.
return matchesCJK ? 2 : 1;
return unicodeWidth(ch);
}