From ec26ebcc9e53f67f6242266556ed13445e2f9688 Mon Sep 17 00:00:00 2001
From: うろちょろ <112748593+chiyonn@users.noreply.github.com>
Date: Tue, 02 Dec 2025 18:04:38 +0000
Subject: [PATCH] feat: improve search tokenization for CJK languages (#2231)

---
 quartz/components/scripts/search.inline.ts |   48 +++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/quartz/components/scripts/search.inline.ts b/quartz/components/scripts/search.inline.ts
index 6a84a50..717f17f 100644
--- a/quartz/components/scripts/search.inline.ts
+++ b/quartz/components/scripts/search.inline.ts
@@ -16,11 +16,49 @@
 type SearchType = "basic" | "tags"
 let searchType: SearchType = "basic"
 let currentSearchTerm: string = ""
-const encoder = (str: string) => {
-  return str
-    .toLowerCase()
-    .split(/\s+/)
-    .filter((token) => token.length > 0)
+const encoder = (str: string): string[] => {
+  const tokens: string[] = []
+  let bufferStart = -1
+  let bufferEnd = -1
+  const lower = str.toLowerCase()
+
+  let i = 0
+  for (const char of lower) {
+    const code = char.codePointAt(0)!
+
+    const isCJK =
+      (code >= 0x3040 && code <= 0x309f) ||
+      (code >= 0x30a0 && code <= 0x30ff) ||
+      (code >= 0x4e00 && code <= 0x9fff) ||
+      (code >= 0xac00 && code <= 0xd7af) ||
+      (code >= 0x20000 && code <= 0x2a6df)
+
+    const isWhitespace = code === 32 || code === 9 || code === 10 || code === 13
+
+    if (isCJK) {
+      if (bufferStart !== -1) {
+        tokens.push(lower.slice(bufferStart, bufferEnd))
+        bufferStart = -1
+      }
+      tokens.push(char)
+    } else if (isWhitespace) {
+      if (bufferStart !== -1) {
+        tokens.push(lower.slice(bufferStart, bufferEnd))
+        bufferStart = -1
+      }
+    } else {
+      if (bufferStart === -1) bufferStart = i
+      bufferEnd = i + char.length
+    }
+
+    i += char.length
+  }
+
+  if (bufferStart !== -1) {
+    tokens.push(lower.slice(bufferStart))
+  }
+
+  return tokens
 }
 
 let index = new FlexSearch.Document<Item>({

--
Gitblit v1.10.0