diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5d06c9a..84893d8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [1.8.5] - 2026-04-14
 
+### Changed
+- Embedding chunking is now token-based instead of character-based, improving chunk sizing consistency for CJK and mixed-language content (#542, #749)
+- `OPEN_NOTEBOOK_CHUNK_SIZE` and `OPEN_NOTEBOOK_CHUNK_OVERLAP` semantics changed from characters to tokens; default reduced from 1200 characters to 400 tokens to stay safely below the 512-token ceiling of BERT-family embedders (e.g. mxbai-embed-large) after accounting for tokenizer mismatch and splitter overshoot. Existing stored embeddings are unaffected; only new ingestions use the new chunking.
+
 ### Fixed
 - Credentials endpoint no longer crashes (500) when encryption key doesn't match stored credentials (#740)
 - Broken credentials are now shown with a decryption warning and can still be deleted
diff --git a/open_notebook/utils/CLAUDE.md b/open_notebook/utils/CLAUDE.md
index ae71ff7..ec4d083 100644
--- a/open_notebook/utils/CLAUDE.md
+++ b/open_notebook/utils/CLAUDE.md
@@ -24,20 +24,20 @@ Each utility is stateless and can be imported independently.
 
 The chunking behavior can be configured via environment variables:
 
-- **OPEN_NOTEBOOK_CHUNK_SIZE**: Maximum chunk size in characters (default: 1200)
-  - Minimum: 100 characters
-  - Warnings: Values > 8192 characters or invalid values
-  - Use case: Smaller models (e.g., mxbai-embed-large with limited context window)
+- **OPEN_NOTEBOOK_CHUNK_SIZE**: Maximum chunk size in tokens (default: 400)
+  - Minimum: 100 tokens
+  - Warnings: Values > 8192 tokens or invalid values
+  - Use case: Conservative baseline that leaves headroom below 512-token embedders (e.g. mxbai-embed-large). Buffer accounts for tokenizer mismatch between our `o200k_base` measurement and the embedder's own tokenizer, plus occasional splitter overshoot and special tokens.
 
-- **OPEN_NOTEBOOK_CHUNK_OVERLAP**: Overlap between chunks in characters (default: 15% of CHUNK_SIZE)
+- **OPEN_NOTEBOOK_CHUNK_OVERLAP**: Overlap between chunks in tokens (default: 15% of CHUNK_SIZE)
   - Must be: >= 0 and < CHUNK_SIZE
   - Warnings: Invalid values or values >= CHUNK_SIZE
   - Use case: Control how much context is shared between adjacent chunks
 
-Example for models with small context windows:
+Example for embedders with larger context windows (e.g. OpenAI text-embedding-3 family, 8191 tokens):
 ```bash
-export OPEN_NOTEBOOK_CHUNK_SIZE=512
-export OPEN_NOTEBOOK_CHUNK_OVERLAP=50
+export OPEN_NOTEBOOK_CHUNK_SIZE=1500
+export OPEN_NOTEBOOK_CHUNK_OVERLAP=150
 ```
 
 Note: Changes require restart of the application.
@@ -63,7 +63,7 @@ Note: Changes require restart of the application.
 
 ### chunking.py
 - **ContentType**: Enum (HTML, MARKDOWN, PLAIN)
-- **CHUNK_SIZE**: Configurable via `OPEN_NOTEBOOK_CHUNK_SIZE` env var (default: 1200)
+- **CHUNK_SIZE**: Configurable via `OPEN_NOTEBOOK_CHUNK_SIZE` env var (default: 400)
 - **CHUNK_OVERLAP**: Configurable via `OPEN_NOTEBOOK_CHUNK_OVERLAP` env var (default: 15% of CHUNK_SIZE)
 - **detect_content_type_from_extension(file_path)**: Detect type from file extension
 - **detect_content_type_from_heuristics(text)**: Detect type from content patterns (returns type + confidence)
@@ -74,7 +74,7 @@ Note: Changes require restart of the application.
 - Uses LangChain splitters: HTMLHeaderTextSplitter, MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter
 - Extension-based detection is primary; heuristics can override PLAIN extensions with 0.8+ confidence
 - Secondary chunking applied when HTML/Markdown splitters produce oversized chunks
-- Returns list of strings, each ≤ CHUNK_SIZE characters
+- Returns list of strings, each approximately ≤ CHUNK_SIZE tokens
 
 ### embedding.py
 - **mean_pool_embeddings(embeddings)**: Combine multiple embeddings via normalized mean pooling
@@ -83,7 +83,7 @@ Note: Changes require restart of the application.
 
 **Key behavior**:
 - Uses model_manager.get_model("embedding") for embedding model
-- Short text (≤ CHUNK_SIZE): direct embedding
+- Short text (≤ CHUNK_SIZE tokens): direct embedding
 - Long text: chunk → embed each → mean pool results
 - Mean pooling: normalize each → mean → normalize result (using numpy)
 - Raises ValueError for empty/whitespace-only text
@@ -103,7 +103,7 @@ Note: Changes require restart of the application.
 - **token_count(text)**: Returns estimated token count for string (via tiktoken)
 - **token_cost(text, model)**: Calculate cost estimate for text with given model
 
-**Key behavior**: Uses cl100k_base encoding; may differ slightly from actual model tokenization
+**Key behavior**: Uses `o200k_base` encoding; may differ slightly from actual model tokenization. If `tiktoken` is unavailable, `token_count()` falls back to a coarse estimate; this refactor keeps that existing contract.
 
 ### version_utils.py
 - **compare_versions(v1, v2)**: Returns -1 (v1 < v2), 0 (equal), 1 (v1 > v2)
@@ -135,8 +135,9 @@ Note: Changes require restart of the application.
 
 ## Important Quirks & Gotchas
 
-- **Token count estimation**: Uses cl100k_base encoding; may differ 5-10% from actual model tokens
-- **Chunk size for Ollama**: 1500 chars chosen to fit within Ollama embedding model context limits
+- **Token count estimation**: Uses `o200k_base` encoding; may differ slightly from actual model tokens
+- **Chunk size semantics changed**: `OPEN_NOTEBOOK_CHUNK_SIZE` and `OPEN_NOTEBOOK_CHUNK_OVERLAP` are token-based, not character-based
+- **Default chunk size**: The token-based default is 400 — leaves ~20% margin below the 512-token ceiling of BERT-family embedders (e.g. mxbai-embed-large) to absorb tokenizer mismatch (we measure with `o200k_base`, they tokenize with WordPiece), splitter overshoot, and special tokens
 - **Content type detection order**: Extension checked first, then heuristics; high-confidence heuristics (≥0.8) can override PLAIN extensions
 - **Mean pooling normalization**: Each embedding normalized before mean, result normalized after
 - **Priority weights default**: If not specified, ContextConfig uses default weights (source=1, note=0.8, insight=1.2)
diff --git a/open_notebook/utils/chunking.py b/open_notebook/utils/chunking.py
index 3f33ba4..b0eb6fe 100644
--- a/open_notebook/utils/chunking.py
+++ b/open_notebook/utils/chunking.py
@@ -9,8 +9,8 @@ Key functions:
 - chunk_text(): Splits text into chunks using appropriate splitter for content type
 
 Environment Variables:
-    OPEN_NOTEBOOK_CHUNK_SIZE: Maximum chunk size in characters (default: 1200)
-    OPEN_NOTEBOOK_CHUNK_OVERLAP: Overlap between chunks in characters (default: 15% of CHUNK_SIZE)
+    OPEN_NOTEBOOK_CHUNK_SIZE: Maximum chunk size in tokens (default: 400)
+    OPEN_NOTEBOOK_CHUNK_OVERLAP: Overlap between chunks in tokens (default: 15% of CHUNK_SIZE)
 """
 
 import os
@@ -26,6 +26,8 @@ from langchain_text_splitters import (
 )
 from loguru import logger
 
+from .token_utils import token_count
+
 
 def _get_chunk_size() -> int:
     """Get chunk size from environment variable or use default."""
@@ -44,14 +46,14 @@ def _get_chunk_size() -> int:
                     f"OPEN_NOTEBOOK_CHUNK_SIZE ({chunk_size}) is very large. "
                     f"This may cause issues with some embedding models."
                 )
-            logger.info(f"Using custom chunk size: {chunk_size} characters")
+            logger.info(f"Using custom chunk size: {chunk_size} tokens")
             return chunk_size
         except ValueError:
             logger.warning(
                 f"Invalid OPEN_NOTEBOOK_CHUNK_SIZE value: '{chunk_size_str}'. "
-                f"Using default: 1200"
+                f"Using default: 400"
             )
-    return 1200
+    return 400
 
 
 def _get_chunk_overlap(chunk_size: int) -> int:
@@ -72,7 +74,7 @@ def _get_chunk_overlap(chunk_size: int) -> int:
                     f"Using 15% of chunk size: {int(chunk_size * 0.15)}"
                 )
                 return int(chunk_size * 0.15)
-            logger.info(f"Using custom chunk overlap: {overlap} characters")
+            logger.info(f"Using custom chunk overlap: {overlap} tokens")
             return overlap
         except ValueError:
             logger.warning(
@@ -358,14 +360,14 @@ def _get_plain_splitter() -> RecursiveCharacterTextSplitter:
     return RecursiveCharacterTextSplitter(
         chunk_size=CHUNK_SIZE,
         chunk_overlap=CHUNK_OVERLAP,
-        length_function=len,
+        length_function=token_count,
         separators=["\n\n", "\n", ". ", ", ", " ", ""],
     )
 
 
 def _apply_secondary_chunking(chunks: List[str]) -> List[str]:
     """
-    Apply secondary chunking to ensure no chunk exceeds CHUNK_SIZE.
+    Apply secondary chunking to ensure no chunk exceeds CHUNK_SIZE tokens.
 
     Used when primary splitters (HTML/Markdown) produce oversized chunks.
     """
@@ -373,7 +375,7 @@ def _apply_secondary_chunking(chunks: List[str]) -> List[str]:
     secondary_splitter = _get_plain_splitter()
 
     for chunk in chunks:
-        if len(chunk) > CHUNK_SIZE:
+        if token_count(chunk) > CHUNK_SIZE:
             # Split oversized chunk
             sub_chunks = secondary_splitter.split_text(chunk)
             result.extend(sub_chunks)
@@ -397,13 +399,14 @@ def chunk_text(
         file_path: Optional file path for content type detection
 
     Returns:
-        List of text chunks, each <= CHUNK_SIZE characters
+        List of text chunks, each approximately <= CHUNK_SIZE tokens
     """
     if not text or not text.strip():
         return []
 
     # Short text doesn't need chunking
-    if len(text) <= CHUNK_SIZE:
+    text_tokens = token_count(text)
+    if text_tokens <= CHUNK_SIZE:
         return [text]
 
     # Detect content type if not provided
@@ -441,5 +444,5 @@ def chunk_text(
     # Filter out empty chunks
     chunks = [c.strip() for c in chunks if c and c.strip()]
 
-    logger.debug(f"Created {len(chunks)} chunks from {len(text)} characters")
+    logger.debug(f"Created {len(chunks)} chunks from {text_tokens} tokens")
     return chunks
diff --git a/open_notebook/utils/embedding.py b/open_notebook/utils/embedding.py
index 7c746f1..af2035b 100644
--- a/open_notebook/utils/embedding.py
+++ b/open_notebook/utils/embedding.py
@@ -17,6 +17,7 @@ import numpy as np
 from loguru import logger
 
 from .chunking import CHUNK_SIZE, ContentType, chunk_text
+from .token_utils import token_count
 
 EMBEDDING_BATCH_SIZE = 50
 EMBEDDING_MAX_RETRIES = 3
@@ -120,11 +121,28 @@ async def generate_embeddings(
     model_name = getattr(embedding_model, "model_name", "unknown")
 
     # Log text sizes for debugging
-    text_sizes = [len(t) for t in texts]
-    logger.debug(
-        f"Generating embeddings for {len(texts)} texts "
-        f"(sizes: min={min(text_sizes)}, max={max(text_sizes)}, "
-        f"total={sum(text_sizes)} chars)"
+    metrics: tuple[int, int, int, int] | None = None
+
+    def _get_size_metrics() -> tuple[int, int, int, int]:
+        nonlocal metrics
+        if metrics is None:
+            token_sizes = [token_count(t) for t in texts]
+            metrics = (
+                min(token_sizes),
+                max(token_sizes),
+                sum(token_sizes),
+                sum(len(t) for t in texts),
+            )
+        return metrics
+
+    logger.opt(lazy=True).debug(
+        "Generating embeddings for {} texts "
+        "(tokens: min={}, max={}, total={}; chars: total={})",
+        lambda: len(texts),
+        lambda: _get_size_metrics()[0],
+        lambda: _get_size_metrics()[1],
+        lambda: _get_size_metrics()[2],
+        lambda: _get_size_metrics()[3],
     )
 
     all_embeddings: List[List[float]] = []
@@ -174,10 +192,10 @@ async def generate_embedding(
     """
     Generate a single embedding for text, handling large content via chunking and mean pooling.
 
-    For short text (<= CHUNK_SIZE):
+    For short text (<= CHUNK_SIZE tokens):
         - Embeds directly and returns the embedding
 
-    For long text (> CHUNK_SIZE):
+    For long text (> CHUNK_SIZE tokens):
         - Chunks the text using appropriate splitter for content type
         - Embeds all chunks in batches
         - Combines embeddings via mean pooling
@@ -199,16 +217,17 @@ async def generate_embedding(
         raise ValueError("Cannot generate embedding for empty text")
 
     text = text.strip()
+    text_tokens = token_count(text)
 
     # Check if chunking is needed
-    if len(text) <= CHUNK_SIZE:
+    if text_tokens <= CHUNK_SIZE:
         # Short text - embed directly
-        logger.debug(f"Embedding short text ({len(text)} chars) directly")
+        logger.debug(f"Embedding short text ({text_tokens} tokens) directly")
         embeddings = await generate_embeddings([text], command_id=command_id)
         return embeddings[0]
 
     # Long text - chunk and mean pool
-    logger.debug(f"Text exceeds chunk size ({len(text)} chars), chunking...")
+    logger.debug(f"Text exceeds chunk size ({text_tokens} tokens), chunking...")
 
     chunks = chunk_text(text, content_type=content_type, file_path=file_path)
 
diff --git a/tests/test_chunking.py b/tests/test_chunking.py
index c717c11..df7d2e6 100644
--- a/tests/test_chunking.py
+++ b/tests/test_chunking.py
@@ -14,6 +14,32 @@ from open_notebook.utils.chunking import (
     detect_content_type_from_extension,
     detect_content_type_from_heuristics,
 )
+from open_notebook.utils.token_utils import token_count
+
+
+def _build_text_with_max_tokens(fragment: str, max_tokens: int) -> str:
+    """Build text that stays within a token budget."""
+    text = ""
+    while True:
+        candidate = text + fragment
+        if token_count(candidate) > max_tokens:
+            return text
+        text = candidate
+
+
+def _build_text_exceeding_tokens(fragment: str, threshold_tokens: int) -> str:
+    """Build text that exceeds a token threshold."""
+    text = fragment
+    while token_count(text) <= threshold_tokens:
+        text += fragment
+    return text
+
+
+def _assert_chunks_within_token_limit(chunks: list[str]) -> None:
+    """Assert chunks stay within the configured token window."""
+    assert chunks
+    for chunk in chunks:
+        assert token_count(chunk) <= CHUNK_SIZE
 
 # ============================================================================
 # TEST SUITE 1: Content Type Detection from Extension
@@ -222,20 +248,33 @@ class TestChunkText:
         assert chunks[0] == text
 
     def test_text_at_chunk_limit(self):
-        """Test text at exactly chunk size limit."""
-        text = "x" * CHUNK_SIZE
+        """Test text within the token chunk size limit."""
+        text = _build_text_with_max_tokens("This is a sentence. ", CHUNK_SIZE)
+        assert token_count(text) <= CHUNK_SIZE
         chunks = chunk_text(text)
         assert len(chunks) == 1
 
     def test_long_text_is_chunked(self):
-        """Test that long text is chunked."""
-        # Create text longer than chunk size
-        text = "This is a sentence. " * 200  # ~4000 chars
+        """Test that long English text is chunked by token budget."""
+        text = _build_text_exceeding_tokens("This is a sentence. ", CHUNK_SIZE)
         chunks = chunk_text(text)
         assert len(chunks) > 1
-        # Each chunk should be <= CHUNK_SIZE
-        for chunk in chunks:
-            assert len(chunk) <= CHUNK_SIZE + 100  # Allow some flexibility for overlap
+        _assert_chunks_within_token_limit(chunks)
+
+    def test_cjk_text_is_chunked_by_tokens(self):
+        """Test that long CJK text is chunked using token measurement."""
+        text = _build_text_exceeding_tokens("這是一段中文內容，用來驗證分塊邏輯。", CHUNK_SIZE)
+        chunks = chunk_text(text, content_type=ContentType.PLAIN)
+        assert len(chunks) > 1
+        _assert_chunks_within_token_limit(chunks)
+
+    def test_mixed_language_text_is_chunked_by_tokens(self):
+        """Test that mixed-language text is chunked using token measurement."""
+        fragment = "This paragraph mixes English and 中文內容 to verify token-based chunking. "
+        text = _build_text_exceeding_tokens(fragment, CHUNK_SIZE)
+        chunks = chunk_text(text, content_type=ContentType.PLAIN)
+        assert len(chunks) > 1
+        _assert_chunks_within_token_limit(chunks)
 
     def test_explicit_content_type_html(self):
         """Test chunking with explicit HTML content type."""
@@ -269,9 +308,10 @@ Content for section 2.
 
     def test_explicit_content_type_plain(self):
         """Test chunking with explicit plain content type."""
-        plain_text = "Word " * 500  # ~2500 chars
+        plain_text = _build_text_exceeding_tokens("Word ", CHUNK_SIZE)
         chunks = chunk_text(plain_text, content_type=ContentType.PLAIN)
-        assert len(chunks) >= 1
+        assert len(chunks) > 1
+        _assert_chunks_within_token_limit(chunks)
 
     def test_file_path_detection(self):
         """Test chunking with file path for content type detection."""
@@ -280,16 +320,14 @@ Content for section 2.
         assert len(chunks) == 1
 
     def test_secondary_chunking_for_large_sections(self):
-        """Test that large sections from HTML/MD splitters are further chunked."""
-        # Create text that would produce a single large section
-        large_section = "x" * 3000  # Larger than CHUNK_SIZE
+        """Test that large Markdown sections are further chunked by tokens."""
+        large_section = _build_text_exceeding_tokens(
+            "這是一段很長的章節內容，用來測試次級分塊。", CHUNK_SIZE
+        )
         md_text = f"# Title\n\n{large_section}"
         chunks = chunk_text(md_text, content_type=ContentType.MARKDOWN)
-        # Should have multiple chunks due to secondary chunking
-        assert len(chunks) >= 1
-        for chunk in chunks:
-            # Allow some flexibility but chunks should be reasonable size
-            assert len(chunk) <= CHUNK_SIZE + 300
+        assert len(chunks) > 1
+        _assert_chunks_within_token_limit(chunks)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_embedding.py b/tests/test_embedding.py
index 61da171..86f8c21 100644
--- a/tests/test_embedding.py
+++ b/tests/test_embedding.py
@@ -6,11 +6,21 @@ Tests embedding generation and mean pooling functionality.
 
 import pytest
 
+from open_notebook.utils.chunking import CHUNK_SIZE
 from open_notebook.utils.embedding import (
     generate_embedding,
     generate_embeddings,
     mean_pool_embeddings,
 )
+from open_notebook.utils.token_utils import token_count
+
+
+def _build_text_exceeding_tokens(fragment: str, threshold_tokens: int) -> str:
+    """Build text that exceeds a token threshold."""
+    text = fragment
+    while token_count(text) <= threshold_tokens:
+        text += fragment
+    return text
 
 # ============================================================================
 # TEST SUITE 1: Mean Pooling
@@ -184,8 +194,7 @@ class TestGenerateEmbedding:
         """Test that long text is chunked and mean pooled."""
         from unittest.mock import AsyncMock, MagicMock, patch
 
-        # Create text longer than chunk size
-        long_text = "This is a sentence. " * 200  # ~4000 chars
+        long_text = _build_text_exceeding_tokens("This is a sentence. ", CHUNK_SIZE)
 
         mock_model = MagicMock()
         # Return multiple embeddings (one per chunk)