Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/constants/ipc-constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ export const IPC_CHANNELS = {
// Window channels
WINDOW_SET_TITLE: "window:setTitle",

// Token channels
TOKENS_COUNT_BULK: "tokens:countBulk",

// Dynamic channel prefixes
WORKSPACE_CHAT_PREFIX: "workspace:chat:",
WORKSPACE_METADATA: "workspace:metadata",
Expand Down
15 changes: 15 additions & 0 deletions src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,21 @@ if (gotTheLock) {
}
});

// Cleanup worker threads on quit
app.on("will-quit", () => {
console.log("App will quit - cleaning up worker threads");
void (async () => {
try {
// Dynamic import is acceptable here - only loaded if worker was used
/* eslint-disable-next-line no-restricted-syntax */
const { tokenizerWorkerPool } = await import("@/services/tokenizerWorkerPool");
tokenizerWorkerPool.terminate();
} catch (error) {
console.error("Error terminating worker pool:", error);
}
})();
});

app.on("activate", () => {
// Only create window if app is ready and no window exists
// This prevents "Cannot create BrowserWindow before app is ready" error
Expand Down
4 changes: 4 additions & 0 deletions src/preload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ const api: IPCApi = {
window: {
setTitle: (title: string) => ipcRenderer.invoke(IPC_CHANNELS.WINDOW_SET_TITLE, title),
},
tokens: {
countBulk: (model: string, texts: string[]) =>
ipcRenderer.invoke(IPC_CHANNELS.TOKENS_COUNT_BULK, model, texts),
},
};

// Expose the API along with platform/versions
Expand Down
19 changes: 19 additions & 0 deletions src/services/ipcMain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ export class IpcMain {

this.registerDialogHandlers(ipcMain);
this.registerWindowHandlers(ipcMain);
this.registerTokenHandlers(ipcMain);
this.registerWorkspaceHandlers(ipcMain);
this.registerProviderHandlers(ipcMain);
this.registerProjectHandlers(ipcMain);
Expand Down Expand Up @@ -174,6 +175,24 @@ export class IpcMain {
});
}

private registerTokenHandlers(ipcMain: ElectronIpcMain): void {
ipcMain.handle(
IPC_CHANNELS.TOKENS_COUNT_BULK,
async (_event, model: string, texts: string[]) => {
try {
// Offload to worker thread - keeps main process responsive
// Dynamic import is acceptable here - worker pool is lazy-loaded on first use
/* eslint-disable-next-line no-restricted-syntax */
const { tokenizerWorkerPool } = await import("@/services/tokenizerWorkerPool");
return await tokenizerWorkerPool.countTokens(model, texts);
} catch (error) {
log.error(`Failed to count tokens for model ${model}:`, error);
return null; // Tokenizer not loaded or error occurred
}
}
);
}

private registerWorkspaceHandlers(ipcMain: ElectronIpcMain): void {
ipcMain.handle(
IPC_CHANNELS.WORKSPACE_CREATE,
Expand Down
164 changes: 164 additions & 0 deletions src/services/tokenizerWorkerPool.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
/**
* Tokenizer Worker Pool
* Manages Node.js worker thread for off-main-thread tokenization
*/

import { Worker } from "worker_threads";
import path from "path";
import { log } from "@/services/log";

interface PendingRequest {
resolve: (counts: number[]) => void;
reject: (error: Error) => void;
timeoutId: NodeJS.Timeout;
}

interface TokenizeRequest {
requestId: number;
model: string;
texts: string[];
}

interface TokenizeResponse {
requestId: number;
success: boolean;
counts?: number[];
error?: string;
}

class TokenizerWorkerPool {
private worker: Worker | null = null;
private requestCounter = 0;
private pendingRequests = new Map<number, PendingRequest>();
private isTerminating = false;

/**
* Get or create the worker thread
*/
private getWorker(): Worker {
if (this.worker && !this.isTerminating) {
return this.worker;
}

// Worker script path - compiled by tsc to dist/workers/tokenizerWorker.js
// __dirname in production will be dist/services, so we go up one level then into workers
const workerPath = path.join(__dirname, "..", "workers", "tokenizerWorker.js");

this.worker = new Worker(workerPath);
this.isTerminating = false;

// Allow Node to exit even if worker is still running (important for tests)
this.worker.unref();

this.worker.on("message", (response: TokenizeResponse) => {
this.handleResponse(response);
});

this.worker.on("error", (error: Error) => {
log.error("Tokenizer worker error:", error);
// Reject all pending requests
for (const [requestId, pending] of this.pendingRequests) {
clearTimeout(pending.timeoutId);
pending.reject(new Error(`Worker error: ${error.message}`));
this.pendingRequests.delete(requestId);
}
});

this.worker.on("exit", (code: number) => {
if (!this.isTerminating && code !== 0) {
log.error(`Tokenizer worker exited with code ${code}`);
}
this.worker = null;
});

return this.worker;
}

/**
* Handle response from worker
*/
private handleResponse(response: TokenizeResponse): void {
const pending = this.pendingRequests.get(response.requestId);
if (!pending) {
return; // Request was cancelled or timed out
}

clearTimeout(pending.timeoutId);
this.pendingRequests.delete(response.requestId);

if (response.success && response.counts) {
pending.resolve(response.counts);
} else {
pending.reject(new Error(response.error ?? "Unknown worker error"));
}
}

/**
* Count tokens for multiple texts using worker thread
* @param model - Model identifier for tokenizer selection
* @param texts - Array of texts to tokenize
* @returns Promise resolving to array of token counts
*/
async countTokens(model: string, texts: string[]): Promise<number[]> {
const requestId = this.requestCounter++;
const worker = this.getWorker();

return new Promise<number[]>((resolve, reject) => {
// Set timeout for request (30 seconds)
const timeoutId = setTimeout(() => {
const pending = this.pendingRequests.get(requestId);
if (pending) {
this.pendingRequests.delete(requestId);
reject(new Error("Tokenization request timeout (30s)"));
}
}, 30000);

// Store pending request
this.pendingRequests.set(requestId, {
resolve,
reject,
timeoutId,
});

// Send request to worker
const request: TokenizeRequest = {
requestId,
model,
texts,
};

try {
worker.postMessage(request);
} catch (error) {
clearTimeout(timeoutId);
this.pendingRequests.delete(requestId);
reject(error instanceof Error ? error : new Error(String(error)));
}
});
}

/**
* Terminate the worker thread and reject all pending requests
*/
terminate(): void {
this.isTerminating = true;

// Reject all pending requests
for (const [requestId, pending] of this.pendingRequests) {
clearTimeout(pending.timeoutId);
pending.reject(new Error("Worker pool terminated"));
this.pendingRequests.delete(requestId);
}

// Terminate worker
if (this.worker) {
this.worker.terminate().catch((error) => {
log.error("Error terminating tokenizer worker:", error);
});
this.worker = null;
}
}
}

// Singleton instance
export const tokenizerWorkerPool = new TokenizerWorkerPool();
3 changes: 3 additions & 0 deletions src/types/ipc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -230,4 +230,7 @@ export interface IPCApi {
window: {
setTitle(title: string): Promise<void>;
};
tokens: {
countBulk(model: string, texts: string[]): Promise<number[] | null>;
};
}
53 changes: 53 additions & 0 deletions src/utils/main/tokenizer.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/**
* Tests for tokenizer cache behavior
*/

import { describe, it, expect } from "@jest/globals";
import { getTokenizerForModel } from "./tokenizer";

describe("tokenizer cache", () => {
const testText = "Hello, world!";

it("should use different cache keys for different models", () => {
// Get tokenizers for different models
const gpt4Tokenizer = getTokenizerForModel("openai:gpt-4");
const claudeTokenizer = getTokenizerForModel("anthropic:claude-opus-4");

// Count tokens with first model
const gpt4Count = gpt4Tokenizer.countTokens(testText);

// Count tokens with second model
const claudeCount = claudeTokenizer.countTokens(testText);

// Counts may differ because different encodings
// This test mainly ensures no crash and cache isolation
expect(typeof gpt4Count).toBe("number");
expect(typeof claudeCount).toBe("number");
expect(gpt4Count).toBeGreaterThan(0);
expect(claudeCount).toBeGreaterThan(0);
});

it("should return same count for same (model, text) pair from cache", () => {
const tokenizer = getTokenizerForModel("openai:gpt-4");

// First call
const count1 = tokenizer.countTokens(testText);

// Second call should hit cache
const count2 = tokenizer.countTokens(testText);

expect(count1).toBe(count2);
});

it("should normalize model keys for cache consistency", () => {
// These should map to the same cache key
const tokenizer1 = getTokenizerForModel("anthropic:claude-opus-4");
const tokenizer2 = getTokenizerForModel("anthropic/claude-opus-4");

const count1 = tokenizer1.countTokens(testText);
const count2 = tokenizer2.countTokens(testText);

// Should get same count since they normalize to same model
expect(count1).toBe(count2);
});
});
37 changes: 27 additions & 10 deletions src/utils/main/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,14 @@ export async function loadTokenizerModules(): Promise<void> {
}

/**
* LRU cache for token counts by text checksum
* Avoids re-tokenizing identical strings (system messages, tool definitions, etc.)
* Key: CRC32 checksum of text, Value: token count
* LRU cache for token counts by (model, text) pairs
* Avoids re-tokenizing identical strings with the same encoding
*
* Key: CRC32 checksum of "model:text" to ensure counts are model-specific
* Value: token count
*
* IMPORTANT: Cache key includes model because different encodings produce different counts.
* For async tokenization (approx → exact), the key stays stable so exact overwrites approx.
*/
const tokenCountCache = new LRUCache<number, number>({
max: 500000, // Max entries (safety limit)
Expand All @@ -83,11 +88,22 @@ const tokenCountCache = new LRUCache<number, number>({
* Count tokens with caching via CRC32 checksum
* Avoids re-tokenizing identical strings (system messages, tool definitions, etc.)
*
* Cache key includes model to prevent cross-model count reuse.
*
* NOTE: For async tokenization, this returns an approximation immediately and caches
* the accurate count in the background. Subsequent calls will use the cached accurate count.
* the accurate count in the background. Subsequent calls with the same (model, text) pair
* will use the cached accurate count once ready.
*/
function countTokensCached(text: string, tokenizeFn: () => number | Promise<number>): number {
const checksum = CRC32.str(text);
function countTokensCached(
text: string,
modelString: string,
tokenizeFn: () => number | Promise<number>
): number {
// Include model in cache key to prevent different encodings from reusing counts
// Normalize model key for consistent cache hits (e.g., "anthropic:claude" → "anthropic/claude")
const normalizedModel = normalizeModelKey(modelString);
const cacheKey = `${normalizedModel}:${text}`;
const checksum = CRC32.str(cacheKey);
const cached = tokenCountCache.get(checksum);
if (cached !== undefined) {
return cached;
Expand All @@ -102,6 +118,7 @@ function countTokensCached(text: string, tokenizeFn: () => number | Promise<numb
}

// Async case: return approximation now, cache accurate value when ready
// Using same cache key ensures exact count overwrites approximation for this (model, text) pair
const approximation = Math.ceil(text.length / 4);
void result.then((count) => tokenCountCache.set(checksum, count));
return approximation;
Expand Down Expand Up @@ -179,8 +196,8 @@ function countTokensWithLoadedModules(
* @returns Tokenizer interface with name and countTokens function
*/
export function getTokenizerForModel(modelString: string): Tokenizer {
// Start loading tokenizer modules in background (idempotent)
void loadTokenizerModules();
// Tokenizer modules are loaded on-demand when countTokens is first called
// This avoids blocking app startup with 8MB+ of tokenizer downloads

return {
get encoding() {
Expand All @@ -189,7 +206,7 @@ export function getTokenizerForModel(modelString: string): Tokenizer {
countTokens: (text: string) => {
// If tokenizer already loaded, use synchronous path for accurate counts
if (tokenizerModules) {
return countTokensCached(text, () => {
return countTokensCached(text, modelString, () => {
try {
return countTokensWithLoadedModules(text, modelString, tokenizerModules!);
} catch (error) {
Expand All @@ -201,7 +218,7 @@ export function getTokenizerForModel(modelString: string): Tokenizer {
}

// Tokenizer not yet loaded - use async path (returns approximation immediately)
return countTokensCached(text, async () => {
return countTokensCached(text, modelString, async () => {
await loadTokenizerModules();
try {
return countTokensWithLoadedModules(text, modelString, tokenizerModules!);
Expand Down
Loading
Loading