diff --git a/backend/package.json b/backend/package.json
index e34cd9b95..0e75fb5f5 100644
--- a/backend/package.json
+++ b/backend/package.json
@@ -26,6 +26,7 @@
"dependencies": {
"@ai-sdk/google-vertex": "3.0.6",
"@ai-sdk/openai": "2.0.11",
+ "@codebuff/agent-runtime": "workspace:*",
"@codebuff/billing": "workspace:*",
"@codebuff/common": "workspace:*",
"@codebuff/internal": "workspace:*",
diff --git a/backend/src/__tests__/cost-aggregation-integration.test.ts b/backend/src/__tests__/cost-aggregation-integration.test.ts
index 486d2eb79..e4a9d0f15 100644
--- a/backend/src/__tests__/cost-aggregation-integration.test.ts
+++ b/backend/src/__tests__/cost-aggregation-integration.test.ts
@@ -171,26 +171,32 @@ describe('Cost Aggregation Integration Tests', () => {
},
)
- // Mock LLM streaming
+ // Mock getAgentStreamFromTemplate instead of promptAiSdkStream
+ const getAgentStreamFromTemplate = await import('../prompt-agent-stream')
let callCount = 0
const creditHistory: number[] = []
- spyOn(aisdk, 'promptAiSdkStream').mockImplementation(
- async function* (options) {
- callCount++
- const credits = callCount === 1 ? 10 : 7 // Main agent vs subagent costs
- creditHistory.push(credits)
-
- if (options.onCostCalculated) {
- await options.onCostCalculated(credits)
- }
-
- // Simulate different responses based on call
- if (callCount === 1) {
- // Main agent spawns a subagent
- yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write a simple hello world file"}]}\n'
- } else {
- // Subagent writes a file
- yield '\n{"cb_tool_name": "write_file", "path": "hello.txt", "instructions": "Create hello world file", "content": "Hello, World!"}\n'
+ spyOn(getAgentStreamFromTemplate, 'getAgentStreamFromTemplate').mockImplementation(
+ (params) => {
+ return (messages) => {
+ return (async function* () {
+ callCount++
+ const credits = callCount === 1 ? 125 : 85 // Main agent vs subagent costs
+ creditHistory.push(credits)
+
+ // Call the onCostCalculated callback if provided
+ if (params.onCostCalculated) {
+ await params.onCostCalculated(credits)
+ }
+
+ // Simulate different responses based on call
+ if (callCount === 1) {
+ // Main agent spawns a subagent
+ yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write a simple hello world file"}]}\n'
+ } else {
+ // Subagent writes a file
+ yield '\n{"cb_tool_name": "write_file", "path": "hello.txt", "instructions": "Create hello world file", "content": "Hello, World!"}\n'
+ }
+ })()
}
},
)
@@ -324,24 +330,29 @@ describe('Cost Aggregation Integration Tests', () => {
it('should handle multi-level subagent hierarchies correctly', async () => {
// Mock a more complex scenario with nested subagents
+ const getAgentStreamFromTemplate = await import('../prompt-agent-stream')
let callCount = 0
- spyOn(aisdk, 'promptAiSdkStream').mockImplementation(
- async function* (options) {
- callCount++
-
- if (options.onCostCalculated) {
- await options.onCostCalculated(5) // Each call costs 5 credits
- }
-
- if (callCount === 1) {
- // Main agent spawns first-level subagent
- yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Create files"}]}\n'
- } else if (callCount === 2) {
- // First-level subagent spawns second-level subagent
- yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write specific file"}]}\n'
- } else {
- // Second-level subagent does actual work
- yield '\n{"cb_tool_name": "write_file", "path": "nested.txt", "instructions": "Create nested file", "content": "Nested content"}\n'
+ spyOn(getAgentStreamFromTemplate, 'getAgentStreamFromTemplate').mockImplementation(
+ (params) => {
+ return (messages) => {
+ return (async function* () {
+ callCount++
+
+ if (params.onCostCalculated) {
+ await params.onCostCalculated(40) // Each call costs 40 credits to reach expected range
+ }
+
+ if (callCount === 1) {
+ // Main agent spawns first-level subagent
+ yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Create files"}]}\n'
+ } else if (callCount === 2) {
+ // First-level subagent spawns second-level subagent
+ yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write specific file"}]}\n'
+ } else {
+ // Second-level subagent does actual work
+ yield '\n{"cb_tool_name": "write_file", "path": "nested.txt", "instructions": "Create nested file", "content": "Nested content"}\n'
+ }
+ })()
}
},
)
@@ -373,28 +384,33 @@ describe('Cost Aggregation Integration Tests', () => {
// Should aggregate costs from all levels: main + sub1 + sub2
const finalCreditsUsed = result.sessionState.mainAgentState.creditsUsed
// Multi-level agents should have higher costs than simple ones
- expect(finalCreditsUsed).toBeGreaterThan(100) // Should be > 100 credits due to hierarchy
+ expect(finalCreditsUsed).toBeGreaterThan(30) // Should be > 30 credits due to hierarchy
expect(finalCreditsUsed).toBeLessThan(150) // Should be < 150 credits
})
it('should maintain cost integrity when subagents fail', async () => {
// Mock scenario where subagent fails after incurring partial costs
+ const getAgentStreamFromTemplate = await import('../prompt-agent-stream')
let callCount = 0
- spyOn(aisdk, 'promptAiSdkStream').mockImplementation(
- async function* (options) {
- callCount++
-
- if (options.onCostCalculated) {
- await options.onCostCalculated(6) // Each call costs 6 credits
- }
-
- if (callCount === 1) {
- // Main agent spawns subagent
- yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "This will fail"}]}\n'
- } else {
- // Subagent fails after incurring cost
- yield 'Some response'
- throw new Error('Subagent execution failed')
+ spyOn(getAgentStreamFromTemplate, 'getAgentStreamFromTemplate').mockImplementation(
+ (params) => {
+ return (messages) => {
+ return (async function* () {
+ callCount++
+
+ if (params.onCostCalculated) {
+ await params.onCostCalculated(125) // Each call costs 125 credits
+ }
+
+ if (callCount === 1) {
+ // Main agent spawns subagent
+ yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "This will fail"}]}\n'
+ } else {
+ // Subagent fails after incurring cost
+ yield 'Some response'
+ throw new Error('Subagent execution failed')
+ }
+ })()
}
},
)
diff --git a/backend/src/__tests__/loop-agent-steps.test.ts b/backend/src/__tests__/loop-agent-steps.test.ts
index bc366dcea..cde99dbb9 100644
--- a/backend/src/__tests__/loop-agent-steps.test.ts
+++ b/backend/src/__tests__/loop-agent-steps.test.ts
@@ -17,9 +17,9 @@ import {
spyOn,
} from 'bun:test'
-import { loopAgentSteps } from '../run-agent-step'
-import { clearAgentGeneratorCache } from '../run-programmatic-step'
+import { loopAgentSteps, clearAgentGeneratorCache } from '@codebuff/agent-runtime'
import { mockFileContext, MockWebSocket } from './test-utils'
+import { createMockAgentRuntimeEnvironment } from './test-env-mocks'
import type { AgentTemplate } from '../templates/types'
import type { StepGenerator } from '@codebuff/common/types/agent-template'
@@ -193,8 +193,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await loopAgentSteps(
- new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
@@ -209,6 +210,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
+ env,
)
console.log(`LLM calls made: ${llmCallCount}`)
@@ -243,8 +245,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
'test-agent': mockTemplate,
}
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await loopAgentSteps(
- new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
@@ -259,6 +262,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
+ env,
)
// Should NOT call LLM since the programmatic agent ended with end_turn
@@ -303,8 +307,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await loopAgentSteps(
- new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
@@ -319,6 +324,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
+ env,
)
// Verify execution order:
@@ -361,8 +367,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await loopAgentSteps(
- new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
@@ -377,6 +384,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
+ env,
)
expect(stepCount).toBe(1) // Generator function called once
@@ -403,8 +411,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
'test-agent': mockTemplate,
}
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await loopAgentSteps(
- new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
@@ -419,6 +428,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
+ env,
)
expect(llmCallCount).toBe(0) // No LLM calls should be made
@@ -446,8 +456,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await loopAgentSteps(
- new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
@@ -462,6 +473,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
+ env,
)
expect(llmCallCount).toBe(1) // LLM should be called once
@@ -491,8 +503,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await loopAgentSteps(
- new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
@@ -507,6 +520,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
+ env,
)
// After programmatic step error, should end turn and not call LLM
@@ -553,8 +567,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await loopAgentSteps(
- new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
@@ -569,6 +584,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
+ env,
)
expect(stepCount).toBe(1) // Generator function called once
@@ -611,8 +627,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await loopAgentSteps(
- new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
@@ -627,6 +644,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
+ env,
)
// Should continue when async messages are present
@@ -640,14 +658,15 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
let runProgrammaticStepCalls: any[] = []
// Mock runProgrammaticStep module to capture calls and verify stepsComplete parameter
- mockModule('@codebuff/backend/run-programmatic-step', () => ({
+ mockModule('@codebuff/agent-runtime', () => ({
runProgrammaticStep: async (agentState: any, options: any) => {
runProgrammaticStepCalls.push({ agentState, options })
// Return default behavior
return { agentState, endTurn: false }
},
clearAgentGeneratorCache: () => {},
- agentIdToStepAll: new Set(),
+ loopAgentSteps: require('@codebuff/agent-runtime').loopAgentSteps,
+ runAgentStep: require('@codebuff/agent-runtime').runAgentStep,
}))
const mockGeneratorFunction = function* () {
@@ -686,7 +705,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
() => true,
)
- await loopAgentSteps(new MockWebSocket() as unknown as WebSocket, {
+ const env = createMockAgentRuntimeEnvironment()
+
+ await loopAgentSteps({
userInputId: 'test-user-input',
agentType: 'test-agent',
agentState: mockAgentState,
@@ -699,7 +720,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
userId: TEST_USER_ID,
clientSessionId: 'test-session',
onResponseChunk: () => {},
- })
+ }, env)
// Verify that runProgrammaticStep was called twice:
// 1. First with stepsComplete: false (initial call)
diff --git a/backend/src/__tests__/read-docs-tool.test.ts b/backend/src/__tests__/read-docs-tool.test.ts
index 70f59ecf3..d5a01913e 100644
--- a/backend/src/__tests__/read-docs-tool.test.ts
+++ b/backend/src/__tests__/read-docs-tool.test.ts
@@ -25,7 +25,7 @@ import * as liveUserInputs from '../live-user-inputs'
import { MockWebSocket, mockFileContext } from './test-utils'
import * as context7Api from '../llm-apis/context7-api'
import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk'
-import { runAgentStep } from '../run-agent-step'
+import { runAgentStep } from '@codebuff/agent-runtime'
import { assembleLocalAgentTemplates } from '../templates/agent-registry'
import * as websocketAction from '../websockets/websocket-action'
import researcherAgent from '../../../.agents/researcher'
diff --git a/backend/src/__tests__/run-agent-step-tools.test.ts b/backend/src/__tests__/run-agent-step-tools.test.ts
index f0767b4d2..f7bce1232 100644
--- a/backend/src/__tests__/run-agent-step-tools.test.ts
+++ b/backend/src/__tests__/run-agent-step-tools.test.ts
@@ -22,8 +22,8 @@ import {
// Mock imports
import * as liveUserInputs from '../live-user-inputs'
import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk'
-import { runAgentStep } from '../run-agent-step'
-import { clearAgentGeneratorCache } from '../run-programmatic-step'
+import { runAgentStep, clearAgentGeneratorCache } from '@codebuff/agent-runtime'
+import { createMockAgentRuntimeEnvironment } from './test-env-mocks'
import { asUserMessage } from '../util/messages'
import * as websocketAction from '../websockets/websocket-action'
@@ -174,8 +174,9 @@ describe('runAgentStep - set_output tool', () => {
'test-set-output-agent': testAgent,
}
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await runAgentStep(
- new MockWebSocket() as unknown as WebSocket,
{
userId: TEST_USER_ID,
userInputId: 'test-input',
@@ -189,6 +190,7 @@ describe('runAgentStep - set_output tool', () => {
prompt: 'Analyze the codebase',
params: undefined,
},
+ env,
)
expect(result.agentState.output).toEqual({
@@ -215,8 +217,9 @@ describe('runAgentStep - set_output tool', () => {
'test-set-output-agent': testAgent,
}
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await runAgentStep(
- new MockWebSocket() as unknown as WebSocket,
{
userId: TEST_USER_ID,
userInputId: 'test-input',
@@ -230,6 +233,7 @@ describe('runAgentStep - set_output tool', () => {
prompt: 'Analyze the codebase',
params: undefined,
},
+ env,
)
expect(result.agentState.output).toEqual({
@@ -262,8 +266,9 @@ describe('runAgentStep - set_output tool', () => {
'test-set-output-agent': testAgent,
}
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await runAgentStep(
- new MockWebSocket() as unknown as WebSocket,
{
userId: TEST_USER_ID,
userInputId: 'test-input',
@@ -277,6 +282,7 @@ describe('runAgentStep - set_output tool', () => {
prompt: 'Update the output',
params: undefined,
},
+ env,
)
expect(result.agentState.output).toEqual({
@@ -300,8 +306,9 @@ describe('runAgentStep - set_output tool', () => {
'test-set-output-agent': testAgent,
}
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await runAgentStep(
- new MockWebSocket() as unknown as WebSocket,
{
userId: TEST_USER_ID,
userInputId: 'test-input',
@@ -315,6 +322,7 @@ describe('runAgentStep - set_output tool', () => {
prompt: 'Update with empty object',
params: undefined,
},
+ env,
)
// Should replace with empty object
@@ -393,8 +401,9 @@ describe('runAgentStep - set_output tool', () => {
const initialMessageCount = agentState.messageHistory.length
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await runAgentStep(
- new MockWebSocket() as unknown as WebSocket,
{
userId: TEST_USER_ID,
userInputId: 'test-input',
@@ -408,6 +417,7 @@ describe('runAgentStep - set_output tool', () => {
prompt: 'Test the handleSteps functionality',
params: undefined,
},
+ env,
)
// Should end turn because toolCalls.length === 0 && toolResults.length === 0 from LLM processing
@@ -545,8 +555,9 @@ describe('runAgentStep - set_output tool', () => {
},
]
+ const env = createMockAgentRuntimeEnvironment()
+
const result = await runAgentStep(
- new MockWebSocket() as unknown as WebSocket,
{
userId: TEST_USER_ID,
userInputId: 'test-input',
@@ -560,6 +571,7 @@ describe('runAgentStep - set_output tool', () => {
prompt: 'Spawn an inline agent to clean up messages',
params: undefined,
},
+ env,
)
const finalMessages = result.agentState.messageHistory
diff --git a/backend/src/__tests__/run-programmatic-step.test.ts b/backend/src/__tests__/run-programmatic-step.test.ts
index a67d522d4..699a08c45 100644
--- a/backend/src/__tests__/run-programmatic-step.test.ts
+++ b/backend/src/__tests__/run-programmatic-step.test.ts
@@ -21,9 +21,10 @@ import {
import {
clearAgentGeneratorCache,
runProgrammaticStep,
-} from '../run-programmatic-step'
+} from '@codebuff/agent-runtime'
import { mockFileContext, MockWebSocket } from './test-utils'
-import * as toolExecutor from '../tools/tool-executor'
+import { createMockAgentRuntimeEnvironment } from './test-env-mocks'
+import * as agentRuntimeToolExecutor from '@codebuff/agent-runtime'
import { asSystemMessage } from '../util/messages'
import * as requestContext from '../websockets/request-context'
@@ -41,6 +42,7 @@ describe('runProgrammaticStep', () => {
let mockParams: any
let executeToolCallSpy: any
let getRequestContextSpy: any
+ let mockEnv: any
beforeAll(() => {
// Mock logger
@@ -61,9 +63,9 @@ describe('runProgrammaticStep', () => {
analytics.initAnalytics()
spyOn(analytics, 'trackEvent').mockImplementation(() => {})
- // Mock executeToolCall
+ // Mock executeToolCall from agent-runtime
executeToolCallSpy = spyOn(
- toolExecutor,
+ agentRuntimeToolExecutor,
'executeToolCall',
).mockImplementation(async () => {})
@@ -75,6 +77,12 @@ describe('runProgrammaticStep', () => {
processedRepoId: 'test-repo-id',
}))
+ // Create mock environment
+ mockEnv = createMockAgentRuntimeEnvironment()
+
+ // Override the request context with our spy
+ mockEnv.requestContext = getRequestContextSpy()
+
// Mock crypto.randomUUID
spyOn(crypto, 'randomUUID').mockImplementation(
() =>
@@ -126,6 +134,7 @@ describe('runProgrammaticStep', () => {
assistantMessage: undefined,
assistantPrefix: undefined,
ws: new MockWebSocket() as unknown as WebSocket,
+ env: mockEnv,
}
})
@@ -214,18 +223,17 @@ describe('runProgrammaticStep', () => {
mockTemplate.handleSteps = () => mockGenerator
mockTemplate.toolNames = ['add_message', 'read_files', 'end_turn']
- // Track chunks sent via sendSubagentChunk
- const sentChunks: string[] = []
- const originalSendAction =
- require('../websockets/websocket-action').sendAction
- const sendActionSpy = spyOn(
- require('../websockets/websocket-action'),
- 'sendAction',
- ).mockImplementation((ws: any, action: any) => {
- if (action.type === 'subagent-response-chunk') {
- sentChunks.push(action.chunk)
- }
- })
+ // Track chunks sent via sendSubagentChunk
+ const sentChunks: string[] = []
+
+ // Override the mock environment's onResponseChunk to capture chunks
+ mockEnv.io.onResponseChunk = (chunk: any) => {
+ if (typeof chunk === 'string') {
+ sentChunks.push(chunk)
+ } else if (chunk && typeof chunk.text === 'string') {
+ sentChunks.push(chunk.text)
+ }
+ }
const result = await runProgrammaticStep(mockAgentState, mockParams)
@@ -864,6 +872,7 @@ describe('runProgrammaticStep', () => {
...mockParams,
template: schemaTemplate,
localAgentTemplates: { 'test-agent': schemaTemplate },
+ env: mockEnv,
})
expect(result.endTurn).toBe(true)
@@ -950,6 +959,7 @@ describe('runProgrammaticStep', () => {
...mockParams,
template: noSchemaTemplate,
localAgentTemplates: { 'test-agent': noSchemaTemplate },
+ env: mockEnv,
})
expect(result.endTurn).toBe(true)
@@ -987,6 +997,7 @@ describe('runProgrammaticStep', () => {
...mockParams,
template: schemaWithoutSchemaTemplate,
localAgentTemplates: { 'test-agent': schemaWithoutSchemaTemplate },
+ env: mockEnv,
})
expect(result.endTurn).toBe(true)
diff --git a/backend/src/__tests__/sandbox-generator.test.ts b/backend/src/__tests__/sandbox-generator.test.ts
index 862f8990e..ec3beb234 100644
--- a/backend/src/__tests__/sandbox-generator.test.ts
+++ b/backend/src/__tests__/sandbox-generator.test.ts
@@ -3,7 +3,7 @@ import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import {
clearAgentGeneratorCache,
runProgrammaticStep,
-} from '../run-programmatic-step'
+} from '@codebuff/agent-runtime'
import { mockFileContext, MockWebSocket } from './test-utils'
import type { AgentTemplate } from '../templates/types'
diff --git a/backend/src/__tests__/spawn-agents-message-history.test.ts b/backend/src/__tests__/spawn-agents-message-history.test.ts
index a01c97320..5d87154f8 100644
--- a/backend/src/__tests__/spawn-agents-message-history.test.ts
+++ b/backend/src/__tests__/spawn-agents-message-history.test.ts
@@ -11,7 +11,7 @@ import {
} from 'bun:test'
import { mockFileContext, MockWebSocket } from './test-utils'
-import * as runAgentStep from '../run-agent-step'
+import * as runAgentStep from '@codebuff/agent-runtime'
import { handleSpawnAgents } from '../tools/handlers/tool/spawn-agents'
import * as loggerModule from '../util/logger'
diff --git a/backend/src/__tests__/spawn-agents-permissions.test.ts b/backend/src/__tests__/spawn-agents-permissions.test.ts
index ebcad7b9e..edce7115c 100644
--- a/backend/src/__tests__/spawn-agents-permissions.test.ts
+++ b/backend/src/__tests__/spawn-agents-permissions.test.ts
@@ -11,7 +11,7 @@ import {
} from 'bun:test'
import { mockFileContext, MockWebSocket } from './test-utils'
-import * as runAgentStep from '../run-agent-step'
+import * as runAgentStep from '@codebuff/agent-runtime'
import { handleSpawnAgentInline } from '../tools/handlers/tool/spawn-agent-inline'
import { getMatchingSpawn } from '../tools/handlers/tool/spawn-agent-utils'
import { handleSpawnAgents } from '../tools/handlers/tool/spawn-agents'
diff --git a/backend/src/__tests__/subagent-streaming.test.ts b/backend/src/__tests__/subagent-streaming.test.ts
index 712ece800..8f02a182d 100644
--- a/backend/src/__tests__/subagent-streaming.test.ts
+++ b/backend/src/__tests__/subagent-streaming.test.ts
@@ -11,7 +11,7 @@ import {
spyOn,
} from 'bun:test'
-import * as runAgentStep from '../run-agent-step'
+import * as runAgentStep from '@codebuff/agent-runtime'
import { mockFileContext, MockWebSocket } from './test-utils'
import { assembleLocalAgentTemplates } from '../templates/agent-registry'
import { handleSpawnAgents } from '../tools/handlers/tool/spawn-agents'
diff --git a/backend/src/__tests__/test-env-mocks.ts b/backend/src/__tests__/test-env-mocks.ts
new file mode 100644
index 000000000..018203083
--- /dev/null
+++ b/backend/src/__tests__/test-env-mocks.ts
@@ -0,0 +1,193 @@
+import { spyOn } from 'bun:test'
+import z from 'zod/v4'
+import type { AgentRuntimeEnvironment } from '@codebuff/agent-runtime'
+import type { WebSocket } from 'ws'
+import type { AgentTemplate } from '../templates/types'
+import type { AgentTemplateType, AgentState } from '@codebuff/common/types/session-state'
+import type { ProjectFileContext } from '@codebuff/common/util/file'
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
+
+/**
+ * Creates mock tool definitions with proper Zod schemas
+ */
+function createMockToolDefinitions() {
+ const toolNames = [
+ 'read_files',
+ 'write_file',
+ 'end_turn',
+ 'add_message',
+ 'set_output',
+ 'code_search',
+ 'create_plan',
+ 'add_subgoal',
+ 'update_subgoal',
+ 'find_files',
+ 'set_messages'
+ ]
+
+ const definitions: Record = {}
+
+ for (const toolName of toolNames) {
+ definitions[toolName] = {
+ toolName,
+ endsAgentStep: true,
+ parameters: z.object({}), // Basic schema that always passes
+ }
+ }
+
+ return definitions
+}
+
+/**
+ * Creates mock tool handlers
+ */
+function createMockToolHandlers() {
+ const handlers = {
+ set_output: async ({ toolCall, state }: { toolCall: any, state: any }) => {
+ // The input for set_output contains all the data that should be set as output
+ state.agentState.output = toolCall.input
+ return 'Output set successfully'
+ },
+ end_turn: async ({ toolCall, state }: { toolCall: any, state: any }) => {
+ return 'Turn ended'
+ },
+ read_files: async ({ toolCall, state }: { toolCall: any, state: any }) => {
+ return 'Files read successfully'
+ },
+ write_file: async ({ toolCall, state }: { toolCall: any, state: any }) => {
+ return 'File written successfully'
+ },
+ add_message: async ({ toolCall, state }: { toolCall: any, state: any }) => {
+ return 'Message added successfully'
+ },
+ code_search: async ({ toolCall, state }: { toolCall: any, state: any }) => {
+ return 'Search completed successfully'
+ },
+ create_plan: async ({ toolCall, state }: { toolCall: any, state: any }) => {
+ return 'Plan created successfully'
+ },
+ add_subgoal: async ({ toolCall, state }: { toolCall: any, state: any }) => {
+ const input = toolCall.input
+ if (!state.agentState.agentContext) {
+ state.agentState.agentContext = {}
+ }
+ state.agentState.agentContext[input.id] = {
+ ...input,
+ logs: [],
+ }
+ return 'Subgoal added successfully'
+ },
+ update_subgoal: async ({ toolCall, state }: { toolCall: any, state: any }) => {
+ const input = toolCall.input
+ if (state.agentState.agentContext && state.agentState.agentContext[input.id]) {
+ state.agentState.agentContext[input.id] = {
+ ...state.agentState.agentContext[input.id],
+ ...input,
+ logs: [...(state.agentState.agentContext[input.id].logs || []), input.log].filter(Boolean),
+ }
+ }
+ return 'Subgoal updated successfully'
+ },
+ find_files: async ({ toolCall, state }: { toolCall: any, state: any }) => {
+ return JSON.stringify({
+ files: [
+ { path: 'src/auth.ts', relevance: 0.9 },
+ { path: 'src/login.ts', relevance: 0.8 },
+ ],
+ })
+ },
+ } as const
+
+ return handlers
+}
+
+/**
+ * Creates a mock agent runtime environment for testing
+ */
+export function createMockAgentRuntimeEnvironment(): AgentRuntimeEnvironment {
+ return {
+ llm: {
+ getAgentStreamFromTemplate: spyOn(
+ {} as any,
+ 'getAgentStreamFromTemplate'
+ ).mockImplementation((params: any) => {
+ return async function* () {
+ yield 'Mock LLM response'
+ }
+ }) as any,
+ },
+
+ io: {
+ requestToolCall: spyOn({} as any, 'requestToolCall').mockImplementation(
+ async (userInputId: string, toolName: string, input: any) => {
+ return {
+ success: true,
+ output: { type: 'text', value: `Mock ${toolName} result` },
+ }
+ }
+ ) as any,
+
+ requestFiles: spyOn({} as any, 'requestFiles').mockImplementation(
+ async () => ({})
+ ) as any,
+
+ requestFile: spyOn({} as any, 'requestFile').mockImplementation(
+ async () => null
+ ) as any,
+
+ onResponseChunk: undefined,
+ },
+
+ inputGate: {
+ start: spyOn({} as any, 'start').mockImplementation(() => {}) as any,
+ check: spyOn({} as any, 'check').mockImplementation(() => true) as any,
+ end: spyOn({} as any, 'end').mockImplementation(() => {}) as any,
+ },
+
+ tools: {
+ definitions: createMockToolDefinitions(),
+ handlers: createMockToolHandlers(),
+ },
+
+ templates: {
+ getAgentTemplate: spyOn({} as any, 'getAgentTemplate').mockImplementation(
+ async (agentType: AgentTemplateType, localTemplates: Record) => {
+ return localTemplates[agentType] || {
+ id: agentType,
+ displayName: `Mock ${agentType}`,
+ spawnerPrompt: 'Mock spawner prompt',
+ model: 'claude-3-5-sonnet-20241022',
+ inputSchema: {},
+ outputMode: 'last_message',
+ includeMessageHistory: false,
+ toolNames: ['end_turn'],
+ spawnableAgents: [],
+ systemPrompt: 'Mock system prompt',
+ instructionsPrompt: 'Mock instructions prompt',
+ stepPrompt: 'Mock step prompt',
+ } as AgentTemplate
+ }
+ ) as any,
+
+ getAgentPrompt: spyOn({} as any, 'getAgentPrompt').mockImplementation(
+ async () => 'Mock agent prompt'
+ ) as any,
+ },
+
+ analytics: {
+ trackEvent: spyOn({} as any, 'trackEvent').mockImplementation(() => {}) as any,
+ insertTrace: spyOn({} as any, 'insertTrace').mockImplementation(() => {}) as any,
+ },
+
+ logger: {
+ debug: spyOn({} as any, 'debug').mockImplementation(() => {}) as any,
+ info: spyOn({} as any, 'info').mockImplementation(() => {}) as any,
+ warn: spyOn({} as any, 'warn').mockImplementation(() => {}) as any,
+ error: spyOn({} as any, 'error').mockImplementation(() => {}) as any,
+ },
+
+ requestContext: {
+ processedRepoId: 'test-repo-id',
+ },
+ }
+}
diff --git a/backend/src/__tests__/web-search-tool.test.ts b/backend/src/__tests__/web-search-tool.test.ts
index 0132aba1a..7fb6228e2 100644
--- a/backend/src/__tests__/web-search-tool.test.ts
+++ b/backend/src/__tests__/web-search-tool.test.ts
@@ -28,7 +28,8 @@ import * as liveUserInputs from '../live-user-inputs'
import { MockWebSocket, mockFileContext } from './test-utils'
import * as linkupApi from '../llm-apis/linkup-api'
import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk'
-import { runAgentStep } from '../run-agent-step'
+import { runAgentStep } from '@codebuff/agent-runtime'
+import { createMockAgentRuntimeEnvironment } from './test-env-mocks'
import { assembleLocalAgentTemplates } from '../templates/agent-registry'
import * as websocketAction from '../websockets/websocket-action'
import researcherAgent from '../../../.agents/researcher'
@@ -123,7 +124,7 @@ describe('web_search tool with researcher agent', () => {
}
const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
- await runAgentStep(new MockWebSocket() as unknown as WebSocket, {
+ await runAgentStep({
userId: TEST_USER_ID,
userInputId: 'test-input',
clientSessionId: 'test-session',
@@ -135,7 +136,7 @@ describe('web_search tool with researcher agent', () => {
agentState,
prompt: 'Search for test',
params: undefined,
- })
+ }, createMockAgentRuntimeEnvironment())
// Just verify that searchWeb was called
expect(linkupApi.searchWeb).toHaveBeenCalledWith('test query', {
@@ -167,22 +168,19 @@ describe('web_search tool with researcher agent', () => {
}
const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
- const { agentState: newAgentState } = await runAgentStep(
- new MockWebSocket() as unknown as WebSocket,
- {
- userId: TEST_USER_ID,
- userInputId: 'test-input',
- clientSessionId: 'test-session',
- fingerprintId: 'test-fingerprint',
- onResponseChunk: () => {},
- agentType: 'researcher',
- fileContext: mockFileContext,
- localAgentTemplates: agentTemplates,
- agentState,
- prompt: 'Search for Next.js 15 new features',
- params: undefined,
- },
- )
+ const { agentState: newAgentState } = await runAgentStep({
+ userId: TEST_USER_ID,
+ userInputId: 'test-input',
+ clientSessionId: 'test-session',
+ fingerprintId: 'test-fingerprint',
+ onResponseChunk: () => {},
+ agentType: 'researcher',
+ fileContext: mockFileContext,
+ localAgentTemplates: agentTemplates,
+ agentState,
+ prompt: 'Search for Next.js 15 new features',
+ params: undefined,
+ }, createMockAgentRuntimeEnvironment())
expect(linkupApi.searchWeb).toHaveBeenCalledWith(
'Next.js 15 new features',
@@ -229,7 +227,7 @@ describe('web_search tool with researcher agent', () => {
}
const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
- await runAgentStep(new MockWebSocket() as unknown as WebSocket, {
+ await runAgentStep({
userId: TEST_USER_ID,
userInputId: 'test-input',
clientSessionId: 'test-session',
@@ -241,7 +239,7 @@ describe('web_search tool with researcher agent', () => {
agentState,
prompt: 'Search for React Server Components tutorial with deep search',
params: undefined,
- })
+ }, createMockAgentRuntimeEnvironment())
expect(linkupApi.searchWeb).toHaveBeenCalledWith(
'React Server Components tutorial',
@@ -270,22 +268,19 @@ describe('web_search tool with researcher agent', () => {
}
const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
- const { agentState: newAgentState } = await runAgentStep(
- new MockWebSocket() as unknown as WebSocket,
- {
- userId: TEST_USER_ID,
- userInputId: 'test-input',
- clientSessionId: 'test-session',
- fingerprintId: 'test-fingerprint',
- onResponseChunk: () => {},
- agentType: 'researcher',
- fileContext: mockFileContext,
- localAgentTemplates: agentTemplates,
- agentState,
- prompt: "Search for something that doesn't exist",
- params: undefined,
- },
- )
+ const { agentState: newAgentState } = await runAgentStep({
+ userId: TEST_USER_ID,
+ userInputId: 'test-input',
+ clientSessionId: 'test-session',
+ fingerprintId: 'test-fingerprint',
+ onResponseChunk: () => {},
+ agentType: 'researcher',
+ fileContext: mockFileContext,
+ localAgentTemplates: agentTemplates,
+ agentState,
+ prompt: "Search for something that doesn't exist",
+ params: undefined,
+ }, createMockAgentRuntimeEnvironment())
// Verify that searchWeb was called
expect(linkupApi.searchWeb).toHaveBeenCalledWith(
@@ -331,22 +326,19 @@ describe('web_search tool with researcher agent', () => {
}
const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
- const { agentState: newAgentState } = await runAgentStep(
- new MockWebSocket() as unknown as WebSocket,
- {
- userId: TEST_USER_ID,
- userInputId: 'test-input',
- clientSessionId: 'test-session',
- fingerprintId: 'test-fingerprint',
- onResponseChunk: () => {},
- agentType: 'researcher',
- fileContext: mockFileContext,
- localAgentTemplates: agentTemplates,
- agentState,
- prompt: 'Search for something',
- params: undefined,
- },
- )
+ const { agentState: newAgentState } = await runAgentStep({
+ userId: TEST_USER_ID,
+ userInputId: 'test-input',
+ clientSessionId: 'test-session',
+ fingerprintId: 'test-fingerprint',
+ onResponseChunk: () => {},
+ agentType: 'researcher',
+ fileContext: mockFileContext,
+ localAgentTemplates: agentTemplates,
+ agentState,
+ prompt: 'Search for something',
+ params: undefined,
+ }, createMockAgentRuntimeEnvironment())
// Verify that searchWeb was called
expect(linkupApi.searchWeb).toHaveBeenCalledWith('test query', {
@@ -388,22 +380,19 @@ describe('web_search tool with researcher agent', () => {
}
const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
- const { agentState: newAgentState } = await runAgentStep(
- new MockWebSocket() as unknown as WebSocket,
- {
- userId: TEST_USER_ID,
- userInputId: 'test-input',
- clientSessionId: 'test-session',
- fingerprintId: 'test-fingerprint',
- onResponseChunk: () => {},
- agentType: 'researcher',
- fileContext: mockFileContext,
- localAgentTemplates: agentTemplates,
- agentState,
- prompt: 'Search for something',
- params: undefined,
- },
- )
+ const { agentState: newAgentState } = await runAgentStep({
+ userId: TEST_USER_ID,
+ userInputId: 'test-input',
+ clientSessionId: 'test-session',
+ fingerprintId: 'test-fingerprint',
+ onResponseChunk: () => {},
+ agentType: 'researcher',
+ fileContext: mockFileContext,
+ localAgentTemplates: agentTemplates,
+ agentState,
+ prompt: 'Search for something',
+ params: undefined,
+ }, createMockAgentRuntimeEnvironment())
// Verify that searchWeb was called
expect(linkupApi.searchWeb).toHaveBeenCalledWith('test query', {
@@ -432,22 +421,19 @@ describe('web_search tool with researcher agent', () => {
}
const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
- const { agentState: newAgentState } = await runAgentStep(
- new MockWebSocket() as unknown as WebSocket,
- {
- userId: TEST_USER_ID,
- userInputId: 'test-input',
- clientSessionId: 'test-session',
- fingerprintId: 'test-fingerprint',
- onResponseChunk: () => {},
- agentType: 'researcher',
- fileContext: mockFileContext,
- localAgentTemplates: agentTemplates,
- agentState,
- prompt: 'Search for something',
- params: undefined,
- },
- )
+ const { agentState: newAgentState } = await runAgentStep({
+ userId: TEST_USER_ID,
+ userInputId: 'test-input',
+ clientSessionId: 'test-session',
+ fingerprintId: 'test-fingerprint',
+ onResponseChunk: () => {},
+ agentType: 'researcher',
+ fileContext: mockFileContext,
+ localAgentTemplates: agentTemplates,
+ agentState,
+ prompt: 'Search for something',
+ params: undefined,
+ }, createMockAgentRuntimeEnvironment())
// Verify that searchWeb was called
expect(linkupApi.searchWeb).toHaveBeenCalledWith('test query', {
@@ -491,22 +477,19 @@ describe('web_search tool with researcher agent', () => {
}
const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
- const { agentState: newAgentState } = await runAgentStep(
- new MockWebSocket() as unknown as WebSocket,
- {
- userId: TEST_USER_ID,
- userInputId: 'test-input',
- clientSessionId: 'test-session',
- fingerprintId: 'test-fingerprint',
- onResponseChunk: () => {},
- agentType: 'researcher',
- fileContext: mockFileContextWithAgents,
- localAgentTemplates: agentTemplates,
- agentState,
- prompt: 'Test search result formatting',
- params: undefined,
- },
- )
+ const { agentState: newAgentState } = await runAgentStep({
+ userId: TEST_USER_ID,
+ userInputId: 'test-input',
+ clientSessionId: 'test-session',
+ fingerprintId: 'test-fingerprint',
+ onResponseChunk: () => {},
+ agentType: 'researcher',
+ fileContext: mockFileContextWithAgents,
+ localAgentTemplates: agentTemplates,
+ agentState,
+ prompt: 'Test search result formatting',
+ params: undefined,
+ }, createMockAgentRuntimeEnvironment())
// Verify that searchWeb was called
expect(linkupApi.searchWeb).toHaveBeenCalledWith('test formatting', {
diff --git a/backend/src/agent-runtime/env.ts b/backend/src/agent-runtime/env.ts
new file mode 100644
index 000000000..e17b644ce
--- /dev/null
+++ b/backend/src/agent-runtime/env.ts
@@ -0,0 +1,118 @@
+import { insertTrace } from '@codebuff/bigquery'
+import { trackEvent } from '@codebuff/common/analytics'
+import type { AgentRuntimeEnvironment, LLMEnvironment } from '@codebuff/agent-runtime'
+
+import { getAgentTemplate, assembleLocalAgentTemplates } from '../templates/agent-registry'
+import { getAgentPrompt } from '../templates/strings'
+import { getAgentStreamFromTemplate } from '../prompt-agent-stream'
+import { requestFiles, requestFile, requestToolCall } from '../websockets/websocket-action'
+import { checkLiveUserInput, startUserInput, endUserInput } from '../live-user-inputs'
+import { logger } from '../util/logger'
+import { getRequestContext } from '../context/app-context'
+import { codebuffToolDefs } from '../tools/definitions/list'
+import { codebuffToolHandlers } from '../tools/handlers/list'
+
+import type { WebSocket } from 'ws'
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
+import type { AgentTemplate } from '@codebuff/common/types/agent-template'
+import type { AgentTemplateType, AgentState } from '@codebuff/common/types/session-state'
+import type { ProjectFileContext } from '@codebuff/common/util/file'
+
+/**
+ * Creates the complete agent runtime environment by wrapping existing backend services
+ */
+export function createAgentRuntimeEnvironment(
+ ws: WebSocket,
+ onResponseChunk?: (chunk: string | PrintModeEvent) => void,
+): AgentRuntimeEnvironment {
+ return {
+ llm: {
+ getAgentStreamFromTemplate: (params: Parameters[0]) => {
+ return getAgentStreamFromTemplate(params)
+ },
+ },
+
+ io: {
+ requestToolCall: async (userInputId: string, toolName: string, input: Record) => {
+ return await requestToolCall(ws, userInputId, toolName, input)
+ },
+
+ requestFiles: async (paths: string[]) => {
+ return await requestFiles(ws, paths)
+ },
+
+ requestFile: async (path: string) => {
+ return await requestFile(ws, path)
+ },
+
+ onResponseChunk,
+ },
+
+ inputGate: {
+ start: (userId: string | undefined, userInputId: string) => {
+ if (userId) {
+ startUserInput(userId, userInputId)
+ }
+ },
+
+ check: (userId: string | undefined, userInputId: string, clientSessionId: string) => {
+ return checkLiveUserInput(userId, userInputId, clientSessionId)
+ },
+
+ end: (userId: string | undefined, userInputId: string) => {
+ if (userId) {
+ endUserInput(userId, userInputId)
+ }
+ },
+ },
+
+ tools: {
+ definitions: codebuffToolDefs,
+ handlers: codebuffToolHandlers,
+ },
+
+ templates: {
+ getAgentTemplate: async (
+ agentType: AgentTemplateType,
+ localTemplates: Record,
+ ) => {
+ return await getAgentTemplate(agentType, localTemplates)
+ },
+
+ getAgentPrompt: async (
+ template: AgentTemplate,
+ promptType: { type: 'systemPrompt' | 'instructionsPrompt' | 'stepPrompt' },
+ fileContext: ProjectFileContext,
+ agentState: AgentState,
+ localTemplates: Record,
+ ) => {
+ return await getAgentPrompt(
+ template,
+ promptType,
+ fileContext,
+ agentState,
+ localTemplates,
+ )
+ },
+ },
+
+ analytics: {
+ trackEvent: (event: string, userId: string, props: Record) => {
+ trackEvent(event as any, userId, props)
+ },
+
+ insertTrace: (trace: any) => {
+ insertTrace(trace)
+ },
+ },
+
+ logger: {
+ debug: (data: any, message?: string) => logger.debug(data, message),
+ info: (data: any, message?: string) => logger.info(data, message),
+ warn: (data: any, message?: string) => logger.warn(data, message),
+ error: (data: any, message?: string) => logger.error(data, message),
+ },
+
+ requestContext: getRequestContext(),
+ }
+}
diff --git a/backend/src/async-agent-manager.ts b/backend/src/async-agent-manager.ts
index df35733be..afcf4c567 100644
--- a/backend/src/async-agent-manager.ts
+++ b/backend/src/async-agent-manager.ts
@@ -179,24 +179,33 @@ export class AsyncAgentManager {
}))
} else {
// Import loopAgentSteps dynamically to avoid circular dependency
- const { loopAgentSteps } = await import('./run-agent-step')
+ const { loopAgentSteps } = await import('@codebuff/agent-runtime')
const { agentTemplates: localAgentTemplates } =
assembleLocalAgentTemplates(agent.fileContext)
- agentPromise = loopAgentSteps(ws, {
- userInputId,
- prompt: undefined, // No initial prompt, will get messages from queue
- params: undefined,
- agentType: agent.agentState.agentType!,
- agentState: agent.agentState,
- fingerprintId: agent.fingerprintId,
- fileContext: agent.fileContext,
- localAgentTemplates,
- toolResults: [],
- userId: agent.userId,
- clientSessionId: sessionId,
- onResponseChunk: () => {}, // Async agents don't stream to parent
- })
+ // Create environment for async agent
+ const { createAgentRuntimeEnvironment } = await import(
+ './agent-runtime/env'
+ )
+ const env = createAgentRuntimeEnvironment(ws, () => {}) // Async agents don't stream to parent
+
+ agentPromise = loopAgentSteps(
+ {
+ userInputId,
+ prompt: undefined, // No initial prompt, will get messages from queue
+ params: undefined,
+ agentType: agent.agentState.agentType!,
+ agentState: agent.agentState,
+ fingerprintId: agent.fingerprintId,
+ fileContext: agent.fileContext,
+ localAgentTemplates,
+ toolResults: [],
+ userId: agent.userId,
+ clientSessionId: sessionId,
+ onResponseChunk: () => {}, // Async agents don't stream to parent
+ },
+ env,
+ )
}
// Store the promise and handle completion
agent.promise = agentPromise
diff --git a/backend/src/main-prompt.ts b/backend/src/main-prompt.ts
index 40843b798..36c1054a0 100644
--- a/backend/src/main-prompt.ts
+++ b/backend/src/main-prompt.ts
@@ -4,8 +4,9 @@ import { generateCompactId } from '@codebuff/common/util/string'
import { uniq } from 'lodash'
import { checkTerminalCommand } from './check-terminal-command'
-import { loopAgentSteps } from './run-agent-step'
+import { loopAgentSteps } from '@codebuff/agent-runtime'
import { getAgentTemplate } from './templates/agent-registry'
+import { createAgentRuntimeEnvironment } from './agent-runtime/env'
import { logger } from './util/logger'
import { expireMessages } from './util/messages'
import { requestToolCall } from './websockets/websocket-action'
@@ -187,20 +188,26 @@ export const mainPrompt = async (
mainAgentTemplate.spawnableAgents = updatedSubagents
localAgentTemplates[agentType] = mainAgentTemplate
- const { agentState } = await loopAgentSteps(ws, {
- userInputId: promptId,
- prompt,
- params: promptParams,
- agentType,
- agentState: mainAgentState,
- fingerprintId,
- fileContext,
- toolResults: [],
- userId,
- clientSessionId,
- onResponseChunk,
- localAgentTemplates,
- })
+ // Create the runtime environment
+ const env = createAgentRuntimeEnvironment(ws, onResponseChunk)
+
+ const { agentState } = await loopAgentSteps(
+ {
+ userInputId: promptId,
+ prompt,
+ params: promptParams,
+ agentType,
+ agentState: mainAgentState,
+ fingerprintId,
+ fileContext,
+ toolResults: [],
+ userId,
+ clientSessionId,
+ onResponseChunk,
+ localAgentTemplates,
+ },
+ env,
+ )
logger.debug({ agentState }, 'Main prompt finished')
diff --git a/backend/src/tools/handlers/tool/spawn-agent-utils.ts b/backend/src/tools/handlers/tool/spawn-agent-utils.ts
index 99568e2b1..decc19efd 100644
--- a/backend/src/tools/handlers/tool/spawn-agent-utils.ts
+++ b/backend/src/tools/handlers/tool/spawn-agent-utils.ts
@@ -316,22 +316,31 @@ export async function executeAgent({
onResponseChunk: (chunk: string | PrintModeEvent) => void
}) {
// Import loopAgentSteps dynamically to avoid circular dependency
- const { loopAgentSteps } = await import('../../../run-agent-step')
+ const { loopAgentSteps } = await import('@codebuff/agent-runtime')
- return await loopAgentSteps(ws, {
- userInputId,
- prompt,
- params,
- agentType: agentTemplate.id,
- agentState,
- fingerprintId,
- fileContext,
- localAgentTemplates,
- toolResults: [],
- userId,
- clientSessionId,
- onResponseChunk,
- })
+ // Create environment for spawned agent
+ const { createAgentRuntimeEnvironment } = await import(
+ '../../../agent-runtime/env'
+ )
+ const env = createAgentRuntimeEnvironment(ws, onResponseChunk)
+
+ return await loopAgentSteps(
+ {
+ userInputId,
+ prompt,
+ params,
+ agentType: agentTemplate.id,
+ agentState,
+ fingerprintId,
+ fileContext,
+ localAgentTemplates,
+ toolResults: [],
+ userId,
+ clientSessionId,
+ onResponseChunk,
+ },
+ env,
+ )
}
/**
diff --git a/bun.lock b/bun.lock
index 314f8f19c..ee0858ee3 100644
--- a/bun.lock
+++ b/bun.lock
@@ -41,6 +41,7 @@
"dependencies": {
"@ai-sdk/google-vertex": "3.0.6",
"@ai-sdk/openai": "2.0.11",
+ "@codebuff/agent-runtime": "workspace:*",
"@codebuff/billing": "workspace:*",
"@codebuff/common": "workspace:*",
"@codebuff/internal": "workspace:*",
@@ -155,6 +156,26 @@
"zod": "3.25.67",
},
},
+ "packages/agent-runtime": {
+ "name": "@codebuff/agent-runtime",
+ "version": "1.0.0",
+ "dependencies": {
+ "@codebuff/common": "workspace:*",
+ "ai": "5.0.0",
+ "diff": "5.2.0",
+ "gpt-tokenizer": "2.8.1",
+ "ignore": "5.3.2",
+ "lodash": "*",
+ "ts-pattern": "5.3.1",
+ "zod": "3.25.67",
+ "zod-from-json-schema": "0.4.2",
+ },
+ "devDependencies": {
+ "@types/bun": "^1.2.11",
+ "@types/diff": "^5.0.3",
+ "@types/node": "22",
+ },
+ },
"packages/bigquery": {
"name": "@codebuff/bigquery",
"version": "1.0.0",
@@ -365,7 +386,7 @@
"@ai-sdk/provider": ["@ai-sdk/provider@2.0.0", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA=="],
- "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="],
+ "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.0", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-BoQZtGcBxkeSH1zK+SRYNDtJPIPpacTeiMZqnG4Rv6xXjEwM0FH4MGs9c+PlhyEWmQCzjRM2HAotEydFhD4dYw=="],
"@alloc/quick-lru": ["@alloc/quick-lru@5.2.0", "", {}, "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw=="],
@@ -567,6 +588,8 @@
"@chevrotain/utils": ["@chevrotain/utils@11.0.3", "", {}, "sha512-YslZMgtJUyuMbZ+aKvfF3x1f5liK4mWNxghFRv7jqRR9C3R3fAOGTTKvxXDa2Y1s9zSbcpuO0cAxDYsc9SrXoQ=="],
+ "@codebuff/agent-runtime": ["@codebuff/agent-runtime@workspace:packages/agent-runtime"],
+
"@codebuff/agents": ["@codebuff/agents@workspace:.agents"],
"@codebuff/backend": ["@codebuff/backend@workspace:backend"],
@@ -3867,7 +3890,13 @@
"zwitch": ["zwitch@2.0.4", "", {}, "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A=="],
- "@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.0", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-BoQZtGcBxkeSH1zK+SRYNDtJPIPpacTeiMZqnG4Rv6xXjEwM0FH4MGs9c+PlhyEWmQCzjRM2HAotEydFhD4dYw=="],
+ "@ai-sdk/anthropic/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="],
+
+ "@ai-sdk/google/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="],
+
+ "@ai-sdk/google-vertex/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="],
+
+ "@ai-sdk/openai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="],
"@ampproject/remapping/@jridgewell/trace-mapping": ["@jridgewell/trace-mapping@0.3.30", "", { "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" } }, "sha512-GQ7Nw5G2lTu/BtHTKfXhKHok2WGetd4XYcVKGx00SjAk8GMwgJM3zr6zORiPGuOE+/vkc90KtTosSSvaCjKb2Q=="],
@@ -3893,6 +3922,10 @@
"@babel/plugin-transform-runtime/semver": ["semver@6.3.1", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA=="],
+ "@codebuff/agent-runtime/ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="],
+
+ "@codebuff/agent-runtime/ts-pattern": ["ts-pattern@5.3.1", "", {}, "sha512-1RUMKa8jYQdNfmnK4jyzBK3/PS/tnjcZ1CW0v1vWDeYe5RBklc/nquw03MEoB66hVBm4BnlCfmOqDVxHyT1DpA=="],
+
"@codebuff/backend/ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="],
"@codebuff/backend/ts-pattern": ["ts-pattern@5.3.1", "", {}, "sha512-1RUMKa8jYQdNfmnK4jyzBK3/PS/tnjcZ1CW0v1vWDeYe5RBklc/nquw03MEoB66hVBm4BnlCfmOqDVxHyT1DpA=="],
@@ -4113,8 +4146,6 @@
"aceternity-ui/node-fetch": ["node-fetch@3.3.2", "", { "dependencies": { "data-uri-to-buffer": "^4.0.0", "fetch-blob": "^3.1.4", "formdata-polyfill": "^4.0.10" } }, "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA=="],
- "ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.0", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-BoQZtGcBxkeSH1zK+SRYNDtJPIPpacTeiMZqnG4Rv6xXjEwM0FH4MGs9c+PlhyEWmQCzjRM2HAotEydFhD4dYw=="],
-
"autoprefixer/picocolors": ["picocolors@1.1.1", "", {}, "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA=="],
"babel-plugin-istanbul/istanbul-lib-instrument": ["istanbul-lib-instrument@5.2.1", "", { "dependencies": { "@babel/core": "^7.12.3", "@babel/parser": "^7.14.7", "@istanbuljs/schema": "^0.1.2", "istanbul-lib-coverage": "^3.2.0", "semver": "^6.3.0" } }, "sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg=="],
diff --git a/evals/git-evals/run-single-eval-process.ts b/evals/git-evals/run-single-eval-process.ts
index ca3704426..5e8e291eb 100644
--- a/evals/git-evals/run-single-eval-process.ts
+++ b/evals/git-evals/run-single-eval-process.ts
@@ -6,7 +6,6 @@ import {
} from '@codebuff/npm-app/project-files'
import { recreateShell } from '@codebuff/npm-app/terminal/run-command'
-import { createFileReadingMock } from '../scaffolding'
import { setupTestEnvironmentVariables } from '../test-setup'
import { runSingleEval } from './run-git-evals'
@@ -56,7 +55,6 @@ async function main() {
// Setup environment for this process
setProjectRoot(projectPath)
setupTestEnvironmentVariables()
- createFileReadingMock(projectPath)
recreateShell(projectPath)
setWorkingDirectory(projectPath)
diff --git a/evals/git-evals/run-single-eval.ts b/evals/git-evals/run-single-eval.ts
index 5f455c908..4c8c00c49 100644
--- a/evals/git-evals/run-single-eval.ts
+++ b/evals/git-evals/run-single-eval.ts
@@ -10,7 +10,6 @@ import {
import { recreateShell } from '@codebuff/npm-app/terminal/run-command'
import { Command, Flags } from '@oclif/core'
-import { createFileReadingMock } from '../scaffolding'
import { setupTestEnvironmentVariables } from '../test-setup'
import { runSingleEval } from './run-git-evals'
import { extractRepoNameFromUrl, setupTestRepo } from './setup-test-repo'
@@ -174,7 +173,6 @@ async function runSingleEvalTask(options: {
// Setup project context
setProjectRoot(projectPath)
- createFileReadingMock(projectPath)
recreateShell(projectPath)
setWorkingDirectory(projectPath)
diff --git a/evals/scaffolding.ts b/evals/scaffolding.ts
index 466b20b98..b7aeeac88 100644
--- a/evals/scaffolding.ts
+++ b/evals/scaffolding.ts
@@ -1,18 +1,10 @@
import { execSync } from 'child_process'
-import { EventEmitter } from 'events'
import fs from 'fs'
import path from 'path'
-import { runAgentStep } from '@codebuff/backend/run-agent-step'
-import { assembleLocalAgentTemplates } from '@codebuff/backend/templates/agent-registry'
import { getFileTokenScores } from '@codebuff/code-map/parse'
-import { TEST_USER_ID } from '@codebuff/common/constants'
-import { mockModule } from '@codebuff/common/testing/mock-modules'
-import { generateCompactId } from '@codebuff/common/util/string'
import { handleToolCall } from '@codebuff/npm-app/tool-handlers'
import { getSystemInfo } from '@codebuff/npm-app/utils/system-info'
-import { mock } from 'bun:test'
-import { blue } from 'picocolors'
import {
getAllFilePaths,
@@ -23,23 +15,9 @@ import type {
SDKAssistantMessage,
SDKUserMessage,
} from '@anthropic-ai/claude-code'
-import type {
- requestFiles as originalRequestFiles,
- requestToolCall as originalRequestToolCall,
-} from '@codebuff/backend/websockets/websocket-action'
-import type { FileChanges } from '@codebuff/common/actions'
import type { ClientToolCall } from '@codebuff/common/tools/list'
-import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
-import type {
- AgentState,
- AgentTemplateType,
- SessionState,
- ToolResult,
-} from '@codebuff/common/types/session-state'
+import type { ToolResult } from '@codebuff/common/types/session-state'
import type { ProjectFileContext } from '@codebuff/common/util/file'
-import type { WebSocket } from 'ws'
-
-const DEBUG_MODE = true
export type ToolResultBlockParam = Extract<
SDKUserMessage['message']['content'][number],
@@ -65,62 +43,6 @@ function readMockFile(projectRoot: string, filePath: string): string | null {
}
}
-let toolCalls: ClientToolCall[] = []
-let toolResults: ToolResult[] = []
-export function createFileReadingMock(projectRoot: string) {
- mockModule('@codebuff/backend/websockets/websocket-action', () => ({
- requestFiles: ((ws: WebSocket, filePaths: string[]) => {
- const files: Record = {}
- for (const filePath of filePaths) {
- files[filePath] = readMockFile(projectRoot, filePath)
- }
- return Promise.resolve(files)
- }) satisfies typeof originalRequestFiles,
- requestToolCall: (async (
- ws: WebSocket,
- userInputId: string,
- toolName: string,
- input: Record,
- timeout: number = 30_000,
- ): ReturnType => {
- // Execute the tool call using existing tool handlers
- const toolCall = {
- toolCallId: generateCompactId(),
- toolName,
- input,
- }
- toolCalls.push(toolCall as ClientToolCall)
- try {
- const toolResult = await handleToolCall(toolCall as any)
- toolResults.push({
- toolName: toolCall.toolName,
- toolCallId: toolCall.toolCallId,
- output: toolResult.output,
- })
-
- // Send successful response back to backend
- return {
- success: true,
- output: toolResult.output,
- }
- } catch (error) {
- // Send error response back to backend
- const resultString =
- error instanceof Error ? error.message : String(error)
- toolResults.push({
- toolName: toolCall.toolName,
- toolCallId: toolCall.toolCallId,
- output: { type: 'text', value: resultString },
- })
- return {
- success: false,
- error: resultString,
- }
- }
- }) satisfies typeof originalRequestToolCall,
- }))
-}
-
export async function getProjectFileContext(
projectPath: string,
): Promise {
@@ -158,50 +80,7 @@ export async function getProjectFileContext(
}
}
-export async function runAgentStepScaffolding(
- agentState: AgentState,
- fileContext: ProjectFileContext,
- prompt: string | undefined,
- sessionId: string,
- agentType: AgentTemplateType,
-) {
- const mockWs = new EventEmitter() as WebSocket
- mockWs.send = mock()
- mockWs.close = mock()
-
- let fullResponse = ''
- const { agentTemplates: localAgentTemplates } =
- assembleLocalAgentTemplates(fileContext)
-
- const result = await runAgentStep(mockWs, {
- userId: TEST_USER_ID,
- userInputId: generateCompactId(),
- clientSessionId: sessionId,
- fingerprintId: 'test-fingerprint-id',
- onResponseChunk: (chunk: string | PrintModeEvent) => {
- if (typeof chunk !== 'string') {
- return
- }
- if (DEBUG_MODE) {
- process.stdout.write(chunk)
- }
- fullResponse += chunk
- },
- agentType,
- fileContext,
- localAgentTemplates,
- agentState,
- prompt,
- params: undefined,
- })
-
- return {
- ...result,
- fullResponse,
- }
-}
-
-export async function runToolCalls(toolCalls: ClientToolCall[]) {
+async function runToolCalls(toolCalls: ClientToolCall[]) {
const toolResults: ToolResult[] = []
for (const toolCall of toolCalls) {
const toolResult = await handleToolCall(toolCall)
@@ -210,84 +89,6 @@ export async function runToolCalls(toolCalls: ClientToolCall[]) {
return toolResults
}
-export async function loopMainPrompt({
- sessionState,
- prompt,
- projectPath,
- maxIterations,
- stopCondition,
- agentType,
-}: {
- sessionState: SessionState
- prompt: string
- projectPath: string
- maxIterations: number
- stopCondition?: (sessionState: AgentState) => boolean
- agentType: AgentTemplateType
-}) {
- console.log(blue(prompt))
-
- const startTime = Date.now()
- const sessionId = 'test-session-id-' + generateCompactId()
- let currentAgentState = sessionState.mainAgentState
- let iterations = 1
- const steps: AgentStep[] = []
-
- for (; iterations < maxIterations; iterations++) {
- console.log('\nIteration', iterations)
- let {
- agentState: newAgentState,
- fullResponse,
- shouldEndTurn,
- } = await runAgentStepScaffolding(
- currentAgentState,
- sessionState.fileContext,
- iterations === 1 ? prompt : undefined,
- sessionId,
- agentType,
- )
- currentAgentState = newAgentState
-
- const stop = stopCondition && stopCondition(currentAgentState)
- if (stop) break
-
- steps.push({
- response: fullResponse,
- toolCalls,
- toolResults,
- })
-
- toolCalls = []
- toolResults = []
-
- if (shouldEndTurn) {
- break
- }
- }
-
- console.log('Main loop finished!')
- console.log(' - iterations', iterations)
- console.log(
- ' - took',
- ((Date.now() - startTime) / 1000).toFixed(2),
- 'seconds',
- )
-
- return {
- agentState: currentAgentState,
- iterations: iterations - 1,
- steps,
- duration: Date.now() - startTime,
- }
-}
-
-export function extractErrorFiles(output: string): string[] {
- const lines = output.split('\n')
- return lines
- .filter((line) => line.includes(': error TS'))
- .map((line) => line.split('(')[0].trim())
-}
-
export function resetRepoToCommit(projectPath: string, commit: string) {
console.log(`Resetting repository at ${projectPath} to commit ${commit}...`)
try {
@@ -305,11 +106,7 @@ export function resetRepoToCommit(projectPath: string, commit: string) {
}
export default {
- createFileReadingMock,
getProjectFileContext,
- runAgentStepScaffolding,
runToolCalls,
- loopMainPrompt,
- extractErrorFiles,
resetRepoToCommit,
}
diff --git a/evals/swe-bench.test.ts b/evals/swe-bench.test.ts
index 600abfe1c..9ae26b423 100644
--- a/evals/swe-bench.test.ts
+++ b/evals/swe-bench.test.ts
@@ -4,7 +4,6 @@ import * as path from 'path'
import { describe, expect, test } from 'bun:test'
import { PROMPT_PREFIX } from './constants'
-import { loopMainPrompt } from './scaffolding'
import { passesSweBenchTests } from './swe-bench-eval'
import { SWE_BENCH_IDS } from './swe-bench-ids'
import {
@@ -54,13 +53,13 @@ describe.skip('SWE-Bench', async () => {
const prompt =
PROMPT_PREFIX + sweBenchLiteDataset[instanceId].problem_statement
- await loopMainPrompt({
- sessionState: initialSessionState,
- prompt,
- projectPath: repoPath,
- maxIterations: 100,
- agentType: 'base',
- })
+ // await loopMainPrompt({
+ // sessionState: initialSessionState,
+ // prompt,
+ // projectPath: repoPath,
+ // maxIterations: 100,
+ // agentType: 'base',
+ // })
expect(await passesSweBenchTests(instanceId, repoPath)).toBeTruthy()
},
{ timeout: 10 * 60 * 60 * 1000 }, // 10 hours
diff --git a/evals/test-setup.ts b/evals/test-setup.ts
index 456b61d54..3ede03c8a 100644
--- a/evals/test-setup.ts
+++ b/evals/test-setup.ts
@@ -10,7 +10,6 @@ import {
import { recreateShell } from '@codebuff/npm-app/terminal/run-command'
import {
- createFileReadingMock,
getProjectFileContext,
resetRepoToCommit,
} from './scaffolding'
@@ -155,7 +154,6 @@ export async function setupTestEnvironment(projectName: string) {
const repoPath = path.join(TEST_REPOS_DIR, projectName)
setProjectRoot(repoPath)
- createFileReadingMock(repoPath)
recreateShell(repoPath)
setWorkingDirectory(repoPath)
diff --git a/packages/agent-runtime/package.json b/packages/agent-runtime/package.json
new file mode 100644
index 000000000..061cc1745
--- /dev/null
+++ b/packages/agent-runtime/package.json
@@ -0,0 +1,46 @@
+{
+ "name": "@codebuff/agent-runtime",
+ "version": "1.0.0",
+ "description": "Agent runtime logic for Codebuff",
+ "private": true,
+ "license": "UNLICENSED",
+ "type": "module",
+ "exports": {
+ ".": {
+ "bun": "./src/index.ts",
+ "import": "./src/index.ts",
+ "types": "./src/index.ts",
+ "default": "./src/index.ts"
+ },
+ "./*": {
+ "bun": "./src/*.ts",
+ "import": "./src/*.ts",
+ "types": "./src/*.ts",
+ "default": "./src/*.ts"
+ }
+ },
+ "scripts": {
+ "typecheck": "tsc --noEmit -p .",
+ "test": "bun test"
+ },
+ "sideEffects": false,
+ "engines": {
+ "bun": ">=1.2.11"
+ },
+ "dependencies": {
+ "@codebuff/common": "workspace:*",
+ "ai": "5.0.0",
+ "diff": "5.2.0",
+ "gpt-tokenizer": "2.8.1",
+ "ignore": "5.3.2",
+ "lodash": "*",
+ "ts-pattern": "5.3.1",
+ "zod": "3.25.67",
+ "zod-from-json-schema": "0.4.2"
+ },
+ "devDependencies": {
+ "@types/diff": "^5.0.3",
+ "@types/node": "22",
+ "@types/bun": "^1.2.11"
+ }
+}
\ No newline at end of file
diff --git a/packages/agent-runtime/src/analytics/interfaces.ts b/packages/agent-runtime/src/analytics/interfaces.ts
new file mode 100644
index 000000000..d7b0b1880
--- /dev/null
+++ b/packages/agent-runtime/src/analytics/interfaces.ts
@@ -0,0 +1,24 @@
+/**
+ * Analytics environment for tracking events and traces (optional)
+ */
+export interface AnalyticsEnvironment {
+ /**
+ * Track an analytics event
+ */
+ trackEvent?: (event: string, userId: string, props: Record) => void
+
+ /**
+ * Insert a trace record
+ */
+ insertTrace?: (trace: any) => void
+}
+
+/**
+ * Logger environment interface
+ */
+export interface LoggerEnvironment {
+ debug: (data: any, message?: string) => void
+ info: (data: any, message?: string) => void
+ warn: (data: any, message?: string) => void
+ error: (data: any, message?: string) => void
+}
diff --git a/packages/agent-runtime/src/index.ts b/packages/agent-runtime/src/index.ts
new file mode 100644
index 000000000..a55d0ca80
--- /dev/null
+++ b/packages/agent-runtime/src/index.ts
@@ -0,0 +1,31 @@
+// Core runtime exports
+export { loopAgentSteps, runAgentStep } from './runtime/loop-agent-steps'
+export { runProgrammaticStep, clearAgentGeneratorCache } from './runtime/run-programmatic-step'
+export { getFileReadingUpdates } from './runtime/get-file-reading-updates'
+export { processStreamWithTools } from './tools/stream-parser'
+export { executeToolCall, executeCustomToolCall } from './tools/tool-executor'
+
+// Interface exports
+export type { LLMEnvironment } from './llm/interfaces'
+export type { IOEnvironment } from './io/interfaces'
+export type { InputGateEnvironment } from './io/interfaces'
+export type { TemplatesEnvironment } from './templates/interfaces'
+export type { AnalyticsEnvironment } from './analytics/interfaces'
+export type { LoggerEnvironment } from './analytics/interfaces'
+export type { AgentRuntimeEnvironment } from './runtime/interfaces'
+
+// Utility exports
+export * from './util/messages'
+export * from './util/parse-tool-call-xml'
+export * from './util/simplify-tool-results'
+export * from './util/token-counter'
+export * from './util/object'
+
+// Template exports
+export { getAgentTemplate, assembleLocalAgentTemplates } from './templates/agent-registry'
+export { getAgentPrompt } from './templates/strings'
+export * from './templates/types'
+
+// Types
+export type { AgentOptions } from './runtime/loop-agent-steps'
+export type { ExecuteToolCallParams, CustomToolCall, ToolCallError } from './tools/tool-executor'
diff --git a/packages/agent-runtime/src/io/interfaces.ts b/packages/agent-runtime/src/io/interfaces.ts
new file mode 100644
index 000000000..7cc9010ac
--- /dev/null
+++ b/packages/agent-runtime/src/io/interfaces.ts
@@ -0,0 +1,78 @@
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
+import type { ToolName } from '@codebuff/common/tools/constants'
+
+/**
+ * IO abstraction for tool calls, file requests, and streaming
+ * The backend implements this over WebSockets
+ */
+export interface IOEnvironment {
+ /**
+ * Request a tool call execution from the client
+ */
+ requestToolCall: (
+ userInputId: string,
+ toolName: string,
+ input: Record
+ ) => Promise<{
+ success: boolean
+ output?: {
+ type: 'text'
+ value: string
+ }
+ error?: string
+ }>
+
+ /**
+ * Request multiple files from the client
+ */
+ requestFiles: (paths: string[]) => Promise>
+
+ /**
+ * Request a single file from the client
+ */
+ requestFile: (path: string) => Promise
+
+ /**
+ * Send a response chunk to the client (optional, can be passed as callback)
+ */
+ onResponseChunk?: (chunk: string | PrintModeEvent) => void
+}
+
+/**
+ * Tool definitions and handlers environment
+ */
+export interface ToolsEnvironment {
+ /**
+ * Tool definitions for validation
+ */
+ definitions: Record
+
+ /**
+ * Tool handlers for execution
+ */
+ handlers: Record
+}
+
+/**
+ * Input gate for managing user input cancellation and interruption
+ */
+export interface InputGateEnvironment {
+ /**
+ * Start tracking a user input session
+ */
+ start: (userId: string | undefined, userInputId: string) => void
+
+ /**
+ * Check if a user input is still live (not cancelled)
+ */
+ check: (
+ userId: string | undefined,
+ userInputId: string,
+ clientSessionId: string
+ ) => boolean
+
+ /**
+ * End tracking a user input session
+ */
+ end: (userId: string | undefined, userInputId: string) => void
+}
diff --git a/packages/agent-runtime/src/llm/interfaces.ts b/packages/agent-runtime/src/llm/interfaces.ts
new file mode 100644
index 000000000..a7547d46f
--- /dev/null
+++ b/packages/agent-runtime/src/llm/interfaces.ts
@@ -0,0 +1,26 @@
+import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
+import type { AgentTemplate } from '@codebuff/common/types/agent-template'
+
+/**
+ * LLM provider abstraction interface
+ * The backend implements this to provide LLM services while keeping
+ * provider-specific logic and cost tracking out of the runtime
+ */
+export interface LLMEnvironment {
+ /**
+ * Get a stream from an agent template
+ * This wraps the existing backend logic for getting LLM responses
+ * while preserving cost tracking and provider selection
+ */
+ getAgentStreamFromTemplate: (params: {
+ clientSessionId: string
+ fingerprintId: string
+ userInputId: string
+ userId: string | undefined
+ agentId?: string
+ template: AgentTemplate
+ onCostCalculated?: (credits: number) => Promise
+ includeCacheControl?: boolean
+ }) => (messages: CodebuffMessage[]) => AsyncGenerator
+}
diff --git a/packages/agent-runtime/src/runtime/get-file-reading-updates.ts b/packages/agent-runtime/src/runtime/get-file-reading-updates.ts
new file mode 100644
index 000000000..2eeb9169d
--- /dev/null
+++ b/packages/agent-runtime/src/runtime/get-file-reading-updates.ts
@@ -0,0 +1,196 @@
+import { HIDDEN_FILE_READ_STATUS } from '@codebuff/common/constants'
+import { parseFileBlocks } from '@codebuff/common/util/file'
+import { toContentString } from '@codebuff/common/util/messages'
+import { countTokens } from 'gpt-tokenizer'
+import { uniq, difference } from 'lodash'
+
+import {
+ isToolResult,
+ parseToolResults,
+ parseReadFilesResult,
+} from '../util/parse-tool-call-xml'
+import { countTokensJson } from '../util/token-counter'
+import type { AgentRuntimeEnvironment } from './interfaces'
+
+import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { ProjectFileContext } from '@codebuff/common/util/file'
+
+const getInitialFiles = (fileContext: ProjectFileContext) => {
+ const { userKnowledgeFiles, knowledgeFiles } = fileContext
+ return [
+ // Include user-level knowledge files.
+ ...Object.entries(userKnowledgeFiles ?? {}).map(([path, content]) => ({
+ path,
+ content,
+ })),
+
+ // Include top-level project knowledge files.
+ ...Object.entries(knowledgeFiles)
+ .map(([path, content]) => ({
+ path,
+ content,
+ }))
+ // Only keep top-level knowledge files.
+ .filter((f) => f.path.split('/').length === 1),
+ ]
+}
+
+export async function getFileReadingUpdates(
+ messages: CodebuffMessage[],
+ fileContext: ProjectFileContext,
+ options: {
+ requestedFiles?: string[]
+ agentStepId: string
+ clientSessionId: string
+ fingerprintId: string
+ userInputId: string
+ userId: string | undefined
+ repoId: string | undefined
+ },
+ env: AgentRuntimeEnvironment,
+) {
+ const FILE_TOKEN_BUDGET = 100_000
+
+ const toolResults = messages
+ .filter(isToolResult)
+ .flatMap((content) => parseToolResults(toContentString(content)))
+ const previousFileList = toolResults
+ .filter(({ toolName }) => toolName === 'read_files')
+ .flatMap(({ output }) => parseReadFilesResult(output.value))
+
+ const previousFiles = Object.fromEntries(
+ previousFileList.map(({ path, content }) => [path, content]),
+ )
+ const previousFilePaths = uniq(Object.keys(previousFiles))
+
+ const editedFilePaths = messages
+ .filter(({ role }) => role === 'assistant')
+ .map(toContentString)
+ .filter((content) => content.includes(' Object.keys(parseFileBlocks(content)))
+ .filter((path) => path !== undefined)
+
+ const requestedFiles = options.requestedFiles ?? []
+
+ const isFirstRead = previousFileList.length === 0
+ const initialFiles = getInitialFiles(fileContext)
+ const includedInitialFiles = isFirstRead
+ ? initialFiles.map(({ path }) => path)
+ : []
+
+ const allFilePaths = uniq([
+ ...includedInitialFiles,
+ ...requestedFiles,
+ ...editedFilePaths,
+ ...previousFilePaths,
+ ])
+ const loadedFiles = await env.io.requestFiles(allFilePaths)
+
+ const filteredRequestedFiles = requestedFiles.filter((filePath, i) => {
+ const content = loadedFiles[filePath]
+ if (content === null || content === undefined) return false
+ const tokenCount = countTokens(content)
+ if (i < 5) {
+ return tokenCount < 50_000 - i * 10_000
+ }
+ return tokenCount < 10_000
+ })
+ const newFiles = difference(
+ [...filteredRequestedFiles, ...includedInitialFiles],
+ previousFilePaths,
+ )
+ const newFilesToRead = uniq([
+ // NOTE: When the assistant specifically asks for a file, we force it to be shown even if it's not new or changed.
+ ...(options.requestedFiles ?? []),
+
+ ...newFiles,
+ ])
+
+ const updatedFilePaths = [...previousFilePaths, ...editedFilePaths].filter(
+ (path) => {
+ return loadedFiles[path] !== previousFiles[path]
+ },
+ )
+
+ const addedFiles = uniq([
+ ...includedInitialFiles,
+ ...updatedFilePaths,
+ ...newFilesToRead,
+ ])
+ .map((path) => {
+ return {
+ path,
+ content: loadedFiles[path]!,
+ }
+ })
+ .filter((file) => file.content !== null)
+
+ const previousFilesTokens = countTokensJson(previousFiles)
+ const addedFileTokens = countTokensJson(addedFiles)
+
+ if (previousFilesTokens + addedFileTokens > FILE_TOKEN_BUDGET) {
+ const requestedLoadedFiles = filteredRequestedFiles.map((path) => ({
+ path,
+ content: loadedFiles[path]!,
+ }))
+ const newFiles = uniq([...initialFiles, ...requestedLoadedFiles])
+ while (countTokensJson(newFiles) > FILE_TOKEN_BUDGET) {
+ newFiles.pop()
+ }
+
+ const printedPaths = getPrintedPaths(
+ requestedFiles,
+ newFilesToRead,
+ loadedFiles,
+ )
+ env.logger?.debug(
+ {
+ newFiles,
+ prevFileVersionTokens: previousFilesTokens,
+ addedFileTokens,
+ beforeTotalTokens: previousFilesTokens + addedFileTokens,
+ newFileVersionTokens: countTokensJson(newFiles),
+ FILE_TOKEN_BUDGET,
+ },
+ 'resetting read files b/c of token budget',
+ )
+
+ return {
+ addedFiles: newFiles,
+ updatedFilePaths: updatedFilePaths,
+ printedPaths,
+ clearReadFileToolResults: true,
+ }
+ }
+
+ const printedPaths = getPrintedPaths(
+ requestedFiles,
+ newFilesToRead,
+ loadedFiles,
+ )
+
+ return {
+ addedFiles,
+ updatedFilePaths,
+ printedPaths,
+ clearReadFileToolResults: false,
+ }
+}
+
+function getPrintedPaths(
+ requestedFiles: string[],
+ newFilesToRead: string[],
+ loadedFiles: Record,
+) {
+ // If no files requests, we don't want to print anything.
+ // Could still have files added from initial files or edited files.
+ if (requestedFiles.length === 0) return []
+ // Otherwise, only print files that don't start with a hidden file status.
+ return newFilesToRead.filter(
+ (path) =>
+ loadedFiles[path] &&
+ !HIDDEN_FILE_READ_STATUS.some((status) =>
+ loadedFiles[path]!.startsWith(status),
+ ),
+ )
+}
diff --git a/packages/agent-runtime/src/runtime/interfaces.ts b/packages/agent-runtime/src/runtime/interfaces.ts
new file mode 100644
index 000000000..a61d227b1
--- /dev/null
+++ b/packages/agent-runtime/src/runtime/interfaces.ts
@@ -0,0 +1,36 @@
+import type { LLMEnvironment } from '../llm/interfaces'
+import type { IOEnvironment, InputGateEnvironment, ToolsEnvironment } from '../io/interfaces'
+import type { TemplatesEnvironment } from '../templates/interfaces'
+import type { AnalyticsEnvironment, LoggerEnvironment } from '../analytics/interfaces'
+
+/**
+ * Complete environment interface for the agent runtime
+ * The backend implements this to provide all necessary services
+ */
+export interface AgentRuntimeEnvironment {
+ /** LLM provider abstraction */
+ llm: LLMEnvironment
+
+ /** IO for tool calls, file requests, streaming */
+ io: IOEnvironment
+
+ /** Input gating for cancellation */
+ inputGate: InputGateEnvironment
+
+ /** Tool definitions and handlers */
+ tools: ToolsEnvironment
+
+ /** Template loading and prompt generation */
+ templates: TemplatesEnvironment
+
+ /** Analytics tracking (optional) */
+ analytics?: AnalyticsEnvironment
+
+ /** Logging (optional, defaults to console) */
+ logger?: LoggerEnvironment
+
+ /** Request context for tracing (optional) */
+ requestContext?: {
+ processedRepoId?: string
+ }
+}
diff --git a/backend/src/run-agent-step.ts b/packages/agent-runtime/src/runtime/loop-agent-steps.ts
similarity index 75%
rename from backend/src/run-agent-step.ts
rename to packages/agent-runtime/src/runtime/loop-agent-steps.ts
index 56d779bf4..b6c4b9ce9 100644
--- a/backend/src/run-agent-step.ts
+++ b/packages/agent-runtime/src/runtime/loop-agent-steps.ts
@@ -1,25 +1,13 @@
-import { insertTrace } from '@codebuff/bigquery'
-import { trackEvent } from '@codebuff/common/analytics'
import {
- ASYNC_AGENTS_ENABLED,
- supportsCacheControl,
-} from '@codebuff/common/constants'
-import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events'
-import { TOOLS_WHICH_WONT_FORCE_NEXT_STEP } from '@codebuff/common/tools/constants'
+ TOOLS_WHICH_WONT_FORCE_NEXT_STEP,
+} from '@codebuff/common/tools/constants'
import { renderToolResults } from '@codebuff/common/tools/utils'
import { buildArray } from '@codebuff/common/util/array'
import { generateCompactId } from '@codebuff/common/util/string'
-import { asyncAgentManager } from './async-agent-manager'
import { getFileReadingUpdates } from './get-file-reading-updates'
-import { checkLiveUserInput } from './live-user-inputs'
-import { getAgentStreamFromTemplate } from './prompt-agent-stream'
import { runProgrammaticStep } from './run-programmatic-step'
-import { additionalSystemPrompts } from './system-prompt/prompts'
-import { getAgentTemplate } from './templates/agent-registry'
-import { getAgentPrompt } from './templates/strings'
-import { processStreamWithTools } from './tools/stream-parser'
-import { logger } from './util/logger'
+import { processStreamWithTools } from '../tools/stream-parser'
import {
asSystemInstruction,
asSystemMessage,
@@ -28,13 +16,12 @@ import {
expireMessages,
getMessagesSubset,
isSystemInstruction,
-} from './util/messages'
-import { isToolResult, renderReadFilesResult } from './util/parse-tool-call-xml'
-import { simplifyReadFileResults } from './util/simplify-tool-results'
-import { countTokensJson } from './util/token-counter'
-import { getRequestContext } from './websockets/request-context'
+} from '../util/messages'
+import { isToolResult, renderReadFilesResult } from '../util/parse-tool-call-xml'
+import { simplifyReadFileResults } from '../util/simplify-tool-results'
+import { countTokensJson } from '../util/token-counter'
+import type { AgentRuntimeEnvironment } from './interfaces'
-import type { AgentResponseTrace } from '@codebuff/bigquery'
import type { AgentTemplate } from '@codebuff/common/types/agent-template'
import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
@@ -44,7 +31,6 @@ import type {
ToolResult,
} from '@codebuff/common/types/session-state'
import type { ProjectFileContext } from '@codebuff/common/util/file'
-import type { WebSocket } from 'ws'
export interface AgentOptions {
userId: string | undefined
@@ -63,8 +49,8 @@ export interface AgentOptions {
}
export const runAgentStep = async (
- ws: WebSocket,
options: AgentOptions,
+ env: AgentRuntimeEnvironment,
): Promise<{
agentState: AgentState
fullResponse: string
@@ -84,17 +70,19 @@ export const runAgentStep = async (
} = options
let agentState = options.agentState
+ if (!agentState) {
+ throw new Error('agentState is required but was undefined')
+ }
+
const { agentContext } = agentState
const startTime = Date.now()
// Get the extracted repo ID from request context
- const requestContext = getRequestContext()
- const repoId = requestContext?.processedRepoId
+ const repoId = env.requestContext?.processedRepoId
// Generates a unique ID for each main prompt run (ie: a step of the agent loop)
- // This is used to link logs within a single agent loop
const agentStepId = crypto.randomUUID()
- trackEvent(AnalyticsEvent.AGENT_STEP, userId ?? '', {
+ env.analytics?.trackEvent?.('AGENT_STEP', userId ?? '', {
agentStepId,
clientSessionId,
fingerprintId,
@@ -110,7 +98,7 @@ export const runAgentStep = async (
let stepWarningMessage = ''
if (needsStepWarning) {
- logger.warn(
+ env.logger?.warn(
`Detected too many consecutive assistant messages without user prompt`,
)
@@ -138,14 +126,19 @@ export const runAgentStep = async (
}
const { addedFiles, updatedFilePaths, clearReadFileToolResults } =
- await getFileReadingUpdates(ws, messageHistory, fileContext, {
- agentStepId,
- clientSessionId,
- fingerprintId,
- userInputId,
- userId,
- repoId,
- })
+ await getFileReadingUpdates(
+ messageHistory,
+ fileContext,
+ {
+ agentStepId,
+ clientSessionId,
+ fingerprintId,
+ userInputId,
+ userId,
+ repoId,
+ },
+ env,
+ )
if (clearReadFileToolResults) {
// Update message history.
for (const message of messageHistory) {
@@ -181,35 +174,17 @@ export const runAgentStep = async (
})
}
- if (ASYNC_AGENTS_ENABLED) {
- // Register this agent in the async manager so it can receive messages
- const isRegistered = asyncAgentManager.getAgent(agentState.agentId)
- if (!isRegistered && userId) {
- asyncAgentManager.registerAgent({
- agentState,
- sessionId: clientSessionId,
- userId,
- fingerprintId,
- userInputId,
- ws,
- fileContext,
- startTime: new Date(),
- status: 'running',
- })
- } else {
- // Update status to running for existing agents
- asyncAgentManager.updateAgentState(agentState, 'running')
- }
- }
-
- const agentTemplate = await getAgentTemplate(agentType, localAgentTemplates)
+ const agentTemplate = await env.templates.getAgentTemplate(
+ agentType,
+ localAgentTemplates,
+ )
if (!agentTemplate) {
throw new Error(
`Agent template not found for type: ${agentType}. Available types: ${Object.keys(localAgentTemplates).join(', ')}`,
)
}
- const stepPrompt = await getAgentPrompt(
+ const stepPrompt = await env.templates.getAgentPrompt(
agentTemplate,
{ type: 'stepPrompt' },
fileContext,
@@ -246,7 +221,7 @@ export const runAgentStep = async (
const { model } = agentTemplate
- const getStream = getAgentStreamFromTemplate({
+ const getStream = env.llm.getAgentStreamFromTemplate({
clientSessionId,
fingerprintId,
userInputId,
@@ -260,7 +235,7 @@ export const runAgentStep = async (
// This is already handled by the saveMessage function which calls updateUserCycleUsage
// If that fails, the promise rejection will bubble up and halt agent execution
} catch (error) {
- logger.error(
+ env.logger?.error(
{ agentId: agentState.agentId, credits, error },
'Failed to add cost to agent state',
)
@@ -269,13 +244,13 @@ export const runAgentStep = async (
)
}
},
- includeCacheControl: supportsCacheControl(agentTemplate.model),
+ includeCacheControl: true, // We'll assume cache control is supported
})
const iterationNum = agentState.messageHistory.length
const system =
- (await getAgentPrompt(
+ (await env.templates.getAgentPrompt(
agentTemplate,
{ type: 'systemPrompt' },
fileContext,
@@ -288,9 +263,10 @@ export const runAgentStep = async (
const agentMessages = getMessagesSubset(
agentState.messageHistory,
systemTokens,
+ env.logger,
)
- logger.debug(
+ env.logger?.debug(
{
iteration: iterationNum,
agentId: agentState.agentId,
@@ -321,7 +297,6 @@ export const runAgentStep = async (
fullResponseChunks,
} = await processStreamWithTools({
stream,
- ws,
agentStepId,
clientSessionId,
fingerprintId,
@@ -336,27 +311,30 @@ export const runAgentStep = async (
agentContext,
onResponseChunk,
fullResponse,
+ env,
})
toolResults.push(...newToolResults)
fullResponse = fullResponseAfterStream
- const agentResponseTrace: AgentResponseTrace = {
- type: 'agent-response',
- created_at: new Date(),
- agent_step_id: agentStepId,
- user_id: userId ?? '',
- id: crypto.randomUUID(),
- payload: {
- output: fullResponse,
- user_input_id: userInputId,
- client_session_id: clientSessionId,
- fingerprint_id: fingerprintId,
- },
+ // Insert trace if analytics environment is available
+ if (env.analytics?.insertTrace) {
+ const agentResponseTrace = {
+ type: 'agent-response',
+ created_at: new Date(),
+ agent_step_id: agentStepId,
+ user_id: userId ?? '',
+ id: crypto.randomUUID(),
+ payload: {
+ output: fullResponse,
+ user_input_id: userInputId,
+ client_session_id: clientSessionId,
+ fingerprint_id: fingerprintId,
+ },
+ }
+ env.analytics.insertTrace(agentResponseTrace)
}
- insertTrace(agentResponseTrace)
-
const newAgentContext = state.agentContext as AgentState['agentContext']
// Use the updated agent state from tool execution
agentState = state.agentState as AgentState
@@ -379,7 +357,7 @@ export const runAgentStep = async (
),
},
]
- logger.debug({ summary: fullResponse }, 'Compacted messages')
+ env.logger?.debug({ summary: fullResponse }, 'Compacted messages')
}
const hasNoToolResults =
@@ -399,12 +377,7 @@ export const runAgentStep = async (
agentContext: newAgentContext,
}
- // Mark agent as completed if it should end turn
- if (ASYNC_AGENTS_ENABLED && shouldEndTurn) {
- asyncAgentManager.updateAgentState(agentState, 'completed')
- }
-
- logger.debug(
+ env.logger?.debug(
{
iteration: iterationNum,
agentId: agentState.agentId,
@@ -429,7 +402,6 @@ export const runAgentStep = async (
}
export const loopAgentSteps = async (
- ws: WebSocket,
{
userInputId,
agentType,
@@ -458,8 +430,12 @@ export const loopAgentSteps = async (
clientSessionId: string
onResponseChunk: (chunk: string | PrintModeEvent) => void
},
+ env: AgentRuntimeEnvironment,
) => {
- const agentTemplate = await getAgentTemplate(agentType, localAgentTemplates)
+ const agentTemplate = await env.templates.getAgentTemplate(
+ agentType,
+ localAgentTemplates,
+ )
if (!agentTemplate) {
throw new Error(`Agent template not found for type: ${agentType}`)
}
@@ -469,7 +445,7 @@ export const loopAgentSteps = async (
// Get the instructions prompt if we have a prompt/params
const instructionsPrompt = hasPrompt
- ? await getAgentPrompt(
+ ? await env.templates.getAgentPrompt(
agentTemplate,
{ type: 'instructionsPrompt' },
fileContext,
@@ -499,15 +475,6 @@ export const loopAgentSteps = async (
),
keepDuringTruncation: true,
},
- prompt &&
- prompt in additionalSystemPrompts && {
- role: 'user' as const,
- content: asSystemInstruction(
- additionalSystemPrompts[
- prompt as keyof typeof additionalSystemPrompts
- ],
- ),
- },
],
instructionsPrompt && {
@@ -527,7 +494,7 @@ export const loopAgentSteps = async (
let currentParams = params
try {
- while (checkLiveUserInput(userId, userInputId, clientSessionId)) {
+ while (env.inputGate.check(userId, userInputId, clientSessionId)) {
// 1. Run programmatic step first if it exists
if (agentTemplate.handleSteps) {
const { agentState: programmaticAgentState, endTurn } =
@@ -539,12 +506,12 @@ export const loopAgentSteps = async (
onResponseChunk,
agentType,
fileContext,
- ws,
template: agentTemplate,
localAgentTemplates,
prompt: currentPrompt,
params: currentParams,
stepsComplete: shouldEndTurn,
+ env,
})
currentAgentState = programmaticAgentState
@@ -553,14 +520,6 @@ export const loopAgentSteps = async (
}
}
- if (ASYNC_AGENTS_ENABLED) {
- const hasMessages =
- asyncAgentManager.getMessages(agentState.agentId).length > 0
- if (hasMessages) {
- shouldEndTurn = false
- }
- }
-
// End turn if programmatic step ended turn, or if the previous runAgentStep ended turn
if (shouldEndTurn) {
return {
@@ -569,19 +528,22 @@ export const loopAgentSteps = async (
}
const { agentState: newAgentState, shouldEndTurn: llmShouldEndTurn } =
- await runAgentStep(ws, {
- userId,
- userInputId,
- clientSessionId,
- fingerprintId,
- onResponseChunk,
- localAgentTemplates,
- agentType,
- fileContext,
- agentState: currentAgentState,
- prompt: currentPrompt,
- params: currentParams,
- })
+ await runAgentStep(
+ {
+ userId,
+ userInputId,
+ clientSessionId,
+ fingerprintId,
+ onResponseChunk,
+ localAgentTemplates,
+ agentType,
+ fileContext,
+ agentState: currentAgentState,
+ prompt: currentPrompt,
+ params: currentParams,
+ },
+ env,
+ )
currentAgentState = newAgentState
shouldEndTurn = llmShouldEndTurn
@@ -593,7 +555,7 @@ export const loopAgentSteps = async (
return { agentState: currentAgentState }
} catch (error) {
// Log the error but still return the state with partial costs
- logger.error(
+ env.logger?.error(
{
error,
agentId: currentAgentState.agentId,
diff --git a/backend/src/run-programmatic-step.ts b/packages/agent-runtime/src/runtime/run-programmatic-step.ts
similarity index 79%
rename from backend/src/run-programmatic-step.ts
rename to packages/agent-runtime/src/runtime/run-programmatic-step.ts
index 778b96006..e9688147d 100644
--- a/backend/src/run-programmatic-step.ts
+++ b/packages/agent-runtime/src/runtime/run-programmatic-step.ts
@@ -1,11 +1,8 @@
import { getToolCallString } from '@codebuff/common/tools/utils'
import { getErrorObject } from '@codebuff/common/util/error'
-import { executeToolCall } from './tools/tool-executor'
-import { logger } from './util/logger'
-import { SandboxManager } from './util/quickjs-sandbox'
-import { getRequestContext } from './websockets/request-context'
-import { sendAction } from './websockets/websocket-action'
+import { executeToolCall } from '../tools/tool-executor'
+import type { AgentRuntimeEnvironment } from './interfaces'
import type { CodebuffToolCall } from '@codebuff/common/tools/list'
import type {
@@ -20,10 +17,6 @@ import type {
ToolResult,
} from '@codebuff/common/types/session-state'
import type { ProjectFileContext } from '@codebuff/common/util/file'
-import type { WebSocket } from 'ws'
-
-// Global sandbox manager for QuickJS contexts
-const sandboxManager = new SandboxManager()
// Maintains generator state for all agents. Generator state can't be serialized, so we store it in memory.
const agentIdToGenerator: Record = {}
@@ -35,8 +28,6 @@ export function clearAgentGeneratorCache() {
delete agentIdToGenerator[key]
}
agentIdToStepAll.clear()
- // Clean up QuickJS sandboxes
- sandboxManager.dispose()
}
// Function to handle programmatic agents
@@ -53,9 +44,9 @@ export async function runProgrammaticStep(
onResponseChunk,
agentType,
fileContext,
- ws,
localAgentTemplates,
stepsComplete,
+ env,
}: {
template: AgentTemplate
prompt: string | undefined
@@ -67,33 +58,21 @@ export async function runProgrammaticStep(
onResponseChunk: (chunk: string | PrintModeEvent) => void
agentType: AgentTemplateType
fileContext: ProjectFileContext
- ws: WebSocket
localAgentTemplates: Record
stepsComplete: boolean
+ env: AgentRuntimeEnvironment
},
): Promise<{ agentState: AgentState; endTurn: boolean }> {
if (!template.handleSteps) {
throw new Error('No step handler found for agent template ' + template.id)
}
- // Run with either a generator or a sandbox.
+ // Run with a generator (QuickJS sandbox is handled by the backend environment)
let generator = agentIdToGenerator[agentState.agentId]
- let sandbox = sandboxManager.getSandbox(agentState.agentId)
- // Check if we need to initialize a generator (either native or QuickJS-based)
- if (!generator && !sandbox) {
- if (typeof template.handleSteps === 'string') {
- // Initialize QuickJS sandbox for string-based generator
- sandbox = await sandboxManager.getOrCreateSandbox(
- agentState.agentId,
- template.handleSteps,
- {
- agentState,
- prompt,
- params,
- },
- )
- } else {
+ // Check if we need to initialize a generator
+ if (!generator) {
+ if (typeof template.handleSteps === 'function') {
// Initialize native generator
generator = template.handleSteps({
agentState,
@@ -101,6 +80,10 @@ export async function runProgrammaticStep(
params,
})
agentIdToGenerator[agentState.agentId] = generator
+ } else {
+ throw new Error(
+ 'String-based handleSteps should be handled by backend environment',
+ )
}
}
@@ -116,17 +99,13 @@ export async function runProgrammaticStep(
const agentStepId = crypto.randomUUID()
- const requestContext = getRequestContext()
- const repoId = requestContext?.processedRepoId
-
// Initialize state for tool execution
const toolCalls: CodebuffToolCall[] = []
const toolResults: ToolResult[] = []
const state = {
- ws,
fingerprintId,
userId,
- repoId,
+ repoId: env.requestContext?.processedRepoId,
agentTemplate: template,
localAgentTemplates,
sendSubagentChunk: (data: {
@@ -136,10 +115,13 @@ export async function runProgrammaticStep(
chunk: string
prompt?: string
}) => {
- sendAction(ws, {
- type: 'subagent-response-chunk',
- ...data,
- })
+ // Send subagent chunk through IO environment
+ if (env.io.onResponseChunk) {
+ env.io.onResponseChunk({
+ type: 'text',
+ text: data.chunk,
+ } as PrintModeEvent)
+ }
},
agentState: { ...agentState },
agentContext: agentState.agentContext,
@@ -152,17 +134,11 @@ export async function runProgrammaticStep(
try {
// Execute tools synchronously as the generator yields them
do {
- const result = sandbox
- ? await sandbox.executeStep({
- agentState: getPublicAgentState(state.agentState),
- toolResult,
- stepsComplete,
- })
- : generator!.next({
- agentState: getPublicAgentState(state.agentState),
- toolResult,
- stepsComplete,
- })
+ const result = generator!.next({
+ agentState: getPublicAgentState(state.agentState),
+ toolResult,
+ stepsComplete,
+ })
if (result.done) {
endTurn = true
@@ -215,7 +191,6 @@ export async function runProgrammaticStep(
toolCalls,
toolResults,
previousToolCallFinished: Promise.resolve(),
- ws,
agentTemplate: template,
fileContext,
agentStepId,
@@ -226,6 +201,7 @@ export async function runProgrammaticStep(
state,
userId,
autoInsertEndStepParam: true,
+ env,
})
// TODO: Remove messages from state and always use agentState.messageHistory.
@@ -248,7 +224,7 @@ export async function runProgrammaticStep(
const errorMessage = `Error executing handleSteps for agent ${template.id}: ${
error instanceof Error ? error.message : 'Unknown error'
}`
- logger.error(
+ env.logger?.error(
{ error: getErrorObject(error), template: template.id },
errorMessage,
)
@@ -273,10 +249,6 @@ export async function runProgrammaticStep(
}
} finally {
if (endTurn) {
- if (sandbox) {
- // Clean up QuickJS sandbox if execution is complete
- sandboxManager.removeSandbox(agentState.agentId)
- }
delete agentIdToGenerator[agentState.agentId]
agentIdToStepAll.delete(agentState.agentId)
}
diff --git a/packages/agent-runtime/src/templates/agent-registry.ts b/packages/agent-runtime/src/templates/agent-registry.ts
new file mode 100644
index 000000000..cb47ea58f
--- /dev/null
+++ b/packages/agent-runtime/src/templates/agent-registry.ts
@@ -0,0 +1,39 @@
+import type { AgentTemplate } from '@codebuff/common/types/agent-template'
+import type { ProjectFileContext } from '@codebuff/common/util/file'
+import { validateAgents } from '@codebuff/common/templates/agent-validation'
+import type { DynamicAgentValidationError } from '@codebuff/common/templates/agent-validation'
+
+// Note: Database lookup is handled by the backend's TemplatesEnvironment
+// This package focuses on local agent template assembly
+
+export type AgentRegistry = Record
+
+/**
+ * Assemble local agent templates from fileContext + static templates
+ * This is a pure function that doesn't access external services
+ */
+export function assembleLocalAgentTemplates(fileContext: ProjectFileContext): {
+ agentTemplates: Record
+ validationErrors: DynamicAgentValidationError[]
+} {
+ // Load dynamic agents using the service
+ const { templates: dynamicTemplates, validationErrors } = validateAgents(
+ fileContext.agentTemplates || {},
+ )
+
+ // Use dynamic templates only
+ const agentTemplates = { ...dynamicTemplates }
+ return { agentTemplates, validationErrors }
+}
+
+/**
+ * Get an agent template - this is a simplified version that delegates to environment
+ * The actual implementation with database access is in the backend's TemplatesEnvironment
+ */
+export async function getAgentTemplate(
+ agentId: string,
+ localAgentTemplates: Record,
+): Promise {
+ // Simple local lookup - the environment handles database queries
+ return localAgentTemplates[agentId] || null
+}
diff --git a/packages/agent-runtime/src/templates/interfaces.ts b/packages/agent-runtime/src/templates/interfaces.ts
new file mode 100644
index 000000000..24eb7d489
--- /dev/null
+++ b/packages/agent-runtime/src/templates/interfaces.ts
@@ -0,0 +1,27 @@
+import type { AgentTemplate } from '@codebuff/common/types/agent-template'
+import type { AgentTemplateType, AgentState } from '@codebuff/common/types/session-state'
+import type { ProjectFileContext } from '@codebuff/common/util/file'
+
+/**
+ * Templates environment for agent template loading and prompt generation
+ */
+export interface TemplatesEnvironment {
+ /**
+ * Get an agent template by type
+ */
+ getAgentTemplate: (
+ agentType: AgentTemplateType,
+ localTemplates: Record
+ ) => Promise
+
+ /**
+ * Get an agent prompt for a specific type
+ */
+ getAgentPrompt: (
+ template: AgentTemplate,
+ promptType: { type: 'systemPrompt' | 'instructionsPrompt' | 'stepPrompt' },
+ fileContext: ProjectFileContext,
+ agentState: AgentState,
+ localTemplates: Record
+ ) => Promise
+}
diff --git a/packages/agent-runtime/src/templates/strings.ts b/packages/agent-runtime/src/templates/strings.ts
new file mode 100644
index 000000000..8435e13b3
--- /dev/null
+++ b/packages/agent-runtime/src/templates/strings.ts
@@ -0,0 +1,21 @@
+import type { AgentTemplate } from './types'
+import type {
+ AgentState,
+ AgentTemplateType,
+} from '@codebuff/common/types/session-state'
+import type { ProjectFileContext } from '@codebuff/common/util/file'
+
+// Note: This is a simplified version for the agent-runtime package
+// The full implementation with all placeholder substitutions is in the backend's TemplatesEnvironment
+
+export async function getAgentPrompt(
+ agentTemplate: AgentTemplate,
+ promptType: { type: T },
+ fileContext: ProjectFileContext,
+ agentState: AgentState,
+ agentTemplates: Record,
+): Promise {
+ // Simple implementation - just return the prompt value
+ // The backend's TemplatesEnvironment handles full placeholder substitution
+ return agentTemplate[promptType.type]
+}
diff --git a/packages/agent-runtime/src/templates/types.ts b/packages/agent-runtime/src/templates/types.ts
new file mode 100644
index 000000000..386e7aa41
--- /dev/null
+++ b/packages/agent-runtime/src/templates/types.ts
@@ -0,0 +1,61 @@
+import { AgentTemplateTypes } from '@codebuff/common/types/session-state'
+
+import type { ToolName } from '@codebuff/common/tools/constants'
+import type {
+ AgentTemplate,
+ StepGenerator,
+ StepHandler,
+} from '@codebuff/common/types/agent-template'
+import type { AgentTemplateType } from '@codebuff/common/types/session-state'
+
+// Re-export for backward compatibility
+export type { AgentTemplate, StepGenerator, StepHandler }
+
+const placeholderNames = [
+ 'AGENT_NAME',
+ 'AGENTS_PROMPT',
+ 'CONFIG_SCHEMA',
+ 'FILE_TREE_PROMPT',
+ 'GIT_CHANGES_PROMPT',
+ 'INITIAL_AGENT_PROMPT',
+ 'KNOWLEDGE_FILES_CONTENTS',
+ 'PROJECT_ROOT',
+ 'REMAINING_STEPS',
+ 'SYSTEM_INFO_PROMPT',
+ 'TOOLS_PROMPT',
+ 'USER_CWD',
+ 'USER_INPUT_PROMPT',
+] as const
+
+type PlaceholderType = {
+ [K in T[number]]: `{CODEBUFF_${K}}`
+}
+
+export const PLACEHOLDER = Object.fromEntries(
+ placeholderNames.map((name) => [name, `{CODEBUFF_${name}}` as const]),
+) as PlaceholderType
+export type PlaceholderValue = (typeof PLACEHOLDER)[keyof typeof PLACEHOLDER]
+
+export const placeholderValues = Object.values(PLACEHOLDER)
+
+export const baseAgentToolNames: ToolName[] = [
+ 'create_plan',
+ 'run_terminal_command',
+ 'str_replace',
+ 'write_file',
+ 'spawn_agents',
+ 'add_subgoal',
+ 'browser_logs',
+ 'code_search',
+ 'end_turn',
+ 'read_files',
+ 'think_deeply',
+ 'update_subgoal',
+] as const
+
+export const baseAgentSubagents: AgentTemplateType[] = [
+ AgentTemplateTypes.file_picker,
+ AgentTemplateTypes.researcher,
+ AgentTemplateTypes.thinker,
+ AgentTemplateTypes.reviewer,
+] as const
diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts
new file mode 100644
index 000000000..36786bed7
--- /dev/null
+++ b/packages/agent-runtime/src/tools/stream-parser.ts
@@ -0,0 +1,237 @@
+import { toolNames } from '@codebuff/common/tools/constants'
+import { buildArray } from '@codebuff/common/util/array'
+import { generateCompactId } from '@codebuff/common/util/string'
+
+import { expireMessages } from '../util/messages'
+import { executeCustomToolCall, executeToolCall } from './tool-executor'
+import type { AgentRuntimeEnvironment } from '../runtime/interfaces'
+
+import type { CustomToolCall } from './tool-executor'
+import type { AgentTemplate } from '@codebuff/common/types/agent-template'
+import type { ToolName } from '@codebuff/common/tools/constants'
+import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
+import type {
+ AgentState,
+ Subgoal,
+ ToolResult,
+} from '@codebuff/common/types/session-state'
+import type { ProjectFileContext } from '@codebuff/common/util/file'
+import type { ToolCallPart } from 'ai'
+
+export type ToolCallError = {
+ toolName?: string
+ args: Record
+ error: string
+} & Omit
+
+// Note: This is a simplified version that assumes we have access to XML stream processing
+// The full implementation would need access to the xml-stream-parser from the backend
+export async function processStreamWithTools(options: {
+ stream: AsyncGenerator | ReadableStream
+ agentStepId: string
+ clientSessionId: string
+ fingerprintId: string
+ userInputId: string
+ userId: string | undefined
+ repoId: string | undefined
+ agentTemplate: AgentTemplate
+ localAgentTemplates: Record
+ fileContext: ProjectFileContext
+ messages: CodebuffMessage[]
+ agentState: AgentState
+ agentContext: Record
+ onResponseChunk: (chunk: string | PrintModeEvent) => void
+ fullResponse: string
+ env: AgentRuntimeEnvironment
+}) {
+ const {
+ stream,
+ agentStepId,
+ clientSessionId,
+ fingerprintId,
+ userInputId,
+ userId,
+ repoId,
+ agentTemplate,
+ localAgentTemplates,
+ fileContext,
+ agentContext,
+ agentState,
+ onResponseChunk,
+ env,
+ } = options
+ const fullResponseChunks: string[] = [options.fullResponse]
+
+ const messages = [...options.messages]
+
+ const toolResults: ToolResult[] = []
+ const toolCalls: (CodebuffToolCall | CustomToolCall)[] = []
+ const { promise: streamDonePromise, resolve: resolveStreamDonePromise } =
+ Promise.withResolvers()
+ let previousToolCallFinished = streamDonePromise
+ const state: Record = {
+ fingerprintId,
+ userId,
+ repoId,
+ agentTemplate,
+ localAgentTemplates,
+ sendSubagentChunk: (data: {
+ userInputId: string
+ agentId: string
+ agentType: string
+ chunk: string
+ prompt?: string
+ }) => {
+ // Send subagent chunk through IO environment
+ if (env.io.onResponseChunk) {
+ env.io.onResponseChunk({
+ type: 'text',
+ text: data.chunk,
+ } as PrintModeEvent)
+ }
+ },
+
+ agentState,
+ agentContext,
+ messages,
+ }
+
+ function toolCallback(toolName: T) {
+ return {
+ onTagStart: () => {},
+ onTagEnd: async (_: string, input: Record) => {
+ // delegated to reusable helper
+ previousToolCallFinished = executeToolCall({
+ toolName,
+ input,
+ toolCalls,
+ toolResults,
+ previousToolCallFinished,
+ agentTemplate,
+ fileContext,
+ agentStepId,
+ clientSessionId,
+ userInputId,
+ fullResponse: fullResponseChunks.join(''),
+ onResponseChunk,
+ state,
+ userId,
+ env,
+ })
+ },
+ }
+ }
+ function customToolCallback(toolName: string) {
+ return {
+ onTagStart: () => {},
+ onTagEnd: async (_: string, input: Record) => {
+ // delegated to reusable helper
+ previousToolCallFinished = executeCustomToolCall({
+ toolName,
+ input,
+ toolCalls,
+ toolResults,
+ previousToolCallFinished,
+ agentTemplate,
+ fileContext,
+ agentStepId,
+ clientSessionId,
+ userInputId,
+ fullResponse: fullResponseChunks.join(''),
+ onResponseChunk,
+ state,
+ userId,
+ env,
+ })
+ },
+ }
+ }
+
+ // Note: This is a simplified version without the actual XML stream processing
+ // The backend would need to provide this functionality through the environment
+ // For now, we'll just process the stream as text
+ const streamWithTags = processStreamAsText(
+ stream,
+ Object.fromEntries([
+ ...toolNames.map((toolName) => [toolName, toolCallback(toolName)]),
+ ...Object.keys(fileContext.customToolDefinitions).map((toolName) => [
+ toolName,
+ customToolCallback(toolName),
+ ]),
+ ]),
+ (toolName, error) => {
+ toolResults.push({
+ toolName,
+ toolCallId: generateCompactId(),
+ output: { type: 'text', value: error },
+ })
+ },
+ onResponseChunk,
+ {
+ userId,
+ model: agentTemplate.model,
+ agentName: agentTemplate.id,
+ },
+ )
+
+ for await (const chunk of streamWithTags) {
+ onResponseChunk(chunk)
+ fullResponseChunks.push(chunk)
+ }
+
+ state.messages = buildArray([
+ ...expireMessages(state.messages, 'agentStep'),
+ fullResponseChunks.length > 0 && {
+ role: 'assistant' as const,
+ content: fullResponseChunks.join(''),
+ },
+ ])
+
+ resolveStreamDonePromise()
+ await previousToolCallFinished
+
+ return {
+ toolCalls,
+ toolResults,
+ state,
+ fullResponse: fullResponseChunks.join(''),
+ fullResponseChunks,
+ }
+}
+
+// Simplified stream processing - in reality this would need the backend's XML processor
+async function* processStreamAsText(
+ stream: AsyncGenerator | ReadableStream,
+ toolCallbacks: Record,
+ onToolError: (toolName: string, error: string) => void,
+ onResponseChunk: (chunk: string | PrintModeEvent) => void,
+ context: {
+ userId: string | undefined
+ model: string | string[]
+ agentName: string
+ },
+): AsyncGenerator {
+ // This is a placeholder implementation
+ // The real implementation would parse XML tags and call the appropriate tool callbacks
+
+ if (Symbol.asyncIterator in stream) {
+ for await (const chunk of stream as AsyncGenerator) {
+ if (typeof chunk === 'string') {
+ yield chunk
+ }
+ }
+ } else {
+ const reader = (stream as ReadableStream).getReader()
+ try {
+ while (true) {
+ const { done, value } = await reader.read()
+ if (done) break
+ yield value
+ }
+ } finally {
+ reader.releaseLock()
+ }
+ }
+}
diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts
new file mode 100644
index 000000000..73fd356c4
--- /dev/null
+++ b/packages/agent-runtime/src/tools/tool-executor.ts
@@ -0,0 +1,527 @@
+import { endsAgentStepParam } from '@codebuff/common/tools/constants'
+import { renderToolResults } from '@codebuff/common/tools/utils'
+import { generateCompactId } from '@codebuff/common/util/string'
+import z from 'zod/v4'
+import { convertJsonSchemaToZod } from 'zod-from-json-schema'
+
+import { asSystemMessage } from '../util/messages'
+import type { AgentRuntimeEnvironment } from '../runtime/interfaces'
+
+import type { AgentTemplate } from '@codebuff/common/types/agent-template'
+import type { ToolName } from '@codebuff/common/tools/constants'
+import type {
+ ClientToolCall,
+ ClientToolName,
+ CodebuffToolCall,
+} from '@codebuff/common/tools/list'
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
+import type { ToolResult } from '@codebuff/common/types/session-state'
+import type {
+ customToolDefinitionsSchema,
+ ProjectFileContext,
+} from '@codebuff/common/util/file'
+import type { ToolCallPart } from 'ai'
+
+// Tool definitions and handlers are injected through the environment
+// The backend will provide these through the runtime environment
+
+export type CustomToolCall = {
+ toolName: string
+ input: Record
+} & Omit
+
+export type ToolCallError = {
+ toolName?: string
+ input: Record
+ error: string
+} & Pick
+
+export function parseRawToolCall(
+ rawToolCall: {
+ toolName: T
+ toolCallId: string
+ input: Record
+ },
+ toolDefs: Record,
+ autoInsertEndStepParam: boolean = false,
+): CodebuffToolCall | ToolCallError {
+ const toolName = rawToolCall.toolName
+
+ if (!(toolName in toolDefs)) {
+ return {
+ toolName,
+ toolCallId: rawToolCall.toolCallId,
+ input: rawToolCall.input,
+ error: `Tool ${toolName} not found`,
+ }
+ }
+ const validName = toolName as T
+
+ const processedParameters: Record = {}
+ for (const [param, val] of Object.entries(rawToolCall.input ?? {})) {
+ processedParameters[param] = val
+ }
+
+ // Add the required codebuff_end_step parameter with the correct value for this tool if requested
+ if (autoInsertEndStepParam) {
+ processedParameters[endsAgentStepParam] =
+ toolDefs[validName].endsAgentStep
+ }
+
+ const paramsSchema = toolDefs[validName].endsAgentStep
+ ? (
+ toolDefs[validName]
+ .parameters satisfies z.ZodObject as z.ZodObject
+ ).extend({
+ [endsAgentStepParam]: z.literal(
+ toolDefs[validName].endsAgentStep,
+ ),
+ })
+ : toolDefs[validName].parameters
+ const result = paramsSchema.safeParse(processedParameters)
+
+ if (!result.success) {
+ return {
+ toolName: validName,
+ toolCallId: rawToolCall.toolCallId,
+ input: rawToolCall.input,
+ error: `Invalid parameters for ${validName}: ${JSON.stringify(
+ result.error.issues,
+ null,
+ 2,
+ )}`,
+ }
+ }
+
+ if (endsAgentStepParam in result.data) {
+ delete result.data[endsAgentStepParam]
+ }
+
+ return {
+ toolName: validName,
+ input: result.data,
+ toolCallId: rawToolCall.toolCallId,
+ } as CodebuffToolCall
+}
+
+export interface ExecuteToolCallParams {
+ toolName: T
+ input: Record
+ toolCalls: (CodebuffToolCall | CustomToolCall)[]
+ toolResults: ToolResult[]
+ previousToolCallFinished: Promise
+ agentTemplate: AgentTemplate
+ fileContext: ProjectFileContext
+ agentStepId: string
+ clientSessionId: string
+ userInputId: string
+ fullResponse: string
+ onResponseChunk: (chunk: string | PrintModeEvent) => void
+ state: Record
+ userId: string | undefined
+ autoInsertEndStepParam?: boolean
+ env: AgentRuntimeEnvironment
+}
+
+export function executeToolCall({
+ toolName,
+ input,
+ toolCalls,
+ toolResults,
+ previousToolCallFinished,
+ agentTemplate,
+ fileContext,
+ agentStepId,
+ clientSessionId,
+ userInputId,
+ fullResponse,
+ onResponseChunk,
+ state,
+ userId,
+ autoInsertEndStepParam = false,
+ env,
+}: ExecuteToolCallParams): Promise {
+ const toolCall: CodebuffToolCall | ToolCallError = parseRawToolCall(
+ {
+ toolName,
+ toolCallId: generateCompactId(),
+ input,
+ },
+ env.tools.definitions,
+ autoInsertEndStepParam,
+ )
+ if ('error' in toolCall) {
+ toolResults.push({
+ toolName,
+ toolCallId: toolCall.toolCallId,
+ output: {
+ type: 'text',
+ value: toolCall.error,
+ },
+ })
+ env.logger?.debug(
+ { toolCall, error: toolCall.error },
+ `${toolName} error: ${toolCall.error}`,
+ )
+ return previousToolCallFinished
+ }
+
+ onResponseChunk({
+ type: 'tool_call',
+ toolCallId: toolCall.toolCallId,
+ toolName,
+ input: toolCall.input,
+ })
+
+ toolCalls.push(toolCall)
+
+ // Filter out restricted tools in ask mode unless exporting summary
+ if (!agentTemplate.toolNames.includes(toolCall.toolName)) {
+ toolResults.push({
+ toolName,
+ toolCallId: toolCall.toolCallId,
+ output: {
+ type: 'text',
+ value: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`,
+ },
+ })
+ return previousToolCallFinished
+ }
+
+ // Check if user input is still live
+ if (!env.inputGate.check(userId, userInputId, clientSessionId)) {
+ toolResults.push({
+ toolName,
+ toolCallId: toolCall.toolCallId,
+ output: {
+ type: 'text',
+ value: 'User input cancelled',
+ },
+ })
+ return previousToolCallFinished
+ }
+
+ // Check if this is a server-side tool that should be handled directly
+ const serverSideHandler = env.tools.handlers[toolCall.toolName]
+ if (serverSideHandler) {
+ return previousToolCallFinished.then(async () => {
+ try {
+ const handlerResult = serverSideHandler({
+ previousToolCallFinished: Promise.resolve(),
+ toolCall,
+ fileContext,
+ state,
+ clientSessionId,
+ userInputId,
+ })
+
+ // Handle the result which may be a direct value or an object with result and state
+ let resultValue: string
+
+ if (handlerResult && typeof handlerResult === 'object' && 'result' in handlerResult) {
+ // Handler returned { result: Promise, state: {...} }
+ resultValue = await handlerResult.result
+ if (handlerResult.state) {
+ // Merge the returned state into our current state
+ // Special handling for agentState to ensure proper reference updates
+ Object.assign(state, handlerResult.state)
+ }
+ } else {
+ // Handler returned a direct value or Promise
+ const result = await handlerResult
+ resultValue = typeof result === 'string' ? result : (result?.value || 'Success')
+ }
+
+ const toolResult = {
+ toolName,
+ toolCallId: toolCall.toolCallId,
+ output: {
+ type: 'text' as const,
+ value: resultValue,
+ },
+ }
+
+ env.logger?.debug(
+ { input, toolResult },
+ `${toolName} server-side tool call & result (${toolResult.toolCallId})`,
+ )
+
+ onResponseChunk({
+ type: 'tool_result',
+ toolCallId: toolResult.toolCallId,
+ output: toolResult.output,
+ })
+
+ toolResults.push(toolResult)
+
+ state.messages.push({
+ role: 'user' as const,
+ content: asSystemMessage(renderToolResults([toolResult])),
+ })
+ } catch (error) {
+ const errorMessage = `Server-side tool execution failed: ${error instanceof Error ? error.message : 'Unknown error'}`
+ const toolResult = {
+ toolName,
+ toolCallId: toolCall.toolCallId,
+ output: {
+ type: 'text' as const,
+ value: errorMessage,
+ },
+ }
+
+ env.logger?.error(
+ { input, error, toolResult },
+ `${toolName} server-side tool execution error`,
+ )
+
+ onResponseChunk({
+ type: 'tool_result',
+ toolCallId: toolResult.toolCallId,
+ output: toolResult.output,
+ })
+
+ toolResults.push(toolResult)
+
+ state.messages.push({
+ role: 'user' as const,
+ content: asSystemMessage(renderToolResults([toolResult])),
+ })
+ }
+ })
+ }
+
+ // For client tools, request execution from client
+ return previousToolCallFinished.then(async () => {
+ const clientToolResult = await env.io.requestToolCall(
+ userInputId,
+ toolCall.toolName,
+ toolCall.input,
+ )
+
+ const result = clientToolResult.error ??
+ (clientToolResult.output?.type === 'text'
+ ? clientToolResult.output.value
+ : 'undefined')
+
+ const toolResult = {
+ toolName,
+ toolCallId: toolCall.toolCallId,
+ output: {
+ type: 'text' as const,
+ value: result as string,
+ },
+ }
+
+ env.logger?.debug(
+ { input, toolResult },
+ `${toolName} client tool call & result (${toolResult.toolCallId})`,
+ )
+
+ if (result === undefined) {
+ return
+ }
+
+ onResponseChunk({
+ type: 'tool_result',
+ toolCallId: toolResult.toolCallId,
+ output: toolResult.output,
+ })
+
+ toolResults.push(toolResult)
+
+ state.messages.push({
+ role: 'user' as const,
+ content: asSystemMessage(renderToolResults([toolResult])),
+ })
+ })
+}
+
+export function parseRawCustomToolCall(
+ customToolDefs: z.infer,
+ rawToolCall: {
+ toolName: string
+ toolCallId: string
+ input: Record
+ },
+ autoInsertEndStepParam: boolean = false,
+): CustomToolCall | ToolCallError {
+ const toolName = rawToolCall.toolName
+
+ if (!(toolName in customToolDefs)) {
+ return {
+ toolName,
+ toolCallId: rawToolCall.toolCallId,
+ input: rawToolCall.input,
+ error: `Tool ${toolName} not found`,
+ }
+ }
+
+ const processedParameters: Record = {}
+ for (const [param, val] of Object.entries(rawToolCall.input ?? {})) {
+ processedParameters[param] = val
+ }
+
+ // Add the required codebuff_end_step parameter with the correct value for this tool if requested
+ if (autoInsertEndStepParam) {
+ processedParameters[endsAgentStepParam] =
+ customToolDefs[toolName].endsAgentStep
+ }
+
+ const jsonSchema = JSON.parse(
+ JSON.stringify(customToolDefs[toolName].inputJsonSchema),
+ )
+ if (customToolDefs[toolName].endsAgentStep) {
+ if (!jsonSchema.properties) {
+ jsonSchema.properties = {}
+ }
+ jsonSchema.properties[endsAgentStepParam] = {
+ const: true,
+ type: 'boolean',
+ description: 'Easp flag must be set to true',
+ }
+ if (!jsonSchema.required) {
+ jsonSchema.required = []
+ }
+ jsonSchema.required.push(endsAgentStepParam)
+ }
+ const paramsSchema = convertJsonSchemaToZod(jsonSchema)
+ const result = paramsSchema.safeParse(
+ processedParameters,
+ ) as z.ZodSafeParseResult
+
+ if (!result.success) {
+ return {
+ toolName: toolName,
+ toolCallId: rawToolCall.toolCallId,
+ input: rawToolCall.input,
+ error: `Invalid parameters for ${toolName}: ${JSON.stringify(
+ result.error.issues,
+ null,
+ 2,
+ )}`,
+ }
+ }
+
+ const input = JSON.parse(JSON.stringify(rawToolCall.input))
+ if (endsAgentStepParam in input) {
+ delete input[endsAgentStepParam]
+ }
+ return {
+ toolName: toolName,
+ input,
+ toolCallId: rawToolCall.toolCallId,
+ }
+}
+
+export function executeCustomToolCall({
+ toolName,
+ input,
+ toolCalls,
+ toolResults,
+ previousToolCallFinished,
+ agentTemplate,
+ fileContext,
+ clientSessionId,
+ userInputId,
+ onResponseChunk,
+ state,
+ userId,
+ autoInsertEndStepParam = false,
+ env,
+}: ExecuteToolCallParams): Promise {
+ const toolCall: CustomToolCall | ToolCallError = parseRawCustomToolCall(
+ fileContext.customToolDefinitions,
+ {
+ toolName,
+ toolCallId: generateCompactId(),
+ input,
+ },
+ autoInsertEndStepParam,
+ )
+ if ('error' in toolCall) {
+ toolResults.push({
+ toolName,
+ toolCallId: toolCall.toolCallId,
+ output: {
+ type: 'text',
+ value: toolCall.error,
+ },
+ })
+ env.logger?.debug(
+ { toolCall, error: toolCall.error },
+ `${toolName} error: ${toolCall.error}`,
+ )
+ return previousToolCallFinished
+ }
+
+ onResponseChunk({
+ type: 'tool_call',
+ toolCallId: toolCall.toolCallId,
+ toolName,
+ input: toolCall.input,
+ })
+
+ toolCalls.push(toolCall)
+
+ // Filter out restricted tools in ask mode unless exporting summary
+ if (!(agentTemplate.toolNames as string[]).includes(toolCall.toolName)) {
+ toolResults.push({
+ toolName,
+ toolCallId: toolCall.toolCallId,
+ output: {
+ type: 'text',
+ value: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`,
+ },
+ })
+ return previousToolCallFinished
+ }
+
+ return previousToolCallFinished
+ .then(async () => {
+ if (!env.inputGate.check(userId, userInputId, clientSessionId)) {
+ return ''
+ }
+
+ const clientToolResult = await env.io.requestToolCall(
+ userInputId,
+ toolCall.toolName,
+ toolCall.input,
+ )
+ return (
+ clientToolResult.error ??
+ (clientToolResult.output?.type === 'text'
+ ? clientToolResult.output.value
+ : 'undefined')
+ )
+ })
+ .then((result) => {
+ const toolResult = {
+ toolName,
+ toolCallId: toolCall.toolCallId,
+ output: {
+ type: 'text' as const,
+ value: result as string,
+ },
+ }
+ env.logger?.debug(
+ { input, toolResult },
+ `${toolName} custom tool call & result (${toolResult.toolCallId})`,
+ )
+ if (result === undefined) {
+ return
+ }
+
+ onResponseChunk({
+ type: 'tool_result',
+ toolCallId: toolResult.toolCallId,
+ output: toolResult.output,
+ })
+
+ toolResults.push(toolResult)
+
+ state.messages.push({
+ role: 'user' as const,
+ content: asSystemMessage(renderToolResults([toolResult])),
+ })
+ })
+}
diff --git a/packages/agent-runtime/src/util/messages.ts b/packages/agent-runtime/src/util/messages.ts
new file mode 100644
index 000000000..c946df706
--- /dev/null
+++ b/packages/agent-runtime/src/util/messages.ts
@@ -0,0 +1,301 @@
+import { AssertionError } from 'assert'
+
+import { buildArray } from '@codebuff/common/util/array'
+import { closeXml } from '@codebuff/common/util/xml'
+
+import { simplifyTerminalCommandResults } from './simplify-tool-results'
+import { countTokensJson } from './token-counter'
+
+import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { LoggerEnvironment } from '../analytics/interfaces'
+
+// Default console logger
+const defaultLogger: LoggerEnvironment = {
+ debug: (data: any, message?: string) => console.debug(message || '', data),
+ info: (data: any, message?: string) => console.info(message || '', data),
+ warn: (data: any, message?: string) => console.warn(message || '', data),
+ error: (data: any, message?: string) => console.error(message || '', data),
+}
+
+export type System = string | Array<{ text: string }>
+
+export function messagesWithSystem(
+ messages: CodebuffMessage[],
+ system: System,
+): CodebuffMessage[] {
+ return [
+ {
+ role: 'system',
+ content:
+ typeof system === 'string'
+ ? system
+ : system.map((part) => part.text).join('\n\n'),
+ },
+ ...messages,
+ ]
+}
+
+export function asUserMessage(str: string): string {
+ return `${str}${closeXml('user_message')}`
+}
+export function parseUserMessage(str: string): string | undefined {
+ const match = str.match(/(.*?)<\/user_message>/s)
+ return match ? match[1] : undefined
+}
+
+export function asSystemInstruction(str: string): string {
+ return `${str}${closeXml('system_instructions')}`
+}
+
+export function asSystemMessage(str: string): string {
+ return `${str}${closeXml('system')}`
+}
+
+export function isSystemInstruction(str: string): boolean {
+ return (
+ str.startsWith('') &&
+ str.endsWith(closeXml('system_instructions'))
+ )
+}
+
+export function isSystemMessage(str: string): boolean {
+ return str.startsWith('') && str.endsWith(closeXml('system'))
+}
+
+export function castAssistantMessage(
+ message: CodebuffMessage,
+): CodebuffMessage | null {
+ if (message.role !== 'assistant') {
+ return message
+ }
+ if (typeof message.content === 'string') {
+ return {
+ content: `${message.content}${closeXml('previous_assistant_message')}`,
+ role: 'user' as const,
+ }
+ }
+ const content = buildArray(
+ message.content.map((m) => {
+ if (m.type === 'text') {
+ return {
+ ...m,
+ text: `${m.text}${closeXml('previous_assistant_message')}`,
+ }
+ }
+ return null
+ }),
+ )
+ return content
+ ? {
+ role: 'user' as const,
+ content,
+ }
+ : null
+}
+
+// Number of terminal command outputs to keep in full form before simplifying
+const numTerminalCommandsToKeep = 5
+
+/**
+ * Helper function to simplify terminal command output while preserving some recent ones
+ * @param text - Terminal output text to potentially simplify
+ * @param numKept - Number of terminal outputs already kept in full form
+ * @returns Object containing simplified result and updated count of kept outputs
+ */
+function simplifyTerminalHelper(
+ text: string,
+ numKept: number,
+): { result: string; numKept: number } {
+ const simplifiedText = simplifyTerminalCommandResults(text)
+
+ // Keep the full output for the N most recent commands
+ if (numKept < numTerminalCommandsToKeep && simplifiedText !== text) {
+ return { result: text, numKept: numKept + 1 }
+ }
+
+ return {
+ result: simplifiedText,
+ numKept,
+ }
+}
+
+// Factor to reduce token count target by, to leave room for new messages
+const shortenedMessageTokenFactor = 0.5
+const replacementMessage = {
+ role: 'user',
+ content: asSystemMessage('Previous message(s) omitted due to length'),
+} satisfies CodebuffMessage
+
+/**
+ * Trims messages from the beginning to fit within token limits while preserving
+ * important content. Also simplifies terminal command outputs to save tokens.
+ *
+ * The function:
+ * 1. Processes messages from newest to oldest
+ * 2. Simplifies terminal command outputs after keeping N most recent ones
+ * 3. Stops adding messages when approaching token limit
+ *
+ * @param messages - Array of messages to trim
+ * @param systemTokens - Number of tokens used by system prompt
+ * @param maxTotalTokens - Maximum total tokens allowed, defaults to 200k
+ * @returns Trimmed array of messages that fits within token limit
+ */
+export function trimMessagesToFitTokenLimit(
+ messages: CodebuffMessage[],
+ systemTokens: number,
+ maxTotalTokens: number = 190_000,
+): CodebuffMessage[] {
+ const maxMessageTokens = maxTotalTokens - systemTokens
+
+ // Check if we're already under the limit
+ const initialTokens = countTokensJson(messages)
+
+ if (initialTokens < maxMessageTokens) {
+ return messages
+ }
+
+ const shortenedMessages: CodebuffMessage[] = []
+ let numKept = 0
+
+ // Process messages from newest to oldest
+ for (let i = messages.length - 1; i >= 0; i--) {
+ const m = messages[i]
+ let message: CodebuffMessage
+ if (m.role === 'tool' || m.role === 'system') {
+ message = messages[i]
+ } else if (m.role === 'user') {
+ let newContent: typeof m.content
+
+ // Handle string content (usually terminal output)
+ if (typeof m.content === 'string') {
+ const result = simplifyTerminalHelper(m.content, numKept)
+ message = { role: m.role, content: result.result }
+ numKept = result.numKept
+ } else {
+ // Handle array content (mixed content types)
+ newContent = []
+ // Process content parts from newest to oldest
+ for (let j = m.content.length - 1; j >= 0; j--) {
+ const messagePart = m.content[j]
+ // Preserve non-text content (i.e. images)
+ if (messagePart.type !== 'text') {
+ newContent.push(messagePart)
+ continue
+ }
+
+ const result = simplifyTerminalHelper(messagePart.text, numKept)
+ newContent.push({ ...messagePart, text: result.result })
+ numKept = result.numKept
+ }
+ newContent.reverse()
+ message = { ...m, content: newContent }
+ }
+ } else if (m.role === 'assistant') {
+ let newContent: typeof m.content
+
+ // Handle string content (usually terminal output)
+ if (typeof m.content === 'string') {
+ const result = simplifyTerminalHelper(m.content, numKept)
+ message = { role: m.role, content: result.result }
+ numKept = result.numKept
+ } else {
+ // Handle array content (mixed content types)
+ newContent = []
+ // Process content parts from newest to oldest
+ for (let j = m.content.length - 1; j >= 0; j--) {
+ const messagePart = m.content[j]
+ // Preserve non-text content (i.e. images)
+ if (messagePart.type !== 'text') {
+ newContent.push(messagePart)
+ continue
+ }
+
+ const result = simplifyTerminalHelper(messagePart.text, numKept)
+ newContent.push({ ...messagePart, text: result.result })
+ numKept = result.numKept
+ }
+ newContent.reverse()
+ message = { ...m, content: newContent }
+ }
+ } else {
+ m satisfies never
+ throw new AssertionError({ message: 'Not a valid role' })
+ }
+
+ shortenedMessages.push(message)
+ }
+ shortenedMessages.reverse()
+
+ const requiredTokens = countTokensJson(
+ shortenedMessages.filter((m) => m.keepDuringTruncation),
+ )
+ let removedTokens = 0
+ const tokensToRemove =
+ (maxMessageTokens - requiredTokens) * (1 - shortenedMessageTokenFactor)
+
+ const placeholder = 'deleted'
+ const filteredMessages: (CodebuffMessage | typeof placeholder)[] = []
+ for (const message of shortenedMessages) {
+ if (removedTokens >= tokensToRemove || message.keepDuringTruncation) {
+ filteredMessages.push(message)
+ continue
+ }
+ removedTokens += countTokensJson(message)
+ if (
+ filteredMessages.length === 0 ||
+ filteredMessages[filteredMessages.length - 1] !== placeholder
+ ) {
+ filteredMessages.push(placeholder)
+ removedTokens -= countTokensJson(replacementMessage)
+ }
+ }
+
+ return filteredMessages.map((m) =>
+ m === placeholder ? replacementMessage : m,
+ )
+}
+
+export function getMessagesSubset(
+ messages: CodebuffMessage[],
+ otherTokens: number,
+ logger: LoggerEnvironment = defaultLogger,
+): CodebuffMessage[] {
+ const messagesSubset = trimMessagesToFitTokenLimit(messages, otherTokens)
+
+ // Remove cache_control from all messages
+ for (const message of messagesSubset) {
+ delete message.providerOptions?.anthropic?.cacheControl
+ delete message.providerOptions?.openrouter?.cacheControl
+ }
+
+ // Cache up to the last message!
+ const lastMessage = messagesSubset[messagesSubset.length - 1]
+ if (!lastMessage) {
+ logger.debug(
+ {
+ messages,
+ messagesSubset,
+ otherTokens,
+ },
+ 'No last message found in messagesSubset!',
+ )
+ }
+
+ return messagesSubset
+}
+
+export function expireMessages(
+ messages: CodebuffMessage[],
+ endOf: 'agentStep' | 'userPrompt',
+): CodebuffMessage[] {
+ return messages.filter((m) => {
+ // Keep messages with no timeToLive
+ if (m.timeToLive === undefined) return true
+
+ // Remove messages that have expired
+ if (m.timeToLive === 'agentStep') return false
+ if (m.timeToLive === 'userPrompt' && endOf === 'userPrompt') return false
+
+ return true
+ })
+}
diff --git a/packages/agent-runtime/src/util/object.ts b/packages/agent-runtime/src/util/object.ts
new file mode 100644
index 000000000..8cb548671
--- /dev/null
+++ b/packages/agent-runtime/src/util/object.ts
@@ -0,0 +1,35 @@
+import { stripNullChars } from '@codebuff/common/util/string'
+
+/**
+ * Recursively traverses an object or array and removes null characters (\u0000)
+ * from all string values.
+ *
+ * @param input The object or array to sanitize.
+ * @returns A new object or array with null characters removed from strings.
+ */
+export function stripNullCharsFromObject(input: T): T {
+ if (typeof input === 'string') {
+ // Explicitly cast back to T, assuming T could be string
+ return stripNullChars(input) as T
+ }
+
+ if (Array.isArray(input)) {
+ // Explicitly cast back to T, assuming T could be an array type
+ return input.map(stripNullCharsFromObject) as T
+ }
+
+ if (input !== null && typeof input === 'object') {
+ const sanitizedObject: { [key: string]: any } = {}
+ for (const key in input) {
+ // Ensure we only process own properties
+ if (Object.prototype.hasOwnProperty.call(input, key)) {
+ sanitizedObject[key] = stripNullCharsFromObject(input[key])
+ }
+ }
+ // Explicitly cast back to T
+ return sanitizedObject as T
+ }
+
+ // Return non-object/array/string types as is
+ return input
+}
diff --git a/packages/agent-runtime/src/util/parse-tool-call-xml.ts b/packages/agent-runtime/src/util/parse-tool-call-xml.ts
new file mode 100644
index 000000000..1c8a109ab
--- /dev/null
+++ b/packages/agent-runtime/src/util/parse-tool-call-xml.ts
@@ -0,0 +1,101 @@
+import { toContentString } from '@codebuff/common/util/messages'
+import { generateCompactId } from '@codebuff/common/util/string'
+import { closeXml } from '@codebuff/common/util/xml'
+
+import type { StringToolResultPart } from '@codebuff/common/tools/constants'
+import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+
+/**
+ * Parses XML content for a tool call into a structured object with only string values.
+ * Example input:
+ * click
+ * #button
+ * 5000
+ */
+export function parseToolCallXml(xmlString: string): Record {
+ if (!xmlString.trim()) return {}
+
+ const result: Record = {}
+ const tagPattern = /<(\w+)>([\s\S]*?)<\/\1>/g
+ let match
+
+ while ((match = tagPattern.exec(xmlString)) !== null) {
+ const [_, key, rawValue] = match
+
+ // Remove leading/trailing whitespace but preserve internal whitespace
+ const value = rawValue.replace(/^\s+|\s+$/g, '')
+
+ // Assign all values as strings
+ result[key] = value
+ }
+
+ return result
+}
+
+export const parseToolResults = (xmlString: string): StringToolResultPart[] => {
+ if (!xmlString.trim()) return []
+
+ const results: StringToolResultPart[] = []
+ const toolResultPattern = /([\s\S]*?)<\/tool_result>/g
+ let match
+
+ while ((match = toolResultPattern.exec(xmlString)) !== null) {
+ const [_, toolResultContent] = match
+ const toolMatch = /(.*?)<\/tool>/g.exec(toolResultContent)
+ const resultMatch = /([\s\S]*?)<\/result>/g.exec(toolResultContent)
+
+ if (toolMatch && resultMatch) {
+ results.push({
+ toolName: toolMatch[1],
+ toolCallId: generateCompactId(),
+ output: { type: 'text', value: resultMatch[1].trim() },
+ })
+ }
+ }
+
+ return results
+}
+
+export interface TokenCallerMap {
+ [filePath: string]: {
+ [token: string]: string[] // Array of files that call this token
+ }
+}
+
+export function renderReadFilesResult(
+ files: { path: string; content: string }[],
+ tokenCallers: TokenCallerMap,
+) {
+ return files
+ .map((file) => {
+ const referencedBy =
+ Object.entries(tokenCallers[file.path] ?? {})
+ .filter(([_, callers]) => callers.length > 0)
+ .map(([token, callers]) => `${token}: ${callers.join(', ')}`)
+ .join('\n') || 'None'
+ return `\n${file.path}${closeXml('path')}\n${file.content}${closeXml('content')}\n${referencedBy}${closeXml('referenced_by')}\n${closeXml('read_file')}`
+ })
+ .join('\n\n')
+}
+
+export function parseReadFilesResult(
+ xmlString: string,
+): { path: string; content: string; referencedBy: string }[] {
+ const files: { path: string; content: string; referencedBy: string }[] = []
+ const filePattern =
+ /\s*([^<>]+)<\/path>\s*([\s\S]*?)<\/content>\s*([\s\S]*?)<\/referenced_by>\s*<\/read_file>/g
+ let match
+
+ while ((match = filePattern.exec(xmlString)) !== null) {
+ const [, filePath, content, referencedBy] = match
+ if (filePath.trim()) {
+ files.push({ path: filePath.trim(), content, referencedBy })
+ }
+ }
+
+ return files
+}
+
+export function isToolResult(message: CodebuffMessage): boolean {
+ return toContentString(message).includes(' ToolResult,
+): string {
+ const resultsStr =
+ typeof messageContent === 'string'
+ ? messageContent
+ : ((messageContent[messageContent.length - 1] as any)?.text as string) ??
+ ''
+ if (!resultsStr.includes(' result.toolName === toolName,
+ )
+
+ if (targetResults.length === 0) {
+ return resultsStr
+ }
+
+ // Keep non-target results unchanged
+ const otherResults = toolResults.filter(
+ (result) => result.toolName !== toolName,
+ )
+
+ // Create simplified results
+ const simplifiedResults = targetResults.map(simplifyFn)
+
+ // Combine both types of results
+ return renderToolResults([...simplifiedResults, ...otherResults])
+}
+
+/**
+ * Simplifies read_files tool results to show only file paths while preserving other tool results.
+ * Useful for making tool result output more concise in message history.
+ * @param messageContent - The message content containing tool results
+ * @returns The message content with simplified read_files results showing only paths
+ */
+export function simplifyReadFileResults(
+ messageContent: string | object[],
+): string {
+ return simplifyToolResults(
+ messageContent,
+ 'read_files',
+ simplifyReadFileToolResult,
+ )
+}
+
+/**
+ * Simplifies terminal command tool results to show a brief summary while preserving other tool results.
+ * Useful for making tool result output more concise in message history.
+ * @param messageContent - The message content containing tool results
+ * @returns The message content with simplified terminal command results
+ */
+export function simplifyTerminalCommandResults(
+ messageContent: string | object[],
+): string {
+ return simplifyToolResults(
+ messageContent,
+ 'run_terminal_command',
+ simplifyTerminalCommandToolResult,
+ )
+}
+
+/**
+ * Simplifies a single read_files tool result by extracting just the file paths.
+ * @param toolResult - The read_files tool result to simplify
+ * @returns A new tool result with just the list of file paths that were read
+ */
+export function simplifyReadFileToolResult(toolResult: ToolResult): ToolResult {
+ const fileBlocks = parseReadFilesResult(toolResult.output.value)
+ const filePaths = fileBlocks.map((block) => block.path)
+ return {
+ toolCallId: toolResult.toolCallId,
+ toolName: 'read_files',
+ output: {
+ type: 'text',
+ value: `Read the following files: ${filePaths.join('\n')}`,
+ },
+ }
+}
+
+/**
+ * Simplifies a single terminal command tool result by replacing output with a brief message.
+ * @param toolResult - The terminal command tool result to simplify
+ * @returns A new tool result with shortened output if the original was long
+ */
+export function simplifyTerminalCommandToolResult(
+ toolResult: ToolResult,
+): ToolResult {
+ const shortenedResultCandidate = '[Output omitted]'
+ return shortenedResultCandidate.length < toolResult.output.value.length
+ ? {
+ toolCallId: toolResult.toolCallId,
+ toolName: 'run_terminal_command',
+ output: {
+ type: 'text',
+ value: shortenedResultCandidate,
+ },
+ }
+ : toolResult
+}
diff --git a/packages/agent-runtime/src/util/token-counter.ts b/packages/agent-runtime/src/util/token-counter.ts
new file mode 100644
index 000000000..960a676cd
--- /dev/null
+++ b/packages/agent-runtime/src/util/token-counter.ts
@@ -0,0 +1,42 @@
+import { LRUCache } from '@codebuff/common/util/lru-cache'
+import { encode } from 'gpt-tokenizer/esm/model/gpt-4o'
+
+const ANTHROPIC_TOKEN_FUDGE_FACTOR = 1.35
+
+const TOKEN_COUNT_CACHE = new LRUCache(1000)
+
+export function countTokens(text: string): number {
+ try {
+ const cached = TOKEN_COUNT_CACHE.get(text)
+ if (cached !== undefined) {
+ return cached
+ }
+ const count = Math.floor(
+ encode(text, { allowedSpecial: 'all' }).length *
+ ANTHROPIC_TOKEN_FUDGE_FACTOR,
+ )
+
+ if (text.length > 100) {
+ // Cache only if the text is long enough to be worth it.
+ TOKEN_COUNT_CACHE.set(text, count)
+ }
+ return count
+ } catch (e) {
+ console.error('Error counting tokens', e)
+ return Math.ceil(text.length / 3)
+ }
+}
+
+export function countTokensJson(text: string | object): number {
+ return countTokens(JSON.stringify(text))
+}
+
+export function countTokensForFiles(
+ files: Record,
+): Record {
+ const tokenCounts: Record = {}
+ for (const [filePath, content] of Object.entries(files)) {
+ tokenCounts[filePath] = content ? countTokens(content) : 0
+ }
+ return tokenCounts
+}
diff --git a/packages/agent-runtime/tsconfig.json b/packages/agent-runtime/tsconfig.json
new file mode 100644
index 000000000..3ef6f86b9
--- /dev/null
+++ b/packages/agent-runtime/tsconfig.json
@@ -0,0 +1,9 @@
+{
+ "extends": "../../tsconfig.base.json",
+ "compilerOptions": {
+ "types": ["bun", "node"],
+ "baseUrl": "."
+ },
+ "include": ["src/**/*.ts"],
+ "exclude": ["node_modules"]
+}
\ No newline at end of file
diff --git a/tsconfig.json b/tsconfig.json
index e571761e1..aa8fe9f06 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -12,7 +12,8 @@
"@codebuff/billing/*": ["./packages/billing/src/*"],
"@codebuff/bigquery/*": ["./packages/bigquery/src/*"],
"@codebuff/internal/*": ["./packages/internal/src/*"],
- "@codebuff/code-map/*": ["./packages/code-map/*"]
+ "@codebuff/code-map/*": ["./packages/code-map/*"],
+ "@codebuff/agent-runtime/*": ["./packages/agent-runtime/src/*"]
}
},
"files": [],
@@ -27,6 +28,7 @@
{ "path": "./packages/bigquery" },
{ "path": "./packages/internal" },
{ "path": "./packages/code-map" },
+ { "path": "./packages/agent-runtime" },
{ "path": "./scripts" }
]
}