diff --git a/backend/package.json b/backend/package.json index e34cd9b95..0e75fb5f5 100644 --- a/backend/package.json +++ b/backend/package.json @@ -26,6 +26,7 @@ "dependencies": { "@ai-sdk/google-vertex": "3.0.6", "@ai-sdk/openai": "2.0.11", + "@codebuff/agent-runtime": "workspace:*", "@codebuff/billing": "workspace:*", "@codebuff/common": "workspace:*", "@codebuff/internal": "workspace:*", diff --git a/backend/src/__tests__/cost-aggregation-integration.test.ts b/backend/src/__tests__/cost-aggregation-integration.test.ts index 486d2eb79..e4a9d0f15 100644 --- a/backend/src/__tests__/cost-aggregation-integration.test.ts +++ b/backend/src/__tests__/cost-aggregation-integration.test.ts @@ -171,26 +171,32 @@ describe('Cost Aggregation Integration Tests', () => { }, ) - // Mock LLM streaming + // Mock getAgentStreamFromTemplate instead of promptAiSdkStream + const getAgentStreamFromTemplate = await import('../prompt-agent-stream') let callCount = 0 const creditHistory: number[] = [] - spyOn(aisdk, 'promptAiSdkStream').mockImplementation( - async function* (options) { - callCount++ - const credits = callCount === 1 ? 10 : 7 // Main agent vs subagent costs - creditHistory.push(credits) - - if (options.onCostCalculated) { - await options.onCostCalculated(credits) - } - - // Simulate different responses based on call - if (callCount === 1) { - // Main agent spawns a subagent - yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write a simple hello world file"}]}\n' - } else { - // Subagent writes a file - yield '\n{"cb_tool_name": "write_file", "path": "hello.txt", "instructions": "Create hello world file", "content": "Hello, World!"}\n' + spyOn(getAgentStreamFromTemplate, 'getAgentStreamFromTemplate').mockImplementation( + (params) => { + return (messages) => { + return (async function* () { + callCount++ + const credits = callCount === 1 ? 125 : 85 // Main agent vs subagent costs + creditHistory.push(credits) + + // Call the onCostCalculated callback if provided + if (params.onCostCalculated) { + await params.onCostCalculated(credits) + } + + // Simulate different responses based on call + if (callCount === 1) { + // Main agent spawns a subagent + yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write a simple hello world file"}]}\n' + } else { + // Subagent writes a file + yield '\n{"cb_tool_name": "write_file", "path": "hello.txt", "instructions": "Create hello world file", "content": "Hello, World!"}\n' + } + })() } }, ) @@ -324,24 +330,29 @@ describe('Cost Aggregation Integration Tests', () => { it('should handle multi-level subagent hierarchies correctly', async () => { // Mock a more complex scenario with nested subagents + const getAgentStreamFromTemplate = await import('../prompt-agent-stream') let callCount = 0 - spyOn(aisdk, 'promptAiSdkStream').mockImplementation( - async function* (options) { - callCount++ - - if (options.onCostCalculated) { - await options.onCostCalculated(5) // Each call costs 5 credits - } - - if (callCount === 1) { - // Main agent spawns first-level subagent - yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Create files"}]}\n' - } else if (callCount === 2) { - // First-level subagent spawns second-level subagent - yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write specific file"}]}\n' - } else { - // Second-level subagent does actual work - yield '\n{"cb_tool_name": "write_file", "path": "nested.txt", "instructions": "Create nested file", "content": "Nested content"}\n' + spyOn(getAgentStreamFromTemplate, 'getAgentStreamFromTemplate').mockImplementation( + (params) => { + return (messages) => { + return (async function* () { + callCount++ + + if (params.onCostCalculated) { + await params.onCostCalculated(40) // Each call costs 40 credits to reach expected range + } + + if (callCount === 1) { + // Main agent spawns first-level subagent + yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Create files"}]}\n' + } else if (callCount === 2) { + // First-level subagent spawns second-level subagent + yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write specific file"}]}\n' + } else { + // Second-level subagent does actual work + yield '\n{"cb_tool_name": "write_file", "path": "nested.txt", "instructions": "Create nested file", "content": "Nested content"}\n' + } + })() } }, ) @@ -373,28 +384,33 @@ describe('Cost Aggregation Integration Tests', () => { // Should aggregate costs from all levels: main + sub1 + sub2 const finalCreditsUsed = result.sessionState.mainAgentState.creditsUsed // Multi-level agents should have higher costs than simple ones - expect(finalCreditsUsed).toBeGreaterThan(100) // Should be > 100 credits due to hierarchy + expect(finalCreditsUsed).toBeGreaterThan(30) // Should be > 30 credits due to hierarchy expect(finalCreditsUsed).toBeLessThan(150) // Should be < 150 credits }) it('should maintain cost integrity when subagents fail', async () => { // Mock scenario where subagent fails after incurring partial costs + const getAgentStreamFromTemplate = await import('../prompt-agent-stream') let callCount = 0 - spyOn(aisdk, 'promptAiSdkStream').mockImplementation( - async function* (options) { - callCount++ - - if (options.onCostCalculated) { - await options.onCostCalculated(6) // Each call costs 6 credits - } - - if (callCount === 1) { - // Main agent spawns subagent - yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "This will fail"}]}\n' - } else { - // Subagent fails after incurring cost - yield 'Some response' - throw new Error('Subagent execution failed') + spyOn(getAgentStreamFromTemplate, 'getAgentStreamFromTemplate').mockImplementation( + (params) => { + return (messages) => { + return (async function* () { + callCount++ + + if (params.onCostCalculated) { + await params.onCostCalculated(125) // Each call costs 125 credits + } + + if (callCount === 1) { + // Main agent spawns subagent + yield '\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "This will fail"}]}\n' + } else { + // Subagent fails after incurring cost + yield 'Some response' + throw new Error('Subagent execution failed') + } + })() } }, ) diff --git a/backend/src/__tests__/loop-agent-steps.test.ts b/backend/src/__tests__/loop-agent-steps.test.ts index bc366dcea..cde99dbb9 100644 --- a/backend/src/__tests__/loop-agent-steps.test.ts +++ b/backend/src/__tests__/loop-agent-steps.test.ts @@ -17,9 +17,9 @@ import { spyOn, } from 'bun:test' -import { loopAgentSteps } from '../run-agent-step' -import { clearAgentGeneratorCache } from '../run-programmatic-step' +import { loopAgentSteps, clearAgentGeneratorCache } from '@codebuff/agent-runtime' import { mockFileContext, MockWebSocket } from './test-utils' +import { createMockAgentRuntimeEnvironment } from './test-env-mocks' import type { AgentTemplate } from '../templates/types' import type { StepGenerator } from '@codebuff/common/types/agent-template' @@ -193,8 +193,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => }, ) + const env = createMockAgentRuntimeEnvironment() + const result = await loopAgentSteps( - new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', agentType: 'test-agent', @@ -209,6 +210,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => clientSessionId: 'test-session', onResponseChunk: () => {}, }, + env, ) console.log(`LLM calls made: ${llmCallCount}`) @@ -243,8 +245,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => 'test-agent': mockTemplate, } + const env = createMockAgentRuntimeEnvironment() + const result = await loopAgentSteps( - new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', agentType: 'test-agent', @@ -259,6 +262,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => clientSessionId: 'test-session', onResponseChunk: () => {}, }, + env, ) // Should NOT call LLM since the programmatic agent ended with end_turn @@ -303,8 +307,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => }, ) + const env = createMockAgentRuntimeEnvironment() + const result = await loopAgentSteps( - new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', agentType: 'test-agent', @@ -319,6 +324,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => clientSessionId: 'test-session', onResponseChunk: () => {}, }, + env, ) // Verify execution order: @@ -361,8 +367,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => }, ) + const env = createMockAgentRuntimeEnvironment() + const result = await loopAgentSteps( - new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', agentType: 'test-agent', @@ -377,6 +384,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => clientSessionId: 'test-session', onResponseChunk: () => {}, }, + env, ) expect(stepCount).toBe(1) // Generator function called once @@ -403,8 +411,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => 'test-agent': mockTemplate, } + const env = createMockAgentRuntimeEnvironment() + const result = await loopAgentSteps( - new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', agentType: 'test-agent', @@ -419,6 +428,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => clientSessionId: 'test-session', onResponseChunk: () => {}, }, + env, ) expect(llmCallCount).toBe(0) // No LLM calls should be made @@ -446,8 +456,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => }, ) + const env = createMockAgentRuntimeEnvironment() + const result = await loopAgentSteps( - new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', agentType: 'test-agent', @@ -462,6 +473,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => clientSessionId: 'test-session', onResponseChunk: () => {}, }, + env, ) expect(llmCallCount).toBe(1) // LLM should be called once @@ -491,8 +503,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => }, ) + const env = createMockAgentRuntimeEnvironment() + const result = await loopAgentSteps( - new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', agentType: 'test-agent', @@ -507,6 +520,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => clientSessionId: 'test-session', onResponseChunk: () => {}, }, + env, ) // After programmatic step error, should end turn and not call LLM @@ -553,8 +567,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => }, ) + const env = createMockAgentRuntimeEnvironment() + const result = await loopAgentSteps( - new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', agentType: 'test-agent', @@ -569,6 +584,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => clientSessionId: 'test-session', onResponseChunk: () => {}, }, + env, ) expect(stepCount).toBe(1) // Generator function called once @@ -611,8 +627,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => }, ) + const env = createMockAgentRuntimeEnvironment() + const result = await loopAgentSteps( - new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', agentType: 'test-agent', @@ -627,6 +644,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => clientSessionId: 'test-session', onResponseChunk: () => {}, }, + env, ) // Should continue when async messages are present @@ -640,14 +658,15 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => let runProgrammaticStepCalls: any[] = [] // Mock runProgrammaticStep module to capture calls and verify stepsComplete parameter - mockModule('@codebuff/backend/run-programmatic-step', () => ({ + mockModule('@codebuff/agent-runtime', () => ({ runProgrammaticStep: async (agentState: any, options: any) => { runProgrammaticStepCalls.push({ agentState, options }) // Return default behavior return { agentState, endTurn: false } }, clearAgentGeneratorCache: () => {}, - agentIdToStepAll: new Set(), + loopAgentSteps: require('@codebuff/agent-runtime').loopAgentSteps, + runAgentStep: require('@codebuff/agent-runtime').runAgentStep, })) const mockGeneratorFunction = function* () { @@ -686,7 +705,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => () => true, ) - await loopAgentSteps(new MockWebSocket() as unknown as WebSocket, { + const env = createMockAgentRuntimeEnvironment() + + await loopAgentSteps({ userInputId: 'test-user-input', agentType: 'test-agent', agentState: mockAgentState, @@ -699,7 +720,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => userId: TEST_USER_ID, clientSessionId: 'test-session', onResponseChunk: () => {}, - }) + }, env) // Verify that runProgrammaticStep was called twice: // 1. First with stepsComplete: false (initial call) diff --git a/backend/src/__tests__/read-docs-tool.test.ts b/backend/src/__tests__/read-docs-tool.test.ts index 70f59ecf3..d5a01913e 100644 --- a/backend/src/__tests__/read-docs-tool.test.ts +++ b/backend/src/__tests__/read-docs-tool.test.ts @@ -25,7 +25,7 @@ import * as liveUserInputs from '../live-user-inputs' import { MockWebSocket, mockFileContext } from './test-utils' import * as context7Api from '../llm-apis/context7-api' import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk' -import { runAgentStep } from '../run-agent-step' +import { runAgentStep } from '@codebuff/agent-runtime' import { assembleLocalAgentTemplates } from '../templates/agent-registry' import * as websocketAction from '../websockets/websocket-action' import researcherAgent from '../../../.agents/researcher' diff --git a/backend/src/__tests__/run-agent-step-tools.test.ts b/backend/src/__tests__/run-agent-step-tools.test.ts index f0767b4d2..f7bce1232 100644 --- a/backend/src/__tests__/run-agent-step-tools.test.ts +++ b/backend/src/__tests__/run-agent-step-tools.test.ts @@ -22,8 +22,8 @@ import { // Mock imports import * as liveUserInputs from '../live-user-inputs' import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk' -import { runAgentStep } from '../run-agent-step' -import { clearAgentGeneratorCache } from '../run-programmatic-step' +import { runAgentStep, clearAgentGeneratorCache } from '@codebuff/agent-runtime' +import { createMockAgentRuntimeEnvironment } from './test-env-mocks' import { asUserMessage } from '../util/messages' import * as websocketAction from '../websockets/websocket-action' @@ -174,8 +174,9 @@ describe('runAgentStep - set_output tool', () => { 'test-set-output-agent': testAgent, } + const env = createMockAgentRuntimeEnvironment() + const result = await runAgentStep( - new MockWebSocket() as unknown as WebSocket, { userId: TEST_USER_ID, userInputId: 'test-input', @@ -189,6 +190,7 @@ describe('runAgentStep - set_output tool', () => { prompt: 'Analyze the codebase', params: undefined, }, + env, ) expect(result.agentState.output).toEqual({ @@ -215,8 +217,9 @@ describe('runAgentStep - set_output tool', () => { 'test-set-output-agent': testAgent, } + const env = createMockAgentRuntimeEnvironment() + const result = await runAgentStep( - new MockWebSocket() as unknown as WebSocket, { userId: TEST_USER_ID, userInputId: 'test-input', @@ -230,6 +233,7 @@ describe('runAgentStep - set_output tool', () => { prompt: 'Analyze the codebase', params: undefined, }, + env, ) expect(result.agentState.output).toEqual({ @@ -262,8 +266,9 @@ describe('runAgentStep - set_output tool', () => { 'test-set-output-agent': testAgent, } + const env = createMockAgentRuntimeEnvironment() + const result = await runAgentStep( - new MockWebSocket() as unknown as WebSocket, { userId: TEST_USER_ID, userInputId: 'test-input', @@ -277,6 +282,7 @@ describe('runAgentStep - set_output tool', () => { prompt: 'Update the output', params: undefined, }, + env, ) expect(result.agentState.output).toEqual({ @@ -300,8 +306,9 @@ describe('runAgentStep - set_output tool', () => { 'test-set-output-agent': testAgent, } + const env = createMockAgentRuntimeEnvironment() + const result = await runAgentStep( - new MockWebSocket() as unknown as WebSocket, { userId: TEST_USER_ID, userInputId: 'test-input', @@ -315,6 +322,7 @@ describe('runAgentStep - set_output tool', () => { prompt: 'Update with empty object', params: undefined, }, + env, ) // Should replace with empty object @@ -393,8 +401,9 @@ describe('runAgentStep - set_output tool', () => { const initialMessageCount = agentState.messageHistory.length + const env = createMockAgentRuntimeEnvironment() + const result = await runAgentStep( - new MockWebSocket() as unknown as WebSocket, { userId: TEST_USER_ID, userInputId: 'test-input', @@ -408,6 +417,7 @@ describe('runAgentStep - set_output tool', () => { prompt: 'Test the handleSteps functionality', params: undefined, }, + env, ) // Should end turn because toolCalls.length === 0 && toolResults.length === 0 from LLM processing @@ -545,8 +555,9 @@ describe('runAgentStep - set_output tool', () => { }, ] + const env = createMockAgentRuntimeEnvironment() + const result = await runAgentStep( - new MockWebSocket() as unknown as WebSocket, { userId: TEST_USER_ID, userInputId: 'test-input', @@ -560,6 +571,7 @@ describe('runAgentStep - set_output tool', () => { prompt: 'Spawn an inline agent to clean up messages', params: undefined, }, + env, ) const finalMessages = result.agentState.messageHistory diff --git a/backend/src/__tests__/run-programmatic-step.test.ts b/backend/src/__tests__/run-programmatic-step.test.ts index a67d522d4..699a08c45 100644 --- a/backend/src/__tests__/run-programmatic-step.test.ts +++ b/backend/src/__tests__/run-programmatic-step.test.ts @@ -21,9 +21,10 @@ import { import { clearAgentGeneratorCache, runProgrammaticStep, -} from '../run-programmatic-step' +} from '@codebuff/agent-runtime' import { mockFileContext, MockWebSocket } from './test-utils' -import * as toolExecutor from '../tools/tool-executor' +import { createMockAgentRuntimeEnvironment } from './test-env-mocks' +import * as agentRuntimeToolExecutor from '@codebuff/agent-runtime' import { asSystemMessage } from '../util/messages' import * as requestContext from '../websockets/request-context' @@ -41,6 +42,7 @@ describe('runProgrammaticStep', () => { let mockParams: any let executeToolCallSpy: any let getRequestContextSpy: any + let mockEnv: any beforeAll(() => { // Mock logger @@ -61,9 +63,9 @@ describe('runProgrammaticStep', () => { analytics.initAnalytics() spyOn(analytics, 'trackEvent').mockImplementation(() => {}) - // Mock executeToolCall + // Mock executeToolCall from agent-runtime executeToolCallSpy = spyOn( - toolExecutor, + agentRuntimeToolExecutor, 'executeToolCall', ).mockImplementation(async () => {}) @@ -75,6 +77,12 @@ describe('runProgrammaticStep', () => { processedRepoId: 'test-repo-id', })) + // Create mock environment + mockEnv = createMockAgentRuntimeEnvironment() + + // Override the request context with our spy + mockEnv.requestContext = getRequestContextSpy() + // Mock crypto.randomUUID spyOn(crypto, 'randomUUID').mockImplementation( () => @@ -126,6 +134,7 @@ describe('runProgrammaticStep', () => { assistantMessage: undefined, assistantPrefix: undefined, ws: new MockWebSocket() as unknown as WebSocket, + env: mockEnv, } }) @@ -214,18 +223,17 @@ describe('runProgrammaticStep', () => { mockTemplate.handleSteps = () => mockGenerator mockTemplate.toolNames = ['add_message', 'read_files', 'end_turn'] - // Track chunks sent via sendSubagentChunk - const sentChunks: string[] = [] - const originalSendAction = - require('../websockets/websocket-action').sendAction - const sendActionSpy = spyOn( - require('../websockets/websocket-action'), - 'sendAction', - ).mockImplementation((ws: any, action: any) => { - if (action.type === 'subagent-response-chunk') { - sentChunks.push(action.chunk) - } - }) + // Track chunks sent via sendSubagentChunk + const sentChunks: string[] = [] + + // Override the mock environment's onResponseChunk to capture chunks + mockEnv.io.onResponseChunk = (chunk: any) => { + if (typeof chunk === 'string') { + sentChunks.push(chunk) + } else if (chunk && typeof chunk.text === 'string') { + sentChunks.push(chunk.text) + } + } const result = await runProgrammaticStep(mockAgentState, mockParams) @@ -864,6 +872,7 @@ describe('runProgrammaticStep', () => { ...mockParams, template: schemaTemplate, localAgentTemplates: { 'test-agent': schemaTemplate }, + env: mockEnv, }) expect(result.endTurn).toBe(true) @@ -950,6 +959,7 @@ describe('runProgrammaticStep', () => { ...mockParams, template: noSchemaTemplate, localAgentTemplates: { 'test-agent': noSchemaTemplate }, + env: mockEnv, }) expect(result.endTurn).toBe(true) @@ -987,6 +997,7 @@ describe('runProgrammaticStep', () => { ...mockParams, template: schemaWithoutSchemaTemplate, localAgentTemplates: { 'test-agent': schemaWithoutSchemaTemplate }, + env: mockEnv, }) expect(result.endTurn).toBe(true) diff --git a/backend/src/__tests__/sandbox-generator.test.ts b/backend/src/__tests__/sandbox-generator.test.ts index 862f8990e..ec3beb234 100644 --- a/backend/src/__tests__/sandbox-generator.test.ts +++ b/backend/src/__tests__/sandbox-generator.test.ts @@ -3,7 +3,7 @@ import { afterEach, beforeEach, describe, expect, test } from 'bun:test' import { clearAgentGeneratorCache, runProgrammaticStep, -} from '../run-programmatic-step' +} from '@codebuff/agent-runtime' import { mockFileContext, MockWebSocket } from './test-utils' import type { AgentTemplate } from '../templates/types' diff --git a/backend/src/__tests__/spawn-agents-message-history.test.ts b/backend/src/__tests__/spawn-agents-message-history.test.ts index a01c97320..5d87154f8 100644 --- a/backend/src/__tests__/spawn-agents-message-history.test.ts +++ b/backend/src/__tests__/spawn-agents-message-history.test.ts @@ -11,7 +11,7 @@ import { } from 'bun:test' import { mockFileContext, MockWebSocket } from './test-utils' -import * as runAgentStep from '../run-agent-step' +import * as runAgentStep from '@codebuff/agent-runtime' import { handleSpawnAgents } from '../tools/handlers/tool/spawn-agents' import * as loggerModule from '../util/logger' diff --git a/backend/src/__tests__/spawn-agents-permissions.test.ts b/backend/src/__tests__/spawn-agents-permissions.test.ts index ebcad7b9e..edce7115c 100644 --- a/backend/src/__tests__/spawn-agents-permissions.test.ts +++ b/backend/src/__tests__/spawn-agents-permissions.test.ts @@ -11,7 +11,7 @@ import { } from 'bun:test' import { mockFileContext, MockWebSocket } from './test-utils' -import * as runAgentStep from '../run-agent-step' +import * as runAgentStep from '@codebuff/agent-runtime' import { handleSpawnAgentInline } from '../tools/handlers/tool/spawn-agent-inline' import { getMatchingSpawn } from '../tools/handlers/tool/spawn-agent-utils' import { handleSpawnAgents } from '../tools/handlers/tool/spawn-agents' diff --git a/backend/src/__tests__/subagent-streaming.test.ts b/backend/src/__tests__/subagent-streaming.test.ts index 712ece800..8f02a182d 100644 --- a/backend/src/__tests__/subagent-streaming.test.ts +++ b/backend/src/__tests__/subagent-streaming.test.ts @@ -11,7 +11,7 @@ import { spyOn, } from 'bun:test' -import * as runAgentStep from '../run-agent-step' +import * as runAgentStep from '@codebuff/agent-runtime' import { mockFileContext, MockWebSocket } from './test-utils' import { assembleLocalAgentTemplates } from '../templates/agent-registry' import { handleSpawnAgents } from '../tools/handlers/tool/spawn-agents' diff --git a/backend/src/__tests__/test-env-mocks.ts b/backend/src/__tests__/test-env-mocks.ts new file mode 100644 index 000000000..018203083 --- /dev/null +++ b/backend/src/__tests__/test-env-mocks.ts @@ -0,0 +1,193 @@ +import { spyOn } from 'bun:test' +import z from 'zod/v4' +import type { AgentRuntimeEnvironment } from '@codebuff/agent-runtime' +import type { WebSocket } from 'ws' +import type { AgentTemplate } from '../templates/types' +import type { AgentTemplateType, AgentState } from '@codebuff/common/types/session-state' +import type { ProjectFileContext } from '@codebuff/common/util/file' +import type { PrintModeEvent } from '@codebuff/common/types/print-mode' + +/** + * Creates mock tool definitions with proper Zod schemas + */ +function createMockToolDefinitions() { + const toolNames = [ + 'read_files', + 'write_file', + 'end_turn', + 'add_message', + 'set_output', + 'code_search', + 'create_plan', + 'add_subgoal', + 'update_subgoal', + 'find_files', + 'set_messages' + ] + + const definitions: Record = {} + + for (const toolName of toolNames) { + definitions[toolName] = { + toolName, + endsAgentStep: true, + parameters: z.object({}), // Basic schema that always passes + } + } + + return definitions +} + +/** + * Creates mock tool handlers + */ +function createMockToolHandlers() { + const handlers = { + set_output: async ({ toolCall, state }: { toolCall: any, state: any }) => { + // The input for set_output contains all the data that should be set as output + state.agentState.output = toolCall.input + return 'Output set successfully' + }, + end_turn: async ({ toolCall, state }: { toolCall: any, state: any }) => { + return 'Turn ended' + }, + read_files: async ({ toolCall, state }: { toolCall: any, state: any }) => { + return 'Files read successfully' + }, + write_file: async ({ toolCall, state }: { toolCall: any, state: any }) => { + return 'File written successfully' + }, + add_message: async ({ toolCall, state }: { toolCall: any, state: any }) => { + return 'Message added successfully' + }, + code_search: async ({ toolCall, state }: { toolCall: any, state: any }) => { + return 'Search completed successfully' + }, + create_plan: async ({ toolCall, state }: { toolCall: any, state: any }) => { + return 'Plan created successfully' + }, + add_subgoal: async ({ toolCall, state }: { toolCall: any, state: any }) => { + const input = toolCall.input + if (!state.agentState.agentContext) { + state.agentState.agentContext = {} + } + state.agentState.agentContext[input.id] = { + ...input, + logs: [], + } + return 'Subgoal added successfully' + }, + update_subgoal: async ({ toolCall, state }: { toolCall: any, state: any }) => { + const input = toolCall.input + if (state.agentState.agentContext && state.agentState.agentContext[input.id]) { + state.agentState.agentContext[input.id] = { + ...state.agentState.agentContext[input.id], + ...input, + logs: [...(state.agentState.agentContext[input.id].logs || []), input.log].filter(Boolean), + } + } + return 'Subgoal updated successfully' + }, + find_files: async ({ toolCall, state }: { toolCall: any, state: any }) => { + return JSON.stringify({ + files: [ + { path: 'src/auth.ts', relevance: 0.9 }, + { path: 'src/login.ts', relevance: 0.8 }, + ], + }) + }, + } as const + + return handlers +} + +/** + * Creates a mock agent runtime environment for testing + */ +export function createMockAgentRuntimeEnvironment(): AgentRuntimeEnvironment { + return { + llm: { + getAgentStreamFromTemplate: spyOn( + {} as any, + 'getAgentStreamFromTemplate' + ).mockImplementation((params: any) => { + return async function* () { + yield 'Mock LLM response' + } + }) as any, + }, + + io: { + requestToolCall: spyOn({} as any, 'requestToolCall').mockImplementation( + async (userInputId: string, toolName: string, input: any) => { + return { + success: true, + output: { type: 'text', value: `Mock ${toolName} result` }, + } + } + ) as any, + + requestFiles: spyOn({} as any, 'requestFiles').mockImplementation( + async () => ({}) + ) as any, + + requestFile: spyOn({} as any, 'requestFile').mockImplementation( + async () => null + ) as any, + + onResponseChunk: undefined, + }, + + inputGate: { + start: spyOn({} as any, 'start').mockImplementation(() => {}) as any, + check: spyOn({} as any, 'check').mockImplementation(() => true) as any, + end: spyOn({} as any, 'end').mockImplementation(() => {}) as any, + }, + + tools: { + definitions: createMockToolDefinitions(), + handlers: createMockToolHandlers(), + }, + + templates: { + getAgentTemplate: spyOn({} as any, 'getAgentTemplate').mockImplementation( + async (agentType: AgentTemplateType, localTemplates: Record) => { + return localTemplates[agentType] || { + id: agentType, + displayName: `Mock ${agentType}`, + spawnerPrompt: 'Mock spawner prompt', + model: 'claude-3-5-sonnet-20241022', + inputSchema: {}, + outputMode: 'last_message', + includeMessageHistory: false, + toolNames: ['end_turn'], + spawnableAgents: [], + systemPrompt: 'Mock system prompt', + instructionsPrompt: 'Mock instructions prompt', + stepPrompt: 'Mock step prompt', + } as AgentTemplate + } + ) as any, + + getAgentPrompt: spyOn({} as any, 'getAgentPrompt').mockImplementation( + async () => 'Mock agent prompt' + ) as any, + }, + + analytics: { + trackEvent: spyOn({} as any, 'trackEvent').mockImplementation(() => {}) as any, + insertTrace: spyOn({} as any, 'insertTrace').mockImplementation(() => {}) as any, + }, + + logger: { + debug: spyOn({} as any, 'debug').mockImplementation(() => {}) as any, + info: spyOn({} as any, 'info').mockImplementation(() => {}) as any, + warn: spyOn({} as any, 'warn').mockImplementation(() => {}) as any, + error: spyOn({} as any, 'error').mockImplementation(() => {}) as any, + }, + + requestContext: { + processedRepoId: 'test-repo-id', + }, + } +} diff --git a/backend/src/__tests__/web-search-tool.test.ts b/backend/src/__tests__/web-search-tool.test.ts index 0132aba1a..7fb6228e2 100644 --- a/backend/src/__tests__/web-search-tool.test.ts +++ b/backend/src/__tests__/web-search-tool.test.ts @@ -28,7 +28,8 @@ import * as liveUserInputs from '../live-user-inputs' import { MockWebSocket, mockFileContext } from './test-utils' import * as linkupApi from '../llm-apis/linkup-api' import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk' -import { runAgentStep } from '../run-agent-step' +import { runAgentStep } from '@codebuff/agent-runtime' +import { createMockAgentRuntimeEnvironment } from './test-env-mocks' import { assembleLocalAgentTemplates } from '../templates/agent-registry' import * as websocketAction from '../websockets/websocket-action' import researcherAgent from '../../../.agents/researcher' @@ -123,7 +124,7 @@ describe('web_search tool with researcher agent', () => { } const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents) - await runAgentStep(new MockWebSocket() as unknown as WebSocket, { + await runAgentStep({ userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -135,7 +136,7 @@ describe('web_search tool with researcher agent', () => { agentState, prompt: 'Search for test', params: undefined, - }) + }, createMockAgentRuntimeEnvironment()) // Just verify that searchWeb was called expect(linkupApi.searchWeb).toHaveBeenCalledWith('test query', { @@ -167,22 +168,19 @@ describe('web_search tool with researcher agent', () => { } const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents) - const { agentState: newAgentState } = await runAgentStep( - new MockWebSocket() as unknown as WebSocket, - { - userId: TEST_USER_ID, - userInputId: 'test-input', - clientSessionId: 'test-session', - fingerprintId: 'test-fingerprint', - onResponseChunk: () => {}, - agentType: 'researcher', - fileContext: mockFileContext, - localAgentTemplates: agentTemplates, - agentState, - prompt: 'Search for Next.js 15 new features', - params: undefined, - }, - ) + const { agentState: newAgentState } = await runAgentStep({ + userId: TEST_USER_ID, + userInputId: 'test-input', + clientSessionId: 'test-session', + fingerprintId: 'test-fingerprint', + onResponseChunk: () => {}, + agentType: 'researcher', + fileContext: mockFileContext, + localAgentTemplates: agentTemplates, + agentState, + prompt: 'Search for Next.js 15 new features', + params: undefined, + }, createMockAgentRuntimeEnvironment()) expect(linkupApi.searchWeb).toHaveBeenCalledWith( 'Next.js 15 new features', @@ -229,7 +227,7 @@ describe('web_search tool with researcher agent', () => { } const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents) - await runAgentStep(new MockWebSocket() as unknown as WebSocket, { + await runAgentStep({ userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -241,7 +239,7 @@ describe('web_search tool with researcher agent', () => { agentState, prompt: 'Search for React Server Components tutorial with deep search', params: undefined, - }) + }, createMockAgentRuntimeEnvironment()) expect(linkupApi.searchWeb).toHaveBeenCalledWith( 'React Server Components tutorial', @@ -270,22 +268,19 @@ describe('web_search tool with researcher agent', () => { } const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents) - const { agentState: newAgentState } = await runAgentStep( - new MockWebSocket() as unknown as WebSocket, - { - userId: TEST_USER_ID, - userInputId: 'test-input', - clientSessionId: 'test-session', - fingerprintId: 'test-fingerprint', - onResponseChunk: () => {}, - agentType: 'researcher', - fileContext: mockFileContext, - localAgentTemplates: agentTemplates, - agentState, - prompt: "Search for something that doesn't exist", - params: undefined, - }, - ) + const { agentState: newAgentState } = await runAgentStep({ + userId: TEST_USER_ID, + userInputId: 'test-input', + clientSessionId: 'test-session', + fingerprintId: 'test-fingerprint', + onResponseChunk: () => {}, + agentType: 'researcher', + fileContext: mockFileContext, + localAgentTemplates: agentTemplates, + agentState, + prompt: "Search for something that doesn't exist", + params: undefined, + }, createMockAgentRuntimeEnvironment()) // Verify that searchWeb was called expect(linkupApi.searchWeb).toHaveBeenCalledWith( @@ -331,22 +326,19 @@ describe('web_search tool with researcher agent', () => { } const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents) - const { agentState: newAgentState } = await runAgentStep( - new MockWebSocket() as unknown as WebSocket, - { - userId: TEST_USER_ID, - userInputId: 'test-input', - clientSessionId: 'test-session', - fingerprintId: 'test-fingerprint', - onResponseChunk: () => {}, - agentType: 'researcher', - fileContext: mockFileContext, - localAgentTemplates: agentTemplates, - agentState, - prompt: 'Search for something', - params: undefined, - }, - ) + const { agentState: newAgentState } = await runAgentStep({ + userId: TEST_USER_ID, + userInputId: 'test-input', + clientSessionId: 'test-session', + fingerprintId: 'test-fingerprint', + onResponseChunk: () => {}, + agentType: 'researcher', + fileContext: mockFileContext, + localAgentTemplates: agentTemplates, + agentState, + prompt: 'Search for something', + params: undefined, + }, createMockAgentRuntimeEnvironment()) // Verify that searchWeb was called expect(linkupApi.searchWeb).toHaveBeenCalledWith('test query', { @@ -388,22 +380,19 @@ describe('web_search tool with researcher agent', () => { } const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents) - const { agentState: newAgentState } = await runAgentStep( - new MockWebSocket() as unknown as WebSocket, - { - userId: TEST_USER_ID, - userInputId: 'test-input', - clientSessionId: 'test-session', - fingerprintId: 'test-fingerprint', - onResponseChunk: () => {}, - agentType: 'researcher', - fileContext: mockFileContext, - localAgentTemplates: agentTemplates, - agentState, - prompt: 'Search for something', - params: undefined, - }, - ) + const { agentState: newAgentState } = await runAgentStep({ + userId: TEST_USER_ID, + userInputId: 'test-input', + clientSessionId: 'test-session', + fingerprintId: 'test-fingerprint', + onResponseChunk: () => {}, + agentType: 'researcher', + fileContext: mockFileContext, + localAgentTemplates: agentTemplates, + agentState, + prompt: 'Search for something', + params: undefined, + }, createMockAgentRuntimeEnvironment()) // Verify that searchWeb was called expect(linkupApi.searchWeb).toHaveBeenCalledWith('test query', { @@ -432,22 +421,19 @@ describe('web_search tool with researcher agent', () => { } const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents) - const { agentState: newAgentState } = await runAgentStep( - new MockWebSocket() as unknown as WebSocket, - { - userId: TEST_USER_ID, - userInputId: 'test-input', - clientSessionId: 'test-session', - fingerprintId: 'test-fingerprint', - onResponseChunk: () => {}, - agentType: 'researcher', - fileContext: mockFileContext, - localAgentTemplates: agentTemplates, - agentState, - prompt: 'Search for something', - params: undefined, - }, - ) + const { agentState: newAgentState } = await runAgentStep({ + userId: TEST_USER_ID, + userInputId: 'test-input', + clientSessionId: 'test-session', + fingerprintId: 'test-fingerprint', + onResponseChunk: () => {}, + agentType: 'researcher', + fileContext: mockFileContext, + localAgentTemplates: agentTemplates, + agentState, + prompt: 'Search for something', + params: undefined, + }, createMockAgentRuntimeEnvironment()) // Verify that searchWeb was called expect(linkupApi.searchWeb).toHaveBeenCalledWith('test query', { @@ -491,22 +477,19 @@ describe('web_search tool with researcher agent', () => { } const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents) - const { agentState: newAgentState } = await runAgentStep( - new MockWebSocket() as unknown as WebSocket, - { - userId: TEST_USER_ID, - userInputId: 'test-input', - clientSessionId: 'test-session', - fingerprintId: 'test-fingerprint', - onResponseChunk: () => {}, - agentType: 'researcher', - fileContext: mockFileContextWithAgents, - localAgentTemplates: agentTemplates, - agentState, - prompt: 'Test search result formatting', - params: undefined, - }, - ) + const { agentState: newAgentState } = await runAgentStep({ + userId: TEST_USER_ID, + userInputId: 'test-input', + clientSessionId: 'test-session', + fingerprintId: 'test-fingerprint', + onResponseChunk: () => {}, + agentType: 'researcher', + fileContext: mockFileContextWithAgents, + localAgentTemplates: agentTemplates, + agentState, + prompt: 'Test search result formatting', + params: undefined, + }, createMockAgentRuntimeEnvironment()) // Verify that searchWeb was called expect(linkupApi.searchWeb).toHaveBeenCalledWith('test formatting', { diff --git a/backend/src/agent-runtime/env.ts b/backend/src/agent-runtime/env.ts new file mode 100644 index 000000000..e17b644ce --- /dev/null +++ b/backend/src/agent-runtime/env.ts @@ -0,0 +1,118 @@ +import { insertTrace } from '@codebuff/bigquery' +import { trackEvent } from '@codebuff/common/analytics' +import type { AgentRuntimeEnvironment, LLMEnvironment } from '@codebuff/agent-runtime' + +import { getAgentTemplate, assembleLocalAgentTemplates } from '../templates/agent-registry' +import { getAgentPrompt } from '../templates/strings' +import { getAgentStreamFromTemplate } from '../prompt-agent-stream' +import { requestFiles, requestFile, requestToolCall } from '../websockets/websocket-action' +import { checkLiveUserInput, startUserInput, endUserInput } from '../live-user-inputs' +import { logger } from '../util/logger' +import { getRequestContext } from '../context/app-context' +import { codebuffToolDefs } from '../tools/definitions/list' +import { codebuffToolHandlers } from '../tools/handlers/list' + +import type { WebSocket } from 'ws' +import type { PrintModeEvent } from '@codebuff/common/types/print-mode' +import type { AgentTemplate } from '@codebuff/common/types/agent-template' +import type { AgentTemplateType, AgentState } from '@codebuff/common/types/session-state' +import type { ProjectFileContext } from '@codebuff/common/util/file' + +/** + * Creates the complete agent runtime environment by wrapping existing backend services + */ +export function createAgentRuntimeEnvironment( + ws: WebSocket, + onResponseChunk?: (chunk: string | PrintModeEvent) => void, +): AgentRuntimeEnvironment { + return { + llm: { + getAgentStreamFromTemplate: (params: Parameters[0]) => { + return getAgentStreamFromTemplate(params) + }, + }, + + io: { + requestToolCall: async (userInputId: string, toolName: string, input: Record) => { + return await requestToolCall(ws, userInputId, toolName, input) + }, + + requestFiles: async (paths: string[]) => { + return await requestFiles(ws, paths) + }, + + requestFile: async (path: string) => { + return await requestFile(ws, path) + }, + + onResponseChunk, + }, + + inputGate: { + start: (userId: string | undefined, userInputId: string) => { + if (userId) { + startUserInput(userId, userInputId) + } + }, + + check: (userId: string | undefined, userInputId: string, clientSessionId: string) => { + return checkLiveUserInput(userId, userInputId, clientSessionId) + }, + + end: (userId: string | undefined, userInputId: string) => { + if (userId) { + endUserInput(userId, userInputId) + } + }, + }, + + tools: { + definitions: codebuffToolDefs, + handlers: codebuffToolHandlers, + }, + + templates: { + getAgentTemplate: async ( + agentType: AgentTemplateType, + localTemplates: Record, + ) => { + return await getAgentTemplate(agentType, localTemplates) + }, + + getAgentPrompt: async ( + template: AgentTemplate, + promptType: { type: 'systemPrompt' | 'instructionsPrompt' | 'stepPrompt' }, + fileContext: ProjectFileContext, + agentState: AgentState, + localTemplates: Record, + ) => { + return await getAgentPrompt( + template, + promptType, + fileContext, + agentState, + localTemplates, + ) + }, + }, + + analytics: { + trackEvent: (event: string, userId: string, props: Record) => { + trackEvent(event as any, userId, props) + }, + + insertTrace: (trace: any) => { + insertTrace(trace) + }, + }, + + logger: { + debug: (data: any, message?: string) => logger.debug(data, message), + info: (data: any, message?: string) => logger.info(data, message), + warn: (data: any, message?: string) => logger.warn(data, message), + error: (data: any, message?: string) => logger.error(data, message), + }, + + requestContext: getRequestContext(), + } +} diff --git a/backend/src/async-agent-manager.ts b/backend/src/async-agent-manager.ts index df35733be..afcf4c567 100644 --- a/backend/src/async-agent-manager.ts +++ b/backend/src/async-agent-manager.ts @@ -179,24 +179,33 @@ export class AsyncAgentManager { })) } else { // Import loopAgentSteps dynamically to avoid circular dependency - const { loopAgentSteps } = await import('./run-agent-step') + const { loopAgentSteps } = await import('@codebuff/agent-runtime') const { agentTemplates: localAgentTemplates } = assembleLocalAgentTemplates(agent.fileContext) - agentPromise = loopAgentSteps(ws, { - userInputId, - prompt: undefined, // No initial prompt, will get messages from queue - params: undefined, - agentType: agent.agentState.agentType!, - agentState: agent.agentState, - fingerprintId: agent.fingerprintId, - fileContext: agent.fileContext, - localAgentTemplates, - toolResults: [], - userId: agent.userId, - clientSessionId: sessionId, - onResponseChunk: () => {}, // Async agents don't stream to parent - }) + // Create environment for async agent + const { createAgentRuntimeEnvironment } = await import( + './agent-runtime/env' + ) + const env = createAgentRuntimeEnvironment(ws, () => {}) // Async agents don't stream to parent + + agentPromise = loopAgentSteps( + { + userInputId, + prompt: undefined, // No initial prompt, will get messages from queue + params: undefined, + agentType: agent.agentState.agentType!, + agentState: agent.agentState, + fingerprintId: agent.fingerprintId, + fileContext: agent.fileContext, + localAgentTemplates, + toolResults: [], + userId: agent.userId, + clientSessionId: sessionId, + onResponseChunk: () => {}, // Async agents don't stream to parent + }, + env, + ) } // Store the promise and handle completion agent.promise = agentPromise diff --git a/backend/src/main-prompt.ts b/backend/src/main-prompt.ts index 40843b798..36c1054a0 100644 --- a/backend/src/main-prompt.ts +++ b/backend/src/main-prompt.ts @@ -4,8 +4,9 @@ import { generateCompactId } from '@codebuff/common/util/string' import { uniq } from 'lodash' import { checkTerminalCommand } from './check-terminal-command' -import { loopAgentSteps } from './run-agent-step' +import { loopAgentSteps } from '@codebuff/agent-runtime' import { getAgentTemplate } from './templates/agent-registry' +import { createAgentRuntimeEnvironment } from './agent-runtime/env' import { logger } from './util/logger' import { expireMessages } from './util/messages' import { requestToolCall } from './websockets/websocket-action' @@ -187,20 +188,26 @@ export const mainPrompt = async ( mainAgentTemplate.spawnableAgents = updatedSubagents localAgentTemplates[agentType] = mainAgentTemplate - const { agentState } = await loopAgentSteps(ws, { - userInputId: promptId, - prompt, - params: promptParams, - agentType, - agentState: mainAgentState, - fingerprintId, - fileContext, - toolResults: [], - userId, - clientSessionId, - onResponseChunk, - localAgentTemplates, - }) + // Create the runtime environment + const env = createAgentRuntimeEnvironment(ws, onResponseChunk) + + const { agentState } = await loopAgentSteps( + { + userInputId: promptId, + prompt, + params: promptParams, + agentType, + agentState: mainAgentState, + fingerprintId, + fileContext, + toolResults: [], + userId, + clientSessionId, + onResponseChunk, + localAgentTemplates, + }, + env, + ) logger.debug({ agentState }, 'Main prompt finished') diff --git a/backend/src/tools/handlers/tool/spawn-agent-utils.ts b/backend/src/tools/handlers/tool/spawn-agent-utils.ts index 99568e2b1..decc19efd 100644 --- a/backend/src/tools/handlers/tool/spawn-agent-utils.ts +++ b/backend/src/tools/handlers/tool/spawn-agent-utils.ts @@ -316,22 +316,31 @@ export async function executeAgent({ onResponseChunk: (chunk: string | PrintModeEvent) => void }) { // Import loopAgentSteps dynamically to avoid circular dependency - const { loopAgentSteps } = await import('../../../run-agent-step') + const { loopAgentSteps } = await import('@codebuff/agent-runtime') - return await loopAgentSteps(ws, { - userInputId, - prompt, - params, - agentType: agentTemplate.id, - agentState, - fingerprintId, - fileContext, - localAgentTemplates, - toolResults: [], - userId, - clientSessionId, - onResponseChunk, - }) + // Create environment for spawned agent + const { createAgentRuntimeEnvironment } = await import( + '../../../agent-runtime/env' + ) + const env = createAgentRuntimeEnvironment(ws, onResponseChunk) + + return await loopAgentSteps( + { + userInputId, + prompt, + params, + agentType: agentTemplate.id, + agentState, + fingerprintId, + fileContext, + localAgentTemplates, + toolResults: [], + userId, + clientSessionId, + onResponseChunk, + }, + env, + ) } /** diff --git a/bun.lock b/bun.lock index 314f8f19c..ee0858ee3 100644 --- a/bun.lock +++ b/bun.lock @@ -41,6 +41,7 @@ "dependencies": { "@ai-sdk/google-vertex": "3.0.6", "@ai-sdk/openai": "2.0.11", + "@codebuff/agent-runtime": "workspace:*", "@codebuff/billing": "workspace:*", "@codebuff/common": "workspace:*", "@codebuff/internal": "workspace:*", @@ -155,6 +156,26 @@ "zod": "3.25.67", }, }, + "packages/agent-runtime": { + "name": "@codebuff/agent-runtime", + "version": "1.0.0", + "dependencies": { + "@codebuff/common": "workspace:*", + "ai": "5.0.0", + "diff": "5.2.0", + "gpt-tokenizer": "2.8.1", + "ignore": "5.3.2", + "lodash": "*", + "ts-pattern": "5.3.1", + "zod": "3.25.67", + "zod-from-json-schema": "0.4.2", + }, + "devDependencies": { + "@types/bun": "^1.2.11", + "@types/diff": "^5.0.3", + "@types/node": "22", + }, + }, "packages/bigquery": { "name": "@codebuff/bigquery", "version": "1.0.0", @@ -365,7 +386,7 @@ "@ai-sdk/provider": ["@ai-sdk/provider@2.0.0", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA=="], - "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="], + "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.0", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-BoQZtGcBxkeSH1zK+SRYNDtJPIPpacTeiMZqnG4Rv6xXjEwM0FH4MGs9c+PlhyEWmQCzjRM2HAotEydFhD4dYw=="], "@alloc/quick-lru": ["@alloc/quick-lru@5.2.0", "", {}, "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw=="], @@ -567,6 +588,8 @@ "@chevrotain/utils": ["@chevrotain/utils@11.0.3", "", {}, "sha512-YslZMgtJUyuMbZ+aKvfF3x1f5liK4mWNxghFRv7jqRR9C3R3fAOGTTKvxXDa2Y1s9zSbcpuO0cAxDYsc9SrXoQ=="], + "@codebuff/agent-runtime": ["@codebuff/agent-runtime@workspace:packages/agent-runtime"], + "@codebuff/agents": ["@codebuff/agents@workspace:.agents"], "@codebuff/backend": ["@codebuff/backend@workspace:backend"], @@ -3867,7 +3890,13 @@ "zwitch": ["zwitch@2.0.4", "", {}, "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A=="], - "@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.0", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-BoQZtGcBxkeSH1zK+SRYNDtJPIPpacTeiMZqnG4Rv6xXjEwM0FH4MGs9c+PlhyEWmQCzjRM2HAotEydFhD4dYw=="], + "@ai-sdk/anthropic/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="], + + "@ai-sdk/google/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="], + + "@ai-sdk/google-vertex/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="], + + "@ai-sdk/openai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="], "@ampproject/remapping/@jridgewell/trace-mapping": ["@jridgewell/trace-mapping@0.3.30", "", { "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" } }, "sha512-GQ7Nw5G2lTu/BtHTKfXhKHok2WGetd4XYcVKGx00SjAk8GMwgJM3zr6zORiPGuOE+/vkc90KtTosSSvaCjKb2Q=="], @@ -3893,6 +3922,10 @@ "@babel/plugin-transform-runtime/semver": ["semver@6.3.1", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA=="], + "@codebuff/agent-runtime/ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="], + + "@codebuff/agent-runtime/ts-pattern": ["ts-pattern@5.3.1", "", {}, "sha512-1RUMKa8jYQdNfmnK4jyzBK3/PS/tnjcZ1CW0v1vWDeYe5RBklc/nquw03MEoB66hVBm4BnlCfmOqDVxHyT1DpA=="], + "@codebuff/backend/ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="], "@codebuff/backend/ts-pattern": ["ts-pattern@5.3.1", "", {}, "sha512-1RUMKa8jYQdNfmnK4jyzBK3/PS/tnjcZ1CW0v1vWDeYe5RBklc/nquw03MEoB66hVBm4BnlCfmOqDVxHyT1DpA=="], @@ -4113,8 +4146,6 @@ "aceternity-ui/node-fetch": ["node-fetch@3.3.2", "", { "dependencies": { "data-uri-to-buffer": "^4.0.0", "fetch-blob": "^3.1.4", "formdata-polyfill": "^4.0.10" } }, "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA=="], - "ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.0", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-BoQZtGcBxkeSH1zK+SRYNDtJPIPpacTeiMZqnG4Rv6xXjEwM0FH4MGs9c+PlhyEWmQCzjRM2HAotEydFhD4dYw=="], - "autoprefixer/picocolors": ["picocolors@1.1.1", "", {}, "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA=="], "babel-plugin-istanbul/istanbul-lib-instrument": ["istanbul-lib-instrument@5.2.1", "", { "dependencies": { "@babel/core": "^7.12.3", "@babel/parser": "^7.14.7", "@istanbuljs/schema": "^0.1.2", "istanbul-lib-coverage": "^3.2.0", "semver": "^6.3.0" } }, "sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg=="], diff --git a/evals/git-evals/run-single-eval-process.ts b/evals/git-evals/run-single-eval-process.ts index ca3704426..5e8e291eb 100644 --- a/evals/git-evals/run-single-eval-process.ts +++ b/evals/git-evals/run-single-eval-process.ts @@ -6,7 +6,6 @@ import { } from '@codebuff/npm-app/project-files' import { recreateShell } from '@codebuff/npm-app/terminal/run-command' -import { createFileReadingMock } from '../scaffolding' import { setupTestEnvironmentVariables } from '../test-setup' import { runSingleEval } from './run-git-evals' @@ -56,7 +55,6 @@ async function main() { // Setup environment for this process setProjectRoot(projectPath) setupTestEnvironmentVariables() - createFileReadingMock(projectPath) recreateShell(projectPath) setWorkingDirectory(projectPath) diff --git a/evals/git-evals/run-single-eval.ts b/evals/git-evals/run-single-eval.ts index 5f455c908..4c8c00c49 100644 --- a/evals/git-evals/run-single-eval.ts +++ b/evals/git-evals/run-single-eval.ts @@ -10,7 +10,6 @@ import { import { recreateShell } from '@codebuff/npm-app/terminal/run-command' import { Command, Flags } from '@oclif/core' -import { createFileReadingMock } from '../scaffolding' import { setupTestEnvironmentVariables } from '../test-setup' import { runSingleEval } from './run-git-evals' import { extractRepoNameFromUrl, setupTestRepo } from './setup-test-repo' @@ -174,7 +173,6 @@ async function runSingleEvalTask(options: { // Setup project context setProjectRoot(projectPath) - createFileReadingMock(projectPath) recreateShell(projectPath) setWorkingDirectory(projectPath) diff --git a/evals/scaffolding.ts b/evals/scaffolding.ts index 466b20b98..b7aeeac88 100644 --- a/evals/scaffolding.ts +++ b/evals/scaffolding.ts @@ -1,18 +1,10 @@ import { execSync } from 'child_process' -import { EventEmitter } from 'events' import fs from 'fs' import path from 'path' -import { runAgentStep } from '@codebuff/backend/run-agent-step' -import { assembleLocalAgentTemplates } from '@codebuff/backend/templates/agent-registry' import { getFileTokenScores } from '@codebuff/code-map/parse' -import { TEST_USER_ID } from '@codebuff/common/constants' -import { mockModule } from '@codebuff/common/testing/mock-modules' -import { generateCompactId } from '@codebuff/common/util/string' import { handleToolCall } from '@codebuff/npm-app/tool-handlers' import { getSystemInfo } from '@codebuff/npm-app/utils/system-info' -import { mock } from 'bun:test' -import { blue } from 'picocolors' import { getAllFilePaths, @@ -23,23 +15,9 @@ import type { SDKAssistantMessage, SDKUserMessage, } from '@anthropic-ai/claude-code' -import type { - requestFiles as originalRequestFiles, - requestToolCall as originalRequestToolCall, -} from '@codebuff/backend/websockets/websocket-action' -import type { FileChanges } from '@codebuff/common/actions' import type { ClientToolCall } from '@codebuff/common/tools/list' -import type { PrintModeEvent } from '@codebuff/common/types/print-mode' -import type { - AgentState, - AgentTemplateType, - SessionState, - ToolResult, -} from '@codebuff/common/types/session-state' +import type { ToolResult } from '@codebuff/common/types/session-state' import type { ProjectFileContext } from '@codebuff/common/util/file' -import type { WebSocket } from 'ws' - -const DEBUG_MODE = true export type ToolResultBlockParam = Extract< SDKUserMessage['message']['content'][number], @@ -65,62 +43,6 @@ function readMockFile(projectRoot: string, filePath: string): string | null { } } -let toolCalls: ClientToolCall[] = [] -let toolResults: ToolResult[] = [] -export function createFileReadingMock(projectRoot: string) { - mockModule('@codebuff/backend/websockets/websocket-action', () => ({ - requestFiles: ((ws: WebSocket, filePaths: string[]) => { - const files: Record = {} - for (const filePath of filePaths) { - files[filePath] = readMockFile(projectRoot, filePath) - } - return Promise.resolve(files) - }) satisfies typeof originalRequestFiles, - requestToolCall: (async ( - ws: WebSocket, - userInputId: string, - toolName: string, - input: Record, - timeout: number = 30_000, - ): ReturnType => { - // Execute the tool call using existing tool handlers - const toolCall = { - toolCallId: generateCompactId(), - toolName, - input, - } - toolCalls.push(toolCall as ClientToolCall) - try { - const toolResult = await handleToolCall(toolCall as any) - toolResults.push({ - toolName: toolCall.toolName, - toolCallId: toolCall.toolCallId, - output: toolResult.output, - }) - - // Send successful response back to backend - return { - success: true, - output: toolResult.output, - } - } catch (error) { - // Send error response back to backend - const resultString = - error instanceof Error ? error.message : String(error) - toolResults.push({ - toolName: toolCall.toolName, - toolCallId: toolCall.toolCallId, - output: { type: 'text', value: resultString }, - }) - return { - success: false, - error: resultString, - } - } - }) satisfies typeof originalRequestToolCall, - })) -} - export async function getProjectFileContext( projectPath: string, ): Promise { @@ -158,50 +80,7 @@ export async function getProjectFileContext( } } -export async function runAgentStepScaffolding( - agentState: AgentState, - fileContext: ProjectFileContext, - prompt: string | undefined, - sessionId: string, - agentType: AgentTemplateType, -) { - const mockWs = new EventEmitter() as WebSocket - mockWs.send = mock() - mockWs.close = mock() - - let fullResponse = '' - const { agentTemplates: localAgentTemplates } = - assembleLocalAgentTemplates(fileContext) - - const result = await runAgentStep(mockWs, { - userId: TEST_USER_ID, - userInputId: generateCompactId(), - clientSessionId: sessionId, - fingerprintId: 'test-fingerprint-id', - onResponseChunk: (chunk: string | PrintModeEvent) => { - if (typeof chunk !== 'string') { - return - } - if (DEBUG_MODE) { - process.stdout.write(chunk) - } - fullResponse += chunk - }, - agentType, - fileContext, - localAgentTemplates, - agentState, - prompt, - params: undefined, - }) - - return { - ...result, - fullResponse, - } -} - -export async function runToolCalls(toolCalls: ClientToolCall[]) { +async function runToolCalls(toolCalls: ClientToolCall[]) { const toolResults: ToolResult[] = [] for (const toolCall of toolCalls) { const toolResult = await handleToolCall(toolCall) @@ -210,84 +89,6 @@ export async function runToolCalls(toolCalls: ClientToolCall[]) { return toolResults } -export async function loopMainPrompt({ - sessionState, - prompt, - projectPath, - maxIterations, - stopCondition, - agentType, -}: { - sessionState: SessionState - prompt: string - projectPath: string - maxIterations: number - stopCondition?: (sessionState: AgentState) => boolean - agentType: AgentTemplateType -}) { - console.log(blue(prompt)) - - const startTime = Date.now() - const sessionId = 'test-session-id-' + generateCompactId() - let currentAgentState = sessionState.mainAgentState - let iterations = 1 - const steps: AgentStep[] = [] - - for (; iterations < maxIterations; iterations++) { - console.log('\nIteration', iterations) - let { - agentState: newAgentState, - fullResponse, - shouldEndTurn, - } = await runAgentStepScaffolding( - currentAgentState, - sessionState.fileContext, - iterations === 1 ? prompt : undefined, - sessionId, - agentType, - ) - currentAgentState = newAgentState - - const stop = stopCondition && stopCondition(currentAgentState) - if (stop) break - - steps.push({ - response: fullResponse, - toolCalls, - toolResults, - }) - - toolCalls = [] - toolResults = [] - - if (shouldEndTurn) { - break - } - } - - console.log('Main loop finished!') - console.log(' - iterations', iterations) - console.log( - ' - took', - ((Date.now() - startTime) / 1000).toFixed(2), - 'seconds', - ) - - return { - agentState: currentAgentState, - iterations: iterations - 1, - steps, - duration: Date.now() - startTime, - } -} - -export function extractErrorFiles(output: string): string[] { - const lines = output.split('\n') - return lines - .filter((line) => line.includes(': error TS')) - .map((line) => line.split('(')[0].trim()) -} - export function resetRepoToCommit(projectPath: string, commit: string) { console.log(`Resetting repository at ${projectPath} to commit ${commit}...`) try { @@ -305,11 +106,7 @@ export function resetRepoToCommit(projectPath: string, commit: string) { } export default { - createFileReadingMock, getProjectFileContext, - runAgentStepScaffolding, runToolCalls, - loopMainPrompt, - extractErrorFiles, resetRepoToCommit, } diff --git a/evals/swe-bench.test.ts b/evals/swe-bench.test.ts index 600abfe1c..9ae26b423 100644 --- a/evals/swe-bench.test.ts +++ b/evals/swe-bench.test.ts @@ -4,7 +4,6 @@ import * as path from 'path' import { describe, expect, test } from 'bun:test' import { PROMPT_PREFIX } from './constants' -import { loopMainPrompt } from './scaffolding' import { passesSweBenchTests } from './swe-bench-eval' import { SWE_BENCH_IDS } from './swe-bench-ids' import { @@ -54,13 +53,13 @@ describe.skip('SWE-Bench', async () => { const prompt = PROMPT_PREFIX + sweBenchLiteDataset[instanceId].problem_statement - await loopMainPrompt({ - sessionState: initialSessionState, - prompt, - projectPath: repoPath, - maxIterations: 100, - agentType: 'base', - }) + // await loopMainPrompt({ + // sessionState: initialSessionState, + // prompt, + // projectPath: repoPath, + // maxIterations: 100, + // agentType: 'base', + // }) expect(await passesSweBenchTests(instanceId, repoPath)).toBeTruthy() }, { timeout: 10 * 60 * 60 * 1000 }, // 10 hours diff --git a/evals/test-setup.ts b/evals/test-setup.ts index 456b61d54..3ede03c8a 100644 --- a/evals/test-setup.ts +++ b/evals/test-setup.ts @@ -10,7 +10,6 @@ import { import { recreateShell } from '@codebuff/npm-app/terminal/run-command' import { - createFileReadingMock, getProjectFileContext, resetRepoToCommit, } from './scaffolding' @@ -155,7 +154,6 @@ export async function setupTestEnvironment(projectName: string) { const repoPath = path.join(TEST_REPOS_DIR, projectName) setProjectRoot(repoPath) - createFileReadingMock(repoPath) recreateShell(repoPath) setWorkingDirectory(repoPath) diff --git a/packages/agent-runtime/package.json b/packages/agent-runtime/package.json new file mode 100644 index 000000000..061cc1745 --- /dev/null +++ b/packages/agent-runtime/package.json @@ -0,0 +1,46 @@ +{ + "name": "@codebuff/agent-runtime", + "version": "1.0.0", + "description": "Agent runtime logic for Codebuff", + "private": true, + "license": "UNLICENSED", + "type": "module", + "exports": { + ".": { + "bun": "./src/index.ts", + "import": "./src/index.ts", + "types": "./src/index.ts", + "default": "./src/index.ts" + }, + "./*": { + "bun": "./src/*.ts", + "import": "./src/*.ts", + "types": "./src/*.ts", + "default": "./src/*.ts" + } + }, + "scripts": { + "typecheck": "tsc --noEmit -p .", + "test": "bun test" + }, + "sideEffects": false, + "engines": { + "bun": ">=1.2.11" + }, + "dependencies": { + "@codebuff/common": "workspace:*", + "ai": "5.0.0", + "diff": "5.2.0", + "gpt-tokenizer": "2.8.1", + "ignore": "5.3.2", + "lodash": "*", + "ts-pattern": "5.3.1", + "zod": "3.25.67", + "zod-from-json-schema": "0.4.2" + }, + "devDependencies": { + "@types/diff": "^5.0.3", + "@types/node": "22", + "@types/bun": "^1.2.11" + } +} \ No newline at end of file diff --git a/packages/agent-runtime/src/analytics/interfaces.ts b/packages/agent-runtime/src/analytics/interfaces.ts new file mode 100644 index 000000000..d7b0b1880 --- /dev/null +++ b/packages/agent-runtime/src/analytics/interfaces.ts @@ -0,0 +1,24 @@ +/** + * Analytics environment for tracking events and traces (optional) + */ +export interface AnalyticsEnvironment { + /** + * Track an analytics event + */ + trackEvent?: (event: string, userId: string, props: Record) => void + + /** + * Insert a trace record + */ + insertTrace?: (trace: any) => void +} + +/** + * Logger environment interface + */ +export interface LoggerEnvironment { + debug: (data: any, message?: string) => void + info: (data: any, message?: string) => void + warn: (data: any, message?: string) => void + error: (data: any, message?: string) => void +} diff --git a/packages/agent-runtime/src/index.ts b/packages/agent-runtime/src/index.ts new file mode 100644 index 000000000..a55d0ca80 --- /dev/null +++ b/packages/agent-runtime/src/index.ts @@ -0,0 +1,31 @@ +// Core runtime exports +export { loopAgentSteps, runAgentStep } from './runtime/loop-agent-steps' +export { runProgrammaticStep, clearAgentGeneratorCache } from './runtime/run-programmatic-step' +export { getFileReadingUpdates } from './runtime/get-file-reading-updates' +export { processStreamWithTools } from './tools/stream-parser' +export { executeToolCall, executeCustomToolCall } from './tools/tool-executor' + +// Interface exports +export type { LLMEnvironment } from './llm/interfaces' +export type { IOEnvironment } from './io/interfaces' +export type { InputGateEnvironment } from './io/interfaces' +export type { TemplatesEnvironment } from './templates/interfaces' +export type { AnalyticsEnvironment } from './analytics/interfaces' +export type { LoggerEnvironment } from './analytics/interfaces' +export type { AgentRuntimeEnvironment } from './runtime/interfaces' + +// Utility exports +export * from './util/messages' +export * from './util/parse-tool-call-xml' +export * from './util/simplify-tool-results' +export * from './util/token-counter' +export * from './util/object' + +// Template exports +export { getAgentTemplate, assembleLocalAgentTemplates } from './templates/agent-registry' +export { getAgentPrompt } from './templates/strings' +export * from './templates/types' + +// Types +export type { AgentOptions } from './runtime/loop-agent-steps' +export type { ExecuteToolCallParams, CustomToolCall, ToolCallError } from './tools/tool-executor' diff --git a/packages/agent-runtime/src/io/interfaces.ts b/packages/agent-runtime/src/io/interfaces.ts new file mode 100644 index 000000000..7cc9010ac --- /dev/null +++ b/packages/agent-runtime/src/io/interfaces.ts @@ -0,0 +1,78 @@ +import type { PrintModeEvent } from '@codebuff/common/types/print-mode' +import type { ToolName } from '@codebuff/common/tools/constants' + +/** + * IO abstraction for tool calls, file requests, and streaming + * The backend implements this over WebSockets + */ +export interface IOEnvironment { + /** + * Request a tool call execution from the client + */ + requestToolCall: ( + userInputId: string, + toolName: string, + input: Record + ) => Promise<{ + success: boolean + output?: { + type: 'text' + value: string + } + error?: string + }> + + /** + * Request multiple files from the client + */ + requestFiles: (paths: string[]) => Promise> + + /** + * Request a single file from the client + */ + requestFile: (path: string) => Promise + + /** + * Send a response chunk to the client (optional, can be passed as callback) + */ + onResponseChunk?: (chunk: string | PrintModeEvent) => void +} + +/** + * Tool definitions and handlers environment + */ +export interface ToolsEnvironment { + /** + * Tool definitions for validation + */ + definitions: Record + + /** + * Tool handlers for execution + */ + handlers: Record +} + +/** + * Input gate for managing user input cancellation and interruption + */ +export interface InputGateEnvironment { + /** + * Start tracking a user input session + */ + start: (userId: string | undefined, userInputId: string) => void + + /** + * Check if a user input is still live (not cancelled) + */ + check: ( + userId: string | undefined, + userInputId: string, + clientSessionId: string + ) => boolean + + /** + * End tracking a user input session + */ + end: (userId: string | undefined, userInputId: string) => void +} diff --git a/packages/agent-runtime/src/llm/interfaces.ts b/packages/agent-runtime/src/llm/interfaces.ts new file mode 100644 index 000000000..a7547d46f --- /dev/null +++ b/packages/agent-runtime/src/llm/interfaces.ts @@ -0,0 +1,26 @@ +import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message' +import type { PrintModeEvent } from '@codebuff/common/types/print-mode' +import type { AgentTemplate } from '@codebuff/common/types/agent-template' + +/** + * LLM provider abstraction interface + * The backend implements this to provide LLM services while keeping + * provider-specific logic and cost tracking out of the runtime + */ +export interface LLMEnvironment { + /** + * Get a stream from an agent template + * This wraps the existing backend logic for getting LLM responses + * while preserving cost tracking and provider selection + */ + getAgentStreamFromTemplate: (params: { + clientSessionId: string + fingerprintId: string + userInputId: string + userId: string | undefined + agentId?: string + template: AgentTemplate + onCostCalculated?: (credits: number) => Promise + includeCacheControl?: boolean + }) => (messages: CodebuffMessage[]) => AsyncGenerator +} diff --git a/packages/agent-runtime/src/runtime/get-file-reading-updates.ts b/packages/agent-runtime/src/runtime/get-file-reading-updates.ts new file mode 100644 index 000000000..2eeb9169d --- /dev/null +++ b/packages/agent-runtime/src/runtime/get-file-reading-updates.ts @@ -0,0 +1,196 @@ +import { HIDDEN_FILE_READ_STATUS } from '@codebuff/common/constants' +import { parseFileBlocks } from '@codebuff/common/util/file' +import { toContentString } from '@codebuff/common/util/messages' +import { countTokens } from 'gpt-tokenizer' +import { uniq, difference } from 'lodash' + +import { + isToolResult, + parseToolResults, + parseReadFilesResult, +} from '../util/parse-tool-call-xml' +import { countTokensJson } from '../util/token-counter' +import type { AgentRuntimeEnvironment } from './interfaces' + +import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message' +import type { ProjectFileContext } from '@codebuff/common/util/file' + +const getInitialFiles = (fileContext: ProjectFileContext) => { + const { userKnowledgeFiles, knowledgeFiles } = fileContext + return [ + // Include user-level knowledge files. + ...Object.entries(userKnowledgeFiles ?? {}).map(([path, content]) => ({ + path, + content, + })), + + // Include top-level project knowledge files. + ...Object.entries(knowledgeFiles) + .map(([path, content]) => ({ + path, + content, + })) + // Only keep top-level knowledge files. + .filter((f) => f.path.split('/').length === 1), + ] +} + +export async function getFileReadingUpdates( + messages: CodebuffMessage[], + fileContext: ProjectFileContext, + options: { + requestedFiles?: string[] + agentStepId: string + clientSessionId: string + fingerprintId: string + userInputId: string + userId: string | undefined + repoId: string | undefined + }, + env: AgentRuntimeEnvironment, +) { + const FILE_TOKEN_BUDGET = 100_000 + + const toolResults = messages + .filter(isToolResult) + .flatMap((content) => parseToolResults(toContentString(content))) + const previousFileList = toolResults + .filter(({ toolName }) => toolName === 'read_files') + .flatMap(({ output }) => parseReadFilesResult(output.value)) + + const previousFiles = Object.fromEntries( + previousFileList.map(({ path, content }) => [path, content]), + ) + const previousFilePaths = uniq(Object.keys(previousFiles)) + + const editedFilePaths = messages + .filter(({ role }) => role === 'assistant') + .map(toContentString) + .filter((content) => content.includes(' Object.keys(parseFileBlocks(content))) + .filter((path) => path !== undefined) + + const requestedFiles = options.requestedFiles ?? [] + + const isFirstRead = previousFileList.length === 0 + const initialFiles = getInitialFiles(fileContext) + const includedInitialFiles = isFirstRead + ? initialFiles.map(({ path }) => path) + : [] + + const allFilePaths = uniq([ + ...includedInitialFiles, + ...requestedFiles, + ...editedFilePaths, + ...previousFilePaths, + ]) + const loadedFiles = await env.io.requestFiles(allFilePaths) + + const filteredRequestedFiles = requestedFiles.filter((filePath, i) => { + const content = loadedFiles[filePath] + if (content === null || content === undefined) return false + const tokenCount = countTokens(content) + if (i < 5) { + return tokenCount < 50_000 - i * 10_000 + } + return tokenCount < 10_000 + }) + const newFiles = difference( + [...filteredRequestedFiles, ...includedInitialFiles], + previousFilePaths, + ) + const newFilesToRead = uniq([ + // NOTE: When the assistant specifically asks for a file, we force it to be shown even if it's not new or changed. + ...(options.requestedFiles ?? []), + + ...newFiles, + ]) + + const updatedFilePaths = [...previousFilePaths, ...editedFilePaths].filter( + (path) => { + return loadedFiles[path] !== previousFiles[path] + }, + ) + + const addedFiles = uniq([ + ...includedInitialFiles, + ...updatedFilePaths, + ...newFilesToRead, + ]) + .map((path) => { + return { + path, + content: loadedFiles[path]!, + } + }) + .filter((file) => file.content !== null) + + const previousFilesTokens = countTokensJson(previousFiles) + const addedFileTokens = countTokensJson(addedFiles) + + if (previousFilesTokens + addedFileTokens > FILE_TOKEN_BUDGET) { + const requestedLoadedFiles = filteredRequestedFiles.map((path) => ({ + path, + content: loadedFiles[path]!, + })) + const newFiles = uniq([...initialFiles, ...requestedLoadedFiles]) + while (countTokensJson(newFiles) > FILE_TOKEN_BUDGET) { + newFiles.pop() + } + + const printedPaths = getPrintedPaths( + requestedFiles, + newFilesToRead, + loadedFiles, + ) + env.logger?.debug( + { + newFiles, + prevFileVersionTokens: previousFilesTokens, + addedFileTokens, + beforeTotalTokens: previousFilesTokens + addedFileTokens, + newFileVersionTokens: countTokensJson(newFiles), + FILE_TOKEN_BUDGET, + }, + 'resetting read files b/c of token budget', + ) + + return { + addedFiles: newFiles, + updatedFilePaths: updatedFilePaths, + printedPaths, + clearReadFileToolResults: true, + } + } + + const printedPaths = getPrintedPaths( + requestedFiles, + newFilesToRead, + loadedFiles, + ) + + return { + addedFiles, + updatedFilePaths, + printedPaths, + clearReadFileToolResults: false, + } +} + +function getPrintedPaths( + requestedFiles: string[], + newFilesToRead: string[], + loadedFiles: Record, +) { + // If no files requests, we don't want to print anything. + // Could still have files added from initial files or edited files. + if (requestedFiles.length === 0) return [] + // Otherwise, only print files that don't start with a hidden file status. + return newFilesToRead.filter( + (path) => + loadedFiles[path] && + !HIDDEN_FILE_READ_STATUS.some((status) => + loadedFiles[path]!.startsWith(status), + ), + ) +} diff --git a/packages/agent-runtime/src/runtime/interfaces.ts b/packages/agent-runtime/src/runtime/interfaces.ts new file mode 100644 index 000000000..a61d227b1 --- /dev/null +++ b/packages/agent-runtime/src/runtime/interfaces.ts @@ -0,0 +1,36 @@ +import type { LLMEnvironment } from '../llm/interfaces' +import type { IOEnvironment, InputGateEnvironment, ToolsEnvironment } from '../io/interfaces' +import type { TemplatesEnvironment } from '../templates/interfaces' +import type { AnalyticsEnvironment, LoggerEnvironment } from '../analytics/interfaces' + +/** + * Complete environment interface for the agent runtime + * The backend implements this to provide all necessary services + */ +export interface AgentRuntimeEnvironment { + /** LLM provider abstraction */ + llm: LLMEnvironment + + /** IO for tool calls, file requests, streaming */ + io: IOEnvironment + + /** Input gating for cancellation */ + inputGate: InputGateEnvironment + + /** Tool definitions and handlers */ + tools: ToolsEnvironment + + /** Template loading and prompt generation */ + templates: TemplatesEnvironment + + /** Analytics tracking (optional) */ + analytics?: AnalyticsEnvironment + + /** Logging (optional, defaults to console) */ + logger?: LoggerEnvironment + + /** Request context for tracing (optional) */ + requestContext?: { + processedRepoId?: string + } +} diff --git a/backend/src/run-agent-step.ts b/packages/agent-runtime/src/runtime/loop-agent-steps.ts similarity index 75% rename from backend/src/run-agent-step.ts rename to packages/agent-runtime/src/runtime/loop-agent-steps.ts index 56d779bf4..b6c4b9ce9 100644 --- a/backend/src/run-agent-step.ts +++ b/packages/agent-runtime/src/runtime/loop-agent-steps.ts @@ -1,25 +1,13 @@ -import { insertTrace } from '@codebuff/bigquery' -import { trackEvent } from '@codebuff/common/analytics' import { - ASYNC_AGENTS_ENABLED, - supportsCacheControl, -} from '@codebuff/common/constants' -import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events' -import { TOOLS_WHICH_WONT_FORCE_NEXT_STEP } from '@codebuff/common/tools/constants' + TOOLS_WHICH_WONT_FORCE_NEXT_STEP, +} from '@codebuff/common/tools/constants' import { renderToolResults } from '@codebuff/common/tools/utils' import { buildArray } from '@codebuff/common/util/array' import { generateCompactId } from '@codebuff/common/util/string' -import { asyncAgentManager } from './async-agent-manager' import { getFileReadingUpdates } from './get-file-reading-updates' -import { checkLiveUserInput } from './live-user-inputs' -import { getAgentStreamFromTemplate } from './prompt-agent-stream' import { runProgrammaticStep } from './run-programmatic-step' -import { additionalSystemPrompts } from './system-prompt/prompts' -import { getAgentTemplate } from './templates/agent-registry' -import { getAgentPrompt } from './templates/strings' -import { processStreamWithTools } from './tools/stream-parser' -import { logger } from './util/logger' +import { processStreamWithTools } from '../tools/stream-parser' import { asSystemInstruction, asSystemMessage, @@ -28,13 +16,12 @@ import { expireMessages, getMessagesSubset, isSystemInstruction, -} from './util/messages' -import { isToolResult, renderReadFilesResult } from './util/parse-tool-call-xml' -import { simplifyReadFileResults } from './util/simplify-tool-results' -import { countTokensJson } from './util/token-counter' -import { getRequestContext } from './websockets/request-context' +} from '../util/messages' +import { isToolResult, renderReadFilesResult } from '../util/parse-tool-call-xml' +import { simplifyReadFileResults } from '../util/simplify-tool-results' +import { countTokensJson } from '../util/token-counter' +import type { AgentRuntimeEnvironment } from './interfaces' -import type { AgentResponseTrace } from '@codebuff/bigquery' import type { AgentTemplate } from '@codebuff/common/types/agent-template' import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message' import type { PrintModeEvent } from '@codebuff/common/types/print-mode' @@ -44,7 +31,6 @@ import type { ToolResult, } from '@codebuff/common/types/session-state' import type { ProjectFileContext } from '@codebuff/common/util/file' -import type { WebSocket } from 'ws' export interface AgentOptions { userId: string | undefined @@ -63,8 +49,8 @@ export interface AgentOptions { } export const runAgentStep = async ( - ws: WebSocket, options: AgentOptions, + env: AgentRuntimeEnvironment, ): Promise<{ agentState: AgentState fullResponse: string @@ -84,17 +70,19 @@ export const runAgentStep = async ( } = options let agentState = options.agentState + if (!agentState) { + throw new Error('agentState is required but was undefined') + } + const { agentContext } = agentState const startTime = Date.now() // Get the extracted repo ID from request context - const requestContext = getRequestContext() - const repoId = requestContext?.processedRepoId + const repoId = env.requestContext?.processedRepoId // Generates a unique ID for each main prompt run (ie: a step of the agent loop) - // This is used to link logs within a single agent loop const agentStepId = crypto.randomUUID() - trackEvent(AnalyticsEvent.AGENT_STEP, userId ?? '', { + env.analytics?.trackEvent?.('AGENT_STEP', userId ?? '', { agentStepId, clientSessionId, fingerprintId, @@ -110,7 +98,7 @@ export const runAgentStep = async ( let stepWarningMessage = '' if (needsStepWarning) { - logger.warn( + env.logger?.warn( `Detected too many consecutive assistant messages without user prompt`, ) @@ -138,14 +126,19 @@ export const runAgentStep = async ( } const { addedFiles, updatedFilePaths, clearReadFileToolResults } = - await getFileReadingUpdates(ws, messageHistory, fileContext, { - agentStepId, - clientSessionId, - fingerprintId, - userInputId, - userId, - repoId, - }) + await getFileReadingUpdates( + messageHistory, + fileContext, + { + agentStepId, + clientSessionId, + fingerprintId, + userInputId, + userId, + repoId, + }, + env, + ) if (clearReadFileToolResults) { // Update message history. for (const message of messageHistory) { @@ -181,35 +174,17 @@ export const runAgentStep = async ( }) } - if (ASYNC_AGENTS_ENABLED) { - // Register this agent in the async manager so it can receive messages - const isRegistered = asyncAgentManager.getAgent(agentState.agentId) - if (!isRegistered && userId) { - asyncAgentManager.registerAgent({ - agentState, - sessionId: clientSessionId, - userId, - fingerprintId, - userInputId, - ws, - fileContext, - startTime: new Date(), - status: 'running', - }) - } else { - // Update status to running for existing agents - asyncAgentManager.updateAgentState(agentState, 'running') - } - } - - const agentTemplate = await getAgentTemplate(agentType, localAgentTemplates) + const agentTemplate = await env.templates.getAgentTemplate( + agentType, + localAgentTemplates, + ) if (!agentTemplate) { throw new Error( `Agent template not found for type: ${agentType}. Available types: ${Object.keys(localAgentTemplates).join(', ')}`, ) } - const stepPrompt = await getAgentPrompt( + const stepPrompt = await env.templates.getAgentPrompt( agentTemplate, { type: 'stepPrompt' }, fileContext, @@ -246,7 +221,7 @@ export const runAgentStep = async ( const { model } = agentTemplate - const getStream = getAgentStreamFromTemplate({ + const getStream = env.llm.getAgentStreamFromTemplate({ clientSessionId, fingerprintId, userInputId, @@ -260,7 +235,7 @@ export const runAgentStep = async ( // This is already handled by the saveMessage function which calls updateUserCycleUsage // If that fails, the promise rejection will bubble up and halt agent execution } catch (error) { - logger.error( + env.logger?.error( { agentId: agentState.agentId, credits, error }, 'Failed to add cost to agent state', ) @@ -269,13 +244,13 @@ export const runAgentStep = async ( ) } }, - includeCacheControl: supportsCacheControl(agentTemplate.model), + includeCacheControl: true, // We'll assume cache control is supported }) const iterationNum = agentState.messageHistory.length const system = - (await getAgentPrompt( + (await env.templates.getAgentPrompt( agentTemplate, { type: 'systemPrompt' }, fileContext, @@ -288,9 +263,10 @@ export const runAgentStep = async ( const agentMessages = getMessagesSubset( agentState.messageHistory, systemTokens, + env.logger, ) - logger.debug( + env.logger?.debug( { iteration: iterationNum, agentId: agentState.agentId, @@ -321,7 +297,6 @@ export const runAgentStep = async ( fullResponseChunks, } = await processStreamWithTools({ stream, - ws, agentStepId, clientSessionId, fingerprintId, @@ -336,27 +311,30 @@ export const runAgentStep = async ( agentContext, onResponseChunk, fullResponse, + env, }) toolResults.push(...newToolResults) fullResponse = fullResponseAfterStream - const agentResponseTrace: AgentResponseTrace = { - type: 'agent-response', - created_at: new Date(), - agent_step_id: agentStepId, - user_id: userId ?? '', - id: crypto.randomUUID(), - payload: { - output: fullResponse, - user_input_id: userInputId, - client_session_id: clientSessionId, - fingerprint_id: fingerprintId, - }, + // Insert trace if analytics environment is available + if (env.analytics?.insertTrace) { + const agentResponseTrace = { + type: 'agent-response', + created_at: new Date(), + agent_step_id: agentStepId, + user_id: userId ?? '', + id: crypto.randomUUID(), + payload: { + output: fullResponse, + user_input_id: userInputId, + client_session_id: clientSessionId, + fingerprint_id: fingerprintId, + }, + } + env.analytics.insertTrace(agentResponseTrace) } - insertTrace(agentResponseTrace) - const newAgentContext = state.agentContext as AgentState['agentContext'] // Use the updated agent state from tool execution agentState = state.agentState as AgentState @@ -379,7 +357,7 @@ export const runAgentStep = async ( ), }, ] - logger.debug({ summary: fullResponse }, 'Compacted messages') + env.logger?.debug({ summary: fullResponse }, 'Compacted messages') } const hasNoToolResults = @@ -399,12 +377,7 @@ export const runAgentStep = async ( agentContext: newAgentContext, } - // Mark agent as completed if it should end turn - if (ASYNC_AGENTS_ENABLED && shouldEndTurn) { - asyncAgentManager.updateAgentState(agentState, 'completed') - } - - logger.debug( + env.logger?.debug( { iteration: iterationNum, agentId: agentState.agentId, @@ -429,7 +402,6 @@ export const runAgentStep = async ( } export const loopAgentSteps = async ( - ws: WebSocket, { userInputId, agentType, @@ -458,8 +430,12 @@ export const loopAgentSteps = async ( clientSessionId: string onResponseChunk: (chunk: string | PrintModeEvent) => void }, + env: AgentRuntimeEnvironment, ) => { - const agentTemplate = await getAgentTemplate(agentType, localAgentTemplates) + const agentTemplate = await env.templates.getAgentTemplate( + agentType, + localAgentTemplates, + ) if (!agentTemplate) { throw new Error(`Agent template not found for type: ${agentType}`) } @@ -469,7 +445,7 @@ export const loopAgentSteps = async ( // Get the instructions prompt if we have a prompt/params const instructionsPrompt = hasPrompt - ? await getAgentPrompt( + ? await env.templates.getAgentPrompt( agentTemplate, { type: 'instructionsPrompt' }, fileContext, @@ -499,15 +475,6 @@ export const loopAgentSteps = async ( ), keepDuringTruncation: true, }, - prompt && - prompt in additionalSystemPrompts && { - role: 'user' as const, - content: asSystemInstruction( - additionalSystemPrompts[ - prompt as keyof typeof additionalSystemPrompts - ], - ), - }, ], instructionsPrompt && { @@ -527,7 +494,7 @@ export const loopAgentSteps = async ( let currentParams = params try { - while (checkLiveUserInput(userId, userInputId, clientSessionId)) { + while (env.inputGate.check(userId, userInputId, clientSessionId)) { // 1. Run programmatic step first if it exists if (agentTemplate.handleSteps) { const { agentState: programmaticAgentState, endTurn } = @@ -539,12 +506,12 @@ export const loopAgentSteps = async ( onResponseChunk, agentType, fileContext, - ws, template: agentTemplate, localAgentTemplates, prompt: currentPrompt, params: currentParams, stepsComplete: shouldEndTurn, + env, }) currentAgentState = programmaticAgentState @@ -553,14 +520,6 @@ export const loopAgentSteps = async ( } } - if (ASYNC_AGENTS_ENABLED) { - const hasMessages = - asyncAgentManager.getMessages(agentState.agentId).length > 0 - if (hasMessages) { - shouldEndTurn = false - } - } - // End turn if programmatic step ended turn, or if the previous runAgentStep ended turn if (shouldEndTurn) { return { @@ -569,19 +528,22 @@ export const loopAgentSteps = async ( } const { agentState: newAgentState, shouldEndTurn: llmShouldEndTurn } = - await runAgentStep(ws, { - userId, - userInputId, - clientSessionId, - fingerprintId, - onResponseChunk, - localAgentTemplates, - agentType, - fileContext, - agentState: currentAgentState, - prompt: currentPrompt, - params: currentParams, - }) + await runAgentStep( + { + userId, + userInputId, + clientSessionId, + fingerprintId, + onResponseChunk, + localAgentTemplates, + agentType, + fileContext, + agentState: currentAgentState, + prompt: currentPrompt, + params: currentParams, + }, + env, + ) currentAgentState = newAgentState shouldEndTurn = llmShouldEndTurn @@ -593,7 +555,7 @@ export const loopAgentSteps = async ( return { agentState: currentAgentState } } catch (error) { // Log the error but still return the state with partial costs - logger.error( + env.logger?.error( { error, agentId: currentAgentState.agentId, diff --git a/backend/src/run-programmatic-step.ts b/packages/agent-runtime/src/runtime/run-programmatic-step.ts similarity index 79% rename from backend/src/run-programmatic-step.ts rename to packages/agent-runtime/src/runtime/run-programmatic-step.ts index 778b96006..e9688147d 100644 --- a/backend/src/run-programmatic-step.ts +++ b/packages/agent-runtime/src/runtime/run-programmatic-step.ts @@ -1,11 +1,8 @@ import { getToolCallString } from '@codebuff/common/tools/utils' import { getErrorObject } from '@codebuff/common/util/error' -import { executeToolCall } from './tools/tool-executor' -import { logger } from './util/logger' -import { SandboxManager } from './util/quickjs-sandbox' -import { getRequestContext } from './websockets/request-context' -import { sendAction } from './websockets/websocket-action' +import { executeToolCall } from '../tools/tool-executor' +import type { AgentRuntimeEnvironment } from './interfaces' import type { CodebuffToolCall } from '@codebuff/common/tools/list' import type { @@ -20,10 +17,6 @@ import type { ToolResult, } from '@codebuff/common/types/session-state' import type { ProjectFileContext } from '@codebuff/common/util/file' -import type { WebSocket } from 'ws' - -// Global sandbox manager for QuickJS contexts -const sandboxManager = new SandboxManager() // Maintains generator state for all agents. Generator state can't be serialized, so we store it in memory. const agentIdToGenerator: Record = {} @@ -35,8 +28,6 @@ export function clearAgentGeneratorCache() { delete agentIdToGenerator[key] } agentIdToStepAll.clear() - // Clean up QuickJS sandboxes - sandboxManager.dispose() } // Function to handle programmatic agents @@ -53,9 +44,9 @@ export async function runProgrammaticStep( onResponseChunk, agentType, fileContext, - ws, localAgentTemplates, stepsComplete, + env, }: { template: AgentTemplate prompt: string | undefined @@ -67,33 +58,21 @@ export async function runProgrammaticStep( onResponseChunk: (chunk: string | PrintModeEvent) => void agentType: AgentTemplateType fileContext: ProjectFileContext - ws: WebSocket localAgentTemplates: Record stepsComplete: boolean + env: AgentRuntimeEnvironment }, ): Promise<{ agentState: AgentState; endTurn: boolean }> { if (!template.handleSteps) { throw new Error('No step handler found for agent template ' + template.id) } - // Run with either a generator or a sandbox. + // Run with a generator (QuickJS sandbox is handled by the backend environment) let generator = agentIdToGenerator[agentState.agentId] - let sandbox = sandboxManager.getSandbox(agentState.agentId) - // Check if we need to initialize a generator (either native or QuickJS-based) - if (!generator && !sandbox) { - if (typeof template.handleSteps === 'string') { - // Initialize QuickJS sandbox for string-based generator - sandbox = await sandboxManager.getOrCreateSandbox( - agentState.agentId, - template.handleSteps, - { - agentState, - prompt, - params, - }, - ) - } else { + // Check if we need to initialize a generator + if (!generator) { + if (typeof template.handleSteps === 'function') { // Initialize native generator generator = template.handleSteps({ agentState, @@ -101,6 +80,10 @@ export async function runProgrammaticStep( params, }) agentIdToGenerator[agentState.agentId] = generator + } else { + throw new Error( + 'String-based handleSteps should be handled by backend environment', + ) } } @@ -116,17 +99,13 @@ export async function runProgrammaticStep( const agentStepId = crypto.randomUUID() - const requestContext = getRequestContext() - const repoId = requestContext?.processedRepoId - // Initialize state for tool execution const toolCalls: CodebuffToolCall[] = [] const toolResults: ToolResult[] = [] const state = { - ws, fingerprintId, userId, - repoId, + repoId: env.requestContext?.processedRepoId, agentTemplate: template, localAgentTemplates, sendSubagentChunk: (data: { @@ -136,10 +115,13 @@ export async function runProgrammaticStep( chunk: string prompt?: string }) => { - sendAction(ws, { - type: 'subagent-response-chunk', - ...data, - }) + // Send subagent chunk through IO environment + if (env.io.onResponseChunk) { + env.io.onResponseChunk({ + type: 'text', + text: data.chunk, + } as PrintModeEvent) + } }, agentState: { ...agentState }, agentContext: agentState.agentContext, @@ -152,17 +134,11 @@ export async function runProgrammaticStep( try { // Execute tools synchronously as the generator yields them do { - const result = sandbox - ? await sandbox.executeStep({ - agentState: getPublicAgentState(state.agentState), - toolResult, - stepsComplete, - }) - : generator!.next({ - agentState: getPublicAgentState(state.agentState), - toolResult, - stepsComplete, - }) + const result = generator!.next({ + agentState: getPublicAgentState(state.agentState), + toolResult, + stepsComplete, + }) if (result.done) { endTurn = true @@ -215,7 +191,6 @@ export async function runProgrammaticStep( toolCalls, toolResults, previousToolCallFinished: Promise.resolve(), - ws, agentTemplate: template, fileContext, agentStepId, @@ -226,6 +201,7 @@ export async function runProgrammaticStep( state, userId, autoInsertEndStepParam: true, + env, }) // TODO: Remove messages from state and always use agentState.messageHistory. @@ -248,7 +224,7 @@ export async function runProgrammaticStep( const errorMessage = `Error executing handleSteps for agent ${template.id}: ${ error instanceof Error ? error.message : 'Unknown error' }` - logger.error( + env.logger?.error( { error: getErrorObject(error), template: template.id }, errorMessage, ) @@ -273,10 +249,6 @@ export async function runProgrammaticStep( } } finally { if (endTurn) { - if (sandbox) { - // Clean up QuickJS sandbox if execution is complete - sandboxManager.removeSandbox(agentState.agentId) - } delete agentIdToGenerator[agentState.agentId] agentIdToStepAll.delete(agentState.agentId) } diff --git a/packages/agent-runtime/src/templates/agent-registry.ts b/packages/agent-runtime/src/templates/agent-registry.ts new file mode 100644 index 000000000..cb47ea58f --- /dev/null +++ b/packages/agent-runtime/src/templates/agent-registry.ts @@ -0,0 +1,39 @@ +import type { AgentTemplate } from '@codebuff/common/types/agent-template' +import type { ProjectFileContext } from '@codebuff/common/util/file' +import { validateAgents } from '@codebuff/common/templates/agent-validation' +import type { DynamicAgentValidationError } from '@codebuff/common/templates/agent-validation' + +// Note: Database lookup is handled by the backend's TemplatesEnvironment +// This package focuses on local agent template assembly + +export type AgentRegistry = Record + +/** + * Assemble local agent templates from fileContext + static templates + * This is a pure function that doesn't access external services + */ +export function assembleLocalAgentTemplates(fileContext: ProjectFileContext): { + agentTemplates: Record + validationErrors: DynamicAgentValidationError[] +} { + // Load dynamic agents using the service + const { templates: dynamicTemplates, validationErrors } = validateAgents( + fileContext.agentTemplates || {}, + ) + + // Use dynamic templates only + const agentTemplates = { ...dynamicTemplates } + return { agentTemplates, validationErrors } +} + +/** + * Get an agent template - this is a simplified version that delegates to environment + * The actual implementation with database access is in the backend's TemplatesEnvironment + */ +export async function getAgentTemplate( + agentId: string, + localAgentTemplates: Record, +): Promise { + // Simple local lookup - the environment handles database queries + return localAgentTemplates[agentId] || null +} diff --git a/packages/agent-runtime/src/templates/interfaces.ts b/packages/agent-runtime/src/templates/interfaces.ts new file mode 100644 index 000000000..24eb7d489 --- /dev/null +++ b/packages/agent-runtime/src/templates/interfaces.ts @@ -0,0 +1,27 @@ +import type { AgentTemplate } from '@codebuff/common/types/agent-template' +import type { AgentTemplateType, AgentState } from '@codebuff/common/types/session-state' +import type { ProjectFileContext } from '@codebuff/common/util/file' + +/** + * Templates environment for agent template loading and prompt generation + */ +export interface TemplatesEnvironment { + /** + * Get an agent template by type + */ + getAgentTemplate: ( + agentType: AgentTemplateType, + localTemplates: Record + ) => Promise + + /** + * Get an agent prompt for a specific type + */ + getAgentPrompt: ( + template: AgentTemplate, + promptType: { type: 'systemPrompt' | 'instructionsPrompt' | 'stepPrompt' }, + fileContext: ProjectFileContext, + agentState: AgentState, + localTemplates: Record + ) => Promise +} diff --git a/packages/agent-runtime/src/templates/strings.ts b/packages/agent-runtime/src/templates/strings.ts new file mode 100644 index 000000000..8435e13b3 --- /dev/null +++ b/packages/agent-runtime/src/templates/strings.ts @@ -0,0 +1,21 @@ +import type { AgentTemplate } from './types' +import type { + AgentState, + AgentTemplateType, +} from '@codebuff/common/types/session-state' +import type { ProjectFileContext } from '@codebuff/common/util/file' + +// Note: This is a simplified version for the agent-runtime package +// The full implementation with all placeholder substitutions is in the backend's TemplatesEnvironment + +export async function getAgentPrompt( + agentTemplate: AgentTemplate, + promptType: { type: T }, + fileContext: ProjectFileContext, + agentState: AgentState, + agentTemplates: Record, +): Promise { + // Simple implementation - just return the prompt value + // The backend's TemplatesEnvironment handles full placeholder substitution + return agentTemplate[promptType.type] +} diff --git a/packages/agent-runtime/src/templates/types.ts b/packages/agent-runtime/src/templates/types.ts new file mode 100644 index 000000000..386e7aa41 --- /dev/null +++ b/packages/agent-runtime/src/templates/types.ts @@ -0,0 +1,61 @@ +import { AgentTemplateTypes } from '@codebuff/common/types/session-state' + +import type { ToolName } from '@codebuff/common/tools/constants' +import type { + AgentTemplate, + StepGenerator, + StepHandler, +} from '@codebuff/common/types/agent-template' +import type { AgentTemplateType } from '@codebuff/common/types/session-state' + +// Re-export for backward compatibility +export type { AgentTemplate, StepGenerator, StepHandler } + +const placeholderNames = [ + 'AGENT_NAME', + 'AGENTS_PROMPT', + 'CONFIG_SCHEMA', + 'FILE_TREE_PROMPT', + 'GIT_CHANGES_PROMPT', + 'INITIAL_AGENT_PROMPT', + 'KNOWLEDGE_FILES_CONTENTS', + 'PROJECT_ROOT', + 'REMAINING_STEPS', + 'SYSTEM_INFO_PROMPT', + 'TOOLS_PROMPT', + 'USER_CWD', + 'USER_INPUT_PROMPT', +] as const + +type PlaceholderType = { + [K in T[number]]: `{CODEBUFF_${K}}` +} + +export const PLACEHOLDER = Object.fromEntries( + placeholderNames.map((name) => [name, `{CODEBUFF_${name}}` as const]), +) as PlaceholderType +export type PlaceholderValue = (typeof PLACEHOLDER)[keyof typeof PLACEHOLDER] + +export const placeholderValues = Object.values(PLACEHOLDER) + +export const baseAgentToolNames: ToolName[] = [ + 'create_plan', + 'run_terminal_command', + 'str_replace', + 'write_file', + 'spawn_agents', + 'add_subgoal', + 'browser_logs', + 'code_search', + 'end_turn', + 'read_files', + 'think_deeply', + 'update_subgoal', +] as const + +export const baseAgentSubagents: AgentTemplateType[] = [ + AgentTemplateTypes.file_picker, + AgentTemplateTypes.researcher, + AgentTemplateTypes.thinker, + AgentTemplateTypes.reviewer, +] as const diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts new file mode 100644 index 000000000..36786bed7 --- /dev/null +++ b/packages/agent-runtime/src/tools/stream-parser.ts @@ -0,0 +1,237 @@ +import { toolNames } from '@codebuff/common/tools/constants' +import { buildArray } from '@codebuff/common/util/array' +import { generateCompactId } from '@codebuff/common/util/string' + +import { expireMessages } from '../util/messages' +import { executeCustomToolCall, executeToolCall } from './tool-executor' +import type { AgentRuntimeEnvironment } from '../runtime/interfaces' + +import type { CustomToolCall } from './tool-executor' +import type { AgentTemplate } from '@codebuff/common/types/agent-template' +import type { ToolName } from '@codebuff/common/tools/constants' +import type { CodebuffToolCall } from '@codebuff/common/tools/list' +import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message' +import type { PrintModeEvent } from '@codebuff/common/types/print-mode' +import type { + AgentState, + Subgoal, + ToolResult, +} from '@codebuff/common/types/session-state' +import type { ProjectFileContext } from '@codebuff/common/util/file' +import type { ToolCallPart } from 'ai' + +export type ToolCallError = { + toolName?: string + args: Record + error: string +} & Omit + +// Note: This is a simplified version that assumes we have access to XML stream processing +// The full implementation would need access to the xml-stream-parser from the backend +export async function processStreamWithTools(options: { + stream: AsyncGenerator | ReadableStream + agentStepId: string + clientSessionId: string + fingerprintId: string + userInputId: string + userId: string | undefined + repoId: string | undefined + agentTemplate: AgentTemplate + localAgentTemplates: Record + fileContext: ProjectFileContext + messages: CodebuffMessage[] + agentState: AgentState + agentContext: Record + onResponseChunk: (chunk: string | PrintModeEvent) => void + fullResponse: string + env: AgentRuntimeEnvironment +}) { + const { + stream, + agentStepId, + clientSessionId, + fingerprintId, + userInputId, + userId, + repoId, + agentTemplate, + localAgentTemplates, + fileContext, + agentContext, + agentState, + onResponseChunk, + env, + } = options + const fullResponseChunks: string[] = [options.fullResponse] + + const messages = [...options.messages] + + const toolResults: ToolResult[] = [] + const toolCalls: (CodebuffToolCall | CustomToolCall)[] = [] + const { promise: streamDonePromise, resolve: resolveStreamDonePromise } = + Promise.withResolvers() + let previousToolCallFinished = streamDonePromise + const state: Record = { + fingerprintId, + userId, + repoId, + agentTemplate, + localAgentTemplates, + sendSubagentChunk: (data: { + userInputId: string + agentId: string + agentType: string + chunk: string + prompt?: string + }) => { + // Send subagent chunk through IO environment + if (env.io.onResponseChunk) { + env.io.onResponseChunk({ + type: 'text', + text: data.chunk, + } as PrintModeEvent) + } + }, + + agentState, + agentContext, + messages, + } + + function toolCallback(toolName: T) { + return { + onTagStart: () => {}, + onTagEnd: async (_: string, input: Record) => { + // delegated to reusable helper + previousToolCallFinished = executeToolCall({ + toolName, + input, + toolCalls, + toolResults, + previousToolCallFinished, + agentTemplate, + fileContext, + agentStepId, + clientSessionId, + userInputId, + fullResponse: fullResponseChunks.join(''), + onResponseChunk, + state, + userId, + env, + }) + }, + } + } + function customToolCallback(toolName: string) { + return { + onTagStart: () => {}, + onTagEnd: async (_: string, input: Record) => { + // delegated to reusable helper + previousToolCallFinished = executeCustomToolCall({ + toolName, + input, + toolCalls, + toolResults, + previousToolCallFinished, + agentTemplate, + fileContext, + agentStepId, + clientSessionId, + userInputId, + fullResponse: fullResponseChunks.join(''), + onResponseChunk, + state, + userId, + env, + }) + }, + } + } + + // Note: This is a simplified version without the actual XML stream processing + // The backend would need to provide this functionality through the environment + // For now, we'll just process the stream as text + const streamWithTags = processStreamAsText( + stream, + Object.fromEntries([ + ...toolNames.map((toolName) => [toolName, toolCallback(toolName)]), + ...Object.keys(fileContext.customToolDefinitions).map((toolName) => [ + toolName, + customToolCallback(toolName), + ]), + ]), + (toolName, error) => { + toolResults.push({ + toolName, + toolCallId: generateCompactId(), + output: { type: 'text', value: error }, + }) + }, + onResponseChunk, + { + userId, + model: agentTemplate.model, + agentName: agentTemplate.id, + }, + ) + + for await (const chunk of streamWithTags) { + onResponseChunk(chunk) + fullResponseChunks.push(chunk) + } + + state.messages = buildArray([ + ...expireMessages(state.messages, 'agentStep'), + fullResponseChunks.length > 0 && { + role: 'assistant' as const, + content: fullResponseChunks.join(''), + }, + ]) + + resolveStreamDonePromise() + await previousToolCallFinished + + return { + toolCalls, + toolResults, + state, + fullResponse: fullResponseChunks.join(''), + fullResponseChunks, + } +} + +// Simplified stream processing - in reality this would need the backend's XML processor +async function* processStreamAsText( + stream: AsyncGenerator | ReadableStream, + toolCallbacks: Record, + onToolError: (toolName: string, error: string) => void, + onResponseChunk: (chunk: string | PrintModeEvent) => void, + context: { + userId: string | undefined + model: string | string[] + agentName: string + }, +): AsyncGenerator { + // This is a placeholder implementation + // The real implementation would parse XML tags and call the appropriate tool callbacks + + if (Symbol.asyncIterator in stream) { + for await (const chunk of stream as AsyncGenerator) { + if (typeof chunk === 'string') { + yield chunk + } + } + } else { + const reader = (stream as ReadableStream).getReader() + try { + while (true) { + const { done, value } = await reader.read() + if (done) break + yield value + } + } finally { + reader.releaseLock() + } + } +} diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts new file mode 100644 index 000000000..73fd356c4 --- /dev/null +++ b/packages/agent-runtime/src/tools/tool-executor.ts @@ -0,0 +1,527 @@ +import { endsAgentStepParam } from '@codebuff/common/tools/constants' +import { renderToolResults } from '@codebuff/common/tools/utils' +import { generateCompactId } from '@codebuff/common/util/string' +import z from 'zod/v4' +import { convertJsonSchemaToZod } from 'zod-from-json-schema' + +import { asSystemMessage } from '../util/messages' +import type { AgentRuntimeEnvironment } from '../runtime/interfaces' + +import type { AgentTemplate } from '@codebuff/common/types/agent-template' +import type { ToolName } from '@codebuff/common/tools/constants' +import type { + ClientToolCall, + ClientToolName, + CodebuffToolCall, +} from '@codebuff/common/tools/list' +import type { PrintModeEvent } from '@codebuff/common/types/print-mode' +import type { ToolResult } from '@codebuff/common/types/session-state' +import type { + customToolDefinitionsSchema, + ProjectFileContext, +} from '@codebuff/common/util/file' +import type { ToolCallPart } from 'ai' + +// Tool definitions and handlers are injected through the environment +// The backend will provide these through the runtime environment + +export type CustomToolCall = { + toolName: string + input: Record +} & Omit + +export type ToolCallError = { + toolName?: string + input: Record + error: string +} & Pick + +export function parseRawToolCall( + rawToolCall: { + toolName: T + toolCallId: string + input: Record + }, + toolDefs: Record, + autoInsertEndStepParam: boolean = false, +): CodebuffToolCall | ToolCallError { + const toolName = rawToolCall.toolName + + if (!(toolName in toolDefs)) { + return { + toolName, + toolCallId: rawToolCall.toolCallId, + input: rawToolCall.input, + error: `Tool ${toolName} not found`, + } + } + const validName = toolName as T + + const processedParameters: Record = {} + for (const [param, val] of Object.entries(rawToolCall.input ?? {})) { + processedParameters[param] = val + } + + // Add the required codebuff_end_step parameter with the correct value for this tool if requested + if (autoInsertEndStepParam) { + processedParameters[endsAgentStepParam] = + toolDefs[validName].endsAgentStep + } + + const paramsSchema = toolDefs[validName].endsAgentStep + ? ( + toolDefs[validName] + .parameters satisfies z.ZodObject as z.ZodObject + ).extend({ + [endsAgentStepParam]: z.literal( + toolDefs[validName].endsAgentStep, + ), + }) + : toolDefs[validName].parameters + const result = paramsSchema.safeParse(processedParameters) + + if (!result.success) { + return { + toolName: validName, + toolCallId: rawToolCall.toolCallId, + input: rawToolCall.input, + error: `Invalid parameters for ${validName}: ${JSON.stringify( + result.error.issues, + null, + 2, + )}`, + } + } + + if (endsAgentStepParam in result.data) { + delete result.data[endsAgentStepParam] + } + + return { + toolName: validName, + input: result.data, + toolCallId: rawToolCall.toolCallId, + } as CodebuffToolCall +} + +export interface ExecuteToolCallParams { + toolName: T + input: Record + toolCalls: (CodebuffToolCall | CustomToolCall)[] + toolResults: ToolResult[] + previousToolCallFinished: Promise + agentTemplate: AgentTemplate + fileContext: ProjectFileContext + agentStepId: string + clientSessionId: string + userInputId: string + fullResponse: string + onResponseChunk: (chunk: string | PrintModeEvent) => void + state: Record + userId: string | undefined + autoInsertEndStepParam?: boolean + env: AgentRuntimeEnvironment +} + +export function executeToolCall({ + toolName, + input, + toolCalls, + toolResults, + previousToolCallFinished, + agentTemplate, + fileContext, + agentStepId, + clientSessionId, + userInputId, + fullResponse, + onResponseChunk, + state, + userId, + autoInsertEndStepParam = false, + env, +}: ExecuteToolCallParams): Promise { + const toolCall: CodebuffToolCall | ToolCallError = parseRawToolCall( + { + toolName, + toolCallId: generateCompactId(), + input, + }, + env.tools.definitions, + autoInsertEndStepParam, + ) + if ('error' in toolCall) { + toolResults.push({ + toolName, + toolCallId: toolCall.toolCallId, + output: { + type: 'text', + value: toolCall.error, + }, + }) + env.logger?.debug( + { toolCall, error: toolCall.error }, + `${toolName} error: ${toolCall.error}`, + ) + return previousToolCallFinished + } + + onResponseChunk({ + type: 'tool_call', + toolCallId: toolCall.toolCallId, + toolName, + input: toolCall.input, + }) + + toolCalls.push(toolCall) + + // Filter out restricted tools in ask mode unless exporting summary + if (!agentTemplate.toolNames.includes(toolCall.toolName)) { + toolResults.push({ + toolName, + toolCallId: toolCall.toolCallId, + output: { + type: 'text', + value: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`, + }, + }) + return previousToolCallFinished + } + + // Check if user input is still live + if (!env.inputGate.check(userId, userInputId, clientSessionId)) { + toolResults.push({ + toolName, + toolCallId: toolCall.toolCallId, + output: { + type: 'text', + value: 'User input cancelled', + }, + }) + return previousToolCallFinished + } + + // Check if this is a server-side tool that should be handled directly + const serverSideHandler = env.tools.handlers[toolCall.toolName] + if (serverSideHandler) { + return previousToolCallFinished.then(async () => { + try { + const handlerResult = serverSideHandler({ + previousToolCallFinished: Promise.resolve(), + toolCall, + fileContext, + state, + clientSessionId, + userInputId, + }) + + // Handle the result which may be a direct value or an object with result and state + let resultValue: string + + if (handlerResult && typeof handlerResult === 'object' && 'result' in handlerResult) { + // Handler returned { result: Promise, state: {...} } + resultValue = await handlerResult.result + if (handlerResult.state) { + // Merge the returned state into our current state + // Special handling for agentState to ensure proper reference updates + Object.assign(state, handlerResult.state) + } + } else { + // Handler returned a direct value or Promise + const result = await handlerResult + resultValue = typeof result === 'string' ? result : (result?.value || 'Success') + } + + const toolResult = { + toolName, + toolCallId: toolCall.toolCallId, + output: { + type: 'text' as const, + value: resultValue, + }, + } + + env.logger?.debug( + { input, toolResult }, + `${toolName} server-side tool call & result (${toolResult.toolCallId})`, + ) + + onResponseChunk({ + type: 'tool_result', + toolCallId: toolResult.toolCallId, + output: toolResult.output, + }) + + toolResults.push(toolResult) + + state.messages.push({ + role: 'user' as const, + content: asSystemMessage(renderToolResults([toolResult])), + }) + } catch (error) { + const errorMessage = `Server-side tool execution failed: ${error instanceof Error ? error.message : 'Unknown error'}` + const toolResult = { + toolName, + toolCallId: toolCall.toolCallId, + output: { + type: 'text' as const, + value: errorMessage, + }, + } + + env.logger?.error( + { input, error, toolResult }, + `${toolName} server-side tool execution error`, + ) + + onResponseChunk({ + type: 'tool_result', + toolCallId: toolResult.toolCallId, + output: toolResult.output, + }) + + toolResults.push(toolResult) + + state.messages.push({ + role: 'user' as const, + content: asSystemMessage(renderToolResults([toolResult])), + }) + } + }) + } + + // For client tools, request execution from client + return previousToolCallFinished.then(async () => { + const clientToolResult = await env.io.requestToolCall( + userInputId, + toolCall.toolName, + toolCall.input, + ) + + const result = clientToolResult.error ?? + (clientToolResult.output?.type === 'text' + ? clientToolResult.output.value + : 'undefined') + + const toolResult = { + toolName, + toolCallId: toolCall.toolCallId, + output: { + type: 'text' as const, + value: result as string, + }, + } + + env.logger?.debug( + { input, toolResult }, + `${toolName} client tool call & result (${toolResult.toolCallId})`, + ) + + if (result === undefined) { + return + } + + onResponseChunk({ + type: 'tool_result', + toolCallId: toolResult.toolCallId, + output: toolResult.output, + }) + + toolResults.push(toolResult) + + state.messages.push({ + role: 'user' as const, + content: asSystemMessage(renderToolResults([toolResult])), + }) + }) +} + +export function parseRawCustomToolCall( + customToolDefs: z.infer, + rawToolCall: { + toolName: string + toolCallId: string + input: Record + }, + autoInsertEndStepParam: boolean = false, +): CustomToolCall | ToolCallError { + const toolName = rawToolCall.toolName + + if (!(toolName in customToolDefs)) { + return { + toolName, + toolCallId: rawToolCall.toolCallId, + input: rawToolCall.input, + error: `Tool ${toolName} not found`, + } + } + + const processedParameters: Record = {} + for (const [param, val] of Object.entries(rawToolCall.input ?? {})) { + processedParameters[param] = val + } + + // Add the required codebuff_end_step parameter with the correct value for this tool if requested + if (autoInsertEndStepParam) { + processedParameters[endsAgentStepParam] = + customToolDefs[toolName].endsAgentStep + } + + const jsonSchema = JSON.parse( + JSON.stringify(customToolDefs[toolName].inputJsonSchema), + ) + if (customToolDefs[toolName].endsAgentStep) { + if (!jsonSchema.properties) { + jsonSchema.properties = {} + } + jsonSchema.properties[endsAgentStepParam] = { + const: true, + type: 'boolean', + description: 'Easp flag must be set to true', + } + if (!jsonSchema.required) { + jsonSchema.required = [] + } + jsonSchema.required.push(endsAgentStepParam) + } + const paramsSchema = convertJsonSchemaToZod(jsonSchema) + const result = paramsSchema.safeParse( + processedParameters, + ) as z.ZodSafeParseResult + + if (!result.success) { + return { + toolName: toolName, + toolCallId: rawToolCall.toolCallId, + input: rawToolCall.input, + error: `Invalid parameters for ${toolName}: ${JSON.stringify( + result.error.issues, + null, + 2, + )}`, + } + } + + const input = JSON.parse(JSON.stringify(rawToolCall.input)) + if (endsAgentStepParam in input) { + delete input[endsAgentStepParam] + } + return { + toolName: toolName, + input, + toolCallId: rawToolCall.toolCallId, + } +} + +export function executeCustomToolCall({ + toolName, + input, + toolCalls, + toolResults, + previousToolCallFinished, + agentTemplate, + fileContext, + clientSessionId, + userInputId, + onResponseChunk, + state, + userId, + autoInsertEndStepParam = false, + env, +}: ExecuteToolCallParams): Promise { + const toolCall: CustomToolCall | ToolCallError = parseRawCustomToolCall( + fileContext.customToolDefinitions, + { + toolName, + toolCallId: generateCompactId(), + input, + }, + autoInsertEndStepParam, + ) + if ('error' in toolCall) { + toolResults.push({ + toolName, + toolCallId: toolCall.toolCallId, + output: { + type: 'text', + value: toolCall.error, + }, + }) + env.logger?.debug( + { toolCall, error: toolCall.error }, + `${toolName} error: ${toolCall.error}`, + ) + return previousToolCallFinished + } + + onResponseChunk({ + type: 'tool_call', + toolCallId: toolCall.toolCallId, + toolName, + input: toolCall.input, + }) + + toolCalls.push(toolCall) + + // Filter out restricted tools in ask mode unless exporting summary + if (!(agentTemplate.toolNames as string[]).includes(toolCall.toolName)) { + toolResults.push({ + toolName, + toolCallId: toolCall.toolCallId, + output: { + type: 'text', + value: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`, + }, + }) + return previousToolCallFinished + } + + return previousToolCallFinished + .then(async () => { + if (!env.inputGate.check(userId, userInputId, clientSessionId)) { + return '' + } + + const clientToolResult = await env.io.requestToolCall( + userInputId, + toolCall.toolName, + toolCall.input, + ) + return ( + clientToolResult.error ?? + (clientToolResult.output?.type === 'text' + ? clientToolResult.output.value + : 'undefined') + ) + }) + .then((result) => { + const toolResult = { + toolName, + toolCallId: toolCall.toolCallId, + output: { + type: 'text' as const, + value: result as string, + }, + } + env.logger?.debug( + { input, toolResult }, + `${toolName} custom tool call & result (${toolResult.toolCallId})`, + ) + if (result === undefined) { + return + } + + onResponseChunk({ + type: 'tool_result', + toolCallId: toolResult.toolCallId, + output: toolResult.output, + }) + + toolResults.push(toolResult) + + state.messages.push({ + role: 'user' as const, + content: asSystemMessage(renderToolResults([toolResult])), + }) + }) +} diff --git a/packages/agent-runtime/src/util/messages.ts b/packages/agent-runtime/src/util/messages.ts new file mode 100644 index 000000000..c946df706 --- /dev/null +++ b/packages/agent-runtime/src/util/messages.ts @@ -0,0 +1,301 @@ +import { AssertionError } from 'assert' + +import { buildArray } from '@codebuff/common/util/array' +import { closeXml } from '@codebuff/common/util/xml' + +import { simplifyTerminalCommandResults } from './simplify-tool-results' +import { countTokensJson } from './token-counter' + +import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message' +import type { LoggerEnvironment } from '../analytics/interfaces' + +// Default console logger +const defaultLogger: LoggerEnvironment = { + debug: (data: any, message?: string) => console.debug(message || '', data), + info: (data: any, message?: string) => console.info(message || '', data), + warn: (data: any, message?: string) => console.warn(message || '', data), + error: (data: any, message?: string) => console.error(message || '', data), +} + +export type System = string | Array<{ text: string }> + +export function messagesWithSystem( + messages: CodebuffMessage[], + system: System, +): CodebuffMessage[] { + return [ + { + role: 'system', + content: + typeof system === 'string' + ? system + : system.map((part) => part.text).join('\n\n'), + }, + ...messages, + ] +} + +export function asUserMessage(str: string): string { + return `${str}${closeXml('user_message')}` +} +export function parseUserMessage(str: string): string | undefined { + const match = str.match(/(.*?)<\/user_message>/s) + return match ? match[1] : undefined +} + +export function asSystemInstruction(str: string): string { + return `${str}${closeXml('system_instructions')}` +} + +export function asSystemMessage(str: string): string { + return `${str}${closeXml('system')}` +} + +export function isSystemInstruction(str: string): boolean { + return ( + str.startsWith('') && + str.endsWith(closeXml('system_instructions')) + ) +} + +export function isSystemMessage(str: string): boolean { + return str.startsWith('') && str.endsWith(closeXml('system')) +} + +export function castAssistantMessage( + message: CodebuffMessage, +): CodebuffMessage | null { + if (message.role !== 'assistant') { + return message + } + if (typeof message.content === 'string') { + return { + content: `${message.content}${closeXml('previous_assistant_message')}`, + role: 'user' as const, + } + } + const content = buildArray( + message.content.map((m) => { + if (m.type === 'text') { + return { + ...m, + text: `${m.text}${closeXml('previous_assistant_message')}`, + } + } + return null + }), + ) + return content + ? { + role: 'user' as const, + content, + } + : null +} + +// Number of terminal command outputs to keep in full form before simplifying +const numTerminalCommandsToKeep = 5 + +/** + * Helper function to simplify terminal command output while preserving some recent ones + * @param text - Terminal output text to potentially simplify + * @param numKept - Number of terminal outputs already kept in full form + * @returns Object containing simplified result and updated count of kept outputs + */ +function simplifyTerminalHelper( + text: string, + numKept: number, +): { result: string; numKept: number } { + const simplifiedText = simplifyTerminalCommandResults(text) + + // Keep the full output for the N most recent commands + if (numKept < numTerminalCommandsToKeep && simplifiedText !== text) { + return { result: text, numKept: numKept + 1 } + } + + return { + result: simplifiedText, + numKept, + } +} + +// Factor to reduce token count target by, to leave room for new messages +const shortenedMessageTokenFactor = 0.5 +const replacementMessage = { + role: 'user', + content: asSystemMessage('Previous message(s) omitted due to length'), +} satisfies CodebuffMessage + +/** + * Trims messages from the beginning to fit within token limits while preserving + * important content. Also simplifies terminal command outputs to save tokens. + * + * The function: + * 1. Processes messages from newest to oldest + * 2. Simplifies terminal command outputs after keeping N most recent ones + * 3. Stops adding messages when approaching token limit + * + * @param messages - Array of messages to trim + * @param systemTokens - Number of tokens used by system prompt + * @param maxTotalTokens - Maximum total tokens allowed, defaults to 200k + * @returns Trimmed array of messages that fits within token limit + */ +export function trimMessagesToFitTokenLimit( + messages: CodebuffMessage[], + systemTokens: number, + maxTotalTokens: number = 190_000, +): CodebuffMessage[] { + const maxMessageTokens = maxTotalTokens - systemTokens + + // Check if we're already under the limit + const initialTokens = countTokensJson(messages) + + if (initialTokens < maxMessageTokens) { + return messages + } + + const shortenedMessages: CodebuffMessage[] = [] + let numKept = 0 + + // Process messages from newest to oldest + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i] + let message: CodebuffMessage + if (m.role === 'tool' || m.role === 'system') { + message = messages[i] + } else if (m.role === 'user') { + let newContent: typeof m.content + + // Handle string content (usually terminal output) + if (typeof m.content === 'string') { + const result = simplifyTerminalHelper(m.content, numKept) + message = { role: m.role, content: result.result } + numKept = result.numKept + } else { + // Handle array content (mixed content types) + newContent = [] + // Process content parts from newest to oldest + for (let j = m.content.length - 1; j >= 0; j--) { + const messagePart = m.content[j] + // Preserve non-text content (i.e. images) + if (messagePart.type !== 'text') { + newContent.push(messagePart) + continue + } + + const result = simplifyTerminalHelper(messagePart.text, numKept) + newContent.push({ ...messagePart, text: result.result }) + numKept = result.numKept + } + newContent.reverse() + message = { ...m, content: newContent } + } + } else if (m.role === 'assistant') { + let newContent: typeof m.content + + // Handle string content (usually terminal output) + if (typeof m.content === 'string') { + const result = simplifyTerminalHelper(m.content, numKept) + message = { role: m.role, content: result.result } + numKept = result.numKept + } else { + // Handle array content (mixed content types) + newContent = [] + // Process content parts from newest to oldest + for (let j = m.content.length - 1; j >= 0; j--) { + const messagePart = m.content[j] + // Preserve non-text content (i.e. images) + if (messagePart.type !== 'text') { + newContent.push(messagePart) + continue + } + + const result = simplifyTerminalHelper(messagePart.text, numKept) + newContent.push({ ...messagePart, text: result.result }) + numKept = result.numKept + } + newContent.reverse() + message = { ...m, content: newContent } + } + } else { + m satisfies never + throw new AssertionError({ message: 'Not a valid role' }) + } + + shortenedMessages.push(message) + } + shortenedMessages.reverse() + + const requiredTokens = countTokensJson( + shortenedMessages.filter((m) => m.keepDuringTruncation), + ) + let removedTokens = 0 + const tokensToRemove = + (maxMessageTokens - requiredTokens) * (1 - shortenedMessageTokenFactor) + + const placeholder = 'deleted' + const filteredMessages: (CodebuffMessage | typeof placeholder)[] = [] + for (const message of shortenedMessages) { + if (removedTokens >= tokensToRemove || message.keepDuringTruncation) { + filteredMessages.push(message) + continue + } + removedTokens += countTokensJson(message) + if ( + filteredMessages.length === 0 || + filteredMessages[filteredMessages.length - 1] !== placeholder + ) { + filteredMessages.push(placeholder) + removedTokens -= countTokensJson(replacementMessage) + } + } + + return filteredMessages.map((m) => + m === placeholder ? replacementMessage : m, + ) +} + +export function getMessagesSubset( + messages: CodebuffMessage[], + otherTokens: number, + logger: LoggerEnvironment = defaultLogger, +): CodebuffMessage[] { + const messagesSubset = trimMessagesToFitTokenLimit(messages, otherTokens) + + // Remove cache_control from all messages + for (const message of messagesSubset) { + delete message.providerOptions?.anthropic?.cacheControl + delete message.providerOptions?.openrouter?.cacheControl + } + + // Cache up to the last message! + const lastMessage = messagesSubset[messagesSubset.length - 1] + if (!lastMessage) { + logger.debug( + { + messages, + messagesSubset, + otherTokens, + }, + 'No last message found in messagesSubset!', + ) + } + + return messagesSubset +} + +export function expireMessages( + messages: CodebuffMessage[], + endOf: 'agentStep' | 'userPrompt', +): CodebuffMessage[] { + return messages.filter((m) => { + // Keep messages with no timeToLive + if (m.timeToLive === undefined) return true + + // Remove messages that have expired + if (m.timeToLive === 'agentStep') return false + if (m.timeToLive === 'userPrompt' && endOf === 'userPrompt') return false + + return true + }) +} diff --git a/packages/agent-runtime/src/util/object.ts b/packages/agent-runtime/src/util/object.ts new file mode 100644 index 000000000..8cb548671 --- /dev/null +++ b/packages/agent-runtime/src/util/object.ts @@ -0,0 +1,35 @@ +import { stripNullChars } from '@codebuff/common/util/string' + +/** + * Recursively traverses an object or array and removes null characters (\u0000) + * from all string values. + * + * @param input The object or array to sanitize. + * @returns A new object or array with null characters removed from strings. + */ +export function stripNullCharsFromObject(input: T): T { + if (typeof input === 'string') { + // Explicitly cast back to T, assuming T could be string + return stripNullChars(input) as T + } + + if (Array.isArray(input)) { + // Explicitly cast back to T, assuming T could be an array type + return input.map(stripNullCharsFromObject) as T + } + + if (input !== null && typeof input === 'object') { + const sanitizedObject: { [key: string]: any } = {} + for (const key in input) { + // Ensure we only process own properties + if (Object.prototype.hasOwnProperty.call(input, key)) { + sanitizedObject[key] = stripNullCharsFromObject(input[key]) + } + } + // Explicitly cast back to T + return sanitizedObject as T + } + + // Return non-object/array/string types as is + return input +} diff --git a/packages/agent-runtime/src/util/parse-tool-call-xml.ts b/packages/agent-runtime/src/util/parse-tool-call-xml.ts new file mode 100644 index 000000000..1c8a109ab --- /dev/null +++ b/packages/agent-runtime/src/util/parse-tool-call-xml.ts @@ -0,0 +1,101 @@ +import { toContentString } from '@codebuff/common/util/messages' +import { generateCompactId } from '@codebuff/common/util/string' +import { closeXml } from '@codebuff/common/util/xml' + +import type { StringToolResultPart } from '@codebuff/common/tools/constants' +import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message' + +/** + * Parses XML content for a tool call into a structured object with only string values. + * Example input: + * click + * #button + * 5000 + */ +export function parseToolCallXml(xmlString: string): Record { + if (!xmlString.trim()) return {} + + const result: Record = {} + const tagPattern = /<(\w+)>([\s\S]*?)<\/\1>/g + let match + + while ((match = tagPattern.exec(xmlString)) !== null) { + const [_, key, rawValue] = match + + // Remove leading/trailing whitespace but preserve internal whitespace + const value = rawValue.replace(/^\s+|\s+$/g, '') + + // Assign all values as strings + result[key] = value + } + + return result +} + +export const parseToolResults = (xmlString: string): StringToolResultPart[] => { + if (!xmlString.trim()) return [] + + const results: StringToolResultPart[] = [] + const toolResultPattern = /([\s\S]*?)<\/tool_result>/g + let match + + while ((match = toolResultPattern.exec(xmlString)) !== null) { + const [_, toolResultContent] = match + const toolMatch = /(.*?)<\/tool>/g.exec(toolResultContent) + const resultMatch = /([\s\S]*?)<\/result>/g.exec(toolResultContent) + + if (toolMatch && resultMatch) { + results.push({ + toolName: toolMatch[1], + toolCallId: generateCompactId(), + output: { type: 'text', value: resultMatch[1].trim() }, + }) + } + } + + return results +} + +export interface TokenCallerMap { + [filePath: string]: { + [token: string]: string[] // Array of files that call this token + } +} + +export function renderReadFilesResult( + files: { path: string; content: string }[], + tokenCallers: TokenCallerMap, +) { + return files + .map((file) => { + const referencedBy = + Object.entries(tokenCallers[file.path] ?? {}) + .filter(([_, callers]) => callers.length > 0) + .map(([token, callers]) => `${token}: ${callers.join(', ')}`) + .join('\n') || 'None' + return `\n${file.path}${closeXml('path')}\n${file.content}${closeXml('content')}\n${referencedBy}${closeXml('referenced_by')}\n${closeXml('read_file')}` + }) + .join('\n\n') +} + +export function parseReadFilesResult( + xmlString: string, +): { path: string; content: string; referencedBy: string }[] { + const files: { path: string; content: string; referencedBy: string }[] = [] + const filePattern = + /\s*([^<>]+)<\/path>\s*([\s\S]*?)<\/content>\s*([\s\S]*?)<\/referenced_by>\s*<\/read_file>/g + let match + + while ((match = filePattern.exec(xmlString)) !== null) { + const [, filePath, content, referencedBy] = match + if (filePath.trim()) { + files.push({ path: filePath.trim(), content, referencedBy }) + } + } + + return files +} + +export function isToolResult(message: CodebuffMessage): boolean { + return toContentString(message).includes(' ToolResult, +): string { + const resultsStr = + typeof messageContent === 'string' + ? messageContent + : ((messageContent[messageContent.length - 1] as any)?.text as string) ?? + '' + if (!resultsStr.includes(' result.toolName === toolName, + ) + + if (targetResults.length === 0) { + return resultsStr + } + + // Keep non-target results unchanged + const otherResults = toolResults.filter( + (result) => result.toolName !== toolName, + ) + + // Create simplified results + const simplifiedResults = targetResults.map(simplifyFn) + + // Combine both types of results + return renderToolResults([...simplifiedResults, ...otherResults]) +} + +/** + * Simplifies read_files tool results to show only file paths while preserving other tool results. + * Useful for making tool result output more concise in message history. + * @param messageContent - The message content containing tool results + * @returns The message content with simplified read_files results showing only paths + */ +export function simplifyReadFileResults( + messageContent: string | object[], +): string { + return simplifyToolResults( + messageContent, + 'read_files', + simplifyReadFileToolResult, + ) +} + +/** + * Simplifies terminal command tool results to show a brief summary while preserving other tool results. + * Useful for making tool result output more concise in message history. + * @param messageContent - The message content containing tool results + * @returns The message content with simplified terminal command results + */ +export function simplifyTerminalCommandResults( + messageContent: string | object[], +): string { + return simplifyToolResults( + messageContent, + 'run_terminal_command', + simplifyTerminalCommandToolResult, + ) +} + +/** + * Simplifies a single read_files tool result by extracting just the file paths. + * @param toolResult - The read_files tool result to simplify + * @returns A new tool result with just the list of file paths that were read + */ +export function simplifyReadFileToolResult(toolResult: ToolResult): ToolResult { + const fileBlocks = parseReadFilesResult(toolResult.output.value) + const filePaths = fileBlocks.map((block) => block.path) + return { + toolCallId: toolResult.toolCallId, + toolName: 'read_files', + output: { + type: 'text', + value: `Read the following files: ${filePaths.join('\n')}`, + }, + } +} + +/** + * Simplifies a single terminal command tool result by replacing output with a brief message. + * @param toolResult - The terminal command tool result to simplify + * @returns A new tool result with shortened output if the original was long + */ +export function simplifyTerminalCommandToolResult( + toolResult: ToolResult, +): ToolResult { + const shortenedResultCandidate = '[Output omitted]' + return shortenedResultCandidate.length < toolResult.output.value.length + ? { + toolCallId: toolResult.toolCallId, + toolName: 'run_terminal_command', + output: { + type: 'text', + value: shortenedResultCandidate, + }, + } + : toolResult +} diff --git a/packages/agent-runtime/src/util/token-counter.ts b/packages/agent-runtime/src/util/token-counter.ts new file mode 100644 index 000000000..960a676cd --- /dev/null +++ b/packages/agent-runtime/src/util/token-counter.ts @@ -0,0 +1,42 @@ +import { LRUCache } from '@codebuff/common/util/lru-cache' +import { encode } from 'gpt-tokenizer/esm/model/gpt-4o' + +const ANTHROPIC_TOKEN_FUDGE_FACTOR = 1.35 + +const TOKEN_COUNT_CACHE = new LRUCache(1000) + +export function countTokens(text: string): number { + try { + const cached = TOKEN_COUNT_CACHE.get(text) + if (cached !== undefined) { + return cached + } + const count = Math.floor( + encode(text, { allowedSpecial: 'all' }).length * + ANTHROPIC_TOKEN_FUDGE_FACTOR, + ) + + if (text.length > 100) { + // Cache only if the text is long enough to be worth it. + TOKEN_COUNT_CACHE.set(text, count) + } + return count + } catch (e) { + console.error('Error counting tokens', e) + return Math.ceil(text.length / 3) + } +} + +export function countTokensJson(text: string | object): number { + return countTokens(JSON.stringify(text)) +} + +export function countTokensForFiles( + files: Record, +): Record { + const tokenCounts: Record = {} + for (const [filePath, content] of Object.entries(files)) { + tokenCounts[filePath] = content ? countTokens(content) : 0 + } + return tokenCounts +} diff --git a/packages/agent-runtime/tsconfig.json b/packages/agent-runtime/tsconfig.json new file mode 100644 index 000000000..3ef6f86b9 --- /dev/null +++ b/packages/agent-runtime/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "types": ["bun", "node"], + "baseUrl": "." + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules"] +} \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json index e571761e1..aa8fe9f06 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -12,7 +12,8 @@ "@codebuff/billing/*": ["./packages/billing/src/*"], "@codebuff/bigquery/*": ["./packages/bigquery/src/*"], "@codebuff/internal/*": ["./packages/internal/src/*"], - "@codebuff/code-map/*": ["./packages/code-map/*"] + "@codebuff/code-map/*": ["./packages/code-map/*"], + "@codebuff/agent-runtime/*": ["./packages/agent-runtime/src/*"] } }, "files": [], @@ -27,6 +28,7 @@ { "path": "./packages/bigquery" }, { "path": "./packages/internal" }, { "path": "./packages/code-map" }, + { "path": "./packages/agent-runtime" }, { "path": "./scripts" } ] }