Prevent long sessions from stalling and expose the requested internal command surface

The runtime now auto-compacts completed conversations once cumulative input usage
crosses a configurable threshold, preserving recent context while surfacing an
explicit user notice. The CLI also publishes the requested ant-only slash
commands through the shared commands crate and main dispatch, using meaningful
local implementations for commit/PR/issue/teleport/debug workflows.

Constraint: Reuse the existing Rust compaction pipeline instead of introducing a new summarization stack
Constraint: No new dependencies or broad command-framework rewrite
Rejected: Implement API-driven compaction inside ConversationRuntime now | too much new plumbing for this delivery
Rejected: Expose new commands as parse-only stubs | would not satisfy the requested command availability
Confidence: medium
Scope-risk: moderate
Reversibility: clean
Directive: If runtime later gains true API-backed compaction, preserve the TurnSummary auto-compaction metadata shape so CLI call sites stay stable
Tested: cargo test; cargo build --release; cargo fmt --all; git diff --check; LSP diagnostics directory check
Not-tested: Live Anthropic-backed specialist command flows; gh-authenticated PR/issue creation in a real repo
This commit is contained in:
Yeachan-Heo
2026-04-01 03:48:50 +00:00
parent a94ef61b01
commit 992681c4fd
4 changed files with 733 additions and 9 deletions

View File

@@ -8,6 +8,9 @@ use crate::permissions::{PermissionOutcome, PermissionPolicy, PermissionPrompter
use crate::session::{ContentBlock, ConversationMessage, Session};
use crate::usage::{TokenUsage, UsageTracker};
const DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD: u32 = 100_000;
const AUTO_COMPACTION_THRESHOLD_ENV_VAR: &str = "CLAUDE_CODE_AUTO_COMPACT_INPUT_TOKENS";
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ApiRequest {
pub system_prompt: Vec<String>,
@@ -84,6 +87,12 @@ pub struct TurnSummary {
pub tool_results: Vec<ConversationMessage>,
pub iterations: usize,
pub usage: TokenUsage,
pub auto_compaction: Option<AutoCompactionEvent>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct AutoCompactionEvent {
pub removed_message_count: usize,
}
pub struct ConversationRuntime<C, T> {
@@ -94,6 +103,7 @@ pub struct ConversationRuntime<C, T> {
system_prompt: Vec<String>,
max_iterations: usize,
usage_tracker: UsageTracker,
auto_compaction_input_tokens_threshold: u32,
}
impl<C, T> ConversationRuntime<C, T>
@@ -118,6 +128,7 @@ where
system_prompt,
max_iterations: usize::MAX,
usage_tracker,
auto_compaction_input_tokens_threshold: auto_compaction_threshold_from_env(),
}
}
@@ -127,6 +138,12 @@ where
self
}
#[must_use]
pub fn with_auto_compaction_input_tokens_threshold(mut self, threshold: u32) -> Self {
self.auto_compaction_input_tokens_threshold = threshold;
self
}
pub fn run_turn(
&mut self,
user_input: impl Into<String>,
@@ -209,11 +226,14 @@ where
}
}
let auto_compaction = self.maybe_auto_compact();
Ok(TurnSummary {
assistant_messages,
tool_results,
iterations,
usage: self.usage_tracker.cumulative_usage(),
auto_compaction,
})
}
@@ -241,6 +261,48 @@ where
pub fn into_session(self) -> Session {
self.session
}
fn maybe_auto_compact(&mut self) -> Option<AutoCompactionEvent> {
if self.usage_tracker.cumulative_usage().input_tokens
< self.auto_compaction_input_tokens_threshold
{
return None;
}
let result = compact_session(
&self.session,
CompactionConfig {
max_estimated_tokens: 0,
..CompactionConfig::default()
},
);
if result.removed_message_count == 0 {
return None;
}
self.session = result.compacted_session;
Some(AutoCompactionEvent {
removed_message_count: result.removed_message_count,
})
}
}
#[must_use]
pub fn auto_compaction_threshold_from_env() -> u32 {
parse_auto_compaction_threshold(
std::env::var(AUTO_COMPACTION_THRESHOLD_ENV_VAR)
.ok()
.as_deref(),
)
}
#[must_use]
fn parse_auto_compaction_threshold(value: Option<&str>) -> u32 {
value
.and_then(|raw| raw.trim().parse::<u32>().ok())
.filter(|threshold| *threshold > 0)
.unwrap_or(DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD)
}
fn build_assistant_message(
@@ -325,8 +387,9 @@ impl ToolExecutor for StaticToolExecutor {
#[cfg(test)]
mod tests {
use super::{
ApiClient, ApiRequest, AssistantEvent, ConversationRuntime, RuntimeError,
StaticToolExecutor,
parse_auto_compaction_threshold, ApiClient, ApiRequest, AssistantEvent,
AutoCompactionEvent, ConversationRuntime, RuntimeError, StaticToolExecutor,
DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD,
};
use crate::compact::CompactionConfig;
use crate::permissions::{
@@ -436,6 +499,7 @@ mod tests {
assert_eq!(summary.tool_results.len(), 1);
assert_eq!(runtime.session().messages.len(), 4);
assert_eq!(summary.usage.output_tokens, 10);
assert_eq!(summary.auto_compaction, None);
assert!(matches!(
runtime.session().messages[1].blocks[1],
ContentBlock::ToolUse { .. }
@@ -581,4 +645,111 @@ mod tests {
MessageRole::System
);
}
#[test]
fn auto_compacts_when_cumulative_input_threshold_is_crossed() {
struct SimpleApi;
impl ApiClient for SimpleApi {
fn stream(
&mut self,
_request: ApiRequest,
) -> Result<Vec<AssistantEvent>, RuntimeError> {
Ok(vec![
AssistantEvent::TextDelta("done".to_string()),
AssistantEvent::Usage(TokenUsage {
input_tokens: 120_000,
output_tokens: 4,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
}),
AssistantEvent::MessageStop,
])
}
}
let session = Session {
version: 1,
messages: vec![
crate::session::ConversationMessage::user_text("one"),
crate::session::ConversationMessage::assistant(vec![ContentBlock::Text {
text: "two".to_string(),
}]),
crate::session::ConversationMessage::user_text("three"),
crate::session::ConversationMessage::assistant(vec![ContentBlock::Text {
text: "four".to_string(),
}]),
],
};
let mut runtime = ConversationRuntime::new(
session,
SimpleApi,
StaticToolExecutor::new(),
PermissionPolicy::new(PermissionMode::DangerFullAccess),
vec!["system".to_string()],
)
.with_auto_compaction_input_tokens_threshold(100_000);
let summary = runtime
.run_turn("trigger", None)
.expect("turn should succeed");
assert_eq!(
summary.auto_compaction,
Some(AutoCompactionEvent {
removed_message_count: 2,
})
);
assert_eq!(runtime.session().messages[0].role, MessageRole::System);
}
#[test]
fn skips_auto_compaction_below_threshold() {
struct SimpleApi;
impl ApiClient for SimpleApi {
fn stream(
&mut self,
_request: ApiRequest,
) -> Result<Vec<AssistantEvent>, RuntimeError> {
Ok(vec![
AssistantEvent::TextDelta("done".to_string()),
AssistantEvent::Usage(TokenUsage {
input_tokens: 99_999,
output_tokens: 4,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
}),
AssistantEvent::MessageStop,
])
}
}
let mut runtime = ConversationRuntime::new(
Session::new(),
SimpleApi,
StaticToolExecutor::new(),
PermissionPolicy::new(PermissionMode::DangerFullAccess),
vec!["system".to_string()],
)
.with_auto_compaction_input_tokens_threshold(100_000);
let summary = runtime
.run_turn("trigger", None)
.expect("turn should succeed");
assert_eq!(summary.auto_compaction, None);
assert_eq!(runtime.session().messages.len(), 2);
}
#[test]
fn auto_compaction_threshold_defaults_and_parses_values() {
assert_eq!(
parse_auto_compaction_threshold(None),
DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD
);
assert_eq!(parse_auto_compaction_threshold(Some("4321")), 4321);
assert_eq!(
parse_auto_compaction_threshold(Some("not-a-number")),
DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD
);
}
}

View File

@@ -30,8 +30,8 @@ pub use config::{
CLAUDE_CODE_SETTINGS_SCHEMA_NAME,
};
pub use conversation::{
ApiClient, ApiRequest, AssistantEvent, ConversationRuntime, RuntimeError, StaticToolExecutor,
ToolError, ToolExecutor, TurnSummary,
auto_compaction_threshold_from_env, ApiClient, ApiRequest, AssistantEvent, AutoCompactionEvent,
ConversationRuntime, RuntimeError, StaticToolExecutor, ToolError, ToolExecutor, TurnSummary,
};
pub use file_ops::{
edit_file, glob_search, grep_search, read_file, write_file, EditFileOutput, GlobSearchOutput,