Expose structured thinking without polluting normal assistant output

Extended thinking needed to travel end-to-end through the API, runtime, and CLI so the client can request a thinking budget, preserve streamed reasoning blocks, and present them in a collapsed text-first form. The implementation keeps thinking strictly opt-in, adds a session-local toggle, and reuses the existing flag/slash-command/reporting surfaces instead of introducing a new UI layer. Constraint: Existing non-thinking text/tool flows had to remain backward compatible by default Constraint: Terminal UX needed a lightweight collapsed representation rather than an interactive TUI widget Rejected: Heuristic CLI-only parsing of reasoning text | brittle against structured stream payloads Rejected: Expanded raw thinking output by default | too noisy for normal assistant responses Confidence: medium Scope-risk: moderate Reversibility: clean Directive: Keep thinking blocks structurally separate from answer text unless the upstream API contract changes Tested: cargo fmt --all; cargo clippy --workspace --all-targets -- -D warnings; cargo test -q Not-tested: Live upstream thinking payloads against the production API contract
2026-04-03 01:11:51 +08:00 · 2026-04-01 01:08:18 +00:00
parent d6341d54c1
commit c14196c730
9 changed files with 353 additions and 31 deletions
--- a/rust/crates/runtime/src/compact.rs
+++ b/rust/crates/runtime/src/compact.rs
@@ -130,7 +130,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String {
        .filter_map(|block| match block {
            ContentBlock::ToolUse { name, .. } => Some(name.as_str()),
            ContentBlock::ToolResult { tool_name, .. } => Some(tool_name.as_str()),
-            ContentBlock::Text { .. } => None,
+            ContentBlock::Text { .. } | ContentBlock::Thinking { .. } => None,
        })
        .collect::<Vec<_>>();
    tool_names.sort_unstable();
@@ -200,6 +200,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String {
 fn summarize_block(block: &ContentBlock) -> String {
    let raw = match block {
        ContentBlock::Text { text } => text.clone(),
+        ContentBlock::Thinking { text, .. } => format!("thinking: {text}"),
        ContentBlock::ToolUse { name, input, .. } => format!("tool_use {name}({input})"),
        ContentBlock::ToolResult {
            tool_name,
@@ -258,7 +259,7 @@ fn collect_key_files(messages: &[ConversationMessage]) -> Vec<String> {
        .iter()
        .flat_map(|message| message.blocks.iter())
        .map(|block| match block {
-            ContentBlock::Text { text } => text.as_str(),
+            ContentBlock::Text { text } | ContentBlock::Thinking { text, .. } => text.as_str(),
            ContentBlock::ToolUse { input, .. } => input.as_str(),
            ContentBlock::ToolResult { output, .. } => output.as_str(),
        })
@@ -280,10 +281,15 @@ fn infer_current_work(messages: &[ConversationMessage]) -> Option<String> {

 fn first_text_block(message: &ConversationMessage) -> Option<&str> {
    message.blocks.iter().find_map(|block| match block {
-        ContentBlock::Text { text } if !text.trim().is_empty() => Some(text.as_str()),
+        ContentBlock::Text { text } | ContentBlock::Thinking { text, .. }
+            if !text.trim().is_empty() =>
+        {
+            Some(text.as_str())
+        }
        ContentBlock::ToolUse { .. }
        | ContentBlock::ToolResult { .. }
-        | ContentBlock::Text { .. } => None,
+        | ContentBlock::Text { .. }
+        | ContentBlock::Thinking { .. } => None,
    })
 }

@@ -328,7 +334,7 @@ fn estimate_message_tokens(message: &ConversationMessage) -> usize {
        .blocks
        .iter()
        .map(|block| match block {
-            ContentBlock::Text { text } => text.len() / 4 + 1,
+            ContentBlock::Text { text } | ContentBlock::Thinking { text, .. } => text.len() / 4 + 1,
            ContentBlock::ToolUse { name, input, .. } => (name.len() + input.len()) / 4 + 1,
            ContentBlock::ToolResult {
                tool_name, output, ..