Expose structured thinking without polluting normal assistant output

Extended thinking needed to travel end-to-end through the API,
runtime, and CLI so the client can request a thinking budget,
preserve streamed reasoning blocks, and present them in a
collapsed text-first form. The implementation keeps thinking
strictly opt-in, adds a session-local toggle, and reuses the
existing flag/slash-command/reporting surfaces instead of
introducing a new UI layer.

Constraint: Existing non-thinking text/tool flows had to remain backward compatible by default
Constraint: Terminal UX needed a lightweight collapsed representation rather than an interactive TUI widget
Rejected: Heuristic CLI-only parsing of reasoning text | brittle against structured stream payloads
Rejected: Expanded raw thinking output by default | too noisy for normal assistant responses
Confidence: medium
Scope-risk: moderate
Reversibility: clean
Directive: Keep thinking blocks structurally separate from answer text unless the upstream API contract changes
Tested: cargo fmt --all; cargo clippy --workspace --all-targets -- -D warnings; cargo test -q
Not-tested: Live upstream thinking payloads against the production API contract
This commit is contained in:
Yeachan-Heo
2026-04-01 01:08:18 +00:00
parent d6341d54c1
commit c14196c730
9 changed files with 353 additions and 31 deletions

View File

@@ -130,7 +130,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String {
.filter_map(|block| match block {
ContentBlock::ToolUse { name, .. } => Some(name.as_str()),
ContentBlock::ToolResult { tool_name, .. } => Some(tool_name.as_str()),
ContentBlock::Text { .. } => None,
ContentBlock::Text { .. } | ContentBlock::Thinking { .. } => None,
})
.collect::<Vec<_>>();
tool_names.sort_unstable();
@@ -200,6 +200,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String {
fn summarize_block(block: &ContentBlock) -> String {
let raw = match block {
ContentBlock::Text { text } => text.clone(),
ContentBlock::Thinking { text, .. } => format!("thinking: {text}"),
ContentBlock::ToolUse { name, input, .. } => format!("tool_use {name}({input})"),
ContentBlock::ToolResult {
tool_name,
@@ -258,7 +259,7 @@ fn collect_key_files(messages: &[ConversationMessage]) -> Vec<String> {
.iter()
.flat_map(|message| message.blocks.iter())
.map(|block| match block {
ContentBlock::Text { text } => text.as_str(),
ContentBlock::Text { text } | ContentBlock::Thinking { text, .. } => text.as_str(),
ContentBlock::ToolUse { input, .. } => input.as_str(),
ContentBlock::ToolResult { output, .. } => output.as_str(),
})
@@ -280,10 +281,15 @@ fn infer_current_work(messages: &[ConversationMessage]) -> Option<String> {
fn first_text_block(message: &ConversationMessage) -> Option<&str> {
message.blocks.iter().find_map(|block| match block {
ContentBlock::Text { text } if !text.trim().is_empty() => Some(text.as_str()),
ContentBlock::Text { text } | ContentBlock::Thinking { text, .. }
if !text.trim().is_empty() =>
{
Some(text.as_str())
}
ContentBlock::ToolUse { .. }
| ContentBlock::ToolResult { .. }
| ContentBlock::Text { .. } => None,
| ContentBlock::Text { .. }
| ContentBlock::Thinking { .. } => None,
})
}
@@ -328,7 +334,7 @@ fn estimate_message_tokens(message: &ConversationMessage) -> usize {
.blocks
.iter()
.map(|block| match block {
ContentBlock::Text { text } => text.len() / 4 + 1,
ContentBlock::Text { text } | ContentBlock::Thinking { text, .. } => text.len() / 4 + 1,
ContentBlock::ToolUse { name, input, .. } => (name.len() + input.len()) / 4 + 1,
ContentBlock::ToolResult {
tool_name, output, ..