Improve CLI visibility into runtime usage and compaction

This adds token and estimated cost reporting to runtime usage tracking and surfaces it in the CLI status and turn output. It also upgrades compaction summaries so users see a clearer resumable summary and token savings after /compact. The verification path required cleaning existing workspace clippy and test friction in adjacent crates so cargo fmt, cargo clippy -D warnings, and cargo test succeed from the Rust workspace root in this repo state. Constraint: Keep the change incremental and user-visible without a large CLI rewrite Constraint: Verification must pass with cargo fmt, cargo clippy --all-targets --all-features -- -D warnings, and cargo test Rejected: Implement a full model-pricing table now | would add more surface area than needed for this first UX slice Confidence: high Scope-risk: moderate Reversibility: clean Directive: If pricing becomes model-specific later, keep the current estimate labeling explicit rather than implying exact billing Tested: cargo fmt; cargo clippy --all-targets --all-features -- -D warnings; cargo test -q Not-tested: Live Anthropic API interaction and real streaming terminal sessions
2026-04-02 22:21:52 +08:00 · 2026-03-31 19:18:42 +00:00
parent 4586764a0e
commit 4bae5ee132
8 changed files with 246 additions and 56 deletions
--- a/rust/crates/runtime/src/usage.rs
+++ b/rust/crates/runtime/src/usage.rs
@@ -1,5 +1,10 @@
 use crate::session::Session;

+const DEFAULT_INPUT_COST_PER_MILLION: f64 = 15.0;
+const DEFAULT_OUTPUT_COST_PER_MILLION: f64 = 75.0;
+const DEFAULT_CACHE_CREATION_COST_PER_MILLION: f64 = 18.75;
+const DEFAULT_CACHE_READ_COST_PER_MILLION: f64 = 1.5;
+
 #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
 pub struct TokenUsage {
    pub input_tokens: u32,
@@ -8,6 +13,24 @@ pub struct TokenUsage {
    pub cache_read_input_tokens: u32,
 }

+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct UsageCostEstimate {
+    pub input_cost_usd: f64,
+    pub output_cost_usd: f64,
+    pub cache_creation_cost_usd: f64,
+    pub cache_read_cost_usd: f64,
+}
+
+impl UsageCostEstimate {
+    #[must_use]
+    pub fn total_cost_usd(self) -> f64 {
+        self.input_cost_usd
+            + self.output_cost_usd
+            + self.cache_creation_cost_usd
+            + self.cache_read_cost_usd
+    }
+}
+
 impl TokenUsage {
    #[must_use]
    pub fn total_tokens(self) -> u32 {
@@ -16,6 +39,54 @@ impl TokenUsage {
            + self.cache_creation_input_tokens
            + self.cache_read_input_tokens
    }
+
+    #[must_use]
+    pub fn estimate_cost_usd(self) -> UsageCostEstimate {
+        UsageCostEstimate {
+            input_cost_usd: cost_for_tokens(self.input_tokens, DEFAULT_INPUT_COST_PER_MILLION),
+            output_cost_usd: cost_for_tokens(self.output_tokens, DEFAULT_OUTPUT_COST_PER_MILLION),
+            cache_creation_cost_usd: cost_for_tokens(
+                self.cache_creation_input_tokens,
+                DEFAULT_CACHE_CREATION_COST_PER_MILLION,
+            ),
+            cache_read_cost_usd: cost_for_tokens(
+                self.cache_read_input_tokens,
+                DEFAULT_CACHE_READ_COST_PER_MILLION,
+            ),
+        }
+    }
+
+    #[must_use]
+    pub fn summary_lines(self, label: &str) -> Vec<String> {
+        let cost = self.estimate_cost_usd();
+        vec![
+            format!(
+                "{label}: total_tokens={} input={} output={} cache_write={} cache_read={} estimated_cost={}",
+                self.total_tokens(),
+                self.input_tokens,
+                self.output_tokens,
+                self.cache_creation_input_tokens,
+                self.cache_read_input_tokens,
+                format_usd(cost.total_cost_usd()),
+            ),
+            format!(
+                "  cost breakdown: input={} output={} cache_write={} cache_read={}",
+                format_usd(cost.input_cost_usd),
+                format_usd(cost.output_cost_usd),
+                format_usd(cost.cache_creation_cost_usd),
+                format_usd(cost.cache_read_cost_usd),
+            ),
+        ]
+    }
+}
+
+fn cost_for_tokens(tokens: u32, usd_per_million_tokens: f64) -> f64 {
+    f64::from(tokens) / 1_000_000.0 * usd_per_million_tokens
+}
+
+#[must_use]
+pub fn format_usd(amount: f64) -> String {
+    format!("${amount:.4}")
 }

 #[derive(Debug, Clone, Default, PartialEq, Eq)]
@@ -69,7 +140,7 @@ impl UsageTracker {

 #[cfg(test)]
 mod tests {
-    use super::{TokenUsage, UsageTracker};
+    use super::{format_usd, TokenUsage, UsageTracker};
    use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session};

    #[test]
@@ -96,6 +167,23 @@ mod tests {
        assert_eq!(tracker.cumulative_usage().total_tokens(), 48);
    }

+    #[test]
+    fn computes_cost_summary_lines() {
+        let usage = TokenUsage {
+            input_tokens: 1_000_000,
+            output_tokens: 500_000,
+            cache_creation_input_tokens: 100_000,
+            cache_read_input_tokens: 200_000,
+        };
+
+        let cost = usage.estimate_cost_usd();
+        assert_eq!(format_usd(cost.input_cost_usd), "$15.0000");
+        assert_eq!(format_usd(cost.output_cost_usd), "$37.5000");
+        let lines = usage.summary_lines("usage");
+        assert!(lines[0].contains("estimated_cost=$54.6750"));
+        assert!(lines[1].contains("cache_read=$0.3000"));
+    }
+
    #[test]
    fn reconstructs_usage_from_session_messages() {
        let session = Session {