mirror of
https://github.com/lWolvesl/claw-code.git
synced 2026-04-03 00:21:53 +08:00
Improve WebFetch title prompts for HTML pages
Make title-focused WebFetch prompts prefer the real HTML <title> value when present instead of always falling back to the first rendered text line. Keep the behavior narrow and preserve the existing summary path for non-title prompts.\n\nConstraint: Must not touch unrelated dirty api files in this worktree\nConstraint: Keep the change limited to rust/crates/tools\nRejected: Broader HTML parsing dependency | not needed for this small parity slice\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: Preserve lightweight HTML handling unless parity requires a materially more robust parser\nTested: cargo test -p tools\nNot-tested: malformed HTML with mixed-case or nested title edge cases
This commit is contained in:
@@ -639,7 +639,7 @@ fn execute_web_fetch(input: &WebFetchInput) -> Result<WebFetchOutput, String> {
|
||||
let body = response.text().map_err(|error| error.to_string())?;
|
||||
let bytes = body.len();
|
||||
let normalized = normalize_fetched_content(&body, &content_type);
|
||||
let result = summarize_web_fetch(&final_url, &input.prompt, &normalized);
|
||||
let result = summarize_web_fetch(&final_url, &input.prompt, &normalized, &body, &content_type);
|
||||
|
||||
Ok(WebFetchOutput {
|
||||
bytes,
|
||||
@@ -750,12 +750,18 @@ fn normalize_fetched_content(body: &str, content_type: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
fn summarize_web_fetch(url: &str, prompt: &str, content: &str) -> String {
|
||||
fn summarize_web_fetch(
|
||||
url: &str,
|
||||
prompt: &str,
|
||||
content: &str,
|
||||
raw_body: &str,
|
||||
content_type: &str,
|
||||
) -> String {
|
||||
let lower_prompt = prompt.to_lowercase();
|
||||
let compact = collapse_whitespace(content);
|
||||
|
||||
let detail = if lower_prompt.contains("title") {
|
||||
extract_title(content)
|
||||
extract_title(content, raw_body, content_type)
|
||||
.map(|title| format!("Title: {title}"))
|
||||
.unwrap_or_else(|| preview_text(&compact, 600))
|
||||
} else if lower_prompt.contains("summary") || lower_prompt.contains("summarize") {
|
||||
@@ -768,7 +774,21 @@ fn summarize_web_fetch(url: &str, prompt: &str, content: &str) -> String {
|
||||
format!("Fetched {url}\n{detail}")
|
||||
}
|
||||
|
||||
fn extract_title(content: &str) -> Option<String> {
|
||||
fn extract_title(content: &str, raw_body: &str, content_type: &str) -> Option<String> {
|
||||
if content_type.contains("html") {
|
||||
let lowered = raw_body.to_lowercase();
|
||||
if let Some(start) = lowered.find("<title>") {
|
||||
let after = start + "<title>".len();
|
||||
if let Some(end_rel) = lowered[after..].find("</title>") {
|
||||
let title =
|
||||
collapse_whitespace(&decode_html_entities(&raw_body[after..after + end_rel]));
|
||||
if !title.is_empty() {
|
||||
return Some(title);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for line in content.lines() {
|
||||
let trimmed = line.trim();
|
||||
if !trimmed.is_empty() {
|
||||
@@ -1798,6 +1818,18 @@ mod tests {
|
||||
assert!(summary.contains("Fetched"));
|
||||
assert!(summary.contains("Test Page"));
|
||||
assert!(summary.contains("Hello world from local server"));
|
||||
|
||||
let titled = execute_tool(
|
||||
"WebFetch",
|
||||
&json!({
|
||||
"url": format!("http://{}/page", server.addr()),
|
||||
"prompt": "What is the page title?"
|
||||
}),
|
||||
)
|
||||
.expect("WebFetch title query should succeed");
|
||||
let titled_output: serde_json::Value = serde_json::from_str(&titled).expect("valid json");
|
||||
let titled_summary = titled_output["result"].as_str().expect("result string");
|
||||
assert!(titled_summary.contains("Title: Ignored"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user