diff --git a/crates/html-extractor/src/lib.rs b/crates/html-extractor/src/lib.rs
index fe62748..1456e45 100644
--- a/crates/html-extractor/src/lib.rs
+++ b/crates/html-extractor/src/lib.rs
@@ -68,15 +68,54 @@ pub fn extract(html: &str, options: &ExtractOptions) -> Result= 200 && kept_text_len * 100 < body_text_len * 15;
let (final_root, quality, used_fallback) = if let Some(idx) = selected_root {
if kept_text_len < min_len {
+ // (b): too short to be useful — fall through.
let (fb_root, q) = fallback::fallback(&tree, options);
(fb_root.or(Some(idx)), q.max(0.15), true)
+ } else if suspiciously_small {
+ // (c): try the fallback chain and pick whichever produced more
+ // text-excluding-links content.
+ let (fb_root, fb_q) = fallback::fallback(&tree, options);
+ let fb_text = fb_root
+ .map(|i| tree.text_len_excluding_links(i))
+ .unwrap_or(0);
+ if fb_text > kept_text_len * 2 {
+ (fb_root, fb_q.max(0.2), true)
+ } else {
+ (
+ Some(idx),
+ confidence_from_score(score, kept_text_len),
+ false,
+ )
+ }
} else {
(
Some(idx),