verity-h-prototype / src /inference_detector.py
Sravanth18's picture
v0.3.1: inference detector (4-tier), contradiction question-fallback, improved batch prompt (154 tests pass)
1e4f1f7 verified
"""Deterministic inference detector β€” flags claims that are inferences, not facts.
v0.3.1: Based on CogniBench cognitive statement taxonomy (arxiv:2505.20767),
GME epistemic modality taxonomy (arxiv:2106.08037), and BioScope hedge cues.
Four-tier detection:
Tier 1: Claim-level β€” epistemic hedges, logical leap connectors, causal inference
Tier 2: Logical leap connectors β€” therefore, thus, based on these findings
Tier 3: Deontic/normative β€” should, recommended, indicated
Tier 4: Question-level β€” speculative/predictive/normative question detection
Only overrides SUPPORTED β†’ UNSUPPORTED. Never touches other labels.
Conservative: prefers missed inference over false positive on factual claims.
"""
from __future__ import annotations
import re
# ═══════════════════════════════════════════════════════════════════════
# Tier 1: Epistemic hedges β€” "state of knowledge" modals (GME taxonomy)
# High precision: these almost never appear in purely factual claims
# ═══════════════════════════════════════════════════════════════════════
_EPISTEMIC_HEDGES = re.compile(
r"\bmost likely\b|\bmost probable\b|\bprobably\b|\bpresumably\b|"
r"\bapparently\b|\bplausibly\b|"
# Epistemic modal constructions (NOT bare "may" β€” too ambiguous)
r"\bmay (?:have|be|represent|indicate|suggest)\b|"
r"\bmight (?:have|be|represent|indicate)\b|"
r"\bcould (?:be|represent|indicate|have)\b|"
# Evidential verbs β€” GME "state of knowledge" triggers
r"\bsuggests?\s+(?:that |a |an |the |\w+ing |\w+ence )|"
r"\bimplies?\b|\bpoints? to\b|"
r"\bwould suggest\b|\bwould indicate\b|"
r"\bappears? to (?:be|have|show|indicate)\b|"
r"\bseems? to (?:be|have|show|indicate)\b|"
# "supports" as epistemic β€” when followed by inference (not evidence citation)
r"\bsupports?\s+(?:a |an |the )?(?:diagnosis|conclusion|hypothesis|notion|idea|interpretation|finding)\b|"
r"\bsupports?\s+(?:a |an |the )?(?:acute|chronic|bacterial|viral|infectious|inflammatory)\b|"
# Clinical/diagnostic inference (BioScope cues)
r"\bconsistent with\b|\bin keeping with\b|\bcompatible with\b|"
r"\bconcerning for\b|"
r"\bcannot rule out\b|\bnot ruled out\b|"
# Causal inference
r"\blikely caused by\b|\bmost likely caused by\b|"
r"\bbest explains?\b|\bsecondary to\b",
re.IGNORECASE,
)
# ═══════════════════════════════════════════════════════════════════════
# Tier 2: Logical leap connectors β€” claim draws a conclusion from evidence
# ═══════════════════════════════════════════════════════════════════════
_LOGICAL_LEAP = re.compile(
r"\btherefore\b|\bthus\b|\bhence\b|\bconsequently\b|"
r"\bthis suggests?\b|\bthis indicates?\b|\bthis implies?\b|"
r"\bthis is consistent with\b|\bthis points to\b|"
r"\bwe can conclude\b|\bit can be concluded\b|"
r"\bbased on (?:these|this|the) (?:findings?|evidence|data|symptoms?)\b|"
r"\btaken together\b|\boverall\b.*\bsuggests?\b",
re.IGNORECASE,
)
# ═══════════════════════════════════════════════════════════════════════
# Tier 3: Predictive/speculative + Deontic/normative
# ═══════════════════════════════════════════════════════════════════════
_PREDICTIVE = re.compile(
r"\bwill likely\b|\bis likely to\b|\bis expected to\b|"
r"\bhas a (?:strong |good |high )?chance\b|"
r"\bpotential(?:ly)? to\b|\bprospects? (?:are|look|seem)\b|"
r"\bpoised to\b|\bon track to\b|"
r"\bshould (?:be able|succeed|continue|grow|improve)\b|"
r"\bforecast|predict",
re.IGNORECASE,
)
_DEONTIC = re.compile(
r"\bshould (?:be |consider |start |seek |get )\b|"
r"\brecommended?\b|\badvised?\b|\bwarranted\b|"
r"\bis indicated\b|\bare indicated\b",
re.IGNORECASE,
)
# ═══════════════════════════════════════════════════════════════════════
# Tier 4: Question-level β€” speculative question detection
# Source: CogniBench + Typed-RAG question taxonomy
# ═══════════════════════════════════════════════════════════════════════
_SPECULATIVE_QUESTION = re.compile(
# Normative: "Should we invest?" "Should the defendant..."
r"^should\s+(?:we|the|he|she|they|I)\b|"
# Predictive: "Will the product..." "Will it rain..."
r"^will\s+(?:the|this|it)\b|"
# Causal inference: "What caused..." "What explains..."
r"^what\s+(?:caused|is causing|explains|led to|is the (?:most )?likely (?:cause|diagnosis))\b|"
# Diagnostic: "Is the defendant guilty..."
r"^is\s+the\s+(?:defendant|patient|suspect)\s+\w+|"
# Evaluative: "Is it advisable..."
r"^is\s+(?:it|this)\s+(?:advisable|recommended|safe|a good)\b|"
# Why questions seeking causal explanation
r"^why\s+(?:did|does|would|is)\b",
re.IGNORECASE,
)
# ═══════════════════════════════════════════════════════════════════════
# Public API
# ═══════════════════════════════════════════════════════════════════════
def is_speculative_question(question: str) -> bool:
"""Return True if the question is speculative/predictive/normative."""
return bool(_SPECULATIVE_QUESTION.search(question.strip()))
def detect_inference(
claim_text: str,
question: str,
) -> tuple[bool, str]:
"""Detect if a SUPPORTED claim is actually an inference.
Returns: (is_inference, reason)
Only call this for claims with label == "SUPPORTED".
"""
ct = claim_text.strip()
# Tier 1: Epistemic hedges in the claim itself
m = _EPISTEMIC_HEDGES.search(ct)
if m:
return True, f"epistemic hedge: '{m.group().strip()}'"
# Tier 2: Logical leap connector
m = _LOGICAL_LEAP.search(ct)
if m:
return True, f"logical leap: '{m.group().strip()}'"
# Tier 3a: Predictive/speculative language
m = _PREDICTIVE.search(ct)
if m:
return True, f"predictive: '{m.group().strip()}'"
# Tier 3b: Deontic/normative
m = _DEONTIC.search(ct)
if m:
return True, f"deontic: '{m.group().strip()}'"
# Tier 4: Speculative question β†’ check if claim answers it (not just restates data)
if is_speculative_question(question):
if _is_answering_speculative_question(ct, question):
return True, "answers speculative question"
return False, ""
# ── Number detection for pure-data check ──────────────────────────────
_HAS_NUMBER = re.compile(r"[\$€£]\s*\d|\d[\d,.]*\s*(?:%|Β°|million|billion|mg|hPa|units?|employees?)")
def _is_answering_speculative_question(claim: str, question: str) -> bool:
"""Heuristic: is this claim trying to answer a speculative question?
We want to flag "The startup will succeed" but NOT flag
"The startup has $2M ARR" β€” both may appear as claims under
"Should we invest in this startup?"
Key insight: claims that contain specific numbers/measurements from
evidence are factual restatements. Claims with evaluative/conclusory
language without specific data are answering the speculative question.
"""
cl = claim.lower()
# Safe harbor: claims with specific numbers/measurements are data restatements
# e.g. "$2M ARR", "30% growth", "45,000 miles", "12,000/Β΅L"
if _HAS_NUMBER.search(claim):
# But still flag if it ALSO has strong evaluative framing
strong_eval = re.compile(
r"\bguilty\b|\binnocent\b|\bwill\s+(?:succeed|fail|rain)\b|"
r"\bdiagnosis\b|\bcause\s+(?:of|is)\b",
re.IGNORECASE,
)
if not strong_eval.search(cl):
return False
# Claims with evaluative/conclusory words β†’ answering the question
evaluative = re.compile(
r"\bsuccess\w*\b|\bfail\w*\b|\bguilty\b|\binnocent\b|"
r"\bcredib\w*\b|\bviable\b|\bprofitable\b|"
r"\bgood\s+(?:chance|position|sign)\b|"
r"\bbad\s+(?:sign|outlook)\b|"
r"\badds?\s+credibility\b|"
r"\binfection\b|\bdisease\b|\bdisorder\b|\bsyndrome\b|"
r"\bcause\s+(?:of|is)\b|\bcaused\s+by\b|"
r"\bdiagnosis\b|\betiology\b|"
r"\bwill\s+(?:rain|succeed|fail|grow|decline)\b",
re.IGNORECASE,
)
if evaluative.search(cl):
return True
# Claims that frame evidence in evaluative terms
framing = re.compile(
r"\bshows?\s+(?:strong|weak|positive|negative|promising)\b|"
r"\bdemonstrates?\s+(?:strong|weak|positive|negative)\b",
re.IGNORECASE,
)
if framing.search(cl):
return True
return False