verity-h-prototype / src /span_matcher.py
Sravanth18's picture
v0.3.1: inference detector (4-tier), contradiction question-fallback, improved batch prompt (154 tests pass)
1e4f1f7 verified
"""Deterministic span matcher β€” labels claims against evidence without LLM.
This is the model-independent backbone of the verifier. It handles:
1. Exact/fuzzy substring matching β†’ SUPPORTED
2. Absence/deferral detection β†’ NOT_IN_EVIDENCE
3. No-match β†’ UNSUPPORTED (needs LLM or stays as-is)
Used both as:
- Primary labeler for simple claims (works on any model size)
- Post-processor to fix LLM mislabeling
"""
from __future__ import annotations
import re
from .schemas import EvidencePointer, EvidenceSpan, VerifiedClaim
from .constants import STOP_WORDS
from .inference_detector import detect_inference
# ── Absence / deferral patterns ───────────────────────────────────────
# Claim text that says "evidence doesn't have X"
ABSENCE_IN_CLAIM = re.compile(
r"evidence does not (?:specify|mention|include|contain|provide|state)|"
r"not (?:provided|mentioned|specified|stated|included)(?: in the evidence)?|"
r"is not provided|is not mentioned|is not specified|"
r"no (?:information|data|mention) (?:about|regarding|on|of)|"
r"cannot (?:determine|confirm|verify) from|"
r"not enough information|"
r"does not contain information|"
r"no evidence (?:about|for|of|regarding)|"
r"is (?:unknown|unavailable|missing|absent)",
re.IGNORECASE,
)
# Evidence text that confirms info is missing/pending
DEFERRAL_IN_EVIDENCE = re.compile(
r"has not been finalized|"
r"not (?:been )?finalized|"
r"still being collected|"
r"data is (?:still )?being|"
r"please contact|"
r"contact(?:ed)? (?:the |directly )?.{0,30}(?:desk|office|department)|"
r"should be contacted|"
r"not (?:yet )?(?:available|determined|decided|released|announced)|"
r"pending|"
r"under review|"
r"to be (?:determined|announced|confirmed|decided)",
re.IGNORECASE,
)
_STOP_WORDS = STOP_WORDS
def label_claim_against_spans(
claim_text: str,
spans: list[EvidenceSpan],
) -> tuple[str, EvidencePointer | None, str]:
"""Deterministically label a claim against evidence spans.
Returns: (label, pointer_or_None, notes)
Label is one of: SUPPORTED, NOT_IN_EVIDENCE, or empty string "" meaning
"I don't know β€” let the LLM decide".
"""
ct = claim_text.strip()
ct_lower = ct.lower().rstrip(".")
# ── 1. Claim says evidence is absent β†’ NOT_IN_EVIDENCE ────────────
if ABSENCE_IN_CLAIM.search(ct):
return "NOT_IN_EVIDENCE", None, "claim states absence of evidence"
# ── 2. Claim text matches a deferral pattern β†’ NOT_IN_EVIDENCE ────
if DEFERRAL_IN_EVIDENCE.search(ct):
return "NOT_IN_EVIDENCE", None, "claim describes pending/deferred info"
# ── 3. Exact substring match in a span β†’ SUPPORTED ────────────────
for span in spans:
st_lower = span.text.lower()
if len(ct_lower) >= 10 and ct_lower in st_lower:
ptr = _make_pointer(span)
return "SUPPORTED", ptr, "exact substring match"
# ── 3b. Number + keyword match ────────────────────────────────────
# If claim has numbers, all match a span, and β‰₯1 keyword overlaps
ct_nums = _extract_comparable_nums(ct)
ct_words = _key_words(ct_lower)
if ct_nums and ct_words:
for span in spans:
span_nums = _extract_comparable_nums(span.text)
if ct_nums and ct_nums.issubset(span_nums):
st_words = _key_words(span.text.lower())
if ct_words & st_words:
ptr = _make_pointer(span)
return "SUPPORTED", ptr, "number + keyword match"
# ── 4. Fuzzy word overlap (β‰₯80% of claim keywords in span) ────────
if ct_words and len(ct_words) >= 2:
for span in spans:
st_words = _key_words(span.text.lower())
overlap = len(ct_words & st_words) / len(ct_words)
if overlap >= 0.80:
# Consistency check: verify numbers/entities match
if _numbers_consistent(ct, span.text):
ptr = _make_pointer(span)
return "SUPPORTED", ptr, f"fuzzy match ({overlap:.0%} keyword overlap)"
else:
return "", None, "fuzzy match but numbers inconsistent"
# ── 5. Can't determine β†’ let LLM decide ──────────────────────────
return "", None, ""
def relabel_claims(
claims: list[VerifiedClaim],
spans: list[EvidenceSpan],
question: str = "",
) -> list[VerifiedClaim]:
"""Post-process LLM-labeled claims using deterministic checks.
Rules (applied in order):
1. SUPPORTED + absence/deferral text β†’ downgrade to NOT_IN_EVIDENCE
2. CONTRADICTS_EVIDENCE + absence text β†’ downgrade to NOT_IN_EVIDENCE
3. NOT_IN_EVIDENCE/UNSUPPORTED + span match β†’ upgrade to SUPPORTED
4. SUPPORTED + inference detected β†’ downgrade to UNSUPPORTED
"""
for claim in claims:
det_label, det_ptr, det_notes = label_claim_against_spans(claim.claim_text, spans)
if claim.label == "SUPPORTED" and det_label == "NOT_IN_EVIDENCE":
# LLM wrongly called an absence claim SUPPORTED
claim.label = "NOT_IN_EVIDENCE"
claim.evidence_pointers = []
claim.notes = (claim.notes or "") + f" [det: {det_notes}]"
elif claim.label == "CONTRADICTS_EVIDENCE" and det_label == "NOT_IN_EVIDENCE":
claim.label = "NOT_IN_EVIDENCE"
claim.evidence_pointers = []
claim.notes = (claim.notes or "") + f" [det: absence claim, not contradiction]"
elif claim.label in ("NOT_IN_EVIDENCE", "UNSUPPORTED") and det_label == "SUPPORTED" and det_ptr:
claim.label = "SUPPORTED"
claim.evidence_pointers = [det_ptr]
claim.notes = (claim.notes or "") + f" [det: {det_notes}]"
# Rule 4: inference detection β€” must come AFTER span match upgrades
if claim.label == "SUPPORTED" and question:
is_inf, inf_reason = detect_inference(claim.claim_text, question)
if is_inf:
claim.label = "UNSUPPORTED"
claim.evidence_pointers = []
claim.notes = (claim.notes or "") + f" [det: inference β€” {inf_reason}]"
return claims
# ── Helpers ────────────────────────────────────────────────────────────
def _key_words(text: str) -> set[str]:
return {w for w in re.findall(r"\b\w{4,}\b", text)} - _STOP_WORDS
# ── Numeric/entity consistency ────────────────────────────────────────
_NUM_RE = re.compile(r"(?<![A-Za-z])[\$]?\d[\d,.]*(?:\s*(?:%|degrees|Β°[CF]|million|billion|mg|hPa))?(?![A-Za-z])")
def _numbers_consistent(claim: str, span: str) -> bool:
"""Check that numbers in the claim match numbers in the span.
If the claim mentions a number that doesn't appear in the span,
the fuzzy match is unreliable β€” could be a hallucinated value.
If the claim has no numbers, skip the check (it's a text-only claim).
"""
claim_nums = _extract_comparable_nums(claim)
if not claim_nums:
return True # no numbers to check
span_nums = _extract_comparable_nums(span)
if not span_nums:
return True # span has no numbers either, can't compare
# Every number in the claim must appear in the span
for cn in claim_nums:
if cn not in span_nums:
return False
return True
def _extract_comparable_nums(text: str) -> set[str]:
"""Extract normalized number strings for comparison."""
nums: set[str] = set()
for m in _NUM_RE.finditer(text):
# Normalize: strip $, commas, spaces
n = m.group().strip()
n = re.sub(r"[$,\s]", "", n)
n = n.lower()
if n:
nums.add(n)
return nums
def _make_pointer(span: EvidenceSpan) -> EvidencePointer:
return EvidencePointer(
span_id=span.span_id,
start_char=span.start_char,
end_char=span.end_char,
text_preview=span.text[:80],
)