Spaces:

pavankumarvk
/

Multi_Modal_Deepfake_Detection

Sleeping

App Files Files Community

pavankumarvk commited on Apr 20

Commit

a7e89c0

verified ·

1 Parent(s): a685dfd

Upload 2 files

Browse files

Files changed (2) hide show

text_detector_inference.py +131 -0
text_detector_model.py +203 -0

text_detector_inference.py ADDED Viewed

	@@ -0,0 +1,131 @@

+"""
+text_detector_inference.py
+==========================
+Inference wrapper for HybridAITextDetector.
+Designed to be imported by app.py (Gradio) in the Hugging Face Space.
+Usage
+-----
+from text_detector_inference import TextDetectorInference
+detector = TextDetectorInference(
+    checkpoint="best_text_detector.pt",
+    threshold=0.5
+)
+result = detector.predict("Some text here...")
+"""
+import os
+import torch
+from transformers import AutoTokenizer
+from text_detector_model import HybridAITextDetector, MODEL_NAME, MAX_LENGTH
+class TextDetectorInference:
+    """
+    Thin wrapper around HybridAITextDetector for single-text prediction.
+    Parameters
+    ----------
+    checkpoint : str
+        Path to the .pt state-dict file.
+    threshold  : float
+        Decision boundary for the sigmoid probability (default 0.5).
+        Set to the optimal F1 threshold found during evaluation.
+    device     : torch.device or None
+        Auto-detects CUDA if None.
+    """
+    def __init__(
+        self,
+        checkpoint: str  = "best_text_detector.pt",
+        threshold:  float = 0.5,
+        device: torch.device = None,
+    ):
+        self.threshold = threshold
+        self.device    = device or torch.device(
+            "cuda" if torch.cuda.is_available() else "cpu"
+        )
+        print(f"[TextDetector] Loading tokenizer from {MODEL_NAME}...")
+        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        if os.path.exists(checkpoint):
+            print(f"[TextDetector] Loading checkpoint: {checkpoint}")
+            self.model = HybridAITextDetector()
+            self.model.load_state_dict(
+                torch.load(checkpoint, map_location=self.device)
+            )
+            self.model.eval().to(self.device)
+            print("[TextDetector] ✅ Model ready")
+        else:
+            print(f"[TextDetector] ⚠️  Checkpoint '{checkpoint}' not found. "
+                  "Model NOT loaded — predictions will fail.")
+            self.model = None
+    # ------------------------------------------------------------------
+    def predict(self, text: str) -> dict:
+        """
+        Classify a single text string.
+        Returns
+        -------
+        dict with keys:
+            label      : "AI-Generated" or "Human-Written"
+            confidence : probability of the predicted class (0-1)
+            ai_prob    : raw P(AI-generated)
+            human_prob : 1 - ai_prob
+        """
+        if self.model is None:
+            return {"error": "Model not loaded — missing checkpoint file."}
+        text = text.strip()
+        if not text:
+            return {"error": "Input text is empty."}
+        enc = self.tokenizer(
+            text,
+            truncation=True,
+            padding="max_length",
+            max_length=MAX_LENGTH,
+            return_tensors="pt",
+        )
+        input_ids      = enc["input_ids"].to(self.device)
+        attention_mask = enc["attention_mask"].to(self.device)
+        token_type_ids = enc.get(
+            "token_type_ids",
+            torch.zeros_like(enc["input_ids"]),
+        ).to(self.device)
+        with torch.no_grad():
+            logit    = self.model(input_ids, attention_mask, token_type_ids)
+            ai_prob  = torch.sigmoid(logit).item()
+        human_prob = 1.0 - ai_prob
+        is_ai      = ai_prob >= self.threshold
+        label      = "AI-Generated" if is_ai else "Human-Written"
+        confidence = ai_prob if is_ai else human_prob
+        return {
+            "label":      label,
+            "confidence": round(confidence, 4),
+            "ai_prob":    round(ai_prob, 4),
+            "human_prob": round(human_prob, 4),
+        }
+    # ------------------------------------------------------------------
+    def predict_batch(self, texts: list[str]) -> list[dict]:
+        """Run predict() on a list of texts. Returns list of result dicts."""
+        return [self.predict(t) for t in texts]
+    # ------------------------------------------------------------------
+    def format_for_gradio(self, text: str) -> tuple[str, float, dict]:
+        """
+        Convenience wrapper that returns values in a Gradio-friendly format:
+            (label_string, confidence_float, full_result_dict)
+        """
+        result = self.predict(text)
+        if "error" in result:
+            return result["error"], 0.0, result
+        return result["label"], result["confidence"], result

text_detector_model.py ADDED Viewed

	@@ -0,0 +1,203 @@

+"""
+text_detector_model.py
+======================
+Standalone model definition for HybridAITextDetector.
+Import this in both training scripts and the Gradio app.
+Architecture:
+    DeBERTa-v3-small  →  [BiLSTM | CNN | Transformer]  →  CrossAttentionFusion  →  Classifier
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import AutoModel
+# ─── Constants ───────────────────────────────────────────────────────────────
+MODEL_NAME  = "microsoft/deberta-v3-small"
+MAX_LENGTH  = 128
+NUM_CLASSES = 1   # binary: sigmoid output
+# ─── Sub-modules ─────────────────────────────────────────────────────────────
+class AttentionPool(nn.Module):
+    """Soft attention pooling over a sequence of vectors."""
+    def __init__(self, dim: int):
+        super().__init__()
+        self.attn = nn.Linear(dim, 1)
+    def forward(self, x: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor:
+        weights = self.attn(x)                              # (B, T, 1)
+        if mask is not None:
+            weights = weights.masked_fill(mask.unsqueeze(-1) == 0, float("-inf"))
+        weights = torch.softmax(weights, dim=1)             # (B, T, 1)
+        return (weights * x).sum(dim=1)                     # (B, dim)
+class BiLSTMBranch(nn.Module):
+    """2-layer Bidirectional LSTM with Attention Pooling."""
+    def __init__(self, input_dim: int, hidden_dim: int = 128):
+        super().__init__()
+        self.lstm = nn.LSTM(
+            input_dim, hidden_dim,
+            num_layers=2,
+            batch_first=True,
+            dropout=0.2,
+            bidirectional=True,
+        )
+        self.pool = AttentionPool(hidden_dim * 2)
+        self.proj = nn.Linear(hidden_dim * 2, 128)
+    def forward(self, x: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor:
+        out, _ = self.lstm(x)                               # (B, T, 256)
+        pooled = self.pool(out, mask)                       # (B, 256)
+        return F.gelu(self.proj(pooled))                    # (B, 128)
+class CNNBranch(nn.Module):
+    """Multi-kernel 1D CNN with Global MaxPooling."""
+    def __init__(self, input_dim: int):
+        super().__init__()
+        self.conv3 = nn.Conv1d(input_dim, 64, kernel_size=3, padding=1)
+        self.conv5 = nn.Conv1d(input_dim, 64, kernel_size=5, padding=2)
+        self.conv7 = nn.Conv1d(input_dim, 64, kernel_size=7, padding=3)
+        self.bn3   = nn.BatchNorm1d(64)
+        self.bn5   = nn.BatchNorm1d(64)
+        self.bn7   = nn.BatchNorm1d(64)
+        self.proj  = nn.Linear(192, 128)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x_t = x.permute(0, 2, 1)                           # (B, D, T)
+        c3  = F.gelu(self.bn3(self.conv3(x_t)))
+        c5  = F.gelu(self.bn5(self.conv5(x_t)))
+        c7  = F.gelu(self.bn7(self.conv7(x_t)))
+        p3  = c3.max(dim=-1).values
+        p5  = c5.max(dim=-1).values
+        p7  = c7.max(dim=-1).values
+        cat = torch.cat([p3, p5, p7], dim=-1)              # (B, 192)
+        return F.gelu(self.proj(cat))                       # (B, 128)
+class TransformerBranch(nn.Module):
+    """Lightweight Transformer Encoder with Attention Pooling."""
+    def __init__(self, input_dim: int):
+        super().__init__()
+        self.proj_in = nn.Linear(input_dim, 128)
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=128, nhead=4,
+            dim_feedforward=256,
+            dropout=0.1,
+            batch_first=True,
+            norm_first=True,
+        )
+        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=2)
+        self.pool = AttentionPool(128)
+    def forward(self, x: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor:
+        x   = F.gelu(self.proj_in(x))                      # (B, T, 128)
+        src_key_padding_mask = (mask == 0) if mask is not None else None
+        out = self.transformer(x, src_key_padding_mask=src_key_padding_mask)
+        return self.pool(out, mask)                         # (B, 128)
+class CrossAttentionFusion(nn.Module):
+    """Fuse 3 branch outputs via multi-head self-attention (3-token sequence)."""
+    def __init__(self, dim: int = 128):
+        super().__init__()
+        self.q     = nn.Linear(dim, dim)
+        self.k     = nn.Linear(dim, dim)
+        self.v     = nn.Linear(dim, dim)
+        self.scale = dim ** 0.5
+        self.proj  = nn.Linear(dim, dim)
+    def forward(
+        self,
+        lstm_out:  torch.Tensor,
+        cnn_out:   torch.Tensor,
+        trans_out: torch.Tensor,
+    ) -> torch.Tensor:
+        stacked = torch.stack([lstm_out, cnn_out, trans_out], dim=1)  # (B, 3, 128)
+        Q    = self.q(stacked)
+        K    = self.k(stacked)
+        V    = self.v(stacked)
+        attn = torch.softmax(torch.bmm(Q, K.transpose(1, 2)) / self.scale, dim=-1)
+        out  = torch.bmm(attn, V).mean(dim=1)              # (B, 128)
+        return F.gelu(self.proj(out))
+# ─── Main Model ──────────────────────────────────────────────────────────────
+class HybridAITextDetector(nn.Module):
+    """
+    Hybrid AI-generated text detector.
+    Inputs
+    ------
+    input_ids      : (B, T) long tensor
+    attention_mask : (B, T) long tensor  — 1 = real token, 0 = pad
+    token_type_ids : (B, T) long tensor
+    Output
+    ------
+    logits : (B, 1) float — apply sigmoid to get P(AI-generated)
+    """
+    def __init__(self):
+        super().__init__()
+        self.deberta = AutoModel.from_pretrained(MODEL_NAME)
+        # Freeze first 6 transformer layers
+        for name, param in self.deberta.named_parameters():
+            if any(f"layer.{i}." in name for i in range(6)):
+                param.requires_grad = False
+            else:
+                param.requires_grad = True
+        hidden = self.deberta.config.hidden_size   # 768 for deberta-v3-small
+        self.lstm_branch  = BiLSTMBranch(hidden)
+        self.cnn_branch   = CNNBranch(hidden)
+        self.trans_branch = TransformerBranch(hidden)
+        self.fusion       = CrossAttentionFusion(dim=128)
+        self.classifier = nn.Sequential(
+            nn.LayerNorm(128),
+            nn.Linear(128, 128),
+            nn.GELU(),
+            nn.Dropout(0.4),
+            nn.Linear(128, 64),
+            nn.GELU(),
+            nn.Dropout(0.3),
+            nn.Linear(64, 1),
+        )
+    def forward(
+        self,
+        input_ids:      torch.Tensor,
+        attention_mask: torch.Tensor,
+        token_type_ids: torch.Tensor,
+    ) -> torch.Tensor:
+        out    = self.deberta(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+        )
+        hidden    = out.last_hidden_state                   # (B, T, 768)
+        lstm_out  = self.lstm_branch(hidden, attention_mask)
+        cnn_out   = self.cnn_branch(hidden)
+        trans_out = self.trans_branch(hidden, attention_mask)
+        fused     = self.fusion(lstm_out, cnn_out, trans_out)
+        return self.classifier(fused)                       # (B, 1)
+# ─── Convenience inference helper ────────────────────────────────────────────
+def load_model(checkpoint_path: str, device: torch.device = None) -> HybridAITextDetector:
+    """Load a trained HybridAITextDetector from a .pt checkpoint."""
+    if device is None:
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = HybridAITextDetector()
+    model.load_state_dict(torch.load(checkpoint_path, map_location=device))
+    model.eval().to(device)
+    return model