import gradio as gr
import re
import nltk
from nltk.corpus import wordnet
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import pandas as pd
from textstat import textstat
import spacy
import requests
from time import sleep
import json
import torch

# Download necessary NLTK data
try:
    nltk.download('wordnet', quiet=True)
    nltk.download('punkt', quiet=True)
    nltk.download('averaged_perceptron_tagger', quiet=True)
except:
    print("NLTK data download failed. Some features may be limited.")

# Load NER model for entity detection
try:
    nlp = spacy.load("en_core_web_sm")
except:
    try:
        spacy.cli.download("en_core_web_sm")
        nlp = spacy.load("en_core_web_sm")
    except:
        print("Spacy model loading failed. Entity recognition will be limited.")
        nlp = None

# Load sentiment analysis pipeline
try:
    sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
except:
    print("Sentiment analyzer loading failed. Sentiment analysis will be disabled.")
    sentiment_analyzer = None

# Load grammar correction model
try:
    grammar_model_name = "pszemraj/flan-t5-large-grammar-synthesis"
    grammar_tokenizer = AutoTokenizer.from_pretrained(grammar_model_name)
    grammar_model = AutoModelForSeq2SeqLM.from_pretrained(grammar_model_name)
except:
    print("Grammar correction model loading failed. Will use alternative methods.")
    grammar_model = None
    grammar_tokenizer = None

# Load text summarization model
try:
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
except:
    print("Summarization model loading failed. Summarization will be disabled.")
    summarizer = None

def get_synonyms(word):
    """Get synonyms for a word using WordNet"""
    synonyms = set()
    try:
        for syn in wordnet.synsets(word):
            for lemma in syn.lemmas():
                synonyms.add(lemma.name().replace('_', ' '))
        return list(synonyms)[:5]  # Return up to 5 synonyms
    except:
        return []

def correct_grammar_with_model(text, max_length=512):
    """Use a transformer model to correct grammar"""
    if not grammar_model or not grammar_tokenizer:
        return text
    
    # Split text into chunks if too long
    chunks = []
    sentences = nltk.sent_tokenize(text)
    current_chunk = ""
    
    for sentence in sentences:
        # If adding this sentence would make the chunk too long, save current chunk and start a new one
        if len(grammar_tokenizer.encode(current_chunk + " " + sentence)) > max_length:
            chunks.append(current_chunk)
            current_chunk = sentence
        else:
            if current_chunk:
                current_chunk += " " + sentence
            else:
                current_chunk = sentence
    
    # Add the last chunk if not empty
    if current_chunk:
        chunks.append(current_chunk)
    
    # Process each chunk
    corrected_chunks = []
    for chunk in chunks:
        # Skip empty chunks
        if not chunk.strip():
            continue
            
        inputs = grammar_tokenizer(f"grammar: {chunk}", return_tensors="pt", truncation=True, max_length=max_length)
        
        with torch.no_grad():
            outputs = grammar_model.generate(
                inputs.input_ids,
                max_length=max_length,
                num_beams=5,
                early_stopping=True
            )
        
        corrected = grammar_tokenizer.decode(outputs[0], skip_special_tokens=True)
        corrected_chunks.append(corrected)
    
    return " ".join(corrected_chunks)

def find_grammar_issues(original_text, corrected_text):
    """Identify differences between original and corrected text"""
    issues = []
    
    # Use simple tokenization to compare texts
    original_sentences = nltk.sent_tokenize(original_text)
    corrected_sentences = nltk.sent_tokenize(corrected_text)
    
    # Match up sentences and find differences
    min_len = min(len(original_sentences), len(corrected_sentences))
    
    for i in range(min_len):
        if original_sentences[i] != corrected_sentences[i]:
            issues.append({
                "original": original_sentences[i],
                "corrected": corrected_sentences[i],
                "position": original_text.find(original_sentences[i])
            })
    
    return issues

def calculate_readability_metrics(text):
    """Calculate various readability metrics"""
    if not text.strip():
        return {}
    
    try:
        return {
            "flesch_reading_ease": textstat.flesch_reading_ease(text),
            "flesch_kincaid_grade": textstat.flesch_kincaid_grade(text),
            "gunning_fog": textstat.gunning_fog(text),
            "smog_index": textstat.smog_index(text),
            "automated_readability_index": textstat.automated_readability_index(text),
            "coleman_liau_index": textstat.coleman_liau_index(text),
            "reading_time": f"{textstat.reading_time(text, ms_per_char=14):.1f} seconds"
        }
    except:
        return {"error": "Readability calculation failed"}

def find_repeated_words(text):
    """Find repeated words in close proximity"""
    words = text.lower().split()
    repeated = []
    
    for i in range(len(words) - 5):
        window = words[i:i+5]
        for word in set(window):
            if len(word) > 3 and window.count(word) > 1:  # Only consider words longer than 3 chars
                repeated.append(word)
    
    return list(set(repeated))

def identify_passive_voice(text):
    """Identify potential passive voice usage"""
    # Simple pattern matching for common passive voice constructions
    passive_patterns = [
        r'\b(?:am|is|are|was|were|be|being|been)\s+(\w+ed)\b',
        r'\b(?:am|is|are|was|were|be|being|been)\s+(\w+en)\b'
    ]
    
    passive_instances = []
    for pattern in passive_patterns:
        matches = re.finditer(pattern, text, re.IGNORECASE)
        for match in matches:
            start = max(0, match.start() - 20)
            end = min(len(text), match.end() + 20)
            context = text[start:end]
            passive_instances.append({
                "match": match.group(0),
                "context": context,
                "position": match.start()
            })
    
    return passive_instances

def analyze_sentiment(text):
    """Analyze sentiment of the text"""
    if not sentiment_analyzer or len(text.strip()) < 5:  # Skip very short text
        return {"label": "N/A", "score": 0}
    
    try:
        result = sentiment_analyzer(text[:512])[0]  # Limit text length for the model
        return result
    except:
        return {"label": "Error", "score": 0}

def extract_entities(text):
    """Extract named entities from text"""
    if not nlp:
        return []
        
    entities = []
    try:
        # Process text in chunks if it's too long
        max_chars = 100000  # spaCy default max length
        if len(text) > max_chars:
            chunks = [text[i:i+max_chars] for i in range(0, len(text), max_chars)]
        else:
            chunks = [text]
            
        for chunk in chunks:
            doc = nlp(chunk)
            for ent in doc.ents:
                entities.append({
                    "text": ent.text,
                    "label": ent.label_,
                    "start": ent.start_char,
                    "end": ent.end_char
                })
    except:
        pass
    
    return entities

def suggest_simpler_vocabulary(text):
    """Suggest simpler alternatives for complex words"""
    # This is a simplified implementation
    complex_words = {
        "utilize": "use",
        "implement": "use",
        "facilitate": "help",
        "leverage": "use",
        "optimize": "improve",
        "commence": "start",
        "terminate": "end",
        "endeavor": "try",
        "cognizant": "aware",
        "prioritize": "focus on",
        "ascertain": "find out",
        "subsequent": "later",
        "initiate": "start",
        "finalize": "finish",
        "abundant": "many",
        "adequate": "enough",
        "demonstrate": "show",
        "encounter": "meet",
        "generate": "create",
        "observe": "see",
        "obtain": "get",
        "require": "need",
        "sufficient": "enough",
        "utilize": "use",
        "endeavour": "try",
        "comprehend": "understand",
        "procure": "get",
        "inquire": "ask",
        "commence": "begin",
        "purchase": "buy",
        "assist": "help"
    }
    
    suggestions = {}
    for word, replacement in complex_words.items():
        if re.search(r'\b' + word + r'\b', text, re.IGNORECASE):
            suggestions[word] = replacement
    
    return suggestions

def summarize_text(text, max_length=150, min_length=40):
    """Summarize the text using a pre-trained model"""
    if not summarizer or len(text.split()) < 30:  # Don't summarize short text
        return "Text is too short for summarization"
    
    try:
        # Split into chunks if text is too long
        max_chunk_length = 1024  # Most summarization models have limits
        if len(text.split()) > max_chunk_length:
            sentences = nltk.sent_tokenize(text)
            chunks = []
            current_chunk = []
            current_length = 0
            
            for sentence in sentences:
                sentence_length = len(sentence.split())
                if current_length + sentence_length <= max_chunk_length:
                    current_chunk.append(sentence)
                    current_length += sentence_length
                else:
                    chunks.append(" ".join(current_chunk))
                    current_chunk = [sentence]
                    current_length = sentence_length
            
            if current_chunk:
                chunks.append(" ".join(current_chunk))
                
            # Summarize each chunk and combine
            summaries = []
            for chunk in chunks:
                summary = summarizer(chunk, max_length=min(max_length, len(chunk.split())), 
                                  min_length=min(min_length, len(chunk.split())//2),
                                  do_sample=False)[0]['summary_text']
                summaries.append(summary)
            
            return " ".join(summaries)
        else:
            return summarizer(text, max_length=max_length, min_length=min_length, 
                           do_sample=False)[0]['summary_text']
    except Exception as e:
        return f"Summarization failed: {str(e)}"

def analyze_formality(text):
    """Analyze the formality level of the text"""
    # Simple heuristics-based formality analysis
    formal_indicators = [
        r'\b(?:however|therefore|thus|consequently|furthermore|moreover|nevertheless)\b',
        r'\b(?:shall|ought|whom|whereby|herein|therein|wherein)\b',
        r'\b(?:Mr\.|Mrs\.|Ms\.|Dr\.|Prof\.)\b',
        r'\b(?:would like to|I am writing to)\b'
    ]
    
    informal_indicators = [
        r'\b(?:yeah|nope|gonna|wanna|gotta|kinda|sorta)\b',
        r'(?:!{2,}|\?{2,})',
        r'\b(?:lol|omg|btw|imo|tbh)\b',
        r"(?:don't|won't|can't|shouldn't|wouldn't|isn't|aren't|haven't)",
        r'\b(?:awesome|cool|super|great|huge)\b'
    ]
    
    formal_count = 0
    for pattern in formal_indicators:
        formal_count += len(re.findall(pattern, text, re.IGNORECASE))
    
    informal_count = 0
    for pattern in informal_indicators:
        informal_count += len(re.findall(pattern, text, re.IGNORECASE))
    
    # Calculate formality score (simple version)
    word_count = len(text.split())
    if word_count == 0:
        return {"formality_level": "Unknown", "score": 0.5}
    
    formal_ratio = formal_count / max(1, word_count)
    informal_ratio = informal_count / max(1, word_count)
    
    # Determine formality level
    if formal_ratio > 0.05 and formal_ratio > informal_ratio * 2:
        formality = "Formal"
        score = min(0.9, 0.5 + formal_ratio * 5)
    elif informal_ratio > 0.05 and informal_ratio > formal_ratio * 2:
        formality = "Informal"
        score = max(0.1, 0.5 - informal_ratio * 5)
    else:
        formality = "Neutral"
        score = 0.5
    
    return {"formality_level": formality, "score": score}

def detect_tone(text):
    """Detect the overall tone of the text"""
    if not text.strip():
        return "Neutral"
    
    # Simple keyword-based tone detection
    tone_keywords = {
        "Professional": ["recommend", "inform", "request", "provide", "consider", "suggest", "propose", "analyze", "evaluate", "conclude"],
        "Academic": ["research", "study", "analysis", "theory", "hypothesis", "methodology", "findings", "literature", "experiment", "data"],
        "Friendly": ["thanks", "appreciate", "happy", "glad", "hope", "welcome", "please", "enjoy", "share", "connect"],
        "Persuasive": ["should", "must", "need", "important", "crucial", "essential", "significant", "consider", "believe", "argue"],
        "Urgent": ["immediately", "urgent", "asap", "quickly", "soon", "deadline", "critical", "emergency", "promptly", "hurry"],
        "Cautious": ["perhaps", "might", "may", "possible", "potentially", "suggest", "consider", "could", "seems", "appears"]
    }
    
    tone_scores = {tone: 0 for tone in tone_keywords}
    word_count = len(text.split())
    
    # Count occurrences of tone keywords
    for tone, keywords in tone_keywords.items():
        for keyword in keywords:
            tone_scores[tone] += len(re.findall(r'\b' + keyword + r'\b', text, re.IGNORECASE))
    
    # Normalize by word count
    for tone in tone_scores:
        tone_scores[tone] = tone_scores[tone] / max(1, word_count)
    
    # Find the most dominant tone
    dominant_tone = max(tone_scores.items(), key=lambda x: x[1])
    
    # Only return a specific tone if it's significantly present
    if dominant_tone[1] > 0.02:
        return dominant_tone[0]
    else:
        return "Neutral"

def text_analysis(text):
    """Comprehensive text analysis"""
    if not text.strip():
        return {
            "grammar_issues": [],
            "corrected_text": "",
            "readability": {},
            "repeated_words": [],
            "passive_voice": [],
            "sentiment": {"label": "N/A", "score": 0},
            "entities": [],
            "simpler_vocabulary": {},
            "formality": {"formality_level": "Unknown", "score": 0.5},
            "tone": "Neutral",
            "summary": "",
            "word_count": 0,
            "sentence_count": 0,
            "average_sentence_length": 0
        }
    
    # Basic text stats
    word_count = len(text.split())
    sentences = nltk.sent_tokenize(text)
    sentence_count = len(sentences)
    avg_sentence_length = word_count / max(sentence_count, 1)
    
    # Correct grammar with AI model
    corrected_text = correct_grammar_with_model(text)
    
    # Find grammar issues by comparing original and corrected text
    grammar_issues = find_grammar_issues(text, corrected_text)
    
    # Run all analysis functions
    readability = calculate_readability_metrics(text)
    repeated_words = find_repeated_words(text)
    passive_voice = identify_passive_voice(text)
    sentiment = analyze_sentiment(text)
    entities = extract_entities(text)
    simpler_words = suggest_simpler_vocabulary(text)
    formality = analyze_formality(text)
    tone = detect_tone(text)
    
    # Generate summary for longer text
    summary = ""
    if word_count > 50:
        summary = summarize_text(text)
    
    return {
        "grammar_issues": grammar_issues,
        "corrected_text": corrected_text,
        "readability": readability,
        "repeated_words": repeated_words,
        "passive_voice": passive_voice,
        "sentiment": sentiment,
        "entities": entities,
        "simpler_vocabulary": simpler_words,
        "formality": formality,
        "tone": tone,
        "summary": summary,
        "word_count": word_count,
        "sentence_count": sentence_count,
        "average_sentence_length": avg_sentence_length
    }

def format_grammar_issues(issues):
    if not issues:
        return "No grammar issues found."
    
    result = "Grammar Issues Found:\n\n"
    for i, issue in enumerate(issues, 1):
        result += f"{i}. Original: \"{issue['original']}\"\n"
        result += f"   Corrected: \"{issue['corrected']}\"\n\n"
    
    return result

def format_readability(metrics):
    if not metrics:
        return "Readability metrics not available."
    
    if "error" in metrics:
        return f"Error: {metrics['error']}"
    
    # Define interpretations for Flesch Reading Ease
    def interpret_flesch(score):
        if score >= 90: return "Very Easy (5th grade)"
        elif score >= 80: return "Easy (6th grade)"
        elif score >= 70: return "Fairly Easy (7th grade)"
        elif score >= 60: return "Standard (8th-9th grade)"
        elif score >= 50: return "Fairly Difficult (10th-12th grade)"
        elif score >= 30: return "Difficult (College)"
        else: return "Very Difficult (College Graduate)"
    
    result = "Readability Analysis:\n\n"
    result += f"• Flesch Reading Ease: {metrics['flesch_reading_ease']:.1f} - {interpret_flesch(metrics['flesch_reading_ease'])}\n"
    result += f"• Flesch-Kincaid Grade Level: {metrics['flesch_kincaid_grade']:.1f}\n"
    result += f"• Gunning Fog Index: {metrics['gunning_fog']:.1f}\n"
    result += f"• SMOG Index: {metrics['smog_index']:.1f}\n"
    result += f"• Automated Readability Index: {metrics['automated_readability_index']:.1f}\n"
    result += f"• Coleman-Liau Index: {metrics['coleman_liau_index']:.1f}\n"
    result += f"• Estimated Reading Time: {metrics['reading_time']}"
    
    return result

def format_passive_voice(passive_instances):
    if not passive_instances:
        return "No passive voice detected."
    
    result = f"Passive Voice Detected ({len(passive_instances)} instances):\n\n"
    for i, instance in enumerate(passive_instances[:5], 1):  # Show up to 5 examples
        result += f"{i}. \"...{instance['context']}...\"\n"
    
    if len(passive_instances) > 5:
        result += f"\nand {len(passive_instances) - 5} more..."
    
    return result

def format_entities(entities):
    if not entities:
        return "No named entities detected."
    
    # Group entities by type
    entity_groups = {}
    for entity in entities:
        if entity['label'] not in entity_groups:
            entity_groups[entity['label']] = []
        entity_groups[entity['label']].append(entity['text'])
    
    result = "Named Entities Detected:\n\n"
    for label, items in entity_groups.items():
        unique_items = list(set(items))[:5]  # Show up to 5 unique entities per type
        result += f"• {label}: {', '.join(unique_items)}"
        if len(set(items)) > 5:
            result += f" and {len(set(items)) - 5} more"
        result += "\n"
    
    return result

def format_vocabulary_suggestions(suggestions):
    if not suggestions:
        return "No vocabulary simplification suggestions."
    
    result = "Vocabulary Simplification Suggestions:\n\n"
    for complex_word, simple_word in suggestions.items():
        result += f"• \"{complex_word}\" → \"{simple_word}\"\n"
    
    return result

def build_interface():
    with gr.Blocks(title="AI Grammar & Style Assistant", theme=gr.themes.Soft()) as app:
        gr.Markdown("# 📝 AI Grammar & Style Assistant")
        gr.Markdown("Powered by AI to help improve your writing with advanced grammar checking, style suggestions, and more!")
        
        with gr.Tab("Text Analysis"):
            with gr.Row():
                with gr.Column(scale=3):
                    input_text = gr.Textbox(
                        label="Enter your text here",
                        placeholder="Type or paste your text here for analysis...",
                        lines=10
                    )
                    analyze_btn = gr.Button("Analyze Text", variant="primary")
                    
                with gr.Column(scale=3):
                    corrected_output = gr.Textbox(label="Corrected Text", lines=10)
                    
            with gr.Row():
                with gr.Column():
                    grammar_issues = gr.Textbox(label="Grammar Issues", lines=6)
                    readability_metrics = gr.Textbox(label="Readability Analysis", lines=10)
                
                with gr.Column():
                    passive_voice_output = gr.Textbox(label="Passive Voice Detection", lines=6)
                    vocab_suggestions = gr.Textbox(label="Vocabulary Suggestions", lines=6)
            
            with gr.Row():
                with gr.Column():
                    entity_detection = gr.Textbox(label="Entity Detection", lines=6)
                
                with gr.Column():
                    formality_tone = gr.Textbox(label="Formality & Tone Analysis", lines=6)
                    
            with gr.Row():
                text_summary = gr.Textbox(label="Text Summary", lines=4)
                
            with gr.Row():
                text_stats = gr.JSON(label="Text Statistics")
        
        with gr.Tab("Help & Information"):
            gr.Markdown("""
            ## How to Use This Tool
            
            1. Enter or paste your text in the input box
            2. Click "Analyze Text"
            3. Review the analysis results across all categories
            
            ## Features
            
            - **Grammar Correction**: AI-powered grammar correction using advanced language models
            - **Readability Analysis**: Multiple readability metrics including Flesch Reading Ease, Gunning Fog, and more
            - **Style Improvement**: Detects passive voice, repeated words, and complex vocabulary
            - **Named Entity Recognition**: Identifies people, organizations, locations, and more
            - **Sentiment Analysis**: Detects the emotional tone of your text
            - **Formality Analysis**: Determines if your text is formal, neutral, or informal
            - **Text Summarization**: Creates a concise summary of longer texts
            - **Tone Detection**: Identifies the overall tone (professional, academic, friendly, etc.)
            
            ## About
            
            This is an AI-powered writing assistant similar to Grammarly, built with Python, Gradio, and Hugging Face transformer models.
            """)
        
        def process_text(text):
            """Process the input text and return all analysis results"""
            if not text.strip():
                return ("", "No text to analyze.", "No text to analyze.", "No text to analyze.", 
                        "No text to analyze.", "No text to analyze.", "No text to analyze.", "No text to analyze.",
                        {})
            
            # Perform comprehensive analysis
            results = text_analysis(text)
            
            # Format corrected text
            corrected = results["corrected_text"] if results["corrected_text"] else text
            
            # Format grammar issues
            grammar_output = format_grammar_issues(results["grammar_issues"])
            
            # Format readability metrics
            readability_output = format_readability(results["readability"])
            
            # Format passive voice detection
            passive_output = format_passive_voice(results["passive_voice"])
            
            # Format entity detection
            entities_output = format_entities(results["entities"])
            
            # Format vocabulary suggestions
            vocab_output = format_vocabulary_suggestions(results["simpler_vocabulary"])
            
            # Format formality and tone
            formality_tone_output = f"Formality: {results['formality']['formality_level']} (Score: {results['formality']['score']:.2f})\nTone: {results['tone']}"
            
            # Format summary
            summary_output = results["summary"] if results["summary"] else "Summary not available for this text."
            
            # Format text statistics
            stats = {
                "Word Count": results["word_count"],
                "Sentence Count": results["sentence_count"],
                "Average Sentence Length": f"{results['average_sentence_length']:.1f} words",
                "Repeated Words": results["repeated_words"],
                "Sentiment": f"{results['sentiment']['label']} (Score: {results['sentiment']['score']:.2f})"
            }
            
            return (corrected, grammar_output, readability_output, passive_output, 
                    vocab_output, entities_output, formality_tone_output, summary_output, stats)
        
        analyze_btn.click(
            process_text,
            inputs=[input_text],
            outputs=[corrected_output, grammar_issues, readability_metrics, 
                    passive_voice_output, vocab_suggestions, entity_detection, 
                    formality_tone, text_summary, text_stats]
        )
    
    return app

# Create and launch the interface
app = build_interface()

# For Hugging Face Spaces deployment
if __name__ == "__main__":
    app.launch()