import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import pandas as pd
import numpy as np
import threading
import textwrap
import statistics
import math

# --- IMPORT OPTIONAL LIBRARIES FOR ANALYSIS ---
# We try to import sophisticated NLP libraries. 
# If missing, we degrade to simpler algorithms gracefully.

ANALYSIS_MODE = "Basic"
try:
    from sentence_transformers import SentenceTransformer, util
    ANALYSIS_MODE = "Transformer (Deep Semantic)"
    print("Log: sentence-transformers detected. Using Deep Semantic Analysis.")
except ImportError:
    try:
        from sklearn.feature_extraction.text import TfidfVectorizer
        from sklearn.metrics.pairwise import cosine_similarity
        ANALYSIS_MODE = "TF-IDF (Statistical)"
        print("Log: sentence-transformers not found. Using sklearn TF-IDF.")
    except ImportError:
        ANALYSIS_MODE = "Jaccard (Lexical)"
        print("Log: sklearn not found. Using basic Jaccard similarity.")

class AnalysisEngine:
    """
    Handles the heavy lifting of text comparison.
    """
    def __init__(self):
        self.model = None
        self.mode = ANALYSIS_MODE
        
        # Load model in a separate thread to not block UI startup if using SBERT
        if "Transformer" in self.mode:
            threading.Thread(target=self._load_transformer, daemon=True).start()

    def _load_transformer(self):
        try:
            # 'all-MiniLM-L6-v2' is a great balance of speed and accuracy
            self.model = SentenceTransformer('all-MiniLM-L6-v2') 
        except Exception as e:
            print(f"Error loading model: {e}")
            self.mode = "TF-IDF (Statistical)"

    def compute_similarity(self, list_a, list_b):
        """
        Returns a list of similarity scores (0.0 to 1.0) between two lists of strings.
        """
        # Ensure inputs are strings and handle NaNs
        list_a = [str(x) if pd.notna(x) else "" for x in list_a]
        list_b = [str(x) if pd.notna(x) else "" for x in list_b]

        if "Transformer" in self.mode and self.model:
            # Deep Semantic Analysis
            embeddings1 = self.model.encode(list_a, convert_to_tensor=True)
            embeddings2 = self.model.encode(list_b, convert_to_tensor=True)
            scores = util.cos_sim(embeddings1, embeddings2)
            # Diagonal contains the paired scores (row 1 vs row 1, row 2 vs row 2)
            return [scores[i][i].item() for i in range(len(list_a))]

        elif "TF-IDF" in self.mode:
            # Statistical Analysis
            from sklearn.feature_extraction.text import TfidfVectorizer
            from sklearn.metrics.pairwise import cosine_similarity
            
            scores = []
            vectorizer = TfidfVectorizer()
            for t1, t2 in zip(list_a, list_b):
                if not t1.strip() or not t2.strip():
                    scores.append(0.0 if t1 != t2 else 1.0)
                    continue
                try:
                    tfidf_matrix = vectorizer.fit_transform([t1, t2])
                    score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
                    scores.append(score)
                except ValueError:
                    # Occurs if vocabulary is empty (e.g. stop words only)
                    scores.append(0.0)
            return scores

        else:
            # Jaccard (Set overlap) Analysis
            scores = []
            for t1, t2 in zip(list_a, list_b):
                set_a = set(t1.lower().split())
                set_b = set(t2.lower().split())
                intersection = len(set_a.intersection(set_b))
                union = len(set_a.union(set_b))
                scores.append(intersection / union if union > 0 else 0.0)
            return scores

class SemanticApp:
    def __init__(self, root):
        self.root = root
        self.root.title(f"Semantic Comparative Analyzer - [{ANALYSIS_MODE}]")
        self.root.geometry("1200x800")
        
        self.engine = AnalysisEngine()
        self.df = None
        self.filename = ""
        self.current_scores = None  # Store current analysis scores
        self.current_col_gt = None
        self.current_col_comp = None

        # Styles
        style = ttk.Style()
        style.theme_use('clam')
        style.configure("Treeview", rowheight=25)
        
        # Main Container
        main_frame = ttk.Frame(root)
        main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

        # Tabs
        self.notebook = ttk.Notebook(main_frame)
        self.notebook.pack(fill=tk.BOTH, expand=True)

        # Tab 1: Analysis
        self.tab_analysis = ttk.Frame(self.notebook)
        self.notebook.add(self.tab_analysis, text="Analysis Dashboard")
        self._setup_analysis_tab()

        # Tab 2: Help
        self.tab_help = ttk.Frame(self.notebook)
        self.notebook.add(self.tab_help, text="Help & Methodology")
        self._setup_help_tab()

    def _setup_analysis_tab(self):
        # --- Top Control Panel ---
        control_frame = ttk.LabelFrame(self.tab_analysis, text="Configuration", padding=10)
        control_frame.pack(fill=tk.X, pady=5)

        # Load Button
        self.btn_load = ttk.Button(control_frame, text="Load CSV File", command=self.load_csv)
        self.btn_load.pack(side=tk.LEFT, padx=5)

        # Dropdowns
        ttk.Label(control_frame, text="Ground Truth Column:").pack(side=tk.LEFT, padx=(20, 5))
        self.var_gt = tk.StringVar()
        self.combo_gt = ttk.Combobox(control_frame, textvariable=self.var_gt, state="readonly", width=30)
        self.combo_gt.pack(side=tk.LEFT, padx=5)

        ttk.Label(control_frame, text="Comparison Column:").pack(side=tk.LEFT, padx=(20, 5))
        self.var_comp = tk.StringVar()
        self.combo_comp = ttk.Combobox(control_frame, textvariable=self.var_comp, state="readonly", width=30)
        self.combo_comp.pack(side=tk.LEFT, padx=5)

        self.btn_run = ttk.Button(control_frame, text="Run Comparative Analysis", command=self.run_analysis, state=tk.DISABLED)
        self.btn_run.pack(side=tk.LEFT, padx=(20, 5))
        
        self.btn_compare_all = ttk.Button(control_frame, text="Compare GT to All Columns", command=self.compare_to_all, state=tk.DISABLED)
        self.btn_compare_all.pack(side=tk.LEFT, padx=5)

        # --- Content Area (Split Panes) ---
        paned_window = ttk.PanedWindow(self.tab_analysis, orient=tk.HORIZONTAL)
        paned_window.pack(fill=tk.BOTH, expand=True, pady=5)

        # Left: Results Table
        frame_table = ttk.Frame(paned_window)
        paned_window.add(frame_table, weight=1)

        columns = ("row_idx", "score", "preview")
        self.tree = ttk.Treeview(frame_table, columns=columns, show="headings")
        self.tree.heading("row_idx", text="Row #")
        self.tree.heading("score", text="Similarity Score")
        self.tree.heading("preview", text="Content Preview (Ground Truth)")
        
        self.tree.column("row_idx", width=50, anchor=tk.CENTER)
        self.tree.column("score", width=100, anchor=tk.CENTER)
        self.tree.column("preview", width=400)
        
        scrollbar = ttk.Scrollbar(frame_table, orient=tk.VERTICAL, command=self.tree.yview)
        self.tree.configure(yscroll=scrollbar.set)
        
        self.tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
        scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

        self.tree.bind("<<TreeviewSelect>>", self.on_row_select)

        # Right: Detail View
        frame_detail = ttk.LabelFrame(paned_window, text="Detailed Comparison", padding=10)
        paned_window.add(frame_detail, weight=1)

        # Statistics Panel (at top of detail view)
        stats_frame = ttk.LabelFrame(frame_detail, text="Row Statistics", padding=5)
        stats_frame.pack(fill=tk.X, pady=(0, 10))
        
        self.lbl_row_stats = ttk.Label(stats_frame, text="Select a row to view detailed statistics...", font=("Arial", 8))
        self.lbl_row_stats.pack(anchor=tk.W)
        
        # Create a frame for stats in a grid layout
        self.stats_grid = ttk.Frame(stats_frame)
        self.stats_grid.pack(fill=tk.X)
        
        self.lbl_similarity = ttk.Label(self.stats_grid, text="Similarity: --", font=("Arial", 8, "bold"))
        self.lbl_similarity.grid(row=0, column=0, sticky=tk.W, padx=5, pady=2)
        
        self.lbl_length_gt = ttk.Label(self.stats_grid, text="GT Length: --", font=("Arial", 8))
        self.lbl_length_gt.grid(row=0, column=1, sticky=tk.W, padx=5, pady=2)
        
        self.lbl_length_comp = ttk.Label(self.stats_grid, text="Comp Length: --", font=("Arial", 8))
        self.lbl_length_comp.grid(row=1, column=0, sticky=tk.W, padx=5, pady=2)
        
        self.lbl_word_overlap = ttk.Label(self.stats_grid, text="Word Overlap: --", font=("Arial", 8))
        self.lbl_word_overlap.grid(row=1, column=1, sticky=tk.W, padx=5, pady=2)
        
        self.lbl_rank = ttk.Label(self.stats_grid, text="Rank: --", font=("Arial", 8))
        self.lbl_rank.grid(row=2, column=0, sticky=tk.W, padx=5, pady=2)
        
        self.lbl_percentile = ttk.Label(self.stats_grid, text="Percentile: --", font=("Arial", 8))
        self.lbl_percentile.grid(row=2, column=1, sticky=tk.W, padx=5, pady=2)

        # Ground Truth Text
        ttk.Label(frame_detail, text="Ground Truth (Reference):", font=("Arial", 9, "bold")).pack(anchor=tk.W)
        self.txt_gt = tk.Text(frame_detail, height=8, width=40, wrap=tk.WORD, bg="#f0f8ff")
        self.txt_gt.pack(fill=tk.BOTH, expand=True, pady=(0, 10))

        # Comparison Text
        self.lbl_comp_title = ttk.Label(frame_detail, text="Comparison Interpretation:", font=("Arial", 9, "bold"))
        self.lbl_comp_title.pack(anchor=tk.W)
        self.txt_comp = tk.Text(frame_detail, height=8, width=40, wrap=tk.WORD, bg="#fff0f0")
        self.txt_comp.pack(fill=tk.BOTH, expand=True)

        # --- Bottom: Statistics ---
        self.lbl_stats = ttk.Label(self.tab_analysis, text="Load a file to begin statistics...", relief=tk.SUNKEN, padding=5)
        self.lbl_stats.pack(fill=tk.X, side=tk.BOTTOM)

        # Tags for color coding
        self.tree.tag_configure('high', background='#d4edda') # Greenish
        self.tree.tag_configure('med', background='#fff3cd')  # Yellowish
        self.tree.tag_configure('low', background='#f8d7da')  # Reddish

    def _setup_help_tab(self):
        # Create scrollable text widget with scrollbar
        text_frame = ttk.Frame(self.tab_help)
        text_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
        
        scrollbar = ttk.Scrollbar(text_frame)
        scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
        
        text_widget = tk.Text(text_frame, wrap=tk.WORD, padx=20, pady=20, 
                             font=("Segoe UI", 10), yscrollcommand=scrollbar.set)
        text_widget.pack(fill=tk.BOTH, expand=True)
        scrollbar.config(command=text_widget.yview)
        
        content = f"""
SEMANTIC COMPARATIVE ANALYSIS TOOL - COMPREHENSIVE GUIDE
========================================================

Current Analysis Mode: {ANALYSIS_MODE}

OVERVIEW
--------
This professional scientific tool performs sophisticated semantic analysis to compare different 
interpretations of the same research paper or document. Each column in your CSV represents a 
different interpretation (e.g., from different AI models, annotators, or analysis methods), and 
the tool quantifies how similar these interpretations are at both the aggregate and row-by-row level.

================================================================================
1. ANALYSIS METHODOLOGIES
================================================================================

The tool supports three analysis modes, automatically selected based on available libraries:

A. TRANSFORMER-BASED (Deep Semantic Analysis) - RECOMMENDED
   - Requires: sentence-transformers library
   - Model: all-MiniLM-L6-v2 (optimized for speed and accuracy)
   - How it works:
     * Converts text into high-dimensional vector embeddings (384 dimensions)
     * Uses neural network models trained on millions of text pairs
     * Captures semantic meaning, not just keyword overlap
     * Example: "The feline sat" and "The cat rested" score highly despite different words
   - Best for: Understanding conceptual similarity, handling synonyms, context-aware comparison
   - Performance: Slower initial load (~30 seconds), but very fast once loaded

B. TF-IDF (Term Frequency-Inverse Document Frequency) - STATISTICAL
   - Requires: scikit-learn library
   - How it works:
     * Identifies important words in each text (weights rare, meaningful terms)
     * Computes cosine similarity between word frequency vectors
     * Good for keyword-based matching
   - Best for: When semantic understanding is less critical, keyword-focused analysis
   - Performance: Fast, no model loading required

C. JACCARD SIMILARITY (Lexical Overlap) - BASIC FALLBACK
   - No external dependencies required
   - How it works:
     * Simple set intersection over union of words
     * Only considers exact word matches
   - Best for: Quick checks, when no advanced libraries are available
   - Performance: Very fast, but least sophisticated

================================================================================
2. SIMILARITY METRICS EXPLAINED
================================================================================

COSINE SIMILARITY SCORE (0.0 to 1.0)
-------------------------------------
The primary metric used is cosine similarity, which measures the angle between two text vectors 
in high-dimensional space. This captures semantic relatedness rather than exact word matching.

INTERPRETATION GUIDE:
  • 0.90 - 1.00: Near-perfect semantic match
     - Texts convey essentially the same meaning
     - Minor differences in phrasing or detail
     - High confidence in agreement
  
  • 0.75 - 0.90: Strong agreement
     - Core concepts align well
     - Some differences in emphasis or detail
     - Generally reliable interpretation match
  
  • 0.50 - 0.75: Moderate agreement
     - Partial overlap in concepts
     - Different perspectives on same topic
     - May require manual review
  
  • 0.30 - 0.50: Weak agreement
     - Limited conceptual overlap
     - Different aspects or interpretations
     - Significant discrepancies likely
  
  • 0.00 - 0.30: Low/no agreement
     - Minimal semantic relationship
     - Possibly discussing different topics
     - Requires careful examination

================================================================================
3. USER INTERFACE GUIDE
================================================================================

A. CONFIGURATION PANEL (Top)
   --------------------------
   • Load CSV File: Import your data file (must have header row)
   • Ground Truth Column: Select the reference interpretation (baseline for comparison)
   • Comparison Column: Select the interpretation to compare against ground truth
   • Run Comparative Analysis: Execute pairwise comparison
   • Compare GT to All Columns: Compare ground truth against ALL other columns simultaneously
     (Opens new window with comprehensive multi-column analysis)

B. RESULTS TABLE (Left Panel)
   ---------------------------
   Displays all rows with:
   • Row #: Sequential row number (1-indexed)
   • Similarity Score: Cosine similarity for that row
   • Content Preview: First 60 characters of ground truth text
   
   COLOR CODING:
   • Green background: High similarity (≥ 0.75) - Strong agreement
   • Yellow background: Medium similarity (0.50 - 0.75) - Moderate agreement
   • Red background: Low similarity (< 0.50) - Weak agreement, review needed
   
   INTERACTION:
   • Click any row to view detailed comparison in right panel
   • Rows are automatically sorted by similarity (highest first in multi-column view)

C. DETAILED COMPARISON PANEL (Right Panel)
   ----------------------------------------
   When a row is selected, displays:
   
   ROW STATISTICS SECTION:
   • Similarity: The exact cosine similarity score (color-coded)
   • GT Length: Character and word count for ground truth text
   • Comp Length: Character and word count for comparison text
   • Word Overlap: Number of shared words and percentage
   • Rank: Position of this row's score among all rows (1 = highest)
   • Percentile: Percentage of rows with lower similarity scores
   
   TEXT VIEWERS:
   • Ground Truth (Reference): Full text of the selected row from ground truth column
   • Comparison Interpretation: Full text of the selected row from comparison column
   • Both text areas are scrollable and wrap text for readability

D. BOTTOM STATISTICS BAR
   ----------------------
   Aggregate statistics for the entire comparison:
   • Mean: Average similarity across all rows
   • Median: Middle value (less affected by outliers)
   • Std Dev: Standard deviation (measure of consistency)
   • Min/Max: Lowest and highest similarity scores
   • High/Med/Low counts: Number of rows in each similarity category
   • Mode: Current analysis methodology being used

================================================================================
4. WORKFLOW RECOMMENDATIONS
================================================================================

STEP 1: DATA PREPARATION
   • Ensure CSV has header row with column names
   • Each column should represent a different interpretation
   • Rows should be aligned (row N in all columns refers to same question/item)
   • Handle missing values appropriately (empty cells treated as empty strings)

STEP 2: ESTABLISH GROUND TRUTH
   • Select the most authoritative or reference interpretation as Ground Truth
   • This becomes the baseline for all comparisons
   • Consider using expert annotations, gold standard, or most reliable source

STEP 3: INITIAL ANALYSIS
   • Start with pairwise comparison (Ground Truth vs one other column)
   • Review overall statistics to understand general agreement level
   • Identify rows with low similarity for detailed review

STEP 4: MULTI-COLUMN ANALYSIS
    • Use "Compare GT to All Columns" for comprehensive overview
    • Side-by-Side Comparison tab: View all column comparisons simultaneously in a single view
      - All similarity tables displayed horizontally for easy comparison
      - Scroll horizontally to see all columns
      - Each table shows row-by-row similarity scores sorted by similarity (highest first)
      - Color-coded rows for quick identification of agreement levels
      - Best for: Quick visual comparison across all interpretations at once
    • Summary Statistics tab: Aggregate statistics for each comparison
    • Individual column tabs: Detailed row-by-row inspection per column
    • Export results for further analysis or reporting

STEP 5: DETAILED REVIEW
   • Focus on rows with similarity < 0.5 (red highlighting)
   • Use row statistics to understand why scores are low
   • Check word overlap percentages for lexical differences
   • Review percentile rankings to identify outliers

STEP 6: INTERPRETATION
   • High mean similarity (>0.75): Interpretations are highly consistent
   • Medium mean similarity (0.5-0.75): Some variation, review low-scoring rows
   • Low mean similarity (<0.5): Significant discrepancies, systematic review needed
   • High variance: Inconsistent agreement across rows (some match well, others don't)
   • Low variance: Consistent agreement level across all rows

================================================================================
5. ADVANCED FEATURES
================================================================================

MULTI-COLUMN COMPARISON WINDOW
    When using "Compare GT to All Columns":
    
    • SIDE-BY-SIDE COMPARISON TAB (NEW - Recommended for Quick Overview):
      - Displays all column comparisons simultaneously in a horizontal layout
      - Each column gets its own table showing row-by-row similarity scores
      - Tables are arranged side-by-side for direct visual comparison
      - Horizontal scrolling allows viewing all columns even with many comparisons
      - All tables are synchronized (same row numbers align across columns)
      
      SORTING CONTROLS:
      - Sort By: Choose to sort by "Similarity" (score) or "Row #" (original order)
      - Sort Order: Choose "↑" (ascending) or "↓" (descending)
      - Changes apply to ALL tables simultaneously for consistent comparison
      - Default: Sorted by Similarity, Descending (highest scores first)
      
      SYNCHRONIZED SCROLLING:
      - Checkbox: "Synchronize Scrollbars" - When enabled, all vertical scrollbars move together
      - Useful for comparing the same rows across all columns
      - Mouse wheel scrolling also synchronized when enabled
      - When disabled, each table scrolls independently
      
      VISUAL FEATURES:
      - Color-coded rows: Green (high ≥0.75), Yellow (medium 0.5-0.75), Red (low <0.5)
      - Rows sorted by similarity score (highest first) by default
      - Best use case: Quickly identify which columns agree/disagree on which rows
      - Advantage: See patterns across all interpretations at once without switching tabs
    
    • Summary Statistics Tab: Overview table comparing all columns
      - Mean, median, standard deviation for each comparison
      - Counts of high/medium/low similarity rows
      - Color-coded by overall agreement level
    
    • Individual Column Tabs: Detailed row-by-row results for each comparison
      - Same interface as main analysis
      - Allows focused review of specific column pairs
      - Full-width preview text for better readability
    
    • Export Functionality: Save all results to CSV
      - Includes all similarity scores
      - Includes original text from all columns
      - Enables further statistical analysis in external tools

ROW-LEVEL STATISTICS
   For each selected row, the tool provides:
   • Exact similarity score with color coding
   • Text length metrics (characters and words)
   • Word overlap analysis (shared vocabulary)
   • Ranking and percentile (contextualizes score within dataset)
   
   These metrics help understand WHY a particular row scored as it did.

================================================================================
6. INTERPRETING RESULTS FOR RESEARCH
================================================================================

QUALITY ASSURANCE USE CASES:
   • Inter-annotator agreement: Compare annotations from multiple human coders
   • Model validation: Compare AI model outputs to expert annotations
   • Consistency checking: Verify interpretations remain consistent across items
   • Error detection: Identify rows where interpretations diverge unexpectedly

RESEARCH METHODOLOGY:
   • Establish acceptable similarity thresholds for your domain
   • Use percentile rankings to identify systematic patterns
   • Combine with qualitative review for low-scoring rows
   • Track improvements over time (e.g., model training iterations)

STATISTICAL CONSIDERATIONS:
   • Mean similarity: Overall agreement level
   • Standard deviation: Consistency of agreement
   • Distribution shape: Are most rows similar with a few outliers, or consistent variation?
   • Category distribution: Ratio of high/medium/low similarity rows

================================================================================
7. TECHNICAL DETAILS
================================================================================

PERFORMANCE:
   • Transformer mode: ~30s initial model load, then ~0.1-1s per comparison
   • TF-IDF mode: No loading, ~0.01-0.1s per comparison
   • Jaccard mode: Instant, but less accurate
   • Processing is done in background threads to keep UI responsive

DATA HANDLING:
   • Empty cells converted to empty strings
   • Text is normalized (whitespace preserved)
   • No automatic text preprocessing (preserves original formatting)
   • Supports Unicode and special characters

LIMITATIONS:
   • Best results with sentence-transformers (requires ~400MB download)
   • Very long texts (>512 tokens) may be truncated by transformer model
   • Similarity scores are relative, not absolute measures
   • Context-dependent: same score may mean different things in different domains

================================================================================
8. TROUBLESHOOTING
================================================================================

LOW SIMILARITY SCORES:
   • Check if texts are actually discussing the same topic
   • Review word overlap percentage (low overlap = different vocabulary)
   • Consider if one interpretation is much longer/shorter
   • Verify data alignment (correct row matching)

SLOW PERFORMANCE:
   • First run with transformer mode requires model download
   • Large datasets (>1000 rows) may take time
   • Consider using TF-IDF mode for faster analysis
   • Close other applications to free memory

UNEXPECTED RESULTS:
   • Verify CSV structure (header row, aligned columns)
   • Check for encoding issues (use UTF-8)
   • Review if missing values are handled correctly
   • Ensure ground truth column is appropriate reference

================================================================================
9. BEST PRACTICES
================================================================================

• Always establish a clear ground truth before analysis
• Review low-scoring rows qualitatively to understand discrepancies
• Use multi-column comparison for comprehensive overview
• Export results for documentation and further analysis
• Document your similarity thresholds and interpretation criteria
• Consider domain-specific factors (technical terms, jargon, abbreviations)
• Combine quantitative similarity with qualitative review
• Track changes over time if comparing multiple analysis iterations

================================================================================
10. CITATION AND METHODOLOGY
================================================================================

If using this tool in research, consider citing:
• Transformer embeddings: Reimers & Gurevych (2019) - Sentence-BERT
• TF-IDF: Salton & Buckley (1988) - Term weighting approaches
• Cosine similarity: Standard vector space model metric

For methodology documentation:
• Specify which analysis mode was used
• Report mean, median, and standard deviation of similarity scores
• Include distribution of similarity categories (high/med/low)
• Note any preprocessing or data handling steps

================================================================================

For questions, issues, or feature requests, refer to the tool's documentation
or contact the development team.

Version: 2.0 | Last Updated: 2024
        """
        text_widget.insert(tk.END, content)
        text_widget.config(state=tk.DISABLED)

    def load_csv(self):
        file_path = filedialog.askopenfilename(filetypes=[("CSV Files", "*.csv")])
        if not file_path:
            return

        try:
            self.df = pd.read_csv(file_path)
            # Filter out empty columns or unnamed ones mostly
            self.df = self.df.dropna(axis=1, how='all')
            
            columns = list(self.df.columns)
            self.combo_gt['values'] = columns
            self.combo_comp['values'] = columns
            
            if len(columns) > 0:
                self.combo_gt.current(0)
                if len(columns) > 1:
                    self.combo_comp.current(1)
                else:
                    self.combo_comp.current(0)
            
            self.btn_run.config(state=tk.NORMAL)
            self.btn_compare_all.config(state=tk.NORMAL)
            import os
            self.filename = os.path.basename(file_path)
            messagebox.showinfo("Success", f"Loaded {self.filename} with {len(self.df)} rows and {len(columns)} columns.")
            
        except Exception as e:
            messagebox.showerror("Error", f"Could not load CSV: {e}")

    def run_analysis(self):
        col_gt = self.var_gt.get()
        col_comp = self.var_comp.get()

        if not col_gt or not col_comp:
            return

        # Clear previous results
        for item in self.tree.get_children():
            self.tree.delete(item)

        # Run analysis in thread to prevent UI freeze
        threading.Thread(target=self._process_analysis, args=(col_gt, col_comp)).start()

    def _process_analysis(self, col_gt, col_comp):
        self.btn_run.config(state=tk.DISABLED, text="Analyzing...")
        
        try:
            list_a = self.df[col_gt].astype(str).tolist()
            list_b = self.df[col_comp].astype(str).tolist()
            
            scores = self.engine.compute_similarity(list_a, list_b)
            
            # Update UI on main thread
            self.root.after(0, lambda: self._update_results(scores, col_gt, col_comp))
            
        except Exception as e:
            self.root.after(0, lambda: messagebox.showerror("Analysis Error", str(e)))
            self.root.after(0, lambda: self.btn_run.config(state=tk.NORMAL, text="Run Comparative Analysis"))

    def _update_results(self, scores, col_gt, col_comp):
        self.btn_run.config(state=tk.NORMAL, text="Run Comparative Analysis")
        
        # Store current analysis data
        self.current_scores = scores
        self.current_col_gt = col_gt
        self.current_col_comp = col_comp
        
        # Sort by score (descending) for better visibility
        indexed_scores = [(idx, score) for idx, score in enumerate(scores)]
        indexed_scores.sort(key=lambda x: x[1], reverse=True)
        
        # Populate Treeview
        for idx, score in indexed_scores:
            # Determine tag
            if score >= 0.75: tag = 'high'
            elif score >= 0.5: tag = 'med'
            else: tag = 'low'
            
            # Preview text (first 50 chars)
            preview_text = str(self.df[col_gt].iloc[idx])[:60].replace("\n", " ") + "..."
            
            self.tree.insert("", tk.END, iid=idx, values=(idx + 1, f"{score:.4f}", preview_text), tags=(tag,))

        # Update Stats
        avg_score = statistics.mean(scores)
        variance = statistics.variance(scores) if len(scores) > 1 else 0
        std_dev = math.sqrt(variance) if variance > 0 else 0
        min_score = min(scores)
        max_score = max(scores)
        median_score = statistics.median(scores)
        
        # Count by category
        high_count = sum(1 for s in scores if s >= 0.75)
        med_count = sum(1 for s in scores if 0.5 <= s < 0.75)
        low_count = sum(1 for s in scores if s < 0.5)
        
        stats_msg = (f"STATS | Mean: {avg_score:.4f} | Median: {median_score:.4f} | Std Dev: {std_dev:.4f} | "
                     f"Min: {min_score:.4f} | Max: {max_score:.4f} | "
                     f"High(≥0.75): {high_count} | Med(0.5-0.75): {med_count} | Low(<0.5): {low_count} | "
                     f"Mode: {self.engine.mode}")
        self.lbl_stats.config(text=stats_msg)

    def on_row_select(self, event):
        selected_items = self.tree.selection()
        if not selected_items:
            return

        row_idx = int(selected_items[0])
        col_gt = self.var_gt.get()
        col_comp = self.var_comp.get()

        val_gt = str(self.df[col_gt].iloc[row_idx])
        val_comp = str(self.df[col_comp].iloc[row_idx])

        self.txt_gt.delete("1.0", tk.END)
        self.txt_gt.insert(tk.END, val_gt)

        self.txt_comp.delete("1.0", tk.END)
        self.txt_comp.insert(tk.END, val_comp)
        
        self.lbl_comp_title.config(text=f"Comparison: {col_comp} (Row {row_idx+1})")
        
        # Update row-level statistics
        if self.current_scores is not None and row_idx < len(self.current_scores):
            self._update_row_stats(row_idx, val_gt, val_comp)
    
    def compare_to_all(self):
        """Compare ground truth column to all other columns"""
        col_gt = self.var_gt.get()
        if not col_gt or self.df is None:
            return
        
        all_columns = list(self.df.columns)
        if col_gt not in all_columns:
            return
        
        other_columns = [col for col in all_columns if col != col_gt]
        if not other_columns:
            messagebox.showinfo("Info", "No other columns to compare against.")
            return
        
        # Create a new window for multi-column comparison
        self._create_comparison_window(col_gt, other_columns)
    
    def _create_comparison_window(self, col_gt, other_columns):
        """Create a window showing comparison of GT to all other columns"""
        win = tk.Toplevel(self.root)
        win.title(f"Multi-Column Comparison: {col_gt} vs All")
        win.geometry("1000x700")
        
        # Create notebook for results
        notebook = ttk.Notebook(win)
        notebook.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
        
        # Process all columns first to get results
        all_results = {}
        for col_comp in other_columns:
            list_a = self.df[col_gt].astype(str).tolist()
            list_b = self.df[col_comp].astype(str).tolist()
            scores = self.engine.compute_similarity(list_a, list_b)
            all_results[col_comp] = scores
        
        # Side-by-Side Comparison Tab (NEW - shows all comparisons together)
        sidebyside_frame = ttk.Frame(notebook)
        notebook.add(sidebyside_frame, text="Side-by-Side Comparison")
        self._create_sidebyside_view(sidebyside_frame, col_gt, other_columns, all_results)
        
        # Summary tab
        summary_frame = ttk.Frame(notebook)
        notebook.add(summary_frame, text="Summary Statistics")
        
        # Create summary table
        summary_tree = ttk.Treeview(summary_frame, columns=("column", "mean", "median", "std", "min", "max", "high", "med", "low"), show="headings")
        summary_tree.heading("column", text="Column")
        summary_tree.heading("mean", text="Mean")
        summary_tree.heading("median", text="Median")
        summary_tree.heading("std", text="Std Dev")
        summary_tree.heading("min", text="Min")
        summary_tree.heading("max", text="Max")
        summary_tree.heading("high", text="High (≥0.75)")
        summary_tree.heading("med", text="Med (0.5-0.75)")
        summary_tree.heading("low", text="Low (<0.5)")
        
        for col in ["column", "mean", "median", "std", "min", "max", "high", "med", "low"]:
            summary_tree.column(col, width=90, anchor=tk.CENTER)
        
        scrollbar_summary = ttk.Scrollbar(summary_frame, orient=tk.VERTICAL, command=summary_tree.yview)
        summary_tree.configure(yscroll=scrollbar_summary.set)
        
        summary_tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
        scrollbar_summary.pack(side=tk.RIGHT, fill=tk.Y)
        
        # Process each column (already done above, but calculate statistics)
        for col_comp in other_columns:
            scores = all_results[col_comp]
            
            # Calculate statistics
            avg = statistics.mean(scores)
            median = statistics.median(scores)
            std = math.sqrt(statistics.variance(scores)) if len(scores) > 1 else 0
            min_s = min(scores)
            max_s = max(scores)
            high = sum(1 for s in scores if s >= 0.75)
            med = sum(1 for s in scores if 0.5 <= s < 0.75)
            low = sum(1 for s in scores if s < 0.5)
            
            # Determine tag based on mean
            tag = 'high' if avg >= 0.75 else 'med' if avg >= 0.5 else 'low'
            
            summary_tree.insert("", tk.END, values=(
                col_comp[:20], f"{avg:.4f}", f"{median:.4f}", f"{std:.4f}",
                f"{min_s:.4f}", f"{max_s:.4f}", high, med, low
            ), tags=(tag,))
        
        summary_tree.tag_configure('high', background='#d4edda')
        summary_tree.tag_configure('med', background='#fff3cd')
        summary_tree.tag_configure('low', background='#f8d7da')
        
        # Individual column tabs
        for col_comp in other_columns:
            col_frame = ttk.Frame(notebook)
            notebook.add(col_frame, text=col_comp[:15])
            
            # Create table for this column
            col_tree = ttk.Treeview(col_frame, columns=("row", "score", "preview"), show="headings")
            col_tree.heading("row", text="Row #")
            col_tree.heading("score", text="Similarity")
            col_tree.heading("preview", text="Preview")
            
            col_tree.column("row", width=60, anchor=tk.CENTER)
            col_tree.column("score", width=100, anchor=tk.CENTER)
            col_tree.column("preview", width=600)
            
            scrollbar_col = ttk.Scrollbar(col_frame, orient=tk.VERTICAL, command=col_tree.yview)
            col_tree.configure(yscroll=scrollbar_col.set)
            
            col_tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
            scrollbar_col.pack(side=tk.RIGHT, fill=tk.Y)
            
            scores = all_results[col_comp]
            for idx, score in enumerate(scores):
                tag = 'high' if score >= 0.75 else 'med' if score >= 0.5 else 'low'
                preview = str(self.df[col_gt].iloc[idx])[:80].replace("\n", " ") + "..."
                col_tree.insert("", tk.END, values=(idx + 1, f"{score:.4f}", preview), tags=(tag,))
            
            col_tree.tag_configure('high', background='#d4edda')
            col_tree.tag_configure('med', background='#fff3cd')
            col_tree.tag_configure('low', background='#f8d7da')
        
        # Export button
        btn_export = ttk.Button(win, text="Export Results to CSV", 
                               command=lambda: self._export_comparison_results(col_gt, all_results))
        btn_export.pack(pady=5)
    
    def _create_sidebyside_view(self, parent_frame, col_gt, other_columns, all_results):
        """Create a side-by-side view showing all column comparisons together"""
        # Header with instructions and controls
        header_frame = ttk.Frame(parent_frame)
        header_frame.pack(fill=tk.X, padx=10, pady=5)
        
        # Left side: Info label
        info_label = ttk.Label(header_frame, 
                               text="All Column Comparisons - Scroll horizontally to view all columns",
                               font=("Arial", 10, "bold"))
        info_label.pack(side=tk.LEFT, padx=5)
        
        # Right side: Controls
        controls_frame = ttk.Frame(header_frame)
        controls_frame.pack(side=tk.RIGHT, padx=10)
        
        # Sort controls
        sort_frame = ttk.LabelFrame(controls_frame, text="Sort", padding=5)
        sort_frame.pack(side=tk.LEFT, padx=5)
        
        sort_by_var = tk.StringVar(value="similarity")
        sort_order_var = tk.StringVar(value="asc")
        
        ttk.Label(sort_frame, text="By:").pack(side=tk.LEFT, padx=2)
        ttk.Radiobutton(sort_frame, text="Similarity", variable=sort_by_var, value="similarity").pack(side=tk.LEFT, padx=2)
        ttk.Radiobutton(sort_frame, text="Row #", variable=sort_by_var, value="row").pack(side=tk.LEFT, padx=2)
        
        ttk.Label(sort_frame, text="Order:").pack(side=tk.LEFT, padx=(10, 2))
        ttk.Radiobutton(sort_frame, text="↑", variable=sort_order_var, value="asc").pack(side=tk.LEFT, padx=2)
        ttk.Radiobutton(sort_frame, text="↓", variable=sort_order_var, value="desc").pack(side=tk.LEFT, padx=2)
        
        # Synchronize scroll checkbox
        sync_frame = ttk.Frame(controls_frame)
        sync_frame.pack(side=tk.LEFT, padx=5)
        
        sync_scroll_var = tk.BooleanVar(value=True)
        sync_checkbox = ttk.Checkbutton(sync_frame, text="Synchronize Scrollbars", variable=sync_scroll_var)
        sync_checkbox.pack()
        
        # Create horizontal scrollable frame using canvas
        canvas_frame = ttk.Frame(parent_frame)
        canvas_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
        
        # Canvas with scrollbars
        canvas = tk.Canvas(canvas_frame, bg="white", highlightthickness=0)
        scrollbar_h = ttk.Scrollbar(canvas_frame, orient=tk.HORIZONTAL, command=canvas.xview)
        scrollbar_v = ttk.Scrollbar(canvas_frame, orient=tk.VERTICAL, command=canvas.yview)
        
        inner_frame = ttk.Frame(canvas)
        canvas_window = canvas.create_window((0, 0), window=inner_frame, anchor=tk.NW)
        
        canvas.configure(xscrollcommand=scrollbar_h.set, yscrollcommand=scrollbar_v.set)
        
        # Create horizontal container for all tables
        tables_container = ttk.Frame(inner_frame)
        tables_container.pack(fill=tk.BOTH, expand=True)
        
        # Create a table for each column comparison
        trees = {}
        scrollbars = {}
        frames = {}
        
        # Flag to prevent recursive scroll updates
        scroll_updating = [False]
        
        # Create synchronized scroll function for scrollbar commands
        def make_sync_scroll_command(source_tree):
            """Create a scroll command that syncs all trees when a scrollbar is moved"""
            def sync_scroll_command(*args):
                if scroll_updating[0] or not sync_scroll_var.get():
                    # If sync is disabled or we're already updating, just scroll the source tree
                    source_tree.yview(*args)
                    return
                
                scroll_updating[0] = True
                try:
                    # Apply the same scroll command to all trees
                    for tree in trees.values():
                        tree.yview(*args)
                finally:
                    scroll_updating[0] = False
            return sync_scroll_command
        
        # Create wrapper for individual tree scrollbar set commands
        def make_scrollbar_set(scrollbar_widget, tree_widget):
            """Create a wrapper that updates scrollbar and syncs other trees if needed"""
            def scrollbar_set(*args):
                scrollbar_widget.set(*args)
                # If sync is enabled and we're not already updating, sync all trees
                if sync_scroll_var.get() and not scroll_updating[0]:
                    scroll_updating[0] = True
                    try:
                        # Get current position of this tree
                        view_info = tree_widget.yview()
                        current_pos = view_info[0]
                        # Apply to all other trees and update their scrollbars
                        for col, tree in trees.items():
                            if tree != tree_widget:
                                # Move the tree to the same position
                                tree.yview_moveto(current_pos)
                                # Update the other scrollbars with the same args
                                # (the flag prevents recursion when tree.yview_moveto triggers yscrollcommand)
                                scrollbars[col].set(*args)
                    finally:
                        scroll_updating[0] = False
            return scrollbar_set
        
        for col_idx, col_comp in enumerate(other_columns):
            # Frame for this column's table
            col_frame = ttk.LabelFrame(tables_container, text=f"{col_gt} vs {col_comp[:20]}", padding=5)
            col_frame.grid(row=0, column=col_idx, sticky="nsew", padx=5)
            frames[col_comp] = col_frame
            
            # Create table
            col_tree = ttk.Treeview(col_frame, columns=("row", "score", "preview"), show="headings", height=25)
            col_tree.heading("row", text="Row #")
            col_tree.heading("score", text="Similarity")
            col_tree.heading("preview", text="Preview")
            
            col_tree.column("row", width=50, anchor=tk.CENTER)
            col_tree.column("score", width=80, anchor=tk.CENTER)
            col_tree.column("preview", width=250)
            
            trees[col_comp] = col_tree
        
        # Create scrollbars and configure sync
        for col_comp in other_columns:
            col_tree = trees[col_comp]
            col_frame = frames[col_comp]
            
            # Create scrollbar
            scrollbar_col = ttk.Scrollbar(col_frame, orient=tk.VERTICAL)
            scrollbars[col_comp] = scrollbar_col
            
            # Pack widgets
            col_tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
            scrollbar_col.pack(side=tk.RIGHT, fill=tk.Y)
        
        # Function to reconfigure all scrollbar commands based on sync state
        def update_scroll_sync():
            sync_enabled = sync_scroll_var.get()
            
            if sync_enabled:
                # Configure all scrollbars to use synchronized scrolling
                for col_comp, col_tree in trees.items():
                    scrollbar_col = scrollbars[col_comp]
                    # Scrollbar controls all trees with sync
                    scrollbar_col.config(command=make_sync_scroll_command(col_tree))
                    # Tree updates scrollbar and triggers sync
                    col_tree.configure(yscrollcommand=make_scrollbar_set(scrollbar_col, col_tree))
            else:
                # Configure each scrollbar independently
                for col_comp, col_tree in trees.items():
                    scrollbar_col = scrollbars[col_comp]
                    # Standard independent scrolling
                    scrollbar_col.config(command=col_tree.yview)
                    col_tree.configure(yscrollcommand=scrollbar_col.set)
        
        # Initial configuration
        update_scroll_sync()
        
        # Update when checkbox changes
        sync_scroll_var.trace('w', lambda *args: update_scroll_sync())
        
        # Bind mouse wheel events for synchronized scrolling
        for col_comp, col_tree in trees.items():
            def make_wheel_handler(tree_widget):
                def on_mousewheel(event):
                    if not sync_scroll_var.get():
                        # Normal scroll behavior
                        if hasattr(event, 'delta'):
                            tree_widget.yview_scroll(int(-1 * (event.delta / 120)), "units")
                        elif event.num == 4:
                            tree_widget.yview_scroll(-1, "units")
                        elif event.num == 5:
                            tree_widget.yview_scroll(1, "units")
                    else:
                        # Synchronized scroll behavior
                        if scroll_updating[0]:
                            return
                        scroll_updating[0] = True
                        try:
                            if hasattr(event, 'delta'):
                                delta = int(-1 * (event.delta / 120))
                            elif event.num == 4:
                                delta = -1
                            elif event.num == 5:
                                delta = 1
                            else:
                                return
                            
                            # Apply to all trees
                            for tree in trees.values():
                                tree.yview_scroll(delta, "units")
                        finally:
                            scroll_updating[0] = False
                return on_mousewheel
            
            col_tree.bind('<MouseWheel>', make_wheel_handler(col_tree))
            col_tree.bind('<Button-4>', make_wheel_handler(col_tree))
            col_tree.bind('<Button-5>', make_wheel_handler(col_tree))
        
        # Function to populate tables with sorting
        def populate_tables(sort_by="similarity", sort_order="desc"):
            """Populate all tables with specified sort order"""
            for col_comp in other_columns:
                col_tree = trees[col_comp]
                # Clear existing items
                for item in col_tree.get_children():
                    col_tree.delete(item)
                
                scores = all_results[col_comp]
                indexed_scores = [(idx, score) for idx, score in enumerate(scores)]
                
                # Sort based on criteria
                if sort_by == "similarity":
                    indexed_scores.sort(key=lambda x: x[1], reverse=(sort_order == "asc"))
                else:  # sort_by == "row"
                    indexed_scores.sort(key=lambda x: x[0], reverse=(sort_order == "desc"))
                
                # Insert items
                for idx, score in indexed_scores:
                    tag = 'high' if score >= 0.75 else 'med' if score >= 0.5 else 'low'
                    preview = str(self.df[col_gt].iloc[idx])[:50].replace("\n", " ") + "..."
                    col_tree.insert("", tk.END, values=(idx + 1, f"{score:.4f}", preview), tags=(tag,))
                
                col_tree.tag_configure('high', background='#d4edda')
                col_tree.tag_configure('med', background='#fff3cd')
                col_tree.tag_configure('low', background='#f8d7da')
        
        # Function to handle sort changes
        def on_sort_change(*args):
            populate_tables(sort_by_var.get(), sort_order_var.get())
        
        sort_by_var.trace('w', on_sort_change)
        sort_order_var.trace('w', on_sort_change)
        
        # Initial population
        populate_tables("similarity", "asc")
        
        # Configure grid weights
        for col_idx in range(len(other_columns)):
            tables_container.grid_columnconfigure(col_idx, weight=0, minsize=400)
        
        # Function to update canvas scroll region (fixed for horizontal scrolling)
        def update_scroll_region(event=None):
            canvas.update_idletasks()
            inner_frame.update_idletasks()
            
            # Get the required width of the inner frame
            req_width = inner_frame.winfo_reqwidth()
            req_height = inner_frame.winfo_reqheight()
            canvas_width = canvas.winfo_width()
            canvas_height = canvas.winfo_height()
            
            # Set scroll region to content size
            if req_width > 1 and req_height > 1:
                canvas.config(scrollregion=(0, 0, req_width, req_height))
                # Set canvas window to match content width (not canvas width) for proper horizontal scrolling
                canvas.itemconfig(canvas_window, width=max(req_width, canvas_width), height=max(req_height, canvas_height))
        
        inner_frame.bind('<Configure>', update_scroll_region)
        canvas.bind('<Configure>', update_scroll_region)
        
        # Pack scrollbars and canvas
        # Pack horizontal scrollbar first at bottom to fill width
        scrollbar_h.pack(side=tk.BOTTOM, fill=tk.X)
        
        # Pack canvas and vertical scrollbar side by side
        canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
        scrollbar_v.pack(side=tk.RIGHT, fill=tk.Y)
        
        # Initial update
        update_scroll_region()
    
    def _export_comparison_results(self, col_gt, all_results):
        """Export comparison results to CSV"""
        file_path = filedialog.asksaveasfilename(
            defaultextension=".csv",
            filetypes=[("CSV Files", "*.csv"), ("All Files", "*.*")]
        )
        if not file_path:
            return
        
        try:
            # Create DataFrame with results
            export_data = {"Row": range(1, len(self.df) + 1)}
            export_data[f"GT_{col_gt}"] = self.df[col_gt].tolist()
            
            for col_comp, scores in all_results.items():
                export_data[f"Score_{col_comp}"] = scores
                export_data[f"Text_{col_comp}"] = self.df[col_comp].tolist()
            
            export_df = pd.DataFrame(export_data)
            export_df.to_csv(file_path, index=False)
            messagebox.showinfo("Success", f"Results exported to {file_path}")
        except Exception as e:
            messagebox.showerror("Error", f"Could not export: {e}")
    
    def _update_row_stats(self, row_idx, text_gt, text_comp):
        """Update statistics for the selected row"""
        if self.current_scores is None:
            return
        
        score = self.current_scores[row_idx]
        
        # Calculate text lengths
        len_gt = len(text_gt)
        len_comp = len(text_comp)
        word_count_gt = len(text_gt.split())
        word_count_comp = len(text_comp.split())
        
        # Calculate word overlap
        words_gt = set(text_gt.lower().split())
        words_comp = set(text_comp.lower().split())
        overlap = len(words_gt.intersection(words_comp))
        union = len(words_gt.union(words_comp))
        word_overlap_pct = (overlap / union * 100) if union > 0 else 0
        
        # Calculate rank and percentile
        sorted_scores = sorted(self.current_scores, reverse=True)
        rank = sorted_scores.index(score) + 1
        percentile = (1 - (rank - 1) / len(self.current_scores)) * 100 if len(self.current_scores) > 0 else 0
        
        # Update labels
        self.lbl_similarity.config(text=f"Similarity: {score:.4f}", 
                                   foreground="green" if score >= 0.75 else "orange" if score >= 0.5 else "red")
        self.lbl_length_gt.config(text=f"GT: {len_gt} chars, {word_count_gt} words")
        self.lbl_length_comp.config(text=f"Comp: {len_comp} chars, {word_count_comp} words")
        self.lbl_word_overlap.config(text=f"Word Overlap: {overlap}/{union} ({word_overlap_pct:.1f}%)")
        self.lbl_rank.config(text=f"Rank: {rank}/{len(self.current_scores)}")
        self.lbl_percentile.config(text=f"Percentile: {percentile:.1f}%")

if __name__ == "__main__":
    root = tk.Tk()
    app = SemanticApp(root)
    root.mainloop()