| |
| import os |
| import re |
| import json |
| import base64 |
| import mimetypes |
| from pathlib import Path |
|
|
| |
| import pandas as pd |
| import matplotlib.pyplot as plt |
| from PIL import Image |
| from dotenv import load_dotenv |
| from openai import OpenAI |
| from anthropic import Anthropic |
| from html import escape |
|
|
| |
| load_dotenv() |
| |
| openai_api_key = os.getenv("OPENAI_API_KEY", "").strip() |
| anthropic_api_key = os.getenv("ANTHROPIC_API_KEY", "").strip() |
|
|
| |
| openai_client = OpenAI(api_key=openai_api_key) if openai_api_key else OpenAI() |
| anthropic_client = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else Anthropic() |
|
|
|
|
| def get_response(model: str, prompt: str) -> str: |
| """Get response from LLM (OpenAI or Anthropic).""" |
| if "claude" in model.lower() or "anthropic" in model.lower(): |
| |
| if not anthropic_api_key: |
| raise ValueError("ANTHROPIC_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.") |
| |
| message = anthropic_client.messages.create( |
| model=model, |
| max_tokens=1000, |
| messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}], |
| ) |
| return message.content[0].text |
| else: |
| |
| if not openai_api_key: |
| raise ValueError("OPENAI_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.") |
| |
| response = openai_client.responses.create( |
| model=model, |
| input=prompt, |
| ) |
| return response.output_text |
|
|
|
|
| |
| def load_and_prepare_data(csv_path: str) -> pd.DataFrame: |
| """Load CSV and derive date parts commonly used in charts.""" |
| df = pd.read_csv(csv_path) |
| |
| if "date" in df.columns: |
| df["date"] = pd.to_datetime(df["date"], errors="coerce") |
| df["quarter"] = df["date"].dt.quarter |
| df["month"] = df["date"].dt.month |
| df["year"] = df["date"].dt.year |
| return df |
|
|
|
|
| |
| def make_schema_text(df: pd.DataFrame) -> str: |
| """Return a human-readable schema from a DataFrame.""" |
| return "\n".join(f"- {c}: {dt}" for c, dt in df.dtypes.items()) |
|
|
|
|
| def ensure_execute_python_tags(text: str) -> str: |
| """Normalize code to be wrapped in <execute_python>...</execute_python>.""" |
| text = text.strip() |
| |
| text = re.sub(r"^```(?:python)?\s*|\s*```$", "", text).strip() |
| if "<execute_python>" not in text: |
| text = f"<execute_python>\n{text}\n</execute_python>" |
| return text |
|
|
|
|
| def encode_image_b64(path: str) -> tuple[str, str]: |
| """Return (media_type, base64_str) for an image file path.""" |
| mime, _ = mimetypes.guess_type(path) |
| media_type = mime or "image/png" |
| with open(path, "rb") as f: |
| b64 = base64.b64encode(f.read()).decode("utf-8") |
| return media_type, b64 |
|
|
|
|
| def image_anthropic_call(model_name: str, prompt: str, media_type: str, b64: str) -> str: |
| """ |
| Call Anthropic Claude (messages.create) with text+image and return *all* text blocks concatenated. |
| Adds a system message to enforce strict JSON output. |
| """ |
| if not anthropic_api_key: |
| raise ValueError("ANTHROPIC_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.") |
| msg = anthropic_client.messages.create( |
| model=model_name, |
| max_tokens=2000, |
| temperature=0, |
| system=( |
| "You are a careful assistant. Respond with a single valid JSON object only. " |
| "Do not include markdown, code fences, or commentary outside JSON." |
| ), |
| messages=[{ |
| "role": "user", |
| "content": [ |
| {"type": "text", "text": prompt}, |
| {"type": "image", "source": {"type": "base64", "media_type": media_type, "data": b64}}, |
| ], |
| }], |
| ) |
|
|
| |
| parts = [] |
| for block in (msg.content or []): |
| if getattr(block, "type", None) == "text": |
| parts.append(block.text) |
| return "".join(parts).strip() |
|
|
|
|
| def image_openai_call(model_name: str, prompt: str, media_type: str, b64: str) -> str: |
| """Call OpenAI with text+image input.""" |
| if not openai_api_key: |
| raise ValueError("OPENAI_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.") |
| data_url = f"data:{media_type};base64,{b64}" |
| resp = openai_client.responses.create( |
| model=model_name, |
| input=[ |
| { |
| "role": "user", |
| "content": [ |
| {"type": "input_text", "text": prompt}, |
| {"type": "input_image", "image_url": data_url}, |
| ], |
| } |
| ], |
| ) |
| content = (resp.output_text or "").strip() |
| return content |
|
|