Step-3.7-Flash-dev

Running

File size: 3,639 Bytes

import os
import json
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from gradio import Server
from openai import OpenAI

# Initialize the Gradio Server (which is a FastAPI subclass)
app = Server()

# Create static directory if it doesn't exist
os.makedirs("static", exist_ok=True)

@app.api(name="chat_with_step")
def chat_with_step(
    messages_json: str,
    reasoning_effort: str = "medium",
    max_tokens: int = 2048,
    temperature: float = 0.7
) -> str:
    """
    API endpoint to call Step 3.7 Flash model via OpenAI-compatible API.
    Takes conversation messages as a JSON-serialized string, and parameters.
    Returns the assistant response along with any reasoning details.
    """
    try:
        # Load messages from JSON string
        messages = json.loads(messages_json)
        
        # Load key from secure server-side environment variable
        key = os.environ.get("STEP_API_KEY", "").strip()
        if not key:
            return json.dumps({
                "status": "error",
                "message": "STEP_API_KEY environment variable is not configured on the server."
            })
            
        # Initialize OpenAI client configured for StepFun
        client = OpenAI(
            api_key=key,
            base_url="https://api.stepfun.com/v1",
        )
        
        # Prepare parameters for the API call
        params = {
            "model": "step-3.7-flash",
            "messages": messages,
            "max_tokens": max_tokens,
            "temperature": temperature
        }
        
        # Add reasoning effort if applicable (only for step-3.7-flash model family)
        if reasoning_effort in ["low", "medium", "high"]:
            params["reasoning_effort"] = reasoning_effort

        # Perform completion request
        response = client.chat.completions.create(**params)
        
        # Extract assistant content
        content = response.choices[0].message.content
        
        # Capture reasoning content if returned by the API
        # Step 3.7 reasoning models might put reasoning in choice.message.reasoning_content
        reasoning_content = getattr(response.choices[0].message, "reasoning_content", "")
        
        # Alternatively, if the model returns thoughts inside <think> tags, we can extract them
        if not reasoning_content and content and "<think>" in content and "</think>" in content:
            parts = content.split("</think>", 1)
            reasoning_content = parts[0].replace("<think>", "").strip()
            content = parts[1].strip()

        return json.dumps({
            "status": "success",
            "content": content,
            "reasoning_content": reasoning_content or ""
        })
        
    except Exception as e:
        return json.dumps({
            "status": "error",
            "message": str(e)
        })

@app.get("/")
async def homepage():
    """Serves the main application landing page."""
    html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "index.html")
    if os.path.exists(html_path):
        with open(html_path, "r", encoding="utf-8") as f:
            return HTMLResponse(content=f.read(), status_code=200)
    return HTMLResponse(
        content="<h1>Frontend is building. Please refresh in a few seconds...</h1>",
        status_code=200
    )

# Mount static folder for CSS, JS, and image assets
app.mount("/static", StaticFiles(directory="static"), name="static")

if __name__ == "__main__":
    # Launch Gradio Server (default port is 7860)
    app.launch(show_error=True)