| import gradio as gr |
| from transformers import AutoModel, AutoTokenizer |
| import numpy as np |
| import json |
|
|
| |
| model_name = "Supabase/gte-small" |
| model = AutoModel.from_pretrained(model_name) |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
| def text_to_vector(texts_json): |
| try: |
| texts = json.loads(texts_json) |
| if not isinstance(texts, list): |
| raise ValueError("Input must be a JSON array of strings.") |
| except json.JSONDecodeError: |
| raise ValueError("Invalid JSON format.") |
|
|
| inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True) |
| outputs = model(**inputs) |
| vectors = outputs.pooler_output.detach().numpy().tolist() |
| return json.dumps(vectors) |
|
|
|
|
|
|
|
|
|
|
|
|
| demo = gr.Interface( |
| fn=text_to_vector, |
| inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"), |
| outputs=gr.Textbox(label="Text Vectors (JSON)", lines=10), |
| title="Batch Text to Vector", |
| description="This demo converts an array of sentences to vectors and returns them as a JSON array." |
| ) |
|
|
| demo.launch() |
|
|