Spaces:
Runtime error
Runtime error
| from typing import Tuple | |
| import pandas as pd | |
| import numpy as np | |
| import time | |
| import asyncio | |
| from utils.s3_utils import write_to_s3 | |
| from utils.data_utils import generate_leaderboard, generate_data | |
| submit_lock = asyncio.Lock() | |
| def update_ratings(R_win : int, R_lose : int, k : int = 32) -> Tuple[int, int]: | |
| """ | |
| Update the ratings of two players after a match. | |
| Args: | |
| R_win (int): The rating of the winning player. | |
| R_lose (int): The rating of the losing player. | |
| k (int, optional): The k-factor. Defaults to 32. | |
| Returns: | |
| Tuple[int, int]: The updated ratings of the winning and losing players. | |
| """ | |
| E_win = 1 / (1 + 10 ** ((R_lose - R_win) / 480)) | |
| E_lose = 1 / (1 + 10 ** ((R_win - R_lose) / 480)) | |
| return int(R_win + k * (1 - E_win)), int(R_lose + k * (0 - E_lose)) | |
| def generate_matchup(leaderboard : pd.DataFrame, beta : int) -> tuple[str, str]: | |
| """ | |
| Generate a pseudo-random matchup between two models. | |
| Args: | |
| leaderboard (pd.DataFrame): The leaderboard of models | |
| beta (int): The damping factor for the Elo update. | |
| Returns: | |
| model1 (str): The first model. | |
| model2 (str): The second model. | |
| """ | |
| if leaderboard['Matches'].sum() == 0: | |
| return np.random.choice(leaderboard.index, 2, replace=False) | |
| weights = [np.exp(-leaderboard.at[model, 'Matches'] / beta) for model in leaderboard.index] | |
| weights = weights / np.sum(weights) # Normalize weights | |
| selected = np.random.choice(leaderboard.index, 2, replace=False, p=weights) | |
| np.random.shuffle(selected) | |
| model1, model2 = selected | |
| return model1, model2 | |
| async def simulate(iter : int, beta : int, criteria : str) -> pd.DataFrame: | |
| """ | |
| Simulate matches between random models. | |
| Args: | |
| iter (int): The number of matches to simulate. | |
| beta (int): The damping factor for the Elo update. | |
| criteria (str): The criteria for the rating. | |
| Returns: | |
| leaderboard (pd.DataFrame): Updated leaderboard after simulation | |
| """ | |
| data = await generate_data() | |
| leaderboard = await generate_leaderboard(criteria) | |
| leaderboard.set_index('Model', inplace=True) | |
| for _ in range(iter): | |
| # Generate random matchups | |
| timestamp = time.time() | |
| model1, model2 = generate_matchup(leaderboard, beta) | |
| R1, R2 = leaderboard.at[model1, 'Elo'], leaderboard.at[model2, 'Elo'] | |
| R1_new, R2_new = update_ratings(R1, R2) | |
| # Update leaderboard | |
| leaderboard.at[model1, 'Elo'], leaderboard.at[model2, 'Elo'] = R1_new, R2_new | |
| leaderboard.at[model1, 'Wins'] += 1 | |
| leaderboard.at[model1, 'Matches'] += 1 | |
| leaderboard.at[model2, 'Matches'] += 1 | |
| leaderboard.at[model1, 'Win Rate'] = np.round(leaderboard.at[model1, 'Wins'] / leaderboard.at[model1, 'Matches'], 2) | |
| leaderboard.at[model2, 'Win Rate'] = np.round(leaderboard.at[model2, 'Wins'] / leaderboard.at[model2, 'Matches'], 2) | |
| # Save match data | |
| data.loc[len(data)] = { | |
| 'Criteria': criteria, | |
| 'Model': model1, | |
| 'Opponent': model2, | |
| 'Won': True, | |
| 'Elo': leaderboard.at[model1, 'Elo'], | |
| 'Win Rate': leaderboard.at[model1, 'Win Rate'], | |
| 'Matches': leaderboard.at[model1, 'Matches'], | |
| 'Timestamp': timestamp, | |
| 'UUID': None | |
| } | |
| data.loc[len(data)] = { | |
| 'Criteria': criteria, | |
| 'Model': model2, | |
| 'Opponent': model1, | |
| 'Won': False, | |
| 'Elo': leaderboard.at[model2, 'Elo'], | |
| 'Win Rate': leaderboard.at[model2, 'Win Rate'], | |
| 'Matches': leaderboard.at[model2, 'Matches'], | |
| 'Timestamp': timestamp, | |
| 'UUID': None | |
| } | |
| leaderboard = leaderboard.sort_values('Elo', ascending=False).reset_index(drop=False) | |
| await asyncio.gather( | |
| write_to_s3(f'leaderboard_{criteria}.csv', leaderboard), | |
| write_to_s3('data.csv', data) | |
| ) | |
| return leaderboard | |
| async def submit_rating(criteria : str, video : str, winner : str, loser : str, uuid : str) -> None: | |
| """ | |
| Submit a rating for a match. | |
| Args: | |
| criteria (str): The criteria for the rating. | |
| winner (str): The winning model. | |
| loser (str): The losing model. | |
| uuid (str): The UUID of the session. | |
| """ | |
| async with submit_lock: | |
| data = await generate_data() | |
| leaderboard = await generate_leaderboard(criteria) | |
| leaderboard.set_index('Model', inplace=True) | |
| if winner is None or loser is None or video is None: | |
| return leaderboard | |
| timestamp = time.time() | |
| R_win, R_lose = leaderboard.at[winner, 'Elo'], leaderboard.at[loser, 'Elo'] | |
| R_win_new, R_lose_new = update_ratings(R_win, R_lose) | |
| # Update leaderboard | |
| leaderboard.loc[[winner, loser], 'Elo'] = [R_win_new, R_lose_new] | |
| leaderboard.at[winner, 'Wins'] += 1 | |
| leaderboard.loc[[winner, loser], 'Matches'] += [1, 1] | |
| leaderboard.loc[[winner, loser], 'Win Rate'] = ( | |
| leaderboard.loc[[winner, loser], 'Wins'] / leaderboard.loc[[winner, loser], 'Matches'] | |
| ).apply(lambda x: round(x, 2)) | |
| # Save match data | |
| data.loc[len(data)] = { | |
| 'Criteria': criteria, | |
| 'Model': winner, | |
| 'Opponent': loser, | |
| 'Won': True, | |
| 'Elo': leaderboard.at[winner, 'Elo'], | |
| 'Win Rate': leaderboard.at[winner, 'Win Rate'], | |
| 'Matches': leaderboard.at[winner, 'Matches'], | |
| 'Video': video, | |
| 'Timestamp': timestamp, | |
| 'UUID': uuid | |
| } | |
| data.loc[len(data)] = { | |
| 'Criteria': criteria, | |
| 'Model': loser, | |
| 'Opponent': winner, | |
| 'Won': False, | |
| 'Elo': leaderboard.at[loser, 'Elo'], | |
| 'Win Rate': leaderboard.at[loser, 'Win Rate'], | |
| 'Matches': leaderboard.at[loser, 'Matches'], | |
| 'Video': video, | |
| 'Timestamp': timestamp, | |
| 'UUID': uuid | |
| } | |
| leaderboard = leaderboard.sort_values('Elo', ascending=False).reset_index(drop=False) | |
| await asyncio.gather( | |
| write_to_s3(f'leaderboard_{criteria}.csv', leaderboard), | |
| write_to_s3('data.csv', data) | |
| ) | |
| return leaderboard |