| import os |
| import random |
| import glob |
| import json |
|
|
| import numpy as np |
| from flask import Flask, render_template, request |
|
|
| app = Flask(__name__) |
|
|
|
|
| with open("problems.json") as f: |
| problems = json.load(f) |
| problem_choices = [q["question_title"] for q in problems] |
|
|
| random_idxs = list(range(len(problems))) |
| random.shuffle(random_idxs) |
|
|
| with open("all_outputs.json") as f: |
| all_outputs = json.load(f) |
| all_models = list(all_outputs.keys()) |
|
|
|
|
| num_questions_filtered = len(problems) |
|
|
| all_correctness_by_problem = { |
| idx: {model: np.mean(all_outputs[model][idx]["pass1_list"]) for model in all_models} |
| for idx in random_idxs |
| } |
|
|
|
|
| def calculate_color(performance): |
| |
| |
| if performance > 0.75: |
| return f"rgba(0, 150, 0, 0.5)" |
| elif performance > 0.5: |
| return f"rgba(50, 150, 0, {performance})" |
| elif performance > 0.25: |
| return f"rgba(150, 50, 0, {1-performance})" |
| else: |
| return f"rgba(150, 0, 0, 0.5)" |
|
|
|
|
| all_evaluations_by_problem_colored = [ |
| ( |
| trueidx, |
| { |
| model: { |
| "correctness": f"{all_correctness_by_problem[idx][model]*100:.1f}", |
| "correctness_color": calculate_color( |
| all_correctness_by_problem[idx][model] |
| ), |
| } |
| for model in all_models |
| }, |
| problems[idx]["difficulty"], |
| ) |
| for trueidx, idx in enumerate(random_idxs) |
| ] |
|
|
| all_data_for_view_formatted = { |
| model: [ |
| [{"code": a, "pass1": b} for a, b in zip(row["code_list"], row["pass1_list"])] |
| |
| for idx in random_idxs |
| for row in [resp[idx]] |
| ] |
| for model, resp in all_outputs.items() |
| } |
|
|
|
|
| @app.route("/") |
| def home(): |
| |
| print(all_models) |
| return render_template( |
| "index.html", models=all_models, problems=all_evaluations_by_problem_colored |
| ) |
|
|
|
|
| @app.route("/problem/<int:problem_idx>") |
| def problem(problem_idx): |
| |
|
|
| data = { |
| model: all_data_for_view_formatted[model][problem_idx] for model in all_models |
| } |
| evaluation = all_evaluations_by_problem_colored[problem_idx][1] |
| question = problems[problem_idx] |
|
|
| |
|
|
| return render_template( |
| "problem.html", |
| problem_idx=problem_idx, |
| evaluation=evaluation, |
| models=all_models, |
| question=question, |
| data=data, |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| app.run() |
|
|