github-actions[bot] commited on
Commit ·
b598e06
1
Parent(s): e83942e
deploy: sync from GitHub 2026-05-13T22:41:47Z
Browse files- data/specs/2e756e9e0a2d417e/_manifest.json +40 -0
- data/specs/2e756e9e0a2d417e/itl_fit.json +96 -0
- data/specs/2e756e9e0a2d417e/logistic_fit.json +22 -0
- data/specs/2e756e9e0a2d417e/trace.csv +0 -0
- data/specs/4925acb216d43131/_manifest.json +40 -0
- data/specs/4925acb216d43131/itl_fit.json +96 -0
- data/specs/4925acb216d43131/logistic_fit.json +22 -0
- data/specs/4925acb216d43131/trace.csv +0 -0
- data/specs/73691f793f6be469/_manifest.json +44 -0
- data/specs/73691f793f6be469/itl_fit.json +132 -0
- data/specs/73691f793f6be469/logistic_fit.json +22 -0
- data/specs/73691f793f6be469/trace.csv +0 -0
- data/specs/860654eabd3dfebf/_manifest.json +40 -0
- data/specs/860654eabd3dfebf/itl_fit.json +96 -0
- data/specs/860654eabd3dfebf/logistic_fit.json +22 -0
- data/specs/860654eabd3dfebf/trace.csv +0 -0
- data/specs/bc0bde304544a603/_manifest.json +42 -0
- data/specs/bc0bde304544a603/itl_fit.json +114 -0
- data/specs/bc0bde304544a603/logistic_fit.json +22 -0
- data/specs/bc0bde304544a603/trace.csv +0 -0
- data/specs/traces_summary.csv +36 -0
- data/specs/training_trace.csv +0 -0
- examples/offline/config.json +37 -23
- requirements.txt +2 -0
- server.py +235 -77
data/specs/2e756e9e0a2d417e/_manifest.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mlenergy_data_version": "0.4.0",
|
| 3 |
+
"openg2g_version": "0.2.0.post1",
|
| 4 |
+
"schema_version": 3,
|
| 5 |
+
"spec": {
|
| 6 |
+
"batch_sizes": [
|
| 7 |
+
8,
|
| 8 |
+
16,
|
| 9 |
+
32,
|
| 10 |
+
64,
|
| 11 |
+
96,
|
| 12 |
+
128,
|
| 13 |
+
192,
|
| 14 |
+
256,
|
| 15 |
+
384,
|
| 16 |
+
512
|
| 17 |
+
],
|
| 18 |
+
"expert_parallel": 1,
|
| 19 |
+
"feasible_batch_sizes": [
|
| 20 |
+
8,
|
| 21 |
+
16,
|
| 22 |
+
32,
|
| 23 |
+
64,
|
| 24 |
+
128,
|
| 25 |
+
256,
|
| 26 |
+
512
|
| 27 |
+
],
|
| 28 |
+
"fit_exclude_batch_sizes": [],
|
| 29 |
+
"gpu_model": "H100",
|
| 30 |
+
"gpus_per_replica": 8,
|
| 31 |
+
"itl_deadline_s": 0.14,
|
| 32 |
+
"model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
| 33 |
+
"model_label": "Qwen3-235B-A22B",
|
| 34 |
+
"precision": "bfloat16",
|
| 35 |
+
"task": "gpqa",
|
| 36 |
+
"tensor_parallel": 8
|
| 37 |
+
},
|
| 38 |
+
"spec_hash": "2e756e9e0a2d417e",
|
| 39 |
+
"written_utc": "2026-05-13T19:50:49+00:00"
|
| 40 |
+
}
|
data/specs/2e756e9e0a2d417e/itl_fit.json
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_label": "Qwen3-235B-A22B",
|
| 3 |
+
"per_batch": {
|
| 4 |
+
"128": {
|
| 5 |
+
"loc": 0.013250339005364572,
|
| 6 |
+
"pi_stall": 0.9173336260689986,
|
| 7 |
+
"pi_steady": 0.0826663739310014,
|
| 8 |
+
"scale_stall": 0.026592548144528116,
|
| 9 |
+
"scale_steady": 0.010186920973940559,
|
| 10 |
+
"sigma_stall": 0.1192887538283944,
|
| 11 |
+
"sigma_steady": 1.4754674804724548
|
| 12 |
+
},
|
| 13 |
+
"16": {
|
| 14 |
+
"loc": 0.012917142001581146,
|
| 15 |
+
"pi_stall": 0.9679024411455602,
|
| 16 |
+
"pi_steady": 0.03209755885443977,
|
| 17 |
+
"scale_stall": 0.011944872620218289,
|
| 18 |
+
"scale_steady": 0.010055679342980215,
|
| 19 |
+
"sigma_stall": 0.06668499474004932,
|
| 20 |
+
"sigma_steady": 0.3364193391214592
|
| 21 |
+
},
|
| 22 |
+
"192": {
|
| 23 |
+
"loc": 0.013189563986427151,
|
| 24 |
+
"pi_stall": 0.9731451222965651,
|
| 25 |
+
"pi_steady": 0.026854877703434865,
|
| 26 |
+
"scale_stall": 0.02687676804506179,
|
| 27 |
+
"scale_steady": 0.018437463794844785,
|
| 28 |
+
"sigma_stall": 0.1420087680700662,
|
| 29 |
+
"sigma_steady": 2.5037616364005233
|
| 30 |
+
},
|
| 31 |
+
"256": {
|
| 32 |
+
"loc": 0.01325633499834314,
|
| 33 |
+
"pi_stall": 0.2520610216205986,
|
| 34 |
+
"pi_steady": 0.7479389783794014,
|
| 35 |
+
"scale_stall": 0.028301520442211254,
|
| 36 |
+
"scale_steady": 0.025729232489331608,
|
| 37 |
+
"sigma_stall": 0.5252417168242823,
|
| 38 |
+
"sigma_steady": 0.07938870914892968
|
| 39 |
+
},
|
| 40 |
+
"32": {
|
| 41 |
+
"loc": 0.013021193001233973,
|
| 42 |
+
"pi_stall": 0.9124702204373671,
|
| 43 |
+
"pi_steady": 0.08752977956263286,
|
| 44 |
+
"scale_stall": 0.02038829931099621,
|
| 45 |
+
"scale_steady": 0.012894545131554781,
|
| 46 |
+
"sigma_stall": 0.05215477595544453,
|
| 47 |
+
"sigma_steady": 0.3888129537458069
|
| 48 |
+
},
|
| 49 |
+
"384": {
|
| 50 |
+
"loc": 0.013478797999025323,
|
| 51 |
+
"pi_stall": 0.2220717676647871,
|
| 52 |
+
"pi_steady": 0.7779282323352129,
|
| 53 |
+
"scale_stall": 0.033227654127086725,
|
| 54 |
+
"scale_steady": 0.024930327834188436,
|
| 55 |
+
"sigma_stall": 0.7095586536822586,
|
| 56 |
+
"sigma_steady": 0.09695357279523584
|
| 57 |
+
},
|
| 58 |
+
"512": {
|
| 59 |
+
"loc": 0.013146029007086531,
|
| 60 |
+
"pi_stall": 0.17758013619796087,
|
| 61 |
+
"pi_steady": 0.8224198638020391,
|
| 62 |
+
"scale_stall": 0.03455616392329533,
|
| 63 |
+
"scale_steady": 0.024200569803794394,
|
| 64 |
+
"sigma_stall": 0.7204206877666427,
|
| 65 |
+
"sigma_steady": 0.09983052036261611
|
| 66 |
+
},
|
| 67 |
+
"64": {
|
| 68 |
+
"loc": 0.013067278996797745,
|
| 69 |
+
"pi_stall": 0.9183472954770104,
|
| 70 |
+
"pi_steady": 0.08165270452298956,
|
| 71 |
+
"scale_stall": 0.02541259423220897,
|
| 72 |
+
"scale_steady": 0.013607592085388696,
|
| 73 |
+
"sigma_stall": 0.08662948392900281,
|
| 74 |
+
"sigma_steady": 1.1879848722617707
|
| 75 |
+
},
|
| 76 |
+
"8": {
|
| 77 |
+
"loc": 0.013058345998044592,
|
| 78 |
+
"pi_stall": 0.9313422909338545,
|
| 79 |
+
"pi_steady": 0.06865770906614554,
|
| 80 |
+
"scale_stall": 0.007196610201069568,
|
| 81 |
+
"scale_steady": 0.00519407736027536,
|
| 82 |
+
"sigma_stall": 0.08145564694748231,
|
| 83 |
+
"sigma_steady": 0.2964711653438411
|
| 84 |
+
},
|
| 85 |
+
"96": {
|
| 86 |
+
"loc": 0.014793187001825777,
|
| 87 |
+
"pi_stall": 0.8434947366701049,
|
| 88 |
+
"pi_steady": 0.15650526332989512,
|
| 89 |
+
"scale_stall": 0.024908395671325943,
|
| 90 |
+
"scale_steady": 0.015913317708739556,
|
| 91 |
+
"sigma_stall": 0.08592049198306687,
|
| 92 |
+
"sigma_steady": 0.5909612329674692
|
| 93 |
+
}
|
| 94 |
+
},
|
| 95 |
+
"schema": "itl_fit.lognormal_mixture_2"
|
| 96 |
+
}
|
data/specs/2e756e9e0a2d417e/logistic_fit.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"latency": {
|
| 3 |
+
"L": 0.672333808351321,
|
| 4 |
+
"b0": 0.025327222690596844,
|
| 5 |
+
"k": 0.9249147277217336,
|
| 6 |
+
"x0": 10.0
|
| 7 |
+
},
|
| 8 |
+
"model_label": "Qwen3-235B-A22B",
|
| 9 |
+
"power": {
|
| 10 |
+
"L": 1130.4391558531665,
|
| 11 |
+
"b0": 2551.5754923889053,
|
| 12 |
+
"k": 1.2638482029342986,
|
| 13 |
+
"x0": 3.846153846153846
|
| 14 |
+
},
|
| 15 |
+
"schema": "logistic_v1",
|
| 16 |
+
"throughput": {
|
| 17 |
+
"L": 1400.9050260569725,
|
| 18 |
+
"b0": 102.09644698524227,
|
| 19 |
+
"k": 0.9249147277217336,
|
| 20 |
+
"x0": 4.461538461538462
|
| 21 |
+
}
|
| 22 |
+
}
|
data/specs/2e756e9e0a2d417e/trace.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/specs/4925acb216d43131/_manifest.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mlenergy_data_version": "0.4.0",
|
| 3 |
+
"openg2g_version": "0.2.0.post1",
|
| 4 |
+
"schema_version": 3,
|
| 5 |
+
"spec": {
|
| 6 |
+
"batch_sizes": [
|
| 7 |
+
8,
|
| 8 |
+
16,
|
| 9 |
+
32,
|
| 10 |
+
64,
|
| 11 |
+
96,
|
| 12 |
+
128,
|
| 13 |
+
192,
|
| 14 |
+
256,
|
| 15 |
+
384,
|
| 16 |
+
512
|
| 17 |
+
],
|
| 18 |
+
"expert_parallel": 1,
|
| 19 |
+
"feasible_batch_sizes": [
|
| 20 |
+
8,
|
| 21 |
+
16,
|
| 22 |
+
32,
|
| 23 |
+
64,
|
| 24 |
+
128,
|
| 25 |
+
256,
|
| 26 |
+
512
|
| 27 |
+
],
|
| 28 |
+
"fit_exclude_batch_sizes": [],
|
| 29 |
+
"gpu_model": "H100",
|
| 30 |
+
"gpus_per_replica": 8,
|
| 31 |
+
"itl_deadline_s": 0.12,
|
| 32 |
+
"model_id": "meta-llama/Llama-3.1-405B-Instruct-FP8",
|
| 33 |
+
"model_label": "Llama-3.1-405B",
|
| 34 |
+
"precision": "fp8",
|
| 35 |
+
"task": "lm-arena-chat",
|
| 36 |
+
"tensor_parallel": 8
|
| 37 |
+
},
|
| 38 |
+
"spec_hash": "4925acb216d43131",
|
| 39 |
+
"written_utc": "2026-05-13T19:50:49+00:00"
|
| 40 |
+
}
|
data/specs/4925acb216d43131/itl_fit.json
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_label": "Llama-3.1-405B",
|
| 3 |
+
"per_batch": {
|
| 4 |
+
"128": {
|
| 5 |
+
"loc": 0.03008249612355232,
|
| 6 |
+
"pi_stall": 0.3839287952895576,
|
| 7 |
+
"pi_steady": 0.6160712047104424,
|
| 8 |
+
"scale_stall": 0.030214832869952784,
|
| 9 |
+
"scale_steady": 0.01777233734505344,
|
| 10 |
+
"sigma_stall": 1.2206296529153282,
|
| 11 |
+
"sigma_steady": 0.05
|
| 12 |
+
},
|
| 13 |
+
"16": {
|
| 14 |
+
"loc": 0.024713983999729155,
|
| 15 |
+
"pi_stall": 0.05360691147049179,
|
| 16 |
+
"pi_steady": 0.9463930885295082,
|
| 17 |
+
"scale_stall": 0.023448181502284848,
|
| 18 |
+
"scale_steady": 0.009322546786604182,
|
| 19 |
+
"sigma_stall": 0.995674214634189,
|
| 20 |
+
"sigma_steady": 0.05
|
| 21 |
+
},
|
| 22 |
+
"192": {
|
| 23 |
+
"loc": 0.030320093276143073,
|
| 24 |
+
"pi_stall": 0.3323523854706074,
|
| 25 |
+
"pi_steady": 0.6676476145293926,
|
| 26 |
+
"scale_stall": 0.041205692039902035,
|
| 27 |
+
"scale_steady": 0.031746846437682316,
|
| 28 |
+
"sigma_stall": 1.4276701505839031,
|
| 29 |
+
"sigma_steady": 0.05
|
| 30 |
+
},
|
| 31 |
+
"256": {
|
| 32 |
+
"loc": 0.030789543183684348,
|
| 33 |
+
"pi_stall": 0.6349499418714404,
|
| 34 |
+
"pi_steady": 0.3650500581285596,
|
| 35 |
+
"scale_stall": 0.04324939305958362,
|
| 36 |
+
"scale_steady": 0.03573730522680613,
|
| 37 |
+
"sigma_stall": 0.3721225887337235,
|
| 38 |
+
"sigma_steady": 1.5919966754869803
|
| 39 |
+
},
|
| 40 |
+
"32": {
|
| 41 |
+
"loc": 0.016498148681759833,
|
| 42 |
+
"pi_stall": 0.07390731357848579,
|
| 43 |
+
"pi_steady": 0.9260926864215142,
|
| 44 |
+
"scale_stall": 0.04555732131859944,
|
| 45 |
+
"scale_steady": 0.019842170525771235,
|
| 46 |
+
"sigma_stall": 0.7320543620724521,
|
| 47 |
+
"sigma_steady": 0.05
|
| 48 |
+
},
|
| 49 |
+
"384": {
|
| 50 |
+
"loc": 0.03019163595342636,
|
| 51 |
+
"pi_stall": 0.6593984486141276,
|
| 52 |
+
"pi_steady": 0.34060155138587245,
|
| 53 |
+
"scale_stall": 0.0597066517568152,
|
| 54 |
+
"scale_steady": 0.03991190875593972,
|
| 55 |
+
"sigma_stall": 0.4563966264262895,
|
| 56 |
+
"sigma_steady": 1.7170985862943504
|
| 57 |
+
},
|
| 58 |
+
"512": {
|
| 59 |
+
"loc": 0.030048940571188926,
|
| 60 |
+
"pi_stall": 0.3362132870451149,
|
| 61 |
+
"pi_steady": 0.6637867129548851,
|
| 62 |
+
"scale_stall": 0.04683803969863319,
|
| 63 |
+
"scale_steady": 0.044447301633350554,
|
| 64 |
+
"sigma_stall": 1.9213625679927164,
|
| 65 |
+
"sigma_steady": 0.4334724494446896
|
| 66 |
+
},
|
| 67 |
+
"64": {
|
| 68 |
+
"loc": 0.03152825334870815,
|
| 69 |
+
"pi_stall": 0.20700593967800063,
|
| 70 |
+
"pi_steady": 0.7929940603219994,
|
| 71 |
+
"scale_stall": 0.019287437563102458,
|
| 72 |
+
"scale_steady": 0.0076138475784196065,
|
| 73 |
+
"sigma_stall": 1.323371073182697,
|
| 74 |
+
"sigma_steady": 0.08253738945931417
|
| 75 |
+
},
|
| 76 |
+
"8": {
|
| 77 |
+
"loc": 0.029814763235092162,
|
| 78 |
+
"pi_stall": 0.04279942897407696,
|
| 79 |
+
"pi_steady": 0.957200571025923,
|
| 80 |
+
"scale_stall": 0.007195773794863316,
|
| 81 |
+
"scale_steady": 0.0033871174475517797,
|
| 82 |
+
"sigma_stall": 1.7354345964695308,
|
| 83 |
+
"sigma_steady": 0.05409894580485472
|
| 84 |
+
},
|
| 85 |
+
"96": {
|
| 86 |
+
"loc": 0.030120123418211936,
|
| 87 |
+
"pi_stall": 0.2798559198385162,
|
| 88 |
+
"pi_steady": 0.7201440801614838,
|
| 89 |
+
"scale_stall": 0.022833854494912858,
|
| 90 |
+
"scale_steady": 0.013914741048287108,
|
| 91 |
+
"sigma_stall": 1.2985027758578491,
|
| 92 |
+
"sigma_steady": 0.05
|
| 93 |
+
}
|
| 94 |
+
},
|
| 95 |
+
"schema": "itl_fit.lognormal_mixture_2"
|
| 96 |
+
}
|
data/specs/4925acb216d43131/logistic_fit.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"latency": {
|
| 3 |
+
"L": 0.11093873569263059,
|
| 4 |
+
"b0": 0.034847828201926605,
|
| 5 |
+
"k": 1.2638482029342986,
|
| 6 |
+
"x0": 7.743589743589744
|
| 7 |
+
},
|
| 8 |
+
"model_label": "Llama-3.1-405B",
|
| 9 |
+
"power": {
|
| 10 |
+
"L": 1824.2431634291524,
|
| 11 |
+
"b0": 3147.2107548161166,
|
| 12 |
+
"k": 1.0811807510766078,
|
| 13 |
+
"x0": 5.692307692307692
|
| 14 |
+
},
|
| 15 |
+
"schema": "logistic_v1",
|
| 16 |
+
"throughput": {
|
| 17 |
+
"L": 2118.1101549783216,
|
| 18 |
+
"b0": 231.54554435357053,
|
| 19 |
+
"k": 1.2638482029342986,
|
| 20 |
+
"x0": 5.897435897435898
|
| 21 |
+
}
|
| 22 |
+
}
|
data/specs/4925acb216d43131/trace.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/specs/73691f793f6be469/_manifest.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mlenergy_data_version": "0.4.0",
|
| 3 |
+
"openg2g_version": "0.2.0.post1",
|
| 4 |
+
"schema_version": 3,
|
| 5 |
+
"spec": {
|
| 6 |
+
"batch_sizes": [
|
| 7 |
+
8,
|
| 8 |
+
16,
|
| 9 |
+
32,
|
| 10 |
+
64,
|
| 11 |
+
96,
|
| 12 |
+
128,
|
| 13 |
+
192,
|
| 14 |
+
256,
|
| 15 |
+
384,
|
| 16 |
+
512,
|
| 17 |
+
768,
|
| 18 |
+
1024,
|
| 19 |
+
1536,
|
| 20 |
+
2048
|
| 21 |
+
],
|
| 22 |
+
"expert_parallel": 1,
|
| 23 |
+
"feasible_batch_sizes": [
|
| 24 |
+
8,
|
| 25 |
+
16,
|
| 26 |
+
32,
|
| 27 |
+
64,
|
| 28 |
+
128,
|
| 29 |
+
256,
|
| 30 |
+
512
|
| 31 |
+
],
|
| 32 |
+
"fit_exclude_batch_sizes": [],
|
| 33 |
+
"gpu_model": "H100",
|
| 34 |
+
"gpus_per_replica": 4,
|
| 35 |
+
"itl_deadline_s": 0.1,
|
| 36 |
+
"model_id": "meta-llama/Llama-3.1-70B-Instruct",
|
| 37 |
+
"model_label": "Llama-3.1-70B",
|
| 38 |
+
"precision": "bfloat16",
|
| 39 |
+
"task": "lm-arena-chat",
|
| 40 |
+
"tensor_parallel": 4
|
| 41 |
+
},
|
| 42 |
+
"spec_hash": "73691f793f6be469",
|
| 43 |
+
"written_utc": "2026-05-13T19:50:49+00:00"
|
| 44 |
+
}
|
data/specs/73691f793f6be469/itl_fit.json
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_label": "Llama-3.1-70B",
|
| 3 |
+
"per_batch": {
|
| 4 |
+
"1024": {
|
| 5 |
+
"loc": 0.016125269474625586,
|
| 6 |
+
"pi_stall": 0.6504738214326463,
|
| 7 |
+
"pi_steady": 0.3495261785673537,
|
| 8 |
+
"scale_stall": 0.09652171767308945,
|
| 9 |
+
"scale_steady": 0.041688588651955336,
|
| 10 |
+
"sigma_stall": 0.2462296773360523,
|
| 11 |
+
"sigma_steady": 1.5217750215970742
|
| 12 |
+
},
|
| 13 |
+
"128": {
|
| 14 |
+
"loc": 0.0022778047790527342,
|
| 15 |
+
"pi_stall": 0.22124884454952398,
|
| 16 |
+
"pi_steady": 0.778751155450476,
|
| 17 |
+
"scale_stall": 0.03383674745941713,
|
| 18 |
+
"scale_steady": 0.02568463145989505,
|
| 19 |
+
"sigma_stall": 0.5948150662563634,
|
| 20 |
+
"sigma_steady": 0.05
|
| 21 |
+
},
|
| 22 |
+
"1536": {
|
| 23 |
+
"loc": 0.016071272963356226,
|
| 24 |
+
"pi_stall": 0.8076530807526892,
|
| 25 |
+
"pi_steady": 0.19234691924731084,
|
| 26 |
+
"scale_stall": 0.089848886853969,
|
| 27 |
+
"scale_steady": 0.03420538011598045,
|
| 28 |
+
"sigma_stall": 0.46924205332030033,
|
| 29 |
+
"sigma_steady": 1.7596491528177638
|
| 30 |
+
},
|
| 31 |
+
"16": {
|
| 32 |
+
"loc": 0.015499197187066079,
|
| 33 |
+
"pi_stall": 0.08907607653965377,
|
| 34 |
+
"pi_steady": 0.9109239234603462,
|
| 35 |
+
"scale_stall": 0.004313043371105469,
|
| 36 |
+
"scale_steady": 0.0022230910334916296,
|
| 37 |
+
"sigma_stall": 1.2844234847005134,
|
| 38 |
+
"sigma_steady": 0.07913035461052097
|
| 39 |
+
},
|
| 40 |
+
"192": {
|
| 41 |
+
"loc": 0.016638526933431624,
|
| 42 |
+
"pi_stall": 0.38156228653101176,
|
| 43 |
+
"pi_steady": 0.6184377134689882,
|
| 44 |
+
"scale_stall": 0.018703032248183576,
|
| 45 |
+
"scale_steady": 0.01714548617582296,
|
| 46 |
+
"sigma_stall": 1.5189443495977526,
|
| 47 |
+
"sigma_steady": 0.10867393985639938
|
| 48 |
+
},
|
| 49 |
+
"2048": {
|
| 50 |
+
"loc": 0.016001691991161554,
|
| 51 |
+
"pi_stall": 0.8736696701311039,
|
| 52 |
+
"pi_steady": 0.12633032986889614,
|
| 53 |
+
"scale_stall": 0.08379097178728898,
|
| 54 |
+
"scale_steady": 0.03344713394228911,
|
| 55 |
+
"sigma_stall": 0.565330172781654,
|
| 56 |
+
"sigma_steady": 1.9824576270857919
|
| 57 |
+
},
|
| 58 |
+
"256": {
|
| 59 |
+
"loc": 0.016839679571032523,
|
| 60 |
+
"pi_stall": 0.6213219296982286,
|
| 61 |
+
"pi_steady": 0.37867807030177136,
|
| 62 |
+
"scale_stall": 0.023516103187706146,
|
| 63 |
+
"scale_steady": 0.02020604087391179,
|
| 64 |
+
"sigma_stall": 0.1354246512511205,
|
| 65 |
+
"sigma_steady": 1.4631774065297862
|
| 66 |
+
},
|
| 67 |
+
"32": {
|
| 68 |
+
"loc": 0.016572259606957435,
|
| 69 |
+
"pi_stall": 0.09635680905311539,
|
| 70 |
+
"pi_steady": 0.9036431909468846,
|
| 71 |
+
"scale_stall": 0.006175543416404281,
|
| 72 |
+
"scale_steady": 0.0034452728216791523,
|
| 73 |
+
"sigma_stall": 1.5074175831534309,
|
| 74 |
+
"sigma_steady": 0.10971052943864357
|
| 75 |
+
},
|
| 76 |
+
"384": {
|
| 77 |
+
"loc": 0.014811257140874863,
|
| 78 |
+
"pi_stall": 0.4531974533289852,
|
| 79 |
+
"pi_steady": 0.5468025466710148,
|
| 80 |
+
"scale_stall": 0.04197168592710718,
|
| 81 |
+
"scale_steady": 0.02986712173327284,
|
| 82 |
+
"sigma_stall": 0.11177738517749303,
|
| 83 |
+
"sigma_steady": 1.1391787357157586
|
| 84 |
+
},
|
| 85 |
+
"512": {
|
| 86 |
+
"loc": 0.01658029133284092,
|
| 87 |
+
"pi_stall": 0.6495871324327867,
|
| 88 |
+
"pi_steady": 0.35041286756721335,
|
| 89 |
+
"scale_stall": 0.05843082131488063,
|
| 90 |
+
"scale_steady": 0.03733948541353972,
|
| 91 |
+
"sigma_stall": 0.10431631509790713,
|
| 92 |
+
"sigma_steady": 1.3462214452446384
|
| 93 |
+
},
|
| 94 |
+
"64": {
|
| 95 |
+
"loc": 0.016720101477742194,
|
| 96 |
+
"pi_stall": 0.150178986901629,
|
| 97 |
+
"pi_steady": 0.849821013098371,
|
| 98 |
+
"scale_stall": 0.008828633434550627,
|
| 99 |
+
"scale_steady": 0.005697346737934002,
|
| 100 |
+
"sigma_stall": 1.3616972036550803,
|
| 101 |
+
"sigma_steady": 0.09826399163532819
|
| 102 |
+
},
|
| 103 |
+
"768": {
|
| 104 |
+
"loc": 0.016117640080094336,
|
| 105 |
+
"pi_stall": 0.645660707800124,
|
| 106 |
+
"pi_steady": 0.35433929219987603,
|
| 107 |
+
"scale_stall": 0.09211974240843952,
|
| 108 |
+
"scale_steady": 0.03127136906409066,
|
| 109 |
+
"sigma_stall": 0.14808465753722347,
|
| 110 |
+
"sigma_steady": 1.418245669226621
|
| 111 |
+
},
|
| 112 |
+
"8": {
|
| 113 |
+
"loc": 0.013241712244391442,
|
| 114 |
+
"pi_stall": 0.04095075080502819,
|
| 115 |
+
"pi_steady": 0.9590492491949718,
|
| 116 |
+
"scale_stall": 0.005297393800444009,
|
| 117 |
+
"scale_steady": 0.0033418650030394626,
|
| 118 |
+
"sigma_stall": 0.8751605195077952,
|
| 119 |
+
"sigma_steady": 0.05
|
| 120 |
+
},
|
| 121 |
+
"96": {
|
| 122 |
+
"loc": 1.0773779988288879e-05,
|
| 123 |
+
"pi_stall": 0.16115863500383798,
|
| 124 |
+
"pi_steady": 0.838841364996162,
|
| 125 |
+
"scale_stall": 0.034668825689171574,
|
| 126 |
+
"scale_steady": 0.025306298134214287,
|
| 127 |
+
"sigma_stall": 0.6442443219876751,
|
| 128 |
+
"sigma_steady": 0.05
|
| 129 |
+
}
|
| 130 |
+
},
|
| 131 |
+
"schema": "itl_fit.lognormal_mixture_2"
|
| 132 |
+
}
|
data/specs/73691f793f6be469/logistic_fit.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"latency": {
|
| 3 |
+
"L": 0.3337169923867402,
|
| 4 |
+
"b0": 0.01581299670243065,
|
| 5 |
+
"k": 0.6768750009458534,
|
| 6 |
+
"x0": 11.23076923076923
|
| 7 |
+
},
|
| 8 |
+
"model_label": "Llama-3.1-70B",
|
| 9 |
+
"power": {
|
| 10 |
+
"L": 1154.0021476589798,
|
| 11 |
+
"b0": 1646.7205316820875,
|
| 12 |
+
"k": 0.5790443980602487,
|
| 13 |
+
"x0": 7.384615384615384
|
| 14 |
+
},
|
| 15 |
+
"schema": "logistic_v1",
|
| 16 |
+
"throughput": {
|
| 17 |
+
"L": 7487.842476643308,
|
| 18 |
+
"b0": -115.86209784705026,
|
| 19 |
+
"k": 0.6768750009458534,
|
| 20 |
+
"x0": 6.871794871794871
|
| 21 |
+
}
|
| 22 |
+
}
|
data/specs/73691f793f6be469/trace.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/specs/860654eabd3dfebf/_manifest.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mlenergy_data_version": "0.4.0",
|
| 3 |
+
"openg2g_version": "0.2.0.post1",
|
| 4 |
+
"schema_version": 3,
|
| 5 |
+
"spec": {
|
| 6 |
+
"batch_sizes": [
|
| 7 |
+
8,
|
| 8 |
+
16,
|
| 9 |
+
32,
|
| 10 |
+
64,
|
| 11 |
+
96,
|
| 12 |
+
128,
|
| 13 |
+
192,
|
| 14 |
+
256,
|
| 15 |
+
384,
|
| 16 |
+
512
|
| 17 |
+
],
|
| 18 |
+
"expert_parallel": 1,
|
| 19 |
+
"feasible_batch_sizes": [
|
| 20 |
+
8,
|
| 21 |
+
16,
|
| 22 |
+
32,
|
| 23 |
+
64,
|
| 24 |
+
128,
|
| 25 |
+
256,
|
| 26 |
+
512
|
| 27 |
+
],
|
| 28 |
+
"fit_exclude_batch_sizes": [],
|
| 29 |
+
"gpu_model": "H100",
|
| 30 |
+
"gpus_per_replica": 2,
|
| 31 |
+
"itl_deadline_s": 0.06,
|
| 32 |
+
"model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
|
| 33 |
+
"model_label": "Qwen3-30B-A3B",
|
| 34 |
+
"precision": "bfloat16",
|
| 35 |
+
"task": "gpqa",
|
| 36 |
+
"tensor_parallel": 2
|
| 37 |
+
},
|
| 38 |
+
"spec_hash": "860654eabd3dfebf",
|
| 39 |
+
"written_utc": "2026-05-13T19:50:49+00:00"
|
| 40 |
+
}
|
data/specs/860654eabd3dfebf/itl_fit.json
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_label": "Qwen3-30B-A3B",
|
| 3 |
+
"per_batch": {
|
| 4 |
+
"128": {
|
| 5 |
+
"loc": 0.005779745297670364,
|
| 6 |
+
"pi_stall": 0.9016421299546657,
|
| 7 |
+
"pi_steady": 0.09835787004533425,
|
| 8 |
+
"scale_stall": 0.022204989839097976,
|
| 9 |
+
"scale_steady": 0.011952816505967263,
|
| 10 |
+
"sigma_stall": 0.17115589522976615,
|
| 11 |
+
"sigma_steady": 0.8638437216770689
|
| 12 |
+
},
|
| 13 |
+
"16": {
|
| 14 |
+
"loc": 0.006709285320878029,
|
| 15 |
+
"pi_stall": 0.9318898842600892,
|
| 16 |
+
"pi_steady": 0.06811011573991077,
|
| 17 |
+
"scale_stall": 0.005286084772734071,
|
| 18 |
+
"scale_steady": 0.003488072071334762,
|
| 19 |
+
"sigma_stall": 0.10968234279797877,
|
| 20 |
+
"sigma_steady": 0.7066423134806045
|
| 21 |
+
},
|
| 22 |
+
"192": {
|
| 23 |
+
"loc": 0.006146881969809532,
|
| 24 |
+
"pi_stall": 0.5496379486314646,
|
| 25 |
+
"pi_steady": 0.45036205136853535,
|
| 26 |
+
"scale_stall": 0.03517282452060233,
|
| 27 |
+
"scale_steady": 0.023996489386177662,
|
| 28 |
+
"sigma_stall": 0.09280191100817078,
|
| 29 |
+
"sigma_steady": 0.4411174802673816
|
| 30 |
+
},
|
| 31 |
+
"256": {
|
| 32 |
+
"loc": 0.005351871492505073,
|
| 33 |
+
"pi_stall": 0.5179735298893404,
|
| 34 |
+
"pi_steady": 0.4820264701106596,
|
| 35 |
+
"scale_stall": 0.0465574174374143,
|
| 36 |
+
"scale_steady": 0.02979761583658413,
|
| 37 |
+
"sigma_stall": 0.08247269205931418,
|
| 38 |
+
"sigma_steady": 0.46540172529241985
|
| 39 |
+
},
|
| 40 |
+
"32": {
|
| 41 |
+
"loc": 5.387010216712952e-06,
|
| 42 |
+
"pi_stall": 0.8987735970748053,
|
| 43 |
+
"pi_steady": 0.10122640292519469,
|
| 44 |
+
"scale_stall": 0.01640469159213389,
|
| 45 |
+
"scale_steady": 0.013000996744328759,
|
| 46 |
+
"sigma_stall": 0.05,
|
| 47 |
+
"sigma_steady": 0.33658931223823957
|
| 48 |
+
},
|
| 49 |
+
"384": {
|
| 50 |
+
"loc": 0.005363984273910522,
|
| 51 |
+
"pi_stall": 0.6442964861816537,
|
| 52 |
+
"pi_steady": 0.3557035138183463,
|
| 53 |
+
"scale_stall": 0.04511956412849174,
|
| 54 |
+
"scale_steady": 0.03479809929767838,
|
| 55 |
+
"sigma_stall": 0.05,
|
| 56 |
+
"sigma_steady": 0.4926395770591876
|
| 57 |
+
},
|
| 58 |
+
"512": {
|
| 59 |
+
"loc": 0.005344948040485382,
|
| 60 |
+
"pi_stall": 0.844032586498555,
|
| 61 |
+
"pi_steady": 0.15596741350144505,
|
| 62 |
+
"scale_stall": 0.04628929533783176,
|
| 63 |
+
"scale_steady": 0.031408287375895155,
|
| 64 |
+
"sigma_stall": 0.16501900831914335,
|
| 65 |
+
"sigma_steady": 0.7857801384710986
|
| 66 |
+
},
|
| 67 |
+
"64": {
|
| 68 |
+
"loc": 5.24731183052063e-06,
|
| 69 |
+
"pi_stall": 0.6213994676646826,
|
| 70 |
+
"pi_steady": 0.37860053233531743,
|
| 71 |
+
"scale_stall": 0.02199501617088761,
|
| 72 |
+
"scale_steady": 0.01839224430879353,
|
| 73 |
+
"sigma_stall": 0.05,
|
| 74 |
+
"sigma_steady": 0.1933315951912185
|
| 75 |
+
},
|
| 76 |
+
"8": {
|
| 77 |
+
"loc": 0.005612284185528755,
|
| 78 |
+
"pi_stall": 0.9754857241215052,
|
| 79 |
+
"pi_steady": 0.02451427587849475,
|
| 80 |
+
"scale_stall": 0.0037432124276108067,
|
| 81 |
+
"scale_steady": 0.0023369924600121004,
|
| 82 |
+
"sigma_stall": 0.09591646898132508,
|
| 83 |
+
"sigma_steady": 0.8401834713569618
|
| 84 |
+
},
|
| 85 |
+
"96": {
|
| 86 |
+
"loc": 0.005704086514353752,
|
| 87 |
+
"pi_stall": 0.8933240851637367,
|
| 88 |
+
"pi_steady": 0.10667591483626326,
|
| 89 |
+
"scale_stall": 0.020937595889766954,
|
| 90 |
+
"scale_steady": 0.009315065436423254,
|
| 91 |
+
"sigma_stall": 0.17041518988603524,
|
| 92 |
+
"sigma_steady": 0.9135642352221147
|
| 93 |
+
}
|
| 94 |
+
},
|
| 95 |
+
"schema": "itl_fit.lognormal_mixture_2"
|
| 96 |
+
}
|
data/specs/860654eabd3dfebf/logistic_fit.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"latency": {
|
| 3 |
+
"L": 0.22314992755762236,
|
| 4 |
+
"b0": 0.011393094262367034,
|
| 5 |
+
"k": 0.9249147277217336,
|
| 6 |
+
"x0": 9.58974358974359
|
| 7 |
+
},
|
| 8 |
+
"model_label": "Qwen3-30B-A3B",
|
| 9 |
+
"power": {
|
| 10 |
+
"L": 443.2622940538781,
|
| 11 |
+
"b0": 521.77175156347,
|
| 12 |
+
"k": 0.6768750009458534,
|
| 13 |
+
"x0": 3.435897435897436
|
| 14 |
+
},
|
| 15 |
+
"schema": "logistic_v1",
|
| 16 |
+
"throughput": {
|
| 17 |
+
"L": 16272.619545718631,
|
| 18 |
+
"b0": -798.0377199891329,
|
| 19 |
+
"k": 0.3101168926574778,
|
| 20 |
+
"x0": 10.0
|
| 21 |
+
}
|
| 22 |
+
}
|
data/specs/860654eabd3dfebf/trace.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/specs/bc0bde304544a603/_manifest.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mlenergy_data_version": "0.4.0",
|
| 3 |
+
"openg2g_version": "0.2.0.post1",
|
| 4 |
+
"schema_version": 3,
|
| 5 |
+
"spec": {
|
| 6 |
+
"batch_sizes": [
|
| 7 |
+
8,
|
| 8 |
+
16,
|
| 9 |
+
32,
|
| 10 |
+
64,
|
| 11 |
+
96,
|
| 12 |
+
128,
|
| 13 |
+
192,
|
| 14 |
+
256,
|
| 15 |
+
384,
|
| 16 |
+
512,
|
| 17 |
+
768,
|
| 18 |
+
1024
|
| 19 |
+
],
|
| 20 |
+
"expert_parallel": 1,
|
| 21 |
+
"feasible_batch_sizes": [
|
| 22 |
+
8,
|
| 23 |
+
16,
|
| 24 |
+
32,
|
| 25 |
+
64,
|
| 26 |
+
128,
|
| 27 |
+
256,
|
| 28 |
+
512
|
| 29 |
+
],
|
| 30 |
+
"fit_exclude_batch_sizes": [],
|
| 31 |
+
"gpu_model": "H100",
|
| 32 |
+
"gpus_per_replica": 1,
|
| 33 |
+
"itl_deadline_s": 0.08,
|
| 34 |
+
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 35 |
+
"model_label": "Llama-3.1-8B",
|
| 36 |
+
"precision": "bfloat16",
|
| 37 |
+
"task": "lm-arena-chat",
|
| 38 |
+
"tensor_parallel": 1
|
| 39 |
+
},
|
| 40 |
+
"spec_hash": "bc0bde304544a603",
|
| 41 |
+
"written_utc": "2026-05-13T19:50:49+00:00"
|
| 42 |
+
}
|
data/specs/bc0bde304544a603/itl_fit.json
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_label": "Llama-3.1-8B",
|
| 3 |
+
"per_batch": {
|
| 4 |
+
"1024": {
|
| 5 |
+
"loc": 0.007010523004621267,
|
| 6 |
+
"pi_stall": 0.7837426601266253,
|
| 7 |
+
"pi_steady": 0.2162573398733747,
|
| 8 |
+
"scale_stall": 0.05956751699666166,
|
| 9 |
+
"scale_steady": 0.023334748866848526,
|
| 10 |
+
"sigma_stall": 0.2225255796663595,
|
| 11 |
+
"sigma_steady": 1.5148319210986743
|
| 12 |
+
},
|
| 13 |
+
"128": {
|
| 14 |
+
"loc": 0.0026725998690128325,
|
| 15 |
+
"pi_stall": 0.670438895498799,
|
| 16 |
+
"pi_steady": 0.329561104501201,
|
| 17 |
+
"scale_stall": 0.01398383994948415,
|
| 18 |
+
"scale_steady": 0.01291166006566778,
|
| 19 |
+
"sigma_stall": 0.10190881519172769,
|
| 20 |
+
"sigma_steady": 0.6028124518856584
|
| 21 |
+
},
|
| 22 |
+
"16": {
|
| 23 |
+
"loc": 0.0018269608339071275,
|
| 24 |
+
"pi_stall": 0.10464355744071052,
|
| 25 |
+
"pi_steady": 0.8953564425592895,
|
| 26 |
+
"scale_stall": 0.0064746630446026925,
|
| 27 |
+
"scale_steady": 0.006130842910997042,
|
| 28 |
+
"sigma_stall": 0.787949891540804,
|
| 29 |
+
"sigma_steady": 0.05
|
| 30 |
+
},
|
| 31 |
+
"192": {
|
| 32 |
+
"loc": 0.004752941670060158,
|
| 33 |
+
"pi_stall": 0.7662774362890146,
|
| 34 |
+
"pi_steady": 0.23372256371098543,
|
| 35 |
+
"scale_stall": 0.01598165094645324,
|
| 36 |
+
"scale_steady": 0.0150463242114058,
|
| 37 |
+
"sigma_stall": 0.14182877630339646,
|
| 38 |
+
"sigma_steady": 0.7849802295588915
|
| 39 |
+
},
|
| 40 |
+
"256": {
|
| 41 |
+
"loc": 0.006535205008160323,
|
| 42 |
+
"pi_stall": 0.7935925278538705,
|
| 43 |
+
"pi_steady": 0.20640747214612953,
|
| 44 |
+
"scale_stall": 0.019609283330274195,
|
| 45 |
+
"scale_steady": 0.016315824423412995,
|
| 46 |
+
"sigma_stall": 0.15827383548087384,
|
| 47 |
+
"sigma_steady": 0.9154474758196223
|
| 48 |
+
},
|
| 49 |
+
"32": {
|
| 50 |
+
"loc": 0.0035353920032978057,
|
| 51 |
+
"pi_stall": 0.17403414005364848,
|
| 52 |
+
"pi_steady": 0.8259658599463515,
|
| 53 |
+
"scale_stall": 0.006034873438781453,
|
| 54 |
+
"scale_steady": 0.005399039804654786,
|
| 55 |
+
"sigma_stall": 0.6283723882974765,
|
| 56 |
+
"sigma_steady": 0.093251542298717
|
| 57 |
+
},
|
| 58 |
+
"384": {
|
| 59 |
+
"loc": 0.004538576999377459,
|
| 60 |
+
"pi_stall": 0.6944861095107936,
|
| 61 |
+
"pi_steady": 0.3055138904892064,
|
| 62 |
+
"scale_stall": 0.03365572663992157,
|
| 63 |
+
"scale_steady": 0.02542297917615243,
|
| 64 |
+
"sigma_stall": 0.10481746968936954,
|
| 65 |
+
"sigma_steady": 0.8129715507672286
|
| 66 |
+
},
|
| 67 |
+
"512": {
|
| 68 |
+
"loc": 0.006579698012840003,
|
| 69 |
+
"pi_stall": 0.7984203718644621,
|
| 70 |
+
"pi_steady": 0.20157962813553787,
|
| 71 |
+
"scale_stall": 0.04203774478880279,
|
| 72 |
+
"scale_steady": 0.017431896454495718,
|
| 73 |
+
"sigma_stall": 0.16222657200608384,
|
| 74 |
+
"sigma_steady": 1.3251956670044174
|
| 75 |
+
},
|
| 76 |
+
"64": {
|
| 77 |
+
"loc": 0.0047925396432876586,
|
| 78 |
+
"pi_stall": 0.8475854653968202,
|
| 79 |
+
"pi_steady": 0.15241453460317977,
|
| 80 |
+
"scale_stall": 0.0068440681404629946,
|
| 81 |
+
"scale_steady": 0.006554435848211792,
|
| 82 |
+
"sigma_stall": 0.1343829094213754,
|
| 83 |
+
"sigma_steady": 0.8077132495834084
|
| 84 |
+
},
|
| 85 |
+
"768": {
|
| 86 |
+
"loc": 0.00489947998027876,
|
| 87 |
+
"pi_stall": 0.5897706893853796,
|
| 88 |
+
"pi_steady": 0.4102293106146204,
|
| 89 |
+
"scale_stall": 0.058643939455719225,
|
| 90 |
+
"scale_steady": 0.045424901604616506,
|
| 91 |
+
"sigma_stall": 0.0958052090925679,
|
| 92 |
+
"sigma_steady": 0.8954582871522097
|
| 93 |
+
},
|
| 94 |
+
"8": {
|
| 95 |
+
"loc": 0.0017732551863193513,
|
| 96 |
+
"pi_stall": 0.04214661390101049,
|
| 97 |
+
"pi_steady": 0.9578533860989895,
|
| 98 |
+
"scale_stall": 0.0056649474765016194,
|
| 99 |
+
"scale_steady": 0.0055782406714952265,
|
| 100 |
+
"sigma_stall": 0.7319335730101633,
|
| 101 |
+
"sigma_steady": 0.05
|
| 102 |
+
},
|
| 103 |
+
"96": {
|
| 104 |
+
"loc": 0.0019363875111341477,
|
| 105 |
+
"pi_stall": 0.2619339890384934,
|
| 106 |
+
"pi_steady": 0.7380660109615066,
|
| 107 |
+
"scale_stall": 0.012201993050876735,
|
| 108 |
+
"scale_steady": 0.01218169825049524,
|
| 109 |
+
"sigma_stall": 0.632097274814945,
|
| 110 |
+
"sigma_steady": 0.08150463339106037
|
| 111 |
+
}
|
| 112 |
+
},
|
| 113 |
+
"schema": "itl_fit.lognormal_mixture_2"
|
| 114 |
+
}
|
data/specs/bc0bde304544a603/logistic_fit.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"latency": {
|
| 3 |
+
"L": 0.08748363159637874,
|
| 4 |
+
"b0": 0.008524586220155353,
|
| 5 |
+
"k": 1.0811807510766078,
|
| 6 |
+
"x0": 9.153846153846153
|
| 7 |
+
},
|
| 8 |
+
"model_label": "Llama-3.1-8B",
|
| 9 |
+
"power": {
|
| 10 |
+
"L": 210.91957273648842,
|
| 11 |
+
"b0": 480.39716041942955,
|
| 12 |
+
"k": 1.2638482029342986,
|
| 13 |
+
"x0": 5.461538461538462
|
| 14 |
+
},
|
| 15 |
+
"schema": "logistic_v1",
|
| 16 |
+
"throughput": {
|
| 17 |
+
"L": 9383.467277123862,
|
| 18 |
+
"b0": 771.2518469119908,
|
| 19 |
+
"k": 0.9249147277217336,
|
| 20 |
+
"x0": 6.153846153846154
|
| 21 |
+
}
|
| 22 |
+
}
|
data/specs/bc0bde304544a603/trace.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/specs/traces_summary.csv
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_label,num_gpus,max_num_seqs,trace_file
|
| 2 |
+
Llama-3.1-70B,4,8,73691f793f6be469/trace.csv
|
| 3 |
+
Llama-3.1-70B,4,16,73691f793f6be469/trace.csv
|
| 4 |
+
Llama-3.1-70B,4,32,73691f793f6be469/trace.csv
|
| 5 |
+
Llama-3.1-70B,4,64,73691f793f6be469/trace.csv
|
| 6 |
+
Llama-3.1-70B,4,128,73691f793f6be469/trace.csv
|
| 7 |
+
Llama-3.1-70B,4,256,73691f793f6be469/trace.csv
|
| 8 |
+
Llama-3.1-70B,4,512,73691f793f6be469/trace.csv
|
| 9 |
+
Qwen3-30B-A3B,2,8,860654eabd3dfebf/trace.csv
|
| 10 |
+
Qwen3-30B-A3B,2,16,860654eabd3dfebf/trace.csv
|
| 11 |
+
Qwen3-30B-A3B,2,32,860654eabd3dfebf/trace.csv
|
| 12 |
+
Qwen3-30B-A3B,2,64,860654eabd3dfebf/trace.csv
|
| 13 |
+
Qwen3-30B-A3B,2,128,860654eabd3dfebf/trace.csv
|
| 14 |
+
Qwen3-30B-A3B,2,256,860654eabd3dfebf/trace.csv
|
| 15 |
+
Qwen3-30B-A3B,2,512,860654eabd3dfebf/trace.csv
|
| 16 |
+
Llama-3.1-405B,8,8,4925acb216d43131/trace.csv
|
| 17 |
+
Llama-3.1-405B,8,16,4925acb216d43131/trace.csv
|
| 18 |
+
Llama-3.1-405B,8,32,4925acb216d43131/trace.csv
|
| 19 |
+
Llama-3.1-405B,8,64,4925acb216d43131/trace.csv
|
| 20 |
+
Llama-3.1-405B,8,128,4925acb216d43131/trace.csv
|
| 21 |
+
Llama-3.1-405B,8,256,4925acb216d43131/trace.csv
|
| 22 |
+
Llama-3.1-405B,8,512,4925acb216d43131/trace.csv
|
| 23 |
+
Qwen3-235B-A22B,8,8,2e756e9e0a2d417e/trace.csv
|
| 24 |
+
Qwen3-235B-A22B,8,16,2e756e9e0a2d417e/trace.csv
|
| 25 |
+
Qwen3-235B-A22B,8,32,2e756e9e0a2d417e/trace.csv
|
| 26 |
+
Qwen3-235B-A22B,8,64,2e756e9e0a2d417e/trace.csv
|
| 27 |
+
Qwen3-235B-A22B,8,128,2e756e9e0a2d417e/trace.csv
|
| 28 |
+
Qwen3-235B-A22B,8,256,2e756e9e0a2d417e/trace.csv
|
| 29 |
+
Qwen3-235B-A22B,8,512,2e756e9e0a2d417e/trace.csv
|
| 30 |
+
Llama-3.1-8B,1,8,bc0bde304544a603/trace.csv
|
| 31 |
+
Llama-3.1-8B,1,16,bc0bde304544a603/trace.csv
|
| 32 |
+
Llama-3.1-8B,1,32,bc0bde304544a603/trace.csv
|
| 33 |
+
Llama-3.1-8B,1,64,bc0bde304544a603/trace.csv
|
| 34 |
+
Llama-3.1-8B,1,128,bc0bde304544a603/trace.csv
|
| 35 |
+
Llama-3.1-8B,1,256,bc0bde304544a603/trace.csv
|
| 36 |
+
Llama-3.1-8B,1,512,bc0bde304544a603/trace.csv
|
data/specs/training_trace.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examples/offline/config.json
CHANGED
|
@@ -3,58 +3,72 @@
|
|
| 3 |
{
|
| 4 |
"model_label": "Llama-3.1-8B",
|
| 5 |
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
|
|
|
|
|
|
| 6 |
"gpus_per_replica": 1,
|
| 7 |
-
"
|
| 8 |
-
"initial_batch_size": 128,
|
| 9 |
"itl_deadline_s": 0.08,
|
| 10 |
-
"
|
|
|
|
|
|
|
|
|
|
| 11 |
},
|
| 12 |
{
|
| 13 |
"model_label": "Llama-3.1-70B",
|
| 14 |
"model_id": "meta-llama/Llama-3.1-70B-Instruct",
|
|
|
|
|
|
|
| 15 |
"gpus_per_replica": 4,
|
| 16 |
-
"
|
| 17 |
-
"initial_batch_size": 128,
|
| 18 |
"itl_deadline_s": 0.10,
|
| 19 |
-
"
|
|
|
|
|
|
|
|
|
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"model_label": "Llama-3.1-405B",
|
| 23 |
"model_id": "meta-llama/Llama-3.1-405B-Instruct-FP8",
|
|
|
|
|
|
|
| 24 |
"gpus_per_replica": 8,
|
| 25 |
-
"
|
| 26 |
-
"
|
| 27 |
"itl_deadline_s": 0.12,
|
| 28 |
-
"
|
|
|
|
|
|
|
|
|
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"model_label": "Qwen3-30B-A3B",
|
| 32 |
"model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
|
|
|
|
|
|
|
| 33 |
"gpus_per_replica": 2,
|
| 34 |
-
"
|
| 35 |
-
"initial_batch_size": 128,
|
| 36 |
"itl_deadline_s": 0.06,
|
| 37 |
-
"
|
|
|
|
|
|
|
|
|
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"model_label": "Qwen3-235B-A22B",
|
| 41 |
"model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
|
|
|
|
|
|
| 42 |
"gpus_per_replica": 8,
|
| 43 |
-
"
|
| 44 |
-
"initial_batch_size": 128,
|
| 45 |
"itl_deadline_s": 0.14,
|
| 46 |
-
"
|
|
|
|
|
|
|
|
|
|
| 47 |
}
|
| 48 |
],
|
| 49 |
-
"data_sources": [
|
| 50 |
-
{"model_label": "Llama-3.1-8B", "task": "lm-arena-chat", "gpu": "H100", "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024]},
|
| 51 |
-
{"model_label": "Llama-3.1-70B", "task": "lm-arena-chat", "gpu": "H100", "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048]},
|
| 52 |
-
{"model_label": "Llama-3.1-405B", "task": "lm-arena-chat", "gpu": "H100", "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512]},
|
| 53 |
-
{"model_label": "Qwen3-30B-A3B", "task": "gpqa", "gpu": "H100", "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512]},
|
| 54 |
-
{"model_label": "Qwen3-235B-A22B", "task": "gpqa", "gpu": "H100", "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512]}
|
| 55 |
-
],
|
| 56 |
"training_trace_params": {},
|
| 57 |
"data_dir": null,
|
| 58 |
"ieee_case_dir": "examples/ieee13",
|
| 59 |
"mlenergy_data_dir": null
|
| 60 |
-
}
|
|
|
|
| 3 |
{
|
| 4 |
"model_label": "Llama-3.1-8B",
|
| 5 |
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 6 |
+
"gpu_model": "H100",
|
| 7 |
+
"task": "lm-arena-chat",
|
| 8 |
"gpus_per_replica": 1,
|
| 9 |
+
"tensor_parallel": 1,
|
|
|
|
| 10 |
"itl_deadline_s": 0.08,
|
| 11 |
+
"batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024],
|
| 12 |
+
"feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
|
| 13 |
+
"num_replicas": 720,
|
| 14 |
+
"initial_batch_size": 128
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"model_label": "Llama-3.1-70B",
|
| 18 |
"model_id": "meta-llama/Llama-3.1-70B-Instruct",
|
| 19 |
+
"gpu_model": "H100",
|
| 20 |
+
"task": "lm-arena-chat",
|
| 21 |
"gpus_per_replica": 4,
|
| 22 |
+
"tensor_parallel": 4,
|
|
|
|
| 23 |
"itl_deadline_s": 0.10,
|
| 24 |
+
"batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048],
|
| 25 |
+
"feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
|
| 26 |
+
"num_replicas": 180,
|
| 27 |
+
"initial_batch_size": 128
|
| 28 |
},
|
| 29 |
{
|
| 30 |
"model_label": "Llama-3.1-405B",
|
| 31 |
"model_id": "meta-llama/Llama-3.1-405B-Instruct-FP8",
|
| 32 |
+
"gpu_model": "H100",
|
| 33 |
+
"task": "lm-arena-chat",
|
| 34 |
"gpus_per_replica": 8,
|
| 35 |
+
"tensor_parallel": 8,
|
| 36 |
+
"precision": "fp8",
|
| 37 |
"itl_deadline_s": 0.12,
|
| 38 |
+
"batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512],
|
| 39 |
+
"feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
|
| 40 |
+
"num_replicas": 90,
|
| 41 |
+
"initial_batch_size": 128
|
| 42 |
},
|
| 43 |
{
|
| 44 |
"model_label": "Qwen3-30B-A3B",
|
| 45 |
"model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
|
| 46 |
+
"gpu_model": "H100",
|
| 47 |
+
"task": "gpqa",
|
| 48 |
"gpus_per_replica": 2,
|
| 49 |
+
"tensor_parallel": 2,
|
|
|
|
| 50 |
"itl_deadline_s": 0.06,
|
| 51 |
+
"batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512],
|
| 52 |
+
"feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
|
| 53 |
+
"num_replicas": 480,
|
| 54 |
+
"initial_batch_size": 128
|
| 55 |
},
|
| 56 |
{
|
| 57 |
"model_label": "Qwen3-235B-A22B",
|
| 58 |
"model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
| 59 |
+
"gpu_model": "H100",
|
| 60 |
+
"task": "gpqa",
|
| 61 |
"gpus_per_replica": 8,
|
| 62 |
+
"tensor_parallel": 8,
|
|
|
|
| 63 |
"itl_deadline_s": 0.14,
|
| 64 |
+
"batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512],
|
| 65 |
+
"feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
|
| 66 |
+
"num_replicas": 210,
|
| 67 |
+
"initial_batch_size": 128
|
| 68 |
}
|
| 69 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
"training_trace_params": {},
|
| 71 |
"data_dir": null,
|
| 72 |
"ieee_case_dir": "examples/ieee13",
|
| 73 |
"mlenergy_data_dir": null
|
| 74 |
+
}
|
requirements.txt
CHANGED
|
@@ -6,3 +6,5 @@ pandas
|
|
| 6 |
opendssdirect.py
|
| 7 |
matplotlib
|
| 8 |
scipy
|
|
|
|
|
|
|
|
|
| 6 |
opendssdirect.py
|
| 7 |
matplotlib
|
| 8 |
scipy
|
| 9 |
+
openg2g[opendss]==0.2.0.post1
|
| 10 |
+
websockets
|
server.py
CHANGED
|
@@ -7,7 +7,7 @@ Uses GPU power traces and workloads to model howAI inference/training affects g
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
|
| 10 |
-
|
| 11 |
from fractions import Fraction
|
| 12 |
from pathlib import Path
|
| 13 |
import subprocess, tempfile, os, uvicorn, threading, math, json, hashlib
|
|
@@ -17,16 +17,21 @@ from fastapi import FastAPI, HTTPException, Response
|
|
| 17 |
from fastapi.middleware.cors import CORSMiddleware
|
| 18 |
from pydantic import BaseModel
|
| 19 |
from typing import Optional
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
from openg2g.coordinator import Coordinator
|
| 23 |
|
| 24 |
-
from
|
| 25 |
-
DatacenterConfig,
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
| 27 |
)
|
|
|
|
| 28 |
from openg2g.datacenter.offline import OfflineDatacenter, OfflineWorkload
|
| 29 |
-
from
|
| 30 |
from openg2g.datacenter.workloads.training import TrainingTrace, TrainingTraceParams
|
| 31 |
from openg2g.grid.opendss import OpenDSSGrid
|
| 32 |
from openg2g.grid.config import TapPosition
|
|
@@ -72,17 +77,15 @@ BUSES_ORDERED = [BUS_INDEX_TO_NAME[i] for i in range(1, 14)]
|
|
| 72 |
#read files
|
| 73 |
_config_raw = json.loads(CONFIG_PATH.read_text())
|
| 74 |
_MODELS = tuple(InferenceModelSpec(**m) for m in _config_raw["models"])
|
| 75 |
-
_SOURCES = {s["model_label"]: MLEnergySource(**s) for s in _config_raw["data_sources"]}
|
| 76 |
_DC_CONFIG = DatacenterConfig(gpus_per_server=8, base_kw_per_phase=500.0)
|
| 77 |
|
|
|
|
| 78 |
if _config_raw.get("data_dir"):
|
| 79 |
_DATA_DIR = Path(_config_raw["data_dir"])
|
| 80 |
else:
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
_DATA_DIR = Path(__file__).parent / "data/offline" / hashlib.sha256(blob).hexdigest()[:16]
|
| 85 |
-
|
| 86 |
# Load traces_summary.csv once at startup so we can quickly look up trace files
|
| 87 |
_TRACES_SUMMARY_PATH = _DATA_DIR / "traces_summary.csv"
|
| 88 |
|
|
@@ -90,6 +93,28 @@ _TRACES_SUMMARY_PATH = _DATA_DIR / "traces_summary.csv"
|
|
| 90 |
_traces_df: pd.DataFrame | None = None
|
| 91 |
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
"""
|
| 94 |
Load trace index CSV and cache it.
|
| 95 |
"""
|
|
@@ -136,43 +161,64 @@ _load_traces_index() # load at startup
|
|
| 136 |
|
| 137 |
"""Datacenter workload (baseline)"""
|
| 138 |
def _build_dc(scale: float = 1.0, duration_s: int = 300) -> OfflineDatacenter:
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
) for m in _MODELS
|
| 147 |
-
)
|
| 148 |
-
inference_data = InferenceData.ensure(_DATA_DIR, scaled_models, _SOURCES, dt_s=0.1)
|
| 149 |
training_trace = TrainingTrace.ensure(
|
| 150 |
-
_DATA_DIR / "training_trace.csv",
|
|
|
|
| 151 |
)
|
| 152 |
-
|
|
|
|
| 153 |
t1 = min(140.0, duration_s * 0.47)
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
workload = OfflineWorkload(
|
| 158 |
-
inference_data
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
return OfflineDatacenter(
|
| 169 |
-
_DC_CONFIG, workload, dt_s=Fraction(1, 10), seed=0,
|
| 170 |
-
power_augmentation=PowerAugmentationConfig(
|
| 171 |
-
amplitude_scale_range=(0.88, 1.12),
|
| 172 |
-
noise_fraction=0.04,
|
| 173 |
),
|
| 174 |
)
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
|
| 178 |
"""
|
|
@@ -190,52 +236,49 @@ def _build_dc_from_real_trace(
|
|
| 190 |
|
| 191 |
power_W = _get_trace_power(model_label, num_gpus, max_num_seqs, num_replicas)
|
| 192 |
|
| 193 |
-
# Trim or repeat trace to match requested duration at dt=0.1s
|
| 194 |
target_steps = int(duration_s / 0.1)
|
| 195 |
if len(power_W) < target_steps:
|
| 196 |
-
# Repeat trace to fill duration
|
| 197 |
repeats = math.ceil(target_steps / len(power_W))
|
| 198 |
power_W = (power_W * repeats)[:target_steps]
|
| 199 |
else:
|
| 200 |
power_W = power_W[:target_steps]
|
| 201 |
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
initial_batch_size = max_num_seqs,
|
| 208 |
-
itl_deadline_s = 0.08,
|
| 209 |
-
)
|
| 210 |
-
source = _SOURCES.get(model_label)
|
| 211 |
-
if source is None:
|
| 212 |
-
# Fall back to first available source if model not in config
|
| 213 |
-
source = next(iter(_SOURCES.values()))
|
| 214 |
|
| 215 |
-
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
| 217 |
)
|
| 218 |
|
| 219 |
-
|
|
|
|
|
|
|
| 220 |
|
| 221 |
dc = OfflineDatacenter(
|
| 222 |
_DC_CONFIG, workload, dt_s=Fraction(1, 10), seed=0,
|
|
|
|
|
|
|
| 223 |
power_augmentation=PowerAugmentationConfig(
|
| 224 |
-
amplitude_scale_range=(1.0, 1.0),
|
| 225 |
noise_fraction=0.0,
|
| 226 |
),
|
| 227 |
)
|
| 228 |
return dc, power_W
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
"""Create IEEE 13-bus grid with datacenter connection."""
|
| 233 |
def _build_grid(tap_pu: float, dc_bus: str) -> OpenDSSGrid:
|
| 234 |
return OpenDSSGrid(
|
| 235 |
-
dss_case_dir=str(DSS_DIR),
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
|
|
|
| 239 |
)
|
| 240 |
|
| 241 |
|
|
@@ -245,23 +288,25 @@ def _make_tap(v: float):
|
|
| 245 |
|
| 246 |
"""Run datacenter + grid simulation."""
|
| 247 |
def _run(dc, grid, tap_pu, dc_bus, duration_s):
|
|
|
|
| 248 |
coord = Coordinator(
|
| 249 |
-
|
|
|
|
| 250 |
controllers=[TapScheduleController(
|
| 251 |
-
schedule=
|
|
|
|
| 252 |
)],
|
| 253 |
total_duration_s=duration_s,
|
| 254 |
-
dc_bus=dc_bus,
|
| 255 |
)
|
| 256 |
return coord.run()
|
| 257 |
|
| 258 |
-
|
| 259 |
"""
|
| 260 |
Runs one full simulation job (datacenter + grid) in a worker process
|
| 261 |
and returns results for the API.
|
| 262 |
"""
|
| 263 |
def _run_full(req_dict: dict) -> dict:
|
| 264 |
|
|
|
|
| 265 |
dc_bus = BUS_INDEX_TO_NAME.get(req_dict["targetBus"], "671")
|
| 266 |
replicas = max(1, req_dict["numReplicas"])
|
| 267 |
|
|
@@ -328,6 +373,7 @@ def _run_full(req_dict: dict) -> dict:
|
|
| 328 |
"""Get per-bus voltage (worst phase per bus)."""
|
| 329 |
def _voltages(gs) -> list[float]:
|
| 330 |
result = []
|
|
|
|
| 331 |
for name in BUSES_ORDERED:
|
| 332 |
try:
|
| 333 |
tp = gs.voltages[name]
|
|
@@ -337,11 +383,14 @@ def _voltages(gs) -> list[float]:
|
|
| 337 |
except Exception as e:
|
| 338 |
logger.debug(f"Bus {name} voltage unavailable: {e}")
|
| 339 |
result.append(None)
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
|
| 347 |
# ── FastAPI────────────────────────────────────────────────────────────────
|
|
@@ -349,11 +398,10 @@ def _voltages(gs) -> list[float]:
|
|
| 349 |
app = FastAPI()
|
| 350 |
app.add_middleware(
|
| 351 |
CORSMiddleware,
|
| 352 |
-
allow_origins=["
|
| 353 |
-
allow_credentials=
|
| 354 |
allow_methods=["*"],
|
| 355 |
allow_headers=["*"],
|
| 356 |
-
allow_origin_regex=".*",
|
| 357 |
)
|
| 358 |
|
| 359 |
|
|
@@ -471,6 +519,116 @@ async def heatmap(req: HeatmapRequest):
|
|
| 471 |
return Response(content=png, media_type="image/png")
|
| 472 |
|
| 473 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
if __name__ == "__main__":
|
| 475 |
logger.info("=" * 70)
|
| 476 |
logger.info(f"Data dir: {_DATA_DIR} ready={_DATA_DIR.exists()}")
|
|
@@ -480,4 +638,4 @@ if __name__ == "__main__":
|
|
| 480 |
logger.info(f"Models: {models}")
|
| 481 |
logger.info(f"Traces: {len(df)} configurations")
|
| 482 |
logger.info("=" * 70)
|
| 483 |
-
uvicorn.run("server:app", host="0.0.0.0", port=8080, workers=1, log_level="info")
|
|
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
|
| 10 |
+
|
| 11 |
from fractions import Fraction
|
| 12 |
from pathlib import Path
|
| 13 |
import subprocess, tempfile, os, uvicorn, threading, math, json, hashlib
|
|
|
|
| 17 |
from fastapi.middleware.cors import CORSMiddleware
|
| 18 |
from pydantic import BaseModel
|
| 19 |
from typing import Optional
|
| 20 |
+
from fastapi import WebSocket, WebSocketDisconnect
|
| 21 |
|
| 22 |
|
| 23 |
from openg2g.coordinator import Coordinator
|
| 24 |
|
| 25 |
+
from openg2g.datacenter.config import (
|
| 26 |
+
DatacenterConfig,
|
| 27 |
+
InferenceModelSpec,
|
| 28 |
+
PowerAugmentationConfig,
|
| 29 |
+
TrainingRun,
|
| 30 |
+
ReplicaSchedule,
|
| 31 |
)
|
| 32 |
+
|
| 33 |
from openg2g.datacenter.offline import OfflineDatacenter, OfflineWorkload
|
| 34 |
+
from openg2g.datacenter.workloads.inference import InferenceData
|
| 35 |
from openg2g.datacenter.workloads.training import TrainingTrace, TrainingTraceParams
|
| 36 |
from openg2g.grid.opendss import OpenDSSGrid
|
| 37 |
from openg2g.grid.config import TapPosition
|
|
|
|
| 77 |
#read files
|
| 78 |
_config_raw = json.loads(CONFIG_PATH.read_text())
|
| 79 |
_MODELS = tuple(InferenceModelSpec(**m) for m in _config_raw["models"])
|
|
|
|
| 80 |
_DC_CONFIG = DatacenterConfig(gpus_per_server=8, base_kw_per_phase=500.0)
|
| 81 |
|
| 82 |
+
|
| 83 |
if _config_raw.get("data_dir"):
|
| 84 |
_DATA_DIR = Path(_config_raw["data_dir"])
|
| 85 |
else:
|
| 86 |
+
_DATA_DIR = Path(__file__).parent / "data/specs"
|
| 87 |
+
|
| 88 |
+
|
|
|
|
|
|
|
| 89 |
# Load traces_summary.csv once at startup so we can quickly look up trace files
|
| 90 |
_TRACES_SUMMARY_PATH = _DATA_DIR / "traces_summary.csv"
|
| 91 |
|
|
|
|
| 93 |
_traces_df: pd.DataFrame | None = None
|
| 94 |
|
| 95 |
|
| 96 |
+
TAP_STEP = 0.00625
|
| 97 |
+
INITIAL_TAPS = TapPosition(
|
| 98 |
+
a=1.0 + 14 * TAP_STEP,
|
| 99 |
+
b=1.0 + 6 * TAP_STEP,
|
| 100 |
+
c=1.0 + 15 * TAP_STEP,
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
# Rescaled to fit in 300s window (original is 3600s, we compress ~12x)
|
| 104 |
+
TAP_CHANGE_SCHEDULE = (
|
| 105 |
+
TapPosition(
|
| 106 |
+
a=1.0 + 16 * TAP_STEP,
|
| 107 |
+
b=1.0 + 6 * TAP_STEP,
|
| 108 |
+
c=1.0 + 17 * TAP_STEP,
|
| 109 |
+
).at(t=75) # was 1500s → 75s at 12x compression
|
| 110 |
+
| TapPosition(
|
| 111 |
+
a=1.0 + 10 * TAP_STEP,
|
| 112 |
+
b=1.0 + 6 * TAP_STEP,
|
| 113 |
+
c=1.0 + 10 * TAP_STEP,
|
| 114 |
+
).at(t=200) # was 3300s → 200s at 12x compression
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
"""
|
| 119 |
Load trace index CSV and cache it.
|
| 120 |
"""
|
|
|
|
| 161 |
|
| 162 |
"""Datacenter workload (baseline)"""
|
| 163 |
def _build_dc(scale: float = 1.0, duration_s: int = 300) -> OfflineDatacenter:
|
| 164 |
+
|
| 165 |
+
df = _load_traces_index()
|
| 166 |
+
first_row = df.iloc[0]
|
| 167 |
+
|
| 168 |
+
first_model = tuple(m for m in _MODELS if m.model_label == first_row["model_label"])
|
| 169 |
+
inference_data = InferenceData.load(_DATA_DIR, first_model)
|
| 170 |
+
|
|
|
|
|
|
|
|
|
|
| 171 |
training_trace = TrainingTrace.ensure(
|
| 172 |
+
_DATA_DIR / "training_trace.csv",
|
| 173 |
+
TrainingTraceParams(),
|
| 174 |
)
|
| 175 |
+
|
| 176 |
+
t0 = min(40.0, duration_s * 0.13)
|
| 177 |
t1 = min(140.0, duration_s * 0.47)
|
| 178 |
+
|
| 179 |
+
replica_schedules = {}
|
| 180 |
+
|
| 181 |
+
for m in _MODELS:
|
| 182 |
+
|
| 183 |
+
initial_replicas = max(1, int(scale * 8))
|
| 184 |
+
|
| 185 |
+
reduced_replicas = max(1, int(initial_replicas * 0.25))
|
| 186 |
+
|
| 187 |
+
replica_schedules[m.model_label] = (
|
| 188 |
+
ReplicaSchedule(initial=initial_replicas)
|
| 189 |
+
.ramp_to(
|
| 190 |
+
reduced_replicas,
|
| 191 |
+
t_start=min(150.0, duration_s * 0.50),
|
| 192 |
+
t_end=min(220.0, duration_s * 0.73),
|
| 193 |
+
)
|
| 194 |
+
)
|
| 195 |
|
| 196 |
workload = OfflineWorkload(
|
| 197 |
+
inference_data=inference_data,
|
| 198 |
+
replica_schedules=replica_schedules,
|
| 199 |
+
|
| 200 |
+
training=TrainingRun(
|
| 201 |
+
n_gpus=max(1, int(24 * scale)),
|
| 202 |
+
trace=training_trace,
|
| 203 |
+
target_peak_W_per_gpu=400.0,
|
| 204 |
+
).at(
|
| 205 |
+
t_start=t0,
|
| 206 |
+
t_end=t1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
),
|
| 208 |
)
|
| 209 |
|
| 210 |
+
return OfflineDatacenter(
|
| 211 |
+
_DC_CONFIG,
|
| 212 |
+
workload,
|
| 213 |
+
dt_s=Fraction(1, 10),
|
| 214 |
+
seed=0,
|
| 215 |
+
name="baseline",
|
| 216 |
+
total_gpu_capacity=1000,
|
| 217 |
+
power_augmentation=PowerAugmentationConfig(
|
| 218 |
+
amplitude_scale_range=(0.88, 1.12),
|
| 219 |
+
noise_fraction=0.04,
|
| 220 |
+
),
|
| 221 |
+
)
|
| 222 |
|
| 223 |
|
| 224 |
"""
|
|
|
|
| 236 |
|
| 237 |
power_W = _get_trace_power(model_label, num_gpus, max_num_seqs, num_replicas)
|
| 238 |
|
|
|
|
| 239 |
target_steps = int(duration_s / 0.1)
|
| 240 |
if len(power_W) < target_steps:
|
|
|
|
| 241 |
repeats = math.ceil(target_steps / len(power_W))
|
| 242 |
power_W = (power_W * repeats)[:target_steps]
|
| 243 |
else:
|
| 244 |
power_W = power_W[:target_steps]
|
| 245 |
|
| 246 |
+
df = _load_traces_index()
|
| 247 |
+
row = df[df["model_label"] == model_label].iloc[0]
|
| 248 |
+
|
| 249 |
+
model_tuple = tuple(m for m in _MODELS if m.model_label == model_label)
|
| 250 |
+
inference_data = InferenceData.load(_DATA_DIR, model_tuple)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
+
workload = OfflineWorkload(
|
| 253 |
+
inference_data=inference_data,
|
| 254 |
+
replica_schedules={
|
| 255 |
+
model_label: ReplicaSchedule(initial=num_replicas)
|
| 256 |
+
},
|
| 257 |
)
|
| 258 |
|
| 259 |
+
# ← compute actual GPU count and add headroom
|
| 260 |
+
actual_gpu_count = num_replicas * num_gpus
|
| 261 |
+
gpu_capacity = max(1000, actual_gpu_count * 2)
|
| 262 |
|
| 263 |
dc = OfflineDatacenter(
|
| 264 |
_DC_CONFIG, workload, dt_s=Fraction(1, 10), seed=0,
|
| 265 |
+
name=model_label.replace(".", "-"),
|
| 266 |
+
total_gpu_capacity=gpu_capacity, # ← was hardcoded 1000
|
| 267 |
power_augmentation=PowerAugmentationConfig(
|
| 268 |
+
amplitude_scale_range=(1.0, 1.0),
|
| 269 |
noise_fraction=0.0,
|
| 270 |
),
|
| 271 |
)
|
| 272 |
return dc, power_W
|
| 273 |
|
|
|
|
|
|
|
| 274 |
"""Create IEEE 13-bus grid with datacenter connection."""
|
| 275 |
def _build_grid(tap_pu: float, dc_bus: str) -> OpenDSSGrid:
|
| 276 |
return OpenDSSGrid(
|
| 277 |
+
dss_case_dir=str(DSS_DIR),
|
| 278 |
+
dss_master_file=DSS_MASTER,
|
| 279 |
+
dt_s=Fraction(1),
|
| 280 |
+
source_pu=tap_pu,
|
| 281 |
+
initial_tap_position=INITIAL_TAPS,
|
| 282 |
)
|
| 283 |
|
| 284 |
|
|
|
|
| 288 |
|
| 289 |
"""Run datacenter + grid simulation."""
|
| 290 |
def _run(dc, grid, tap_pu, dc_bus, duration_s):
|
| 291 |
+
grid.attach_dc(dc, bus=dc_bus, connection_type="wye", power_factor=_DC_CONFIG.power_factor)
|
| 292 |
coord = Coordinator(
|
| 293 |
+
datacenters=[dc],
|
| 294 |
+
grid=grid,
|
| 295 |
controllers=[TapScheduleController(
|
| 296 |
+
schedule=TAP_CHANGE_SCHEDULE, # ← real schedule
|
| 297 |
+
dt_s=Fraction(1)
|
| 298 |
)],
|
| 299 |
total_duration_s=duration_s,
|
|
|
|
| 300 |
)
|
| 301 |
return coord.run()
|
| 302 |
|
|
|
|
| 303 |
"""
|
| 304 |
Runs one full simulation job (datacenter + grid) in a worker process
|
| 305 |
and returns results for the API.
|
| 306 |
"""
|
| 307 |
def _run_full(req_dict: dict) -> dict:
|
| 308 |
|
| 309 |
+
|
| 310 |
dc_bus = BUS_INDEX_TO_NAME.get(req_dict["targetBus"], "671")
|
| 311 |
replicas = max(1, req_dict["numReplicas"])
|
| 312 |
|
|
|
|
| 373 |
"""Get per-bus voltage (worst phase per bus)."""
|
| 374 |
def _voltages(gs) -> list[float]:
|
| 375 |
result = []
|
| 376 |
+
none_count = 0
|
| 377 |
for name in BUSES_ORDERED:
|
| 378 |
try:
|
| 379 |
tp = gs.voltages[name]
|
|
|
|
| 383 |
except Exception as e:
|
| 384 |
logger.debug(f"Bus {name} voltage unavailable: {e}")
|
| 385 |
result.append(None)
|
| 386 |
+
|
| 387 |
+
none_count = sum(1 for v in result if v is None)
|
| 388 |
+
if none_count > 3:
|
| 389 |
+
logger.warning(f"OpenDSS convergence failure: {none_count}/13 buses returned None")
|
| 390 |
+
|
| 391 |
+
known = [v for v in result if v is not None]
|
| 392 |
+
avg = sum(known) / len(known) if known else 1.0
|
| 393 |
+
return [v if v is not None else avg for v in result]
|
| 394 |
|
| 395 |
|
| 396 |
# ── FastAPI────────────────────────────────────────────────────────────────
|
|
|
|
| 398 |
app = FastAPI()
|
| 399 |
app.add_middleware(
|
| 400 |
CORSMiddleware,
|
| 401 |
+
allow_origins=["*"],
|
| 402 |
+
allow_credentials=False,
|
| 403 |
allow_methods=["*"],
|
| 404 |
allow_headers=["*"],
|
|
|
|
| 405 |
)
|
| 406 |
|
| 407 |
|
|
|
|
| 519 |
return Response(content=png, media_type="image/png")
|
| 520 |
|
| 521 |
|
| 522 |
+
|
| 523 |
+
|
| 524 |
+
|
| 525 |
+
|
| 526 |
+
def _serialize_tick(tick, req_dict: dict, raw_power_W: list[float]) -> dict:
|
| 527 |
+
"""Serialize one TickOutput to a small JSON-safe dict for the frontend."""
|
| 528 |
+
t = tick.t_s
|
| 529 |
+
|
| 530 |
+
# Grid voltages (may be None if grid didn't tick this step)
|
| 531 |
+
voltages = None
|
| 532 |
+
min_v = max_v = target_v = None
|
| 533 |
+
if tick.grid_state is not None:
|
| 534 |
+
voltages = _voltages(tick.grid_state)
|
| 535 |
+
min_v = min(voltages)
|
| 536 |
+
max_v = max(voltages)
|
| 537 |
+
target_v = voltages[req_dict["targetBus"] - 1]
|
| 538 |
+
|
| 539 |
+
# DC power (sum all sites)
|
| 540 |
+
kw = 0.0
|
| 541 |
+
batch_by_model: dict[str, int] = {}
|
| 542 |
+
for dc_name, ds in tick.dc_states.items():
|
| 543 |
+
pw = ds.power_w
|
| 544 |
+
kw += float((pw.a + pw.b + pw.c) / 1000)
|
| 545 |
+
if hasattr(ds, "batch_size_by_model"):
|
| 546 |
+
batch_by_model.update(ds.batch_size_by_model)
|
| 547 |
+
if math.isnan(kw):
|
| 548 |
+
kw = 0.0
|
| 549 |
+
|
| 550 |
+
trace_idx = min(int(t / 0.1), len(raw_power_W) - 1) if raw_power_W else 0
|
| 551 |
+
raw_kw = raw_power_W[trace_idx] / 1000.0 if raw_power_W else kw
|
| 552 |
+
|
| 553 |
+
events = [{"type": e.event_type, "data": e.data} for e in tick.sim_events]
|
| 554 |
+
|
| 555 |
+
return {
|
| 556 |
+
"time": float(t),
|
| 557 |
+
"gpu_power_kW": kw,
|
| 558 |
+
"gpu_power_raw_kW": raw_kw,
|
| 559 |
+
"active_gpus": req_dict["numReplicas"] * req_dict["numGpus"],
|
| 560 |
+
"batch_by_model": batch_by_model,
|
| 561 |
+
"voltages": voltages,
|
| 562 |
+
"min_voltage": min_v,
|
| 563 |
+
"max_voltage": max_v,
|
| 564 |
+
"target_bus_voltage": target_v,
|
| 565 |
+
"sim_events": events,
|
| 566 |
+
}
|
| 567 |
+
|
| 568 |
+
|
| 569 |
+
def _run_streaming(req_dict: dict):
|
| 570 |
+
"""Generator: builds coordinator and yields serialized tick dicts."""
|
| 571 |
+
dc_bus = BUS_INDEX_TO_NAME.get(req_dict["targetBus"], "671")
|
| 572 |
+
replicas = max(1, req_dict["numReplicas"])
|
| 573 |
+
|
| 574 |
+
dc, raw_power_W = _build_dc_from_real_trace(
|
| 575 |
+
model_label = req_dict["modelLabel"],
|
| 576 |
+
num_gpus = req_dict["numGpus"],
|
| 577 |
+
max_num_seqs = req_dict["maxNumSeqs"],
|
| 578 |
+
num_replicas = replicas,
|
| 579 |
+
duration_s = req_dict["durationS"],
|
| 580 |
+
)
|
| 581 |
+
grid = _build_grid(req_dict["substationVoltage"], dc_bus)
|
| 582 |
+
grid.attach_dc(dc, bus=dc_bus, connection_type="wye",
|
| 583 |
+
power_factor=_DC_CONFIG.power_factor)
|
| 584 |
+
|
| 585 |
+
coord = Coordinator(
|
| 586 |
+
datacenters=[dc],
|
| 587 |
+
grid=grid,
|
| 588 |
+
|
| 589 |
+
controllers=[TapScheduleController(
|
| 590 |
+
schedule=TAP_CHANGE_SCHEDULE,
|
| 591 |
+
dt_s=Fraction(1),
|
| 592 |
+
)],
|
| 593 |
+
total_duration_s=req_dict["durationS"],
|
| 594 |
+
)
|
| 595 |
+
|
| 596 |
+
step = max(1, req_dict["sampleInterval"])
|
| 597 |
+
tick_num = 0
|
| 598 |
+
try:
|
| 599 |
+
for tick in coord.run_iter():
|
| 600 |
+
if tick_num % step == 0:
|
| 601 |
+
yield _serialize_tick(tick, req_dict, raw_power_W)
|
| 602 |
+
tick_num += 1
|
| 603 |
+
finally:
|
| 604 |
+
coord.stop()
|
| 605 |
+
|
| 606 |
+
@app.websocket("/ws/sim-stream")
|
| 607 |
+
async def sim_stream(ws: WebSocket):
|
| 608 |
+
await ws.accept()
|
| 609 |
+
try:
|
| 610 |
+
req_dict = await ws.receive_json()
|
| 611 |
+
req = LLMImpactRequest(**req_dict)
|
| 612 |
+
logger.info(f"WS stream: {req.modelLabel} bus={req.targetBus}")
|
| 613 |
+
|
| 614 |
+
# Run full simulation in process pool (separate process = safe for OpenDSS)
|
| 615 |
+
loop = asyncio.get_event_loop()
|
| 616 |
+
result = await loop.run_in_executor(_pool, _run_full, req.dict())
|
| 617 |
+
|
| 618 |
+
# Stream results tick by tick from the completed result
|
| 619 |
+
for row in result["timeSeries"]:
|
| 620 |
+
await ws.send_json(row)
|
| 621 |
+
|
| 622 |
+
await ws.send_json({"done": True})
|
| 623 |
+
|
| 624 |
+
except WebSocketDisconnect:
|
| 625 |
+
logger.info("WS client disconnected")
|
| 626 |
+
except Exception as e:
|
| 627 |
+
logger.exception("WS stream failed")
|
| 628 |
+
try:
|
| 629 |
+
await ws.send_json({"error": str(e)})
|
| 630 |
+
except Exception:
|
| 631 |
+
pass
|
| 632 |
if __name__ == "__main__":
|
| 633 |
logger.info("=" * 70)
|
| 634 |
logger.info(f"Data dir: {_DATA_DIR} ready={_DATA_DIR.exists()}")
|
|
|
|
| 638 |
logger.info(f"Models: {models}")
|
| 639 |
logger.info(f"Traces: {len(df)} configurations")
|
| 640 |
logger.info("=" * 70)
|
| 641 |
+
uvicorn.run("server:app", host="0.0.0.0", port=8080, workers=1, log_level="info", ws_ping_interval=None)
|