github-actions[bot] commited on
Commit
b598e06
·
1 Parent(s): e83942e

deploy: sync from GitHub 2026-05-13T22:41:47Z

Browse files
data/specs/2e756e9e0a2d417e/_manifest.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mlenergy_data_version": "0.4.0",
3
+ "openg2g_version": "0.2.0.post1",
4
+ "schema_version": 3,
5
+ "spec": {
6
+ "batch_sizes": [
7
+ 8,
8
+ 16,
9
+ 32,
10
+ 64,
11
+ 96,
12
+ 128,
13
+ 192,
14
+ 256,
15
+ 384,
16
+ 512
17
+ ],
18
+ "expert_parallel": 1,
19
+ "feasible_batch_sizes": [
20
+ 8,
21
+ 16,
22
+ 32,
23
+ 64,
24
+ 128,
25
+ 256,
26
+ 512
27
+ ],
28
+ "fit_exclude_batch_sizes": [],
29
+ "gpu_model": "H100",
30
+ "gpus_per_replica": 8,
31
+ "itl_deadline_s": 0.14,
32
+ "model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
33
+ "model_label": "Qwen3-235B-A22B",
34
+ "precision": "bfloat16",
35
+ "task": "gpqa",
36
+ "tensor_parallel": 8
37
+ },
38
+ "spec_hash": "2e756e9e0a2d417e",
39
+ "written_utc": "2026-05-13T19:50:49+00:00"
40
+ }
data/specs/2e756e9e0a2d417e/itl_fit.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_label": "Qwen3-235B-A22B",
3
+ "per_batch": {
4
+ "128": {
5
+ "loc": 0.013250339005364572,
6
+ "pi_stall": 0.9173336260689986,
7
+ "pi_steady": 0.0826663739310014,
8
+ "scale_stall": 0.026592548144528116,
9
+ "scale_steady": 0.010186920973940559,
10
+ "sigma_stall": 0.1192887538283944,
11
+ "sigma_steady": 1.4754674804724548
12
+ },
13
+ "16": {
14
+ "loc": 0.012917142001581146,
15
+ "pi_stall": 0.9679024411455602,
16
+ "pi_steady": 0.03209755885443977,
17
+ "scale_stall": 0.011944872620218289,
18
+ "scale_steady": 0.010055679342980215,
19
+ "sigma_stall": 0.06668499474004932,
20
+ "sigma_steady": 0.3364193391214592
21
+ },
22
+ "192": {
23
+ "loc": 0.013189563986427151,
24
+ "pi_stall": 0.9731451222965651,
25
+ "pi_steady": 0.026854877703434865,
26
+ "scale_stall": 0.02687676804506179,
27
+ "scale_steady": 0.018437463794844785,
28
+ "sigma_stall": 0.1420087680700662,
29
+ "sigma_steady": 2.5037616364005233
30
+ },
31
+ "256": {
32
+ "loc": 0.01325633499834314,
33
+ "pi_stall": 0.2520610216205986,
34
+ "pi_steady": 0.7479389783794014,
35
+ "scale_stall": 0.028301520442211254,
36
+ "scale_steady": 0.025729232489331608,
37
+ "sigma_stall": 0.5252417168242823,
38
+ "sigma_steady": 0.07938870914892968
39
+ },
40
+ "32": {
41
+ "loc": 0.013021193001233973,
42
+ "pi_stall": 0.9124702204373671,
43
+ "pi_steady": 0.08752977956263286,
44
+ "scale_stall": 0.02038829931099621,
45
+ "scale_steady": 0.012894545131554781,
46
+ "sigma_stall": 0.05215477595544453,
47
+ "sigma_steady": 0.3888129537458069
48
+ },
49
+ "384": {
50
+ "loc": 0.013478797999025323,
51
+ "pi_stall": 0.2220717676647871,
52
+ "pi_steady": 0.7779282323352129,
53
+ "scale_stall": 0.033227654127086725,
54
+ "scale_steady": 0.024930327834188436,
55
+ "sigma_stall": 0.7095586536822586,
56
+ "sigma_steady": 0.09695357279523584
57
+ },
58
+ "512": {
59
+ "loc": 0.013146029007086531,
60
+ "pi_stall": 0.17758013619796087,
61
+ "pi_steady": 0.8224198638020391,
62
+ "scale_stall": 0.03455616392329533,
63
+ "scale_steady": 0.024200569803794394,
64
+ "sigma_stall": 0.7204206877666427,
65
+ "sigma_steady": 0.09983052036261611
66
+ },
67
+ "64": {
68
+ "loc": 0.013067278996797745,
69
+ "pi_stall": 0.9183472954770104,
70
+ "pi_steady": 0.08165270452298956,
71
+ "scale_stall": 0.02541259423220897,
72
+ "scale_steady": 0.013607592085388696,
73
+ "sigma_stall": 0.08662948392900281,
74
+ "sigma_steady": 1.1879848722617707
75
+ },
76
+ "8": {
77
+ "loc": 0.013058345998044592,
78
+ "pi_stall": 0.9313422909338545,
79
+ "pi_steady": 0.06865770906614554,
80
+ "scale_stall": 0.007196610201069568,
81
+ "scale_steady": 0.00519407736027536,
82
+ "sigma_stall": 0.08145564694748231,
83
+ "sigma_steady": 0.2964711653438411
84
+ },
85
+ "96": {
86
+ "loc": 0.014793187001825777,
87
+ "pi_stall": 0.8434947366701049,
88
+ "pi_steady": 0.15650526332989512,
89
+ "scale_stall": 0.024908395671325943,
90
+ "scale_steady": 0.015913317708739556,
91
+ "sigma_stall": 0.08592049198306687,
92
+ "sigma_steady": 0.5909612329674692
93
+ }
94
+ },
95
+ "schema": "itl_fit.lognormal_mixture_2"
96
+ }
data/specs/2e756e9e0a2d417e/logistic_fit.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "latency": {
3
+ "L": 0.672333808351321,
4
+ "b0": 0.025327222690596844,
5
+ "k": 0.9249147277217336,
6
+ "x0": 10.0
7
+ },
8
+ "model_label": "Qwen3-235B-A22B",
9
+ "power": {
10
+ "L": 1130.4391558531665,
11
+ "b0": 2551.5754923889053,
12
+ "k": 1.2638482029342986,
13
+ "x0": 3.846153846153846
14
+ },
15
+ "schema": "logistic_v1",
16
+ "throughput": {
17
+ "L": 1400.9050260569725,
18
+ "b0": 102.09644698524227,
19
+ "k": 0.9249147277217336,
20
+ "x0": 4.461538461538462
21
+ }
22
+ }
data/specs/2e756e9e0a2d417e/trace.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/specs/4925acb216d43131/_manifest.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mlenergy_data_version": "0.4.0",
3
+ "openg2g_version": "0.2.0.post1",
4
+ "schema_version": 3,
5
+ "spec": {
6
+ "batch_sizes": [
7
+ 8,
8
+ 16,
9
+ 32,
10
+ 64,
11
+ 96,
12
+ 128,
13
+ 192,
14
+ 256,
15
+ 384,
16
+ 512
17
+ ],
18
+ "expert_parallel": 1,
19
+ "feasible_batch_sizes": [
20
+ 8,
21
+ 16,
22
+ 32,
23
+ 64,
24
+ 128,
25
+ 256,
26
+ 512
27
+ ],
28
+ "fit_exclude_batch_sizes": [],
29
+ "gpu_model": "H100",
30
+ "gpus_per_replica": 8,
31
+ "itl_deadline_s": 0.12,
32
+ "model_id": "meta-llama/Llama-3.1-405B-Instruct-FP8",
33
+ "model_label": "Llama-3.1-405B",
34
+ "precision": "fp8",
35
+ "task": "lm-arena-chat",
36
+ "tensor_parallel": 8
37
+ },
38
+ "spec_hash": "4925acb216d43131",
39
+ "written_utc": "2026-05-13T19:50:49+00:00"
40
+ }
data/specs/4925acb216d43131/itl_fit.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_label": "Llama-3.1-405B",
3
+ "per_batch": {
4
+ "128": {
5
+ "loc": 0.03008249612355232,
6
+ "pi_stall": 0.3839287952895576,
7
+ "pi_steady": 0.6160712047104424,
8
+ "scale_stall": 0.030214832869952784,
9
+ "scale_steady": 0.01777233734505344,
10
+ "sigma_stall": 1.2206296529153282,
11
+ "sigma_steady": 0.05
12
+ },
13
+ "16": {
14
+ "loc": 0.024713983999729155,
15
+ "pi_stall": 0.05360691147049179,
16
+ "pi_steady": 0.9463930885295082,
17
+ "scale_stall": 0.023448181502284848,
18
+ "scale_steady": 0.009322546786604182,
19
+ "sigma_stall": 0.995674214634189,
20
+ "sigma_steady": 0.05
21
+ },
22
+ "192": {
23
+ "loc": 0.030320093276143073,
24
+ "pi_stall": 0.3323523854706074,
25
+ "pi_steady": 0.6676476145293926,
26
+ "scale_stall": 0.041205692039902035,
27
+ "scale_steady": 0.031746846437682316,
28
+ "sigma_stall": 1.4276701505839031,
29
+ "sigma_steady": 0.05
30
+ },
31
+ "256": {
32
+ "loc": 0.030789543183684348,
33
+ "pi_stall": 0.6349499418714404,
34
+ "pi_steady": 0.3650500581285596,
35
+ "scale_stall": 0.04324939305958362,
36
+ "scale_steady": 0.03573730522680613,
37
+ "sigma_stall": 0.3721225887337235,
38
+ "sigma_steady": 1.5919966754869803
39
+ },
40
+ "32": {
41
+ "loc": 0.016498148681759833,
42
+ "pi_stall": 0.07390731357848579,
43
+ "pi_steady": 0.9260926864215142,
44
+ "scale_stall": 0.04555732131859944,
45
+ "scale_steady": 0.019842170525771235,
46
+ "sigma_stall": 0.7320543620724521,
47
+ "sigma_steady": 0.05
48
+ },
49
+ "384": {
50
+ "loc": 0.03019163595342636,
51
+ "pi_stall": 0.6593984486141276,
52
+ "pi_steady": 0.34060155138587245,
53
+ "scale_stall": 0.0597066517568152,
54
+ "scale_steady": 0.03991190875593972,
55
+ "sigma_stall": 0.4563966264262895,
56
+ "sigma_steady": 1.7170985862943504
57
+ },
58
+ "512": {
59
+ "loc": 0.030048940571188926,
60
+ "pi_stall": 0.3362132870451149,
61
+ "pi_steady": 0.6637867129548851,
62
+ "scale_stall": 0.04683803969863319,
63
+ "scale_steady": 0.044447301633350554,
64
+ "sigma_stall": 1.9213625679927164,
65
+ "sigma_steady": 0.4334724494446896
66
+ },
67
+ "64": {
68
+ "loc": 0.03152825334870815,
69
+ "pi_stall": 0.20700593967800063,
70
+ "pi_steady": 0.7929940603219994,
71
+ "scale_stall": 0.019287437563102458,
72
+ "scale_steady": 0.0076138475784196065,
73
+ "sigma_stall": 1.323371073182697,
74
+ "sigma_steady": 0.08253738945931417
75
+ },
76
+ "8": {
77
+ "loc": 0.029814763235092162,
78
+ "pi_stall": 0.04279942897407696,
79
+ "pi_steady": 0.957200571025923,
80
+ "scale_stall": 0.007195773794863316,
81
+ "scale_steady": 0.0033871174475517797,
82
+ "sigma_stall": 1.7354345964695308,
83
+ "sigma_steady": 0.05409894580485472
84
+ },
85
+ "96": {
86
+ "loc": 0.030120123418211936,
87
+ "pi_stall": 0.2798559198385162,
88
+ "pi_steady": 0.7201440801614838,
89
+ "scale_stall": 0.022833854494912858,
90
+ "scale_steady": 0.013914741048287108,
91
+ "sigma_stall": 1.2985027758578491,
92
+ "sigma_steady": 0.05
93
+ }
94
+ },
95
+ "schema": "itl_fit.lognormal_mixture_2"
96
+ }
data/specs/4925acb216d43131/logistic_fit.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "latency": {
3
+ "L": 0.11093873569263059,
4
+ "b0": 0.034847828201926605,
5
+ "k": 1.2638482029342986,
6
+ "x0": 7.743589743589744
7
+ },
8
+ "model_label": "Llama-3.1-405B",
9
+ "power": {
10
+ "L": 1824.2431634291524,
11
+ "b0": 3147.2107548161166,
12
+ "k": 1.0811807510766078,
13
+ "x0": 5.692307692307692
14
+ },
15
+ "schema": "logistic_v1",
16
+ "throughput": {
17
+ "L": 2118.1101549783216,
18
+ "b0": 231.54554435357053,
19
+ "k": 1.2638482029342986,
20
+ "x0": 5.897435897435898
21
+ }
22
+ }
data/specs/4925acb216d43131/trace.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/specs/73691f793f6be469/_manifest.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mlenergy_data_version": "0.4.0",
3
+ "openg2g_version": "0.2.0.post1",
4
+ "schema_version": 3,
5
+ "spec": {
6
+ "batch_sizes": [
7
+ 8,
8
+ 16,
9
+ 32,
10
+ 64,
11
+ 96,
12
+ 128,
13
+ 192,
14
+ 256,
15
+ 384,
16
+ 512,
17
+ 768,
18
+ 1024,
19
+ 1536,
20
+ 2048
21
+ ],
22
+ "expert_parallel": 1,
23
+ "feasible_batch_sizes": [
24
+ 8,
25
+ 16,
26
+ 32,
27
+ 64,
28
+ 128,
29
+ 256,
30
+ 512
31
+ ],
32
+ "fit_exclude_batch_sizes": [],
33
+ "gpu_model": "H100",
34
+ "gpus_per_replica": 4,
35
+ "itl_deadline_s": 0.1,
36
+ "model_id": "meta-llama/Llama-3.1-70B-Instruct",
37
+ "model_label": "Llama-3.1-70B",
38
+ "precision": "bfloat16",
39
+ "task": "lm-arena-chat",
40
+ "tensor_parallel": 4
41
+ },
42
+ "spec_hash": "73691f793f6be469",
43
+ "written_utc": "2026-05-13T19:50:49+00:00"
44
+ }
data/specs/73691f793f6be469/itl_fit.json ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_label": "Llama-3.1-70B",
3
+ "per_batch": {
4
+ "1024": {
5
+ "loc": 0.016125269474625586,
6
+ "pi_stall": 0.6504738214326463,
7
+ "pi_steady": 0.3495261785673537,
8
+ "scale_stall": 0.09652171767308945,
9
+ "scale_steady": 0.041688588651955336,
10
+ "sigma_stall": 0.2462296773360523,
11
+ "sigma_steady": 1.5217750215970742
12
+ },
13
+ "128": {
14
+ "loc": 0.0022778047790527342,
15
+ "pi_stall": 0.22124884454952398,
16
+ "pi_steady": 0.778751155450476,
17
+ "scale_stall": 0.03383674745941713,
18
+ "scale_steady": 0.02568463145989505,
19
+ "sigma_stall": 0.5948150662563634,
20
+ "sigma_steady": 0.05
21
+ },
22
+ "1536": {
23
+ "loc": 0.016071272963356226,
24
+ "pi_stall": 0.8076530807526892,
25
+ "pi_steady": 0.19234691924731084,
26
+ "scale_stall": 0.089848886853969,
27
+ "scale_steady": 0.03420538011598045,
28
+ "sigma_stall": 0.46924205332030033,
29
+ "sigma_steady": 1.7596491528177638
30
+ },
31
+ "16": {
32
+ "loc": 0.015499197187066079,
33
+ "pi_stall": 0.08907607653965377,
34
+ "pi_steady": 0.9109239234603462,
35
+ "scale_stall": 0.004313043371105469,
36
+ "scale_steady": 0.0022230910334916296,
37
+ "sigma_stall": 1.2844234847005134,
38
+ "sigma_steady": 0.07913035461052097
39
+ },
40
+ "192": {
41
+ "loc": 0.016638526933431624,
42
+ "pi_stall": 0.38156228653101176,
43
+ "pi_steady": 0.6184377134689882,
44
+ "scale_stall": 0.018703032248183576,
45
+ "scale_steady": 0.01714548617582296,
46
+ "sigma_stall": 1.5189443495977526,
47
+ "sigma_steady": 0.10867393985639938
48
+ },
49
+ "2048": {
50
+ "loc": 0.016001691991161554,
51
+ "pi_stall": 0.8736696701311039,
52
+ "pi_steady": 0.12633032986889614,
53
+ "scale_stall": 0.08379097178728898,
54
+ "scale_steady": 0.03344713394228911,
55
+ "sigma_stall": 0.565330172781654,
56
+ "sigma_steady": 1.9824576270857919
57
+ },
58
+ "256": {
59
+ "loc": 0.016839679571032523,
60
+ "pi_stall": 0.6213219296982286,
61
+ "pi_steady": 0.37867807030177136,
62
+ "scale_stall": 0.023516103187706146,
63
+ "scale_steady": 0.02020604087391179,
64
+ "sigma_stall": 0.1354246512511205,
65
+ "sigma_steady": 1.4631774065297862
66
+ },
67
+ "32": {
68
+ "loc": 0.016572259606957435,
69
+ "pi_stall": 0.09635680905311539,
70
+ "pi_steady": 0.9036431909468846,
71
+ "scale_stall": 0.006175543416404281,
72
+ "scale_steady": 0.0034452728216791523,
73
+ "sigma_stall": 1.5074175831534309,
74
+ "sigma_steady": 0.10971052943864357
75
+ },
76
+ "384": {
77
+ "loc": 0.014811257140874863,
78
+ "pi_stall": 0.4531974533289852,
79
+ "pi_steady": 0.5468025466710148,
80
+ "scale_stall": 0.04197168592710718,
81
+ "scale_steady": 0.02986712173327284,
82
+ "sigma_stall": 0.11177738517749303,
83
+ "sigma_steady": 1.1391787357157586
84
+ },
85
+ "512": {
86
+ "loc": 0.01658029133284092,
87
+ "pi_stall": 0.6495871324327867,
88
+ "pi_steady": 0.35041286756721335,
89
+ "scale_stall": 0.05843082131488063,
90
+ "scale_steady": 0.03733948541353972,
91
+ "sigma_stall": 0.10431631509790713,
92
+ "sigma_steady": 1.3462214452446384
93
+ },
94
+ "64": {
95
+ "loc": 0.016720101477742194,
96
+ "pi_stall": 0.150178986901629,
97
+ "pi_steady": 0.849821013098371,
98
+ "scale_stall": 0.008828633434550627,
99
+ "scale_steady": 0.005697346737934002,
100
+ "sigma_stall": 1.3616972036550803,
101
+ "sigma_steady": 0.09826399163532819
102
+ },
103
+ "768": {
104
+ "loc": 0.016117640080094336,
105
+ "pi_stall": 0.645660707800124,
106
+ "pi_steady": 0.35433929219987603,
107
+ "scale_stall": 0.09211974240843952,
108
+ "scale_steady": 0.03127136906409066,
109
+ "sigma_stall": 0.14808465753722347,
110
+ "sigma_steady": 1.418245669226621
111
+ },
112
+ "8": {
113
+ "loc": 0.013241712244391442,
114
+ "pi_stall": 0.04095075080502819,
115
+ "pi_steady": 0.9590492491949718,
116
+ "scale_stall": 0.005297393800444009,
117
+ "scale_steady": 0.0033418650030394626,
118
+ "sigma_stall": 0.8751605195077952,
119
+ "sigma_steady": 0.05
120
+ },
121
+ "96": {
122
+ "loc": 1.0773779988288879e-05,
123
+ "pi_stall": 0.16115863500383798,
124
+ "pi_steady": 0.838841364996162,
125
+ "scale_stall": 0.034668825689171574,
126
+ "scale_steady": 0.025306298134214287,
127
+ "sigma_stall": 0.6442443219876751,
128
+ "sigma_steady": 0.05
129
+ }
130
+ },
131
+ "schema": "itl_fit.lognormal_mixture_2"
132
+ }
data/specs/73691f793f6be469/logistic_fit.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "latency": {
3
+ "L": 0.3337169923867402,
4
+ "b0": 0.01581299670243065,
5
+ "k": 0.6768750009458534,
6
+ "x0": 11.23076923076923
7
+ },
8
+ "model_label": "Llama-3.1-70B",
9
+ "power": {
10
+ "L": 1154.0021476589798,
11
+ "b0": 1646.7205316820875,
12
+ "k": 0.5790443980602487,
13
+ "x0": 7.384615384615384
14
+ },
15
+ "schema": "logistic_v1",
16
+ "throughput": {
17
+ "L": 7487.842476643308,
18
+ "b0": -115.86209784705026,
19
+ "k": 0.6768750009458534,
20
+ "x0": 6.871794871794871
21
+ }
22
+ }
data/specs/73691f793f6be469/trace.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/specs/860654eabd3dfebf/_manifest.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mlenergy_data_version": "0.4.0",
3
+ "openg2g_version": "0.2.0.post1",
4
+ "schema_version": 3,
5
+ "spec": {
6
+ "batch_sizes": [
7
+ 8,
8
+ 16,
9
+ 32,
10
+ 64,
11
+ 96,
12
+ 128,
13
+ 192,
14
+ 256,
15
+ 384,
16
+ 512
17
+ ],
18
+ "expert_parallel": 1,
19
+ "feasible_batch_sizes": [
20
+ 8,
21
+ 16,
22
+ 32,
23
+ 64,
24
+ 128,
25
+ 256,
26
+ 512
27
+ ],
28
+ "fit_exclude_batch_sizes": [],
29
+ "gpu_model": "H100",
30
+ "gpus_per_replica": 2,
31
+ "itl_deadline_s": 0.06,
32
+ "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
33
+ "model_label": "Qwen3-30B-A3B",
34
+ "precision": "bfloat16",
35
+ "task": "gpqa",
36
+ "tensor_parallel": 2
37
+ },
38
+ "spec_hash": "860654eabd3dfebf",
39
+ "written_utc": "2026-05-13T19:50:49+00:00"
40
+ }
data/specs/860654eabd3dfebf/itl_fit.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_label": "Qwen3-30B-A3B",
3
+ "per_batch": {
4
+ "128": {
5
+ "loc": 0.005779745297670364,
6
+ "pi_stall": 0.9016421299546657,
7
+ "pi_steady": 0.09835787004533425,
8
+ "scale_stall": 0.022204989839097976,
9
+ "scale_steady": 0.011952816505967263,
10
+ "sigma_stall": 0.17115589522976615,
11
+ "sigma_steady": 0.8638437216770689
12
+ },
13
+ "16": {
14
+ "loc": 0.006709285320878029,
15
+ "pi_stall": 0.9318898842600892,
16
+ "pi_steady": 0.06811011573991077,
17
+ "scale_stall": 0.005286084772734071,
18
+ "scale_steady": 0.003488072071334762,
19
+ "sigma_stall": 0.10968234279797877,
20
+ "sigma_steady": 0.7066423134806045
21
+ },
22
+ "192": {
23
+ "loc": 0.006146881969809532,
24
+ "pi_stall": 0.5496379486314646,
25
+ "pi_steady": 0.45036205136853535,
26
+ "scale_stall": 0.03517282452060233,
27
+ "scale_steady": 0.023996489386177662,
28
+ "sigma_stall": 0.09280191100817078,
29
+ "sigma_steady": 0.4411174802673816
30
+ },
31
+ "256": {
32
+ "loc": 0.005351871492505073,
33
+ "pi_stall": 0.5179735298893404,
34
+ "pi_steady": 0.4820264701106596,
35
+ "scale_stall": 0.0465574174374143,
36
+ "scale_steady": 0.02979761583658413,
37
+ "sigma_stall": 0.08247269205931418,
38
+ "sigma_steady": 0.46540172529241985
39
+ },
40
+ "32": {
41
+ "loc": 5.387010216712952e-06,
42
+ "pi_stall": 0.8987735970748053,
43
+ "pi_steady": 0.10122640292519469,
44
+ "scale_stall": 0.01640469159213389,
45
+ "scale_steady": 0.013000996744328759,
46
+ "sigma_stall": 0.05,
47
+ "sigma_steady": 0.33658931223823957
48
+ },
49
+ "384": {
50
+ "loc": 0.005363984273910522,
51
+ "pi_stall": 0.6442964861816537,
52
+ "pi_steady": 0.3557035138183463,
53
+ "scale_stall": 0.04511956412849174,
54
+ "scale_steady": 0.03479809929767838,
55
+ "sigma_stall": 0.05,
56
+ "sigma_steady": 0.4926395770591876
57
+ },
58
+ "512": {
59
+ "loc": 0.005344948040485382,
60
+ "pi_stall": 0.844032586498555,
61
+ "pi_steady": 0.15596741350144505,
62
+ "scale_stall": 0.04628929533783176,
63
+ "scale_steady": 0.031408287375895155,
64
+ "sigma_stall": 0.16501900831914335,
65
+ "sigma_steady": 0.7857801384710986
66
+ },
67
+ "64": {
68
+ "loc": 5.24731183052063e-06,
69
+ "pi_stall": 0.6213994676646826,
70
+ "pi_steady": 0.37860053233531743,
71
+ "scale_stall": 0.02199501617088761,
72
+ "scale_steady": 0.01839224430879353,
73
+ "sigma_stall": 0.05,
74
+ "sigma_steady": 0.1933315951912185
75
+ },
76
+ "8": {
77
+ "loc": 0.005612284185528755,
78
+ "pi_stall": 0.9754857241215052,
79
+ "pi_steady": 0.02451427587849475,
80
+ "scale_stall": 0.0037432124276108067,
81
+ "scale_steady": 0.0023369924600121004,
82
+ "sigma_stall": 0.09591646898132508,
83
+ "sigma_steady": 0.8401834713569618
84
+ },
85
+ "96": {
86
+ "loc": 0.005704086514353752,
87
+ "pi_stall": 0.8933240851637367,
88
+ "pi_steady": 0.10667591483626326,
89
+ "scale_stall": 0.020937595889766954,
90
+ "scale_steady": 0.009315065436423254,
91
+ "sigma_stall": 0.17041518988603524,
92
+ "sigma_steady": 0.9135642352221147
93
+ }
94
+ },
95
+ "schema": "itl_fit.lognormal_mixture_2"
96
+ }
data/specs/860654eabd3dfebf/logistic_fit.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "latency": {
3
+ "L": 0.22314992755762236,
4
+ "b0": 0.011393094262367034,
5
+ "k": 0.9249147277217336,
6
+ "x0": 9.58974358974359
7
+ },
8
+ "model_label": "Qwen3-30B-A3B",
9
+ "power": {
10
+ "L": 443.2622940538781,
11
+ "b0": 521.77175156347,
12
+ "k": 0.6768750009458534,
13
+ "x0": 3.435897435897436
14
+ },
15
+ "schema": "logistic_v1",
16
+ "throughput": {
17
+ "L": 16272.619545718631,
18
+ "b0": -798.0377199891329,
19
+ "k": 0.3101168926574778,
20
+ "x0": 10.0
21
+ }
22
+ }
data/specs/860654eabd3dfebf/trace.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/specs/bc0bde304544a603/_manifest.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mlenergy_data_version": "0.4.0",
3
+ "openg2g_version": "0.2.0.post1",
4
+ "schema_version": 3,
5
+ "spec": {
6
+ "batch_sizes": [
7
+ 8,
8
+ 16,
9
+ 32,
10
+ 64,
11
+ 96,
12
+ 128,
13
+ 192,
14
+ 256,
15
+ 384,
16
+ 512,
17
+ 768,
18
+ 1024
19
+ ],
20
+ "expert_parallel": 1,
21
+ "feasible_batch_sizes": [
22
+ 8,
23
+ 16,
24
+ 32,
25
+ 64,
26
+ 128,
27
+ 256,
28
+ 512
29
+ ],
30
+ "fit_exclude_batch_sizes": [],
31
+ "gpu_model": "H100",
32
+ "gpus_per_replica": 1,
33
+ "itl_deadline_s": 0.08,
34
+ "model_id": "meta-llama/Llama-3.1-8B-Instruct",
35
+ "model_label": "Llama-3.1-8B",
36
+ "precision": "bfloat16",
37
+ "task": "lm-arena-chat",
38
+ "tensor_parallel": 1
39
+ },
40
+ "spec_hash": "bc0bde304544a603",
41
+ "written_utc": "2026-05-13T19:50:49+00:00"
42
+ }
data/specs/bc0bde304544a603/itl_fit.json ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_label": "Llama-3.1-8B",
3
+ "per_batch": {
4
+ "1024": {
5
+ "loc": 0.007010523004621267,
6
+ "pi_stall": 0.7837426601266253,
7
+ "pi_steady": 0.2162573398733747,
8
+ "scale_stall": 0.05956751699666166,
9
+ "scale_steady": 0.023334748866848526,
10
+ "sigma_stall": 0.2225255796663595,
11
+ "sigma_steady": 1.5148319210986743
12
+ },
13
+ "128": {
14
+ "loc": 0.0026725998690128325,
15
+ "pi_stall": 0.670438895498799,
16
+ "pi_steady": 0.329561104501201,
17
+ "scale_stall": 0.01398383994948415,
18
+ "scale_steady": 0.01291166006566778,
19
+ "sigma_stall": 0.10190881519172769,
20
+ "sigma_steady": 0.6028124518856584
21
+ },
22
+ "16": {
23
+ "loc": 0.0018269608339071275,
24
+ "pi_stall": 0.10464355744071052,
25
+ "pi_steady": 0.8953564425592895,
26
+ "scale_stall": 0.0064746630446026925,
27
+ "scale_steady": 0.006130842910997042,
28
+ "sigma_stall": 0.787949891540804,
29
+ "sigma_steady": 0.05
30
+ },
31
+ "192": {
32
+ "loc": 0.004752941670060158,
33
+ "pi_stall": 0.7662774362890146,
34
+ "pi_steady": 0.23372256371098543,
35
+ "scale_stall": 0.01598165094645324,
36
+ "scale_steady": 0.0150463242114058,
37
+ "sigma_stall": 0.14182877630339646,
38
+ "sigma_steady": 0.7849802295588915
39
+ },
40
+ "256": {
41
+ "loc": 0.006535205008160323,
42
+ "pi_stall": 0.7935925278538705,
43
+ "pi_steady": 0.20640747214612953,
44
+ "scale_stall": 0.019609283330274195,
45
+ "scale_steady": 0.016315824423412995,
46
+ "sigma_stall": 0.15827383548087384,
47
+ "sigma_steady": 0.9154474758196223
48
+ },
49
+ "32": {
50
+ "loc": 0.0035353920032978057,
51
+ "pi_stall": 0.17403414005364848,
52
+ "pi_steady": 0.8259658599463515,
53
+ "scale_stall": 0.006034873438781453,
54
+ "scale_steady": 0.005399039804654786,
55
+ "sigma_stall": 0.6283723882974765,
56
+ "sigma_steady": 0.093251542298717
57
+ },
58
+ "384": {
59
+ "loc": 0.004538576999377459,
60
+ "pi_stall": 0.6944861095107936,
61
+ "pi_steady": 0.3055138904892064,
62
+ "scale_stall": 0.03365572663992157,
63
+ "scale_steady": 0.02542297917615243,
64
+ "sigma_stall": 0.10481746968936954,
65
+ "sigma_steady": 0.8129715507672286
66
+ },
67
+ "512": {
68
+ "loc": 0.006579698012840003,
69
+ "pi_stall": 0.7984203718644621,
70
+ "pi_steady": 0.20157962813553787,
71
+ "scale_stall": 0.04203774478880279,
72
+ "scale_steady": 0.017431896454495718,
73
+ "sigma_stall": 0.16222657200608384,
74
+ "sigma_steady": 1.3251956670044174
75
+ },
76
+ "64": {
77
+ "loc": 0.0047925396432876586,
78
+ "pi_stall": 0.8475854653968202,
79
+ "pi_steady": 0.15241453460317977,
80
+ "scale_stall": 0.0068440681404629946,
81
+ "scale_steady": 0.006554435848211792,
82
+ "sigma_stall": 0.1343829094213754,
83
+ "sigma_steady": 0.8077132495834084
84
+ },
85
+ "768": {
86
+ "loc": 0.00489947998027876,
87
+ "pi_stall": 0.5897706893853796,
88
+ "pi_steady": 0.4102293106146204,
89
+ "scale_stall": 0.058643939455719225,
90
+ "scale_steady": 0.045424901604616506,
91
+ "sigma_stall": 0.0958052090925679,
92
+ "sigma_steady": 0.8954582871522097
93
+ },
94
+ "8": {
95
+ "loc": 0.0017732551863193513,
96
+ "pi_stall": 0.04214661390101049,
97
+ "pi_steady": 0.9578533860989895,
98
+ "scale_stall": 0.0056649474765016194,
99
+ "scale_steady": 0.0055782406714952265,
100
+ "sigma_stall": 0.7319335730101633,
101
+ "sigma_steady": 0.05
102
+ },
103
+ "96": {
104
+ "loc": 0.0019363875111341477,
105
+ "pi_stall": 0.2619339890384934,
106
+ "pi_steady": 0.7380660109615066,
107
+ "scale_stall": 0.012201993050876735,
108
+ "scale_steady": 0.01218169825049524,
109
+ "sigma_stall": 0.632097274814945,
110
+ "sigma_steady": 0.08150463339106037
111
+ }
112
+ },
113
+ "schema": "itl_fit.lognormal_mixture_2"
114
+ }
data/specs/bc0bde304544a603/logistic_fit.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "latency": {
3
+ "L": 0.08748363159637874,
4
+ "b0": 0.008524586220155353,
5
+ "k": 1.0811807510766078,
6
+ "x0": 9.153846153846153
7
+ },
8
+ "model_label": "Llama-3.1-8B",
9
+ "power": {
10
+ "L": 210.91957273648842,
11
+ "b0": 480.39716041942955,
12
+ "k": 1.2638482029342986,
13
+ "x0": 5.461538461538462
14
+ },
15
+ "schema": "logistic_v1",
16
+ "throughput": {
17
+ "L": 9383.467277123862,
18
+ "b0": 771.2518469119908,
19
+ "k": 0.9249147277217336,
20
+ "x0": 6.153846153846154
21
+ }
22
+ }
data/specs/bc0bde304544a603/trace.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/specs/traces_summary.csv ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_label,num_gpus,max_num_seqs,trace_file
2
+ Llama-3.1-70B,4,8,73691f793f6be469/trace.csv
3
+ Llama-3.1-70B,4,16,73691f793f6be469/trace.csv
4
+ Llama-3.1-70B,4,32,73691f793f6be469/trace.csv
5
+ Llama-3.1-70B,4,64,73691f793f6be469/trace.csv
6
+ Llama-3.1-70B,4,128,73691f793f6be469/trace.csv
7
+ Llama-3.1-70B,4,256,73691f793f6be469/trace.csv
8
+ Llama-3.1-70B,4,512,73691f793f6be469/trace.csv
9
+ Qwen3-30B-A3B,2,8,860654eabd3dfebf/trace.csv
10
+ Qwen3-30B-A3B,2,16,860654eabd3dfebf/trace.csv
11
+ Qwen3-30B-A3B,2,32,860654eabd3dfebf/trace.csv
12
+ Qwen3-30B-A3B,2,64,860654eabd3dfebf/trace.csv
13
+ Qwen3-30B-A3B,2,128,860654eabd3dfebf/trace.csv
14
+ Qwen3-30B-A3B,2,256,860654eabd3dfebf/trace.csv
15
+ Qwen3-30B-A3B,2,512,860654eabd3dfebf/trace.csv
16
+ Llama-3.1-405B,8,8,4925acb216d43131/trace.csv
17
+ Llama-3.1-405B,8,16,4925acb216d43131/trace.csv
18
+ Llama-3.1-405B,8,32,4925acb216d43131/trace.csv
19
+ Llama-3.1-405B,8,64,4925acb216d43131/trace.csv
20
+ Llama-3.1-405B,8,128,4925acb216d43131/trace.csv
21
+ Llama-3.1-405B,8,256,4925acb216d43131/trace.csv
22
+ Llama-3.1-405B,8,512,4925acb216d43131/trace.csv
23
+ Qwen3-235B-A22B,8,8,2e756e9e0a2d417e/trace.csv
24
+ Qwen3-235B-A22B,8,16,2e756e9e0a2d417e/trace.csv
25
+ Qwen3-235B-A22B,8,32,2e756e9e0a2d417e/trace.csv
26
+ Qwen3-235B-A22B,8,64,2e756e9e0a2d417e/trace.csv
27
+ Qwen3-235B-A22B,8,128,2e756e9e0a2d417e/trace.csv
28
+ Qwen3-235B-A22B,8,256,2e756e9e0a2d417e/trace.csv
29
+ Qwen3-235B-A22B,8,512,2e756e9e0a2d417e/trace.csv
30
+ Llama-3.1-8B,1,8,bc0bde304544a603/trace.csv
31
+ Llama-3.1-8B,1,16,bc0bde304544a603/trace.csv
32
+ Llama-3.1-8B,1,32,bc0bde304544a603/trace.csv
33
+ Llama-3.1-8B,1,64,bc0bde304544a603/trace.csv
34
+ Llama-3.1-8B,1,128,bc0bde304544a603/trace.csv
35
+ Llama-3.1-8B,1,256,bc0bde304544a603/trace.csv
36
+ Llama-3.1-8B,1,512,bc0bde304544a603/trace.csv
data/specs/training_trace.csv ADDED
The diff for this file is too large to render. See raw diff
 
examples/offline/config.json CHANGED
@@ -3,58 +3,72 @@
3
  {
4
  "model_label": "Llama-3.1-8B",
5
  "model_id": "meta-llama/Llama-3.1-8B-Instruct",
 
 
6
  "gpus_per_replica": 1,
7
- "num_replicas": 720,
8
- "initial_batch_size": 128,
9
  "itl_deadline_s": 0.08,
10
- "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512]
 
 
 
11
  },
12
  {
13
  "model_label": "Llama-3.1-70B",
14
  "model_id": "meta-llama/Llama-3.1-70B-Instruct",
 
 
15
  "gpus_per_replica": 4,
16
- "num_replicas": 180,
17
- "initial_batch_size": 128,
18
  "itl_deadline_s": 0.10,
19
- "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512]
 
 
 
20
  },
21
  {
22
  "model_label": "Llama-3.1-405B",
23
  "model_id": "meta-llama/Llama-3.1-405B-Instruct-FP8",
 
 
24
  "gpus_per_replica": 8,
25
- "num_replicas": 90,
26
- "initial_batch_size": 128,
27
  "itl_deadline_s": 0.12,
28
- "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512]
 
 
 
29
  },
30
  {
31
  "model_label": "Qwen3-30B-A3B",
32
  "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
 
 
33
  "gpus_per_replica": 2,
34
- "num_replicas": 480,
35
- "initial_batch_size": 128,
36
  "itl_deadline_s": 0.06,
37
- "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512]
 
 
 
38
  },
39
  {
40
  "model_label": "Qwen3-235B-A22B",
41
  "model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
 
 
42
  "gpus_per_replica": 8,
43
- "num_replicas": 210,
44
- "initial_batch_size": 128,
45
  "itl_deadline_s": 0.14,
46
- "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512]
 
 
 
47
  }
48
  ],
49
- "data_sources": [
50
- {"model_label": "Llama-3.1-8B", "task": "lm-arena-chat", "gpu": "H100", "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024]},
51
- {"model_label": "Llama-3.1-70B", "task": "lm-arena-chat", "gpu": "H100", "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048]},
52
- {"model_label": "Llama-3.1-405B", "task": "lm-arena-chat", "gpu": "H100", "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512]},
53
- {"model_label": "Qwen3-30B-A3B", "task": "gpqa", "gpu": "H100", "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512]},
54
- {"model_label": "Qwen3-235B-A22B", "task": "gpqa", "gpu": "H100", "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512]}
55
- ],
56
  "training_trace_params": {},
57
  "data_dir": null,
58
  "ieee_case_dir": "examples/ieee13",
59
  "mlenergy_data_dir": null
60
- }
 
3
  {
4
  "model_label": "Llama-3.1-8B",
5
  "model_id": "meta-llama/Llama-3.1-8B-Instruct",
6
+ "gpu_model": "H100",
7
+ "task": "lm-arena-chat",
8
  "gpus_per_replica": 1,
9
+ "tensor_parallel": 1,
 
10
  "itl_deadline_s": 0.08,
11
+ "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024],
12
+ "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
13
+ "num_replicas": 720,
14
+ "initial_batch_size": 128
15
  },
16
  {
17
  "model_label": "Llama-3.1-70B",
18
  "model_id": "meta-llama/Llama-3.1-70B-Instruct",
19
+ "gpu_model": "H100",
20
+ "task": "lm-arena-chat",
21
  "gpus_per_replica": 4,
22
+ "tensor_parallel": 4,
 
23
  "itl_deadline_s": 0.10,
24
+ "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048],
25
+ "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
26
+ "num_replicas": 180,
27
+ "initial_batch_size": 128
28
  },
29
  {
30
  "model_label": "Llama-3.1-405B",
31
  "model_id": "meta-llama/Llama-3.1-405B-Instruct-FP8",
32
+ "gpu_model": "H100",
33
+ "task": "lm-arena-chat",
34
  "gpus_per_replica": 8,
35
+ "tensor_parallel": 8,
36
+ "precision": "fp8",
37
  "itl_deadline_s": 0.12,
38
+ "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512],
39
+ "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
40
+ "num_replicas": 90,
41
+ "initial_batch_size": 128
42
  },
43
  {
44
  "model_label": "Qwen3-30B-A3B",
45
  "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
46
+ "gpu_model": "H100",
47
+ "task": "gpqa",
48
  "gpus_per_replica": 2,
49
+ "tensor_parallel": 2,
 
50
  "itl_deadline_s": 0.06,
51
+ "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512],
52
+ "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
53
+ "num_replicas": 480,
54
+ "initial_batch_size": 128
55
  },
56
  {
57
  "model_label": "Qwen3-235B-A22B",
58
  "model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
59
+ "gpu_model": "H100",
60
+ "task": "gpqa",
61
  "gpus_per_replica": 8,
62
+ "tensor_parallel": 8,
 
63
  "itl_deadline_s": 0.14,
64
+ "batch_sizes": [8, 16, 32, 64, 96, 128, 192, 256, 384, 512],
65
+ "feasible_batch_sizes": [8, 16, 32, 64, 128, 256, 512],
66
+ "num_replicas": 210,
67
+ "initial_batch_size": 128
68
  }
69
  ],
 
 
 
 
 
 
 
70
  "training_trace_params": {},
71
  "data_dir": null,
72
  "ieee_case_dir": "examples/ieee13",
73
  "mlenergy_data_dir": null
74
+ }
requirements.txt CHANGED
@@ -6,3 +6,5 @@ pandas
6
  opendssdirect.py
7
  matplotlib
8
  scipy
 
 
 
6
  opendssdirect.py
7
  matplotlib
8
  scipy
9
+ openg2g[opendss]==0.2.0.post1
10
+ websockets
server.py CHANGED
@@ -7,7 +7,7 @@ Uses GPU power traces and workloads to model howAI inference/training affects g
7
  """
8
 
9
 
10
- from __future__ import annotations
11
  from fractions import Fraction
12
  from pathlib import Path
13
  import subprocess, tempfile, os, uvicorn, threading, math, json, hashlib
@@ -17,16 +17,21 @@ from fastapi import FastAPI, HTTPException, Response
17
  from fastapi.middleware.cors import CORSMiddleware
18
  from pydantic import BaseModel
19
  from typing import Optional
 
20
 
21
 
22
  from openg2g.coordinator import Coordinator
23
 
24
- from openg2g.datacenter.config import (
25
- DatacenterConfig, InferenceModelSpec,
26
- PowerAugmentationConfig, InferenceRamp, TrainingRun,
 
 
 
27
  )
 
28
  from openg2g.datacenter.offline import OfflineDatacenter, OfflineWorkload
29
- from openg2g.datacenter.workloads.inference import InferenceData, MLEnergySource
30
  from openg2g.datacenter.workloads.training import TrainingTrace, TrainingTraceParams
31
  from openg2g.grid.opendss import OpenDSSGrid
32
  from openg2g.grid.config import TapPosition
@@ -72,17 +77,15 @@ BUSES_ORDERED = [BUS_INDEX_TO_NAME[i] for i in range(1, 14)]
72
  #read files
73
  _config_raw = json.loads(CONFIG_PATH.read_text())
74
  _MODELS = tuple(InferenceModelSpec(**m) for m in _config_raw["models"])
75
- _SOURCES = {s["model_label"]: MLEnergySource(**s) for s in _config_raw["data_sources"]}
76
  _DC_CONFIG = DatacenterConfig(gpus_per_server=8, base_kw_per_phase=500.0)
77
 
 
78
  if _config_raw.get("data_dir"):
79
  _DATA_DIR = Path(_config_raw["data_dir"])
80
  else:
81
- blob = json.dumps(sorted(_config_raw["data_sources"],
82
- key=lambda s: s["model_label"]),
83
- sort_keys=True).encode()
84
- _DATA_DIR = Path(__file__).parent / "data/offline" / hashlib.sha256(blob).hexdigest()[:16]
85
-
86
  # Load traces_summary.csv once at startup so we can quickly look up trace files
87
  _TRACES_SUMMARY_PATH = _DATA_DIR / "traces_summary.csv"
88
 
@@ -90,6 +93,28 @@ _TRACES_SUMMARY_PATH = _DATA_DIR / "traces_summary.csv"
90
  _traces_df: pd.DataFrame | None = None
91
 
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  """
94
  Load trace index CSV and cache it.
95
  """
@@ -136,43 +161,64 @@ _load_traces_index() # load at startup
136
 
137
  """Datacenter workload (baseline)"""
138
  def _build_dc(scale: float = 1.0, duration_s: int = 300) -> OfflineDatacenter:
139
- scaled_models = tuple(
140
- InferenceModelSpec(
141
- model_label = m.model_label,
142
- num_replicas = max(1, int(m.num_replicas * scale)),
143
- gpus_per_replica = m.gpus_per_replica,
144
- initial_batch_size = m.initial_batch_size,
145
- itl_deadline_s = m.itl_deadline_s,
146
- ) for m in _MODELS
147
- )
148
- inference_data = InferenceData.ensure(_DATA_DIR, scaled_models, _SOURCES, dt_s=0.1)
149
  training_trace = TrainingTrace.ensure(
150
- _DATA_DIR / "training_trace.csv", TrainingTraceParams()
 
151
  )
152
- t0 = min(40.0, duration_s * 0.13)
 
153
  t1 = min(140.0, duration_s * 0.47)
154
- t2 = min(150.0, duration_s * 0.50)
155
- t3 = min(220.0, duration_s * 0.73)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
  workload = OfflineWorkload(
158
- inference_data = inference_data,
159
- training = TrainingRun(
160
- n_gpus = max(1, int(24 * scale)),
161
- trace = training_trace,
162
- target_peak_W_per_gpu= 400.0,
163
- ).at(t_start=t0, t_end=t1),
164
- inference_ramps = InferenceRamp(
165
- target=min(1.0, 0.25 * scale)
166
- ).at(t_start=t2, t_end=t3),
167
- )
168
- return OfflineDatacenter(
169
- _DC_CONFIG, workload, dt_s=Fraction(1, 10), seed=0,
170
- power_augmentation=PowerAugmentationConfig(
171
- amplitude_scale_range=(0.88, 1.12),
172
- noise_fraction=0.04,
173
  ),
174
  )
175
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
 
178
  """
@@ -190,52 +236,49 @@ def _build_dc_from_real_trace(
190
 
191
  power_W = _get_trace_power(model_label, num_gpus, max_num_seqs, num_replicas)
192
 
193
- # Trim or repeat trace to match requested duration at dt=0.1s
194
  target_steps = int(duration_s / 0.1)
195
  if len(power_W) < target_steps:
196
- # Repeat trace to fill duration
197
  repeats = math.ceil(target_steps / len(power_W))
198
  power_W = (power_W * repeats)[:target_steps]
199
  else:
200
  power_W = power_W[:target_steps]
201
 
202
- # Build InferenceData with a single model replica matching the trace GPUs
203
- model_spec = InferenceModelSpec(
204
- model_label = model_label,
205
- num_replicas = num_replicas,
206
- gpus_per_replica = num_gpus,
207
- initial_batch_size = max_num_seqs,
208
- itl_deadline_s = 0.08,
209
- )
210
- source = _SOURCES.get(model_label)
211
- if source is None:
212
- # Fall back to first available source if model not in config
213
- source = next(iter(_SOURCES.values()))
214
 
215
- inference_data = InferenceData.ensure(
216
- _DATA_DIR, (model_spec,), {model_label: source}, dt_s=0.1
 
 
 
217
  )
218
 
219
- workload = OfflineWorkload(inference_data=inference_data)
 
 
220
 
221
  dc = OfflineDatacenter(
222
  _DC_CONFIG, workload, dt_s=Fraction(1, 10), seed=0,
 
 
223
  power_augmentation=PowerAugmentationConfig(
224
- amplitude_scale_range=(1.0, 1.0), # no augmentation — use real trace as-is
225
  noise_fraction=0.0,
226
  ),
227
  )
228
  return dc, power_W
229
 
230
-
231
-
232
  """Create IEEE 13-bus grid with datacenter connection."""
233
  def _build_grid(tap_pu: float, dc_bus: str) -> OpenDSSGrid:
234
  return OpenDSSGrid(
235
- dss_case_dir=str(DSS_DIR), dss_master_file=DSS_MASTER,
236
- dc_bus=dc_bus, dc_bus_kv=4.16,
237
- power_factor=_DC_CONFIG.power_factor,
238
- dt_s=Fraction(1), connection_type="wye",
 
239
  )
240
 
241
 
@@ -245,23 +288,25 @@ def _make_tap(v: float):
245
 
246
  """Run datacenter + grid simulation."""
247
  def _run(dc, grid, tap_pu, dc_bus, duration_s):
 
248
  coord = Coordinator(
249
- datacenter=dc, grid=grid,
 
250
  controllers=[TapScheduleController(
251
- schedule=_make_tap(tap_pu), dt_s=Fraction(1)
 
252
  )],
253
  total_duration_s=duration_s,
254
- dc_bus=dc_bus,
255
  )
256
  return coord.run()
257
 
258
-
259
  """
260
  Runs one full simulation job (datacenter + grid) in a worker process
261
  and returns results for the API.
262
  """
263
  def _run_full(req_dict: dict) -> dict:
264
 
 
265
  dc_bus = BUS_INDEX_TO_NAME.get(req_dict["targetBus"], "671")
266
  replicas = max(1, req_dict["numReplicas"])
267
 
@@ -328,6 +373,7 @@ def _run_full(req_dict: dict) -> dict:
328
  """Get per-bus voltage (worst phase per bus)."""
329
  def _voltages(gs) -> list[float]:
330
  result = []
 
331
  for name in BUSES_ORDERED:
332
  try:
333
  tp = gs.voltages[name]
@@ -337,11 +383,14 @@ def _voltages(gs) -> list[float]:
337
  except Exception as e:
338
  logger.debug(f"Bus {name} voltage unavailable: {e}")
339
  result.append(None)
340
- known = [v for v in result if v is not None]
341
- avg = sum(known) / len(known) if known else 1.0
342
- result = [v if v is not None else avg for v in result]
343
- logger.debug(f"Voltages: {[round(v,4) for v in result]}")
344
- return result
 
 
 
345
 
346
 
347
  # ── FastAPI────────────────────────────────────────────────────────────────
@@ -349,11 +398,10 @@ def _voltages(gs) -> list[float]:
349
  app = FastAPI()
350
  app.add_middleware(
351
  CORSMiddleware,
352
- allow_origins=["https://gpu2grid.io", "http://localhost:5173", "http://localhost:5174"],
353
- allow_credentials=True,
354
  allow_methods=["*"],
355
  allow_headers=["*"],
356
- allow_origin_regex=".*",
357
  )
358
 
359
 
@@ -471,6 +519,116 @@ async def heatmap(req: HeatmapRequest):
471
  return Response(content=png, media_type="image/png")
472
 
473
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
  if __name__ == "__main__":
475
  logger.info("=" * 70)
476
  logger.info(f"Data dir: {_DATA_DIR} ready={_DATA_DIR.exists()}")
@@ -480,4 +638,4 @@ if __name__ == "__main__":
480
  logger.info(f"Models: {models}")
481
  logger.info(f"Traces: {len(df)} configurations")
482
  logger.info("=" * 70)
483
- uvicorn.run("server:app", host="0.0.0.0", port=8080, workers=1, log_level="info")
 
7
  """
8
 
9
 
10
+
11
  from fractions import Fraction
12
  from pathlib import Path
13
  import subprocess, tempfile, os, uvicorn, threading, math, json, hashlib
 
17
  from fastapi.middleware.cors import CORSMiddleware
18
  from pydantic import BaseModel
19
  from typing import Optional
20
+ from fastapi import WebSocket, WebSocketDisconnect
21
 
22
 
23
  from openg2g.coordinator import Coordinator
24
 
25
+ from openg2g.datacenter.config import (
26
+ DatacenterConfig,
27
+ InferenceModelSpec,
28
+ PowerAugmentationConfig,
29
+ TrainingRun,
30
+ ReplicaSchedule,
31
  )
32
+
33
  from openg2g.datacenter.offline import OfflineDatacenter, OfflineWorkload
34
+ from openg2g.datacenter.workloads.inference import InferenceData
35
  from openg2g.datacenter.workloads.training import TrainingTrace, TrainingTraceParams
36
  from openg2g.grid.opendss import OpenDSSGrid
37
  from openg2g.grid.config import TapPosition
 
77
  #read files
78
  _config_raw = json.loads(CONFIG_PATH.read_text())
79
  _MODELS = tuple(InferenceModelSpec(**m) for m in _config_raw["models"])
 
80
  _DC_CONFIG = DatacenterConfig(gpus_per_server=8, base_kw_per_phase=500.0)
81
 
82
+
83
  if _config_raw.get("data_dir"):
84
  _DATA_DIR = Path(_config_raw["data_dir"])
85
  else:
86
+ _DATA_DIR = Path(__file__).parent / "data/specs"
87
+
88
+
 
 
89
  # Load traces_summary.csv once at startup so we can quickly look up trace files
90
  _TRACES_SUMMARY_PATH = _DATA_DIR / "traces_summary.csv"
91
 
 
93
  _traces_df: pd.DataFrame | None = None
94
 
95
 
96
+ TAP_STEP = 0.00625
97
+ INITIAL_TAPS = TapPosition(
98
+ a=1.0 + 14 * TAP_STEP,
99
+ b=1.0 + 6 * TAP_STEP,
100
+ c=1.0 + 15 * TAP_STEP,
101
+ )
102
+
103
+ # Rescaled to fit in 300s window (original is 3600s, we compress ~12x)
104
+ TAP_CHANGE_SCHEDULE = (
105
+ TapPosition(
106
+ a=1.0 + 16 * TAP_STEP,
107
+ b=1.0 + 6 * TAP_STEP,
108
+ c=1.0 + 17 * TAP_STEP,
109
+ ).at(t=75) # was 1500s → 75s at 12x compression
110
+ | TapPosition(
111
+ a=1.0 + 10 * TAP_STEP,
112
+ b=1.0 + 6 * TAP_STEP,
113
+ c=1.0 + 10 * TAP_STEP,
114
+ ).at(t=200) # was 3300s → 200s at 12x compression
115
+ )
116
+
117
+
118
  """
119
  Load trace index CSV and cache it.
120
  """
 
161
 
162
  """Datacenter workload (baseline)"""
163
  def _build_dc(scale: float = 1.0, duration_s: int = 300) -> OfflineDatacenter:
164
+
165
+ df = _load_traces_index()
166
+ first_row = df.iloc[0]
167
+
168
+ first_model = tuple(m for m in _MODELS if m.model_label == first_row["model_label"])
169
+ inference_data = InferenceData.load(_DATA_DIR, first_model)
170
+
 
 
 
171
  training_trace = TrainingTrace.ensure(
172
+ _DATA_DIR / "training_trace.csv",
173
+ TrainingTraceParams(),
174
  )
175
+
176
+ t0 = min(40.0, duration_s * 0.13)
177
  t1 = min(140.0, duration_s * 0.47)
178
+
179
+ replica_schedules = {}
180
+
181
+ for m in _MODELS:
182
+
183
+ initial_replicas = max(1, int(scale * 8))
184
+
185
+ reduced_replicas = max(1, int(initial_replicas * 0.25))
186
+
187
+ replica_schedules[m.model_label] = (
188
+ ReplicaSchedule(initial=initial_replicas)
189
+ .ramp_to(
190
+ reduced_replicas,
191
+ t_start=min(150.0, duration_s * 0.50),
192
+ t_end=min(220.0, duration_s * 0.73),
193
+ )
194
+ )
195
 
196
  workload = OfflineWorkload(
197
+ inference_data=inference_data,
198
+ replica_schedules=replica_schedules,
199
+
200
+ training=TrainingRun(
201
+ n_gpus=max(1, int(24 * scale)),
202
+ trace=training_trace,
203
+ target_peak_W_per_gpu=400.0,
204
+ ).at(
205
+ t_start=t0,
206
+ t_end=t1,
 
 
 
 
 
207
  ),
208
  )
209
 
210
+ return OfflineDatacenter(
211
+ _DC_CONFIG,
212
+ workload,
213
+ dt_s=Fraction(1, 10),
214
+ seed=0,
215
+ name="baseline",
216
+ total_gpu_capacity=1000,
217
+ power_augmentation=PowerAugmentationConfig(
218
+ amplitude_scale_range=(0.88, 1.12),
219
+ noise_fraction=0.04,
220
+ ),
221
+ )
222
 
223
 
224
  """
 
236
 
237
  power_W = _get_trace_power(model_label, num_gpus, max_num_seqs, num_replicas)
238
 
 
239
  target_steps = int(duration_s / 0.1)
240
  if len(power_W) < target_steps:
 
241
  repeats = math.ceil(target_steps / len(power_W))
242
  power_W = (power_W * repeats)[:target_steps]
243
  else:
244
  power_W = power_W[:target_steps]
245
 
246
+ df = _load_traces_index()
247
+ row = df[df["model_label"] == model_label].iloc[0]
248
+
249
+ model_tuple = tuple(m for m in _MODELS if m.model_label == model_label)
250
+ inference_data = InferenceData.load(_DATA_DIR, model_tuple)
 
 
 
 
 
 
 
251
 
252
+ workload = OfflineWorkload(
253
+ inference_data=inference_data,
254
+ replica_schedules={
255
+ model_label: ReplicaSchedule(initial=num_replicas)
256
+ },
257
  )
258
 
259
+ # compute actual GPU count and add headroom
260
+ actual_gpu_count = num_replicas * num_gpus
261
+ gpu_capacity = max(1000, actual_gpu_count * 2)
262
 
263
  dc = OfflineDatacenter(
264
  _DC_CONFIG, workload, dt_s=Fraction(1, 10), seed=0,
265
+ name=model_label.replace(".", "-"),
266
+ total_gpu_capacity=gpu_capacity, # ← was hardcoded 1000
267
  power_augmentation=PowerAugmentationConfig(
268
+ amplitude_scale_range=(1.0, 1.0),
269
  noise_fraction=0.0,
270
  ),
271
  )
272
  return dc, power_W
273
 
 
 
274
  """Create IEEE 13-bus grid with datacenter connection."""
275
  def _build_grid(tap_pu: float, dc_bus: str) -> OpenDSSGrid:
276
  return OpenDSSGrid(
277
+ dss_case_dir=str(DSS_DIR),
278
+ dss_master_file=DSS_MASTER,
279
+ dt_s=Fraction(1),
280
+ source_pu=tap_pu,
281
+ initial_tap_position=INITIAL_TAPS,
282
  )
283
 
284
 
 
288
 
289
  """Run datacenter + grid simulation."""
290
  def _run(dc, grid, tap_pu, dc_bus, duration_s):
291
+ grid.attach_dc(dc, bus=dc_bus, connection_type="wye", power_factor=_DC_CONFIG.power_factor)
292
  coord = Coordinator(
293
+ datacenters=[dc],
294
+ grid=grid,
295
  controllers=[TapScheduleController(
296
+ schedule=TAP_CHANGE_SCHEDULE, # ← real schedule
297
+ dt_s=Fraction(1)
298
  )],
299
  total_duration_s=duration_s,
 
300
  )
301
  return coord.run()
302
 
 
303
  """
304
  Runs one full simulation job (datacenter + grid) in a worker process
305
  and returns results for the API.
306
  """
307
  def _run_full(req_dict: dict) -> dict:
308
 
309
+
310
  dc_bus = BUS_INDEX_TO_NAME.get(req_dict["targetBus"], "671")
311
  replicas = max(1, req_dict["numReplicas"])
312
 
 
373
  """Get per-bus voltage (worst phase per bus)."""
374
  def _voltages(gs) -> list[float]:
375
  result = []
376
+ none_count = 0
377
  for name in BUSES_ORDERED:
378
  try:
379
  tp = gs.voltages[name]
 
383
  except Exception as e:
384
  logger.debug(f"Bus {name} voltage unavailable: {e}")
385
  result.append(None)
386
+
387
+ none_count = sum(1 for v in result if v is None)
388
+ if none_count > 3:
389
+ logger.warning(f"OpenDSS convergence failure: {none_count}/13 buses returned None")
390
+
391
+ known = [v for v in result if v is not None]
392
+ avg = sum(known) / len(known) if known else 1.0
393
+ return [v if v is not None else avg for v in result]
394
 
395
 
396
  # ── FastAPI────────────────────────────────────────────────────────────────
 
398
  app = FastAPI()
399
  app.add_middleware(
400
  CORSMiddleware,
401
+ allow_origins=["*"],
402
+ allow_credentials=False,
403
  allow_methods=["*"],
404
  allow_headers=["*"],
 
405
  )
406
 
407
 
 
519
  return Response(content=png, media_type="image/png")
520
 
521
 
522
+
523
+
524
+
525
+
526
+ def _serialize_tick(tick, req_dict: dict, raw_power_W: list[float]) -> dict:
527
+ """Serialize one TickOutput to a small JSON-safe dict for the frontend."""
528
+ t = tick.t_s
529
+
530
+ # Grid voltages (may be None if grid didn't tick this step)
531
+ voltages = None
532
+ min_v = max_v = target_v = None
533
+ if tick.grid_state is not None:
534
+ voltages = _voltages(tick.grid_state)
535
+ min_v = min(voltages)
536
+ max_v = max(voltages)
537
+ target_v = voltages[req_dict["targetBus"] - 1]
538
+
539
+ # DC power (sum all sites)
540
+ kw = 0.0
541
+ batch_by_model: dict[str, int] = {}
542
+ for dc_name, ds in tick.dc_states.items():
543
+ pw = ds.power_w
544
+ kw += float((pw.a + pw.b + pw.c) / 1000)
545
+ if hasattr(ds, "batch_size_by_model"):
546
+ batch_by_model.update(ds.batch_size_by_model)
547
+ if math.isnan(kw):
548
+ kw = 0.0
549
+
550
+ trace_idx = min(int(t / 0.1), len(raw_power_W) - 1) if raw_power_W else 0
551
+ raw_kw = raw_power_W[trace_idx] / 1000.0 if raw_power_W else kw
552
+
553
+ events = [{"type": e.event_type, "data": e.data} for e in tick.sim_events]
554
+
555
+ return {
556
+ "time": float(t),
557
+ "gpu_power_kW": kw,
558
+ "gpu_power_raw_kW": raw_kw,
559
+ "active_gpus": req_dict["numReplicas"] * req_dict["numGpus"],
560
+ "batch_by_model": batch_by_model,
561
+ "voltages": voltages,
562
+ "min_voltage": min_v,
563
+ "max_voltage": max_v,
564
+ "target_bus_voltage": target_v,
565
+ "sim_events": events,
566
+ }
567
+
568
+
569
+ def _run_streaming(req_dict: dict):
570
+ """Generator: builds coordinator and yields serialized tick dicts."""
571
+ dc_bus = BUS_INDEX_TO_NAME.get(req_dict["targetBus"], "671")
572
+ replicas = max(1, req_dict["numReplicas"])
573
+
574
+ dc, raw_power_W = _build_dc_from_real_trace(
575
+ model_label = req_dict["modelLabel"],
576
+ num_gpus = req_dict["numGpus"],
577
+ max_num_seqs = req_dict["maxNumSeqs"],
578
+ num_replicas = replicas,
579
+ duration_s = req_dict["durationS"],
580
+ )
581
+ grid = _build_grid(req_dict["substationVoltage"], dc_bus)
582
+ grid.attach_dc(dc, bus=dc_bus, connection_type="wye",
583
+ power_factor=_DC_CONFIG.power_factor)
584
+
585
+ coord = Coordinator(
586
+ datacenters=[dc],
587
+ grid=grid,
588
+
589
+ controllers=[TapScheduleController(
590
+ schedule=TAP_CHANGE_SCHEDULE,
591
+ dt_s=Fraction(1),
592
+ )],
593
+ total_duration_s=req_dict["durationS"],
594
+ )
595
+
596
+ step = max(1, req_dict["sampleInterval"])
597
+ tick_num = 0
598
+ try:
599
+ for tick in coord.run_iter():
600
+ if tick_num % step == 0:
601
+ yield _serialize_tick(tick, req_dict, raw_power_W)
602
+ tick_num += 1
603
+ finally:
604
+ coord.stop()
605
+
606
+ @app.websocket("/ws/sim-stream")
607
+ async def sim_stream(ws: WebSocket):
608
+ await ws.accept()
609
+ try:
610
+ req_dict = await ws.receive_json()
611
+ req = LLMImpactRequest(**req_dict)
612
+ logger.info(f"WS stream: {req.modelLabel} bus={req.targetBus}")
613
+
614
+ # Run full simulation in process pool (separate process = safe for OpenDSS)
615
+ loop = asyncio.get_event_loop()
616
+ result = await loop.run_in_executor(_pool, _run_full, req.dict())
617
+
618
+ # Stream results tick by tick from the completed result
619
+ for row in result["timeSeries"]:
620
+ await ws.send_json(row)
621
+
622
+ await ws.send_json({"done": True})
623
+
624
+ except WebSocketDisconnect:
625
+ logger.info("WS client disconnected")
626
+ except Exception as e:
627
+ logger.exception("WS stream failed")
628
+ try:
629
+ await ws.send_json({"error": str(e)})
630
+ except Exception:
631
+ pass
632
  if __name__ == "__main__":
633
  logger.info("=" * 70)
634
  logger.info(f"Data dir: {_DATA_DIR} ready={_DATA_DIR.exists()}")
 
638
  logger.info(f"Models: {models}")
639
  logger.info(f"Traces: {len(df)} configurations")
640
  logger.info("=" * 70)
641
+ uvicorn.run("server:app", host="0.0.0.0", port=8080, workers=1, log_level="info", ws_ping_interval=None)