github-actions[bot] commited on
Commit
817f4f7
·
1 Parent(s): 83aa768

deploy: sync from GitHub 2026-05-04T13:40:43Z

Browse files
Files changed (1) hide show
  1. server.py +172 -100
server.py CHANGED
@@ -33,8 +33,32 @@ from openg2g.grid.config import TapPosition
33
  from openg2g.controller.tap_schedule import TapScheduleController
34
  from openg2g.metrics.voltage import compute_allbus_voltage_stats
35
 
36
- #run one simulation at a time
37
- dss_lock = threading.Lock()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  DSS_DIR = Path(__file__).parent / "examples/ieee13"
40
  DSS_MASTER = "IEEE13Nodeckt.dss"
@@ -225,19 +249,85 @@ def _make_tap(v: float):
225
 
226
  """Run datacenter + grid simulation."""
227
  def _run(dc, grid, tap_pu, dc_bus, duration_s):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
- #run one simulation at time
230
 
231
- with dss_lock:
232
- coord = Coordinator(
233
- datacenter=dc, grid=grid,
234
- controllers=[TapScheduleController(
235
- schedule=_make_tap(tap_pu), dt_s=Fraction(1)
236
- )],
237
- total_duration_s=duration_s,
238
- dc_bus=dc_bus,
239
- )
240
- return coord.run()
241
 
242
  """Get per-bus voltage (worst phase per bus)."""
243
  def _voltages(gs, debug=False) -> list[float]:
@@ -263,10 +353,11 @@ def _voltages(gs, debug=False) -> list[float]:
263
  app = FastAPI()
264
  app.add_middleware(
265
  CORSMiddleware,
266
- allow_origins=["https://gpu2grid.io"],
267
  allow_credentials=True,
268
  allow_methods=["*"],
269
  allow_headers=["*"],
 
270
  )
271
 
272
 
@@ -297,9 +388,44 @@ def health():
297
  return {"status": "ok", "data_ready": _DATA_DIR.exists(),
298
  "message": "gpu2grid OpenDSS server"}
299
 
300
- @app.get("/health")
301
- def health():
302
- return "OK"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
 
305
  """Return available traces"""
@@ -312,7 +438,6 @@ def list_traces():
312
 
313
  traces = df[["model_label","num_gpus","max_num_seqs"]].to_dict("records")
314
 
315
- # Group by model for convenient frontend rendering
316
  models = []
317
  for model_label, group in df.groupby("model_label"):
318
  models.append({
@@ -334,13 +459,13 @@ def list_traces():
334
  """Baseline grid simulation, no workload"""
335
  @app.post("/api/powerflow")
336
  async def powerflow(req: PowerflowRequest):
337
- print(f"\n📊 Powerflow v={req.substationVoltage}")
338
  try:
339
  dc = _build_dc(scale=0.001, duration_s=5)
340
  grid = _build_grid(req.substationVoltage, "671")
341
  log = _run(dc, grid, req.substationVoltage, "671", 5)
342
  vs = _voltages(log.grid_states[-1], debug=True)
343
- print(f" min={min(vs):.4f} max={max(vs):.4f}")
344
  return {"buses": [{"id": i+1, "voltage": v, "activePower": 0.0,
345
  "reactivePower": 0.0} for i, v in enumerate(vs)],
346
  "lines": []}
@@ -353,89 +478,36 @@ async def powerflow(req: PowerflowRequest):
353
  """Simulate AI workload impact on grid using GPU traces."""
354
  @app.post("/api/llm-impact")
355
  async def llm_impact(req: LLMImpactRequest):
356
- # 1. Map target bus index to OpenDSS bus name
357
- dc_bus = BUS_INDEX_TO_NAME.get(req.targetBus, "671")
358
-
359
- # 2. Use the exact replica count from the frontend
360
- replicas = max(1, req.numReplicas)
361
 
362
- print(f"\n🤖 LLM Impact Simulation")
363
- print(f" Bus: {req.targetBus} ({dc_bus}) | Model: {req.modelLabel}")
364
- print(f" Config: {req.numGpus} GPUs/replica | {req.maxNumSeqs} Seq Len")
365
- print(f" Replicas: {replicas} | Substation V: {req.substationVoltage}")
 
366
 
367
- try:
368
- # 3. build dc from real trace
369
- dc, raw_power_W = _build_dc_from_real_trace(
370
- model_label = req.modelLabel,
371
- num_gpus = req.numGpus,
372
- max_num_seqs = req.maxNumSeqs,
373
- num_replicas = replicas,
374
- duration_s = req.durationS,
375
- )
 
 
 
 
 
 
 
 
 
 
 
376
 
377
- # 4. Run the grid simulation
378
- grid = _build_grid(req.substationVoltage, dc_bus)
379
- log = _run(dc, grid, req.substationVoltage, dc_bus, req.durationS)
380
-
381
- # 5. Process results for frontebd
382
- step = max(1, req.sampleInterval)
383
- gs_sampled = log.grid_states[::step]
384
- t_sampled = list(log.time_s[::step])
385
- dc_states = log.dc_states
386
-
387
- results = []
388
- for i, (t, gs) in enumerate(zip(t_sampled, gs_sampled)):
389
- vs = _voltages(gs, debug=(i == 0))
390
-
391
- # Match grid time to DC power state
392
- dc_i = min(range(len(dc_states)), key=lambda j: abs(dc_states[j].time_s - t))
393
- ds = dc_states[dc_i]
394
-
395
- # Sum power across phases A, B, C (convert Watts to kW)
396
- kw = float((ds.power_w.a + ds.power_w.b + ds.power_w.c) / 1000)
397
- if math.isnan(kw): kw = 0.0
398
-
399
- # Match with the raw trace index for display
400
- trace_idx = min(int(t / 0.1), len(raw_power_W) - 1) if raw_power_W else 0
401
- raw_kw = raw_power_W[trace_idx] / 1000.0 if raw_power_W else kw
402
-
403
- results.append({
404
- "time": float(t),
405
- "gpu_power_W": kw * 1000,
406
- "gpu_power_kW": kw,
407
- "gpu_power_raw_kW": raw_kw,
408
- "gpu_reactive_kVAR": kw * 0.329,
409
- "active_gpus": replicas * req.numGpus,
410
- "voltages": vs,
411
- "min_voltage": min(vs),
412
- "max_voltage": max(vs),
413
- "target_bus_voltage": vs[req.targetBus - 1],
414
- "total_load_kW": kw,
415
- })
416
-
417
- # 6. Return standard response
418
- return {
419
- "numSamples": len(results),
420
- "targetBus": req.targetBus,
421
- "modelLabel": req.modelLabel,
422
- "numGpus": req.numGpus,
423
- "maxNumSeqs": req.maxNumSeqs,
424
- "numReplicas": replicas,
425
- "duration": float(max(r["time"] for r in results) if results else 0),
426
- "minVoltage": float(min(r["min_voltage"] for r in results) if results else 1.0),
427
- "maxVoltage": float(max(r["max_voltage"] for r in results) if results else 1.0),
428
- "avgGpuPower": float(sum(r["gpu_power_W"] for r in results) / len(results) if results else 0),
429
- "peakGpuPower": float(max(r["gpu_power_W"] for r in results) if results else 0),
430
- "timeSeries": results,
431
- }
432
 
433
- except Exception as e:
434
- import traceback
435
- traceback.print_exc()
436
- # Very important: if the model_label doesn't match the CSV names,
437
- # _get_trace_power will raise a ValueError. This catch will show you why.
438
- raise HTTPException(status_code=500, detail=str(e))
439
 
440
  @app.post("/api/heatmap")
441
  async def heatmap(req: HeatmapRequest):
@@ -464,4 +536,4 @@ if __name__ == "__main__":
464
  print(f" Models: {models}")
465
  print(f" Traces: {len(df)} configurations")
466
  print("="*70 + "\n")
467
- uvicorn.run(app, host="0.0.0.0", port=8080, log_level="info")
 
33
  from openg2g.controller.tap_schedule import TapScheduleController
34
  from openg2g.metrics.voltage import compute_allbus_voltage_stats
35
 
36
+ import asyncio, uuid, time
37
+ from concurrent.futures import ProcessPoolExecutor
38
+
39
+ import sqlite3, json
40
+
41
+ conn = sqlite3.connect("jobs.db", check_same_thread=False, timeout=30)
42
+ conn.execute("PRAGMA journal_mode=WAL;")
43
+
44
+
45
+ # create table to track background simulation jobs
46
+ conn.execute("""
47
+ CREATE TABLE IF NOT EXISTS jobs (
48
+ id TEXT PRIMARY KEY,
49
+ status TEXT,
50
+ result TEXT,
51
+ error TEXT
52
+ )
53
+ """)
54
+ conn.commit()
55
+
56
+ #currently set to 2 for free tier at hf
57
+ _pool = ProcessPoolExecutor(max_workers=2)
58
+ _jobs: dict = {}
59
+ _start_time = time.time()
60
+
61
+
62
 
63
  DSS_DIR = Path(__file__).parent / "examples/ieee13"
64
  DSS_MASTER = "IEEE13Nodeckt.dss"
 
249
 
250
  """Run datacenter + grid simulation."""
251
  def _run(dc, grid, tap_pu, dc_bus, duration_s):
252
+ coord = Coordinator(
253
+ datacenter=dc, grid=grid,
254
+ controllers=[TapScheduleController(
255
+ schedule=_make_tap(tap_pu), dt_s=Fraction(1)
256
+ )],
257
+ total_duration_s=duration_s,
258
+ dc_bus=dc_bus,
259
+ )
260
+ return coord.run()
261
+
262
+
263
+ """
264
+ Runs one full simulation job (datacenter + grid) in a worker process
265
+ and returns results for the API.
266
+ """
267
+ def _run_full(req_dict: dict) -> dict:
268
+
269
+ dc_bus = BUS_INDEX_TO_NAME.get(req_dict["targetBus"], "671")
270
+ replicas = max(1, req_dict["numReplicas"])
271
+
272
+ dc, raw_power_W = _build_dc_from_real_trace(
273
+ model_label = req_dict["modelLabel"],
274
+ num_gpus = req_dict["numGpus"],
275
+ max_num_seqs = req_dict["maxNumSeqs"],
276
+ num_replicas = replicas,
277
+ duration_s = req_dict["durationS"],
278
+ )
279
+ grid = _build_grid(req_dict["substationVoltage"], dc_bus)
280
+ log = _run(dc, grid, req_dict["substationVoltage"], dc_bus, req_dict["durationS"])
281
+
282
+ step = max(1, req_dict["sampleInterval"])
283
+ gs_sampled = log.grid_states[::step]
284
+ t_sampled = list(log.time_s[::step])
285
+ dc_states = log.dc_states
286
+
287
+ results = []
288
+ for i, (t, gs) in enumerate(zip(t_sampled, gs_sampled)):
289
+ vs = _voltages(gs)
290
+ dc_i = min(range(len(dc_states)), key=lambda j: abs(dc_states[j].time_s - t))
291
+ ds = dc_states[dc_i]
292
+ kw = float((ds.power_w.a + ds.power_w.b + ds.power_w.c) / 1000)
293
+
294
+
295
+ if math.isnan(kw): kw = 0.0
296
+ trace_idx = min(int(t / 0.1), len(raw_power_W) - 1) if raw_power_W else 0
297
+ raw_kw = raw_power_W[trace_idx] / 1000.0 if raw_power_W else kw
298
+ results.append({
299
+ "time": float(t),
300
+ "gpu_power_W": kw * 1000,
301
+ "gpu_power_kW": kw,
302
+ "gpu_power_raw_kW": raw_kw,
303
+ "gpu_reactive_kVAR": kw * 0.329,
304
+ "active_gpus": replicas * req_dict["numGpus"],
305
+ "voltages": vs,
306
+ "min_voltage": min(vs),
307
+ "max_voltage": max(vs),
308
+ "target_bus_voltage": vs[req_dict["targetBus"] - 1],
309
+ "total_load_kW": kw,
310
+ })
311
+
312
+ return {
313
+ "numSamples": len(results),
314
+ "targetBus": req_dict["targetBus"],
315
+ "modelLabel": req_dict["modelLabel"],
316
+ "numGpus": req_dict["numGpus"],
317
+
318
+
319
+ "maxNumSeqs": req_dict["maxNumSeqs"],
320
+ "numReplicas": replicas,
321
+ "duration": float(max(r["time"] for r in results) if results else 0),
322
+ "minVoltage": float(min(r["min_voltage"] for r in results) if results else 1.0),
323
+ "maxVoltage": float(max(r["max_voltage"] for r in results) if results else 1.0),
324
+ "avgGpuPower": float(sum(r["gpu_power_W"] for r in results) / len(results) if results else 0),
325
+ "peakGpuPower": float(max(r["gpu_power_W"] for r in results) if results else 0),
326
+ "timeSeries": results,
327
+ }
328
+
329
 
 
330
 
 
 
 
 
 
 
 
 
 
 
331
 
332
  """Get per-bus voltage (worst phase per bus)."""
333
  def _voltages(gs, debug=False) -> list[float]:
 
353
  app = FastAPI()
354
  app.add_middleware(
355
  CORSMiddleware,
356
+ allow_origins=["https://gpu2grid.io", "http://localhost:5173", "http://localhost:5174"],
357
  allow_credentials=True,
358
  allow_methods=["*"],
359
  allow_headers=["*"],
360
+ allow_origin_regex=".*",
361
  )
362
 
363
 
 
388
  return {"status": "ok", "data_ready": _DATA_DIR.exists(),
389
  "message": "gpu2grid OpenDSS server"}
390
 
391
+
392
+
393
+ @app.get("/api/status")
394
+ def status():
395
+ active = conn.execute(
396
+ "SELECT COUNT(*) FROM jobs WHERE status='pending'"
397
+ ).fetchone()[0]
398
+
399
+ total = conn.execute(
400
+ "SELECT COUNT(*) FROM jobs"
401
+ ).fetchone()[0]
402
+
403
+ return {
404
+ "active_jobs": active,
405
+ "total_jobs": total,
406
+ "workers": _pool._max_workers,
407
+ }
408
+
409
+
410
+
411
+ @app.get("/api/job/{job_id}")
412
+ def get_job(job_id: str):
413
+ row = conn.execute(
414
+ "SELECT status, result, error FROM jobs WHERE id=?",
415
+ (job_id,)
416
+ ).fetchone()
417
+
418
+ if not row:
419
+ raise HTTPException(404, "Job not found")
420
+
421
+ status, result, error = row
422
+
423
+ if status == "done":
424
+ return {"status": status, "result": json.loads(result)}
425
+ elif status == "error":
426
+ return {"status": status, "detail": error}
427
+ else:
428
+ return {"status": status}
429
 
430
 
431
  """Return available traces"""
 
438
 
439
  traces = df[["model_label","num_gpus","max_num_seqs"]].to_dict("records")
440
 
 
441
  models = []
442
  for model_label, group in df.groupby("model_label"):
443
  models.append({
 
459
  """Baseline grid simulation, no workload"""
460
  @app.post("/api/powerflow")
461
  async def powerflow(req: PowerflowRequest):
462
+ print(f"\nPowerflow v={req.substationVoltage}")
463
  try:
464
  dc = _build_dc(scale=0.001, duration_s=5)
465
  grid = _build_grid(req.substationVoltage, "671")
466
  log = _run(dc, grid, req.substationVoltage, "671", 5)
467
  vs = _voltages(log.grid_states[-1], debug=True)
468
+ print(f" min={min(vs):.4f} max={max(vs):.4f}")
469
  return {"buses": [{"id": i+1, "voltage": v, "activePower": 0.0,
470
  "reactivePower": 0.0} for i, v in enumerate(vs)],
471
  "lines": []}
 
478
  """Simulate AI workload impact on grid using GPU traces."""
479
  @app.post("/api/llm-impact")
480
  async def llm_impact(req: LLMImpactRequest):
481
+ job_id = uuid.uuid4().hex
 
 
 
 
482
 
483
+ conn.execute(
484
+ "INSERT INTO jobs (id, status) VALUES (?, ?)",
485
+ (job_id, "pending")
486
+ )
487
+ conn.commit()
488
 
489
+ async def run_and_store():
490
+ try:
491
+ loop = asyncio.get_event_loop()
492
+ result = await loop.run_in_executor(_pool, _run_full, req.dict())
493
+
494
+ conn.execute(
495
+ "UPDATE jobs SET status=?, result=? WHERE id=?",
496
+ ("done", json.dumps(result), job_id)
497
+ )
498
+ conn.commit()
499
+
500
+ except Exception as e:
501
+ conn.execute(
502
+ "UPDATE jobs SET status=?, error=? WHERE id=?",
503
+ ("error", str(e), job_id)
504
+ )
505
+ conn.commit()
506
+
507
+ asyncio.create_task(run_and_store())
508
+ return {"job_id": job_id}
509
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510
 
 
 
 
 
 
 
511
 
512
  @app.post("/api/heatmap")
513
  async def heatmap(req: HeatmapRequest):
 
536
  print(f" Models: {models}")
537
  print(f" Traces: {len(df)} configurations")
538
  print("="*70 + "\n")
539
+ uvicorn.run("server:app", host="0.0.0.0", port=8080, workers=1, log_level="info")