FeilongTang commited on
Commit
047a9df
·
1 Parent(s): 901e5ca

Add 'Target canvases (total per video)' slider, default 4

Browse files

Decouples the canvas count from the GOP grouping. The total is split
across GOP groups as evenly as possible (the first `remainder` groups
get +1 each, with a floor of 1 per group so nothing is invisible).
Each group's frame range is then sliced into k consecutive sub-ranges,
one IPPP canvas per sub-range.

Examples (16 sampled frames, target=4):
GOP=4 -> 4 groups × 1 canvas = 4
GOP=8 -> 2 groups × 2 canvases = 4
GOP=16 -> 1 group × 4 canvases = 4
Dynamic -> 4 groups × 1 canvas = 4

API
- pack_canvases_per_group() now returns (canvases, sub_ranges,
n_selected) and takes target_canvases.
- Caption switches from 'Group K/N' to 'Canvas K/N · group G · I@#s
+ p P-frames' so the sub-range origin is visible.
- Run info JSON 'canvases' entries get 'sub_range', and the params
block gains 'target_canvases'.
- DEMO_PRESET extended by one value.

Files changed (1) hide show
  1. app.py +121 -58
app.py CHANGED
@@ -58,6 +58,7 @@ DEMO_PRESET = (
58
  96.0, # bitcost_pct
59
  0.55, # fade_strength
60
  "dynamic", # gop
 
61
  )
62
 
63
 
@@ -447,68 +448,118 @@ def write_mp4(frames: List[np.ndarray], path: str, fps: float) -> None:
447
  proc.kill()
448
 
449
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  def pack_canvases_per_group(
451
  frames: List[np.ndarray],
452
  masks: List[np.ndarray],
453
  groups: List[Tuple[int, int]],
454
  patch: int,
455
- ) -> Tuple[List[np.ndarray], int]:
456
- """One canvas per GOP group, structured as I-frame + P-frame patches
457
- (IPPP order, matching the codec convention).
458
-
459
- Within each group [s..e]:
460
- - frame s is the I-frame: its WHOLE image is laid down as the top
461
- of the canvas (the anchor / keyframe).
462
- - frames s+1..e are P-frames: only their selected patches go below
463
- the I-frame, packed in time-order, raster scan, in a wb-wide grid.
464
-
465
- The canvas width is locked to the frame's patch-grid width so the
466
- I-frame slots in cleanly and the P-grid below aligns block-for-block.
 
 
 
 
 
 
 
467
  """
468
  canvases: List[np.ndarray] = []
 
469
  total_selected = 0
470
- for (s, e) in groups:
471
- if s >= len(frames):
472
- continue
473
- i_frame = frames[s]
474
- h, w = i_frame.shape[:2]
475
- hb, wb = h // patch, w // patch
476
- canvas_w = wb * patch
477
- # I-frame block (already a multiple of patch from smart_resize).
478
- i_block = i_frame[: hb * patch, : canvas_w].copy()
479
- total_selected += hb * wb # I-frame counts as fully kept.
480
-
481
- # Collect selected patches from P-frames (s+1..e), time-major.
482
- p_patches: List[np.ndarray] = []
483
- for k in range(s + 1, e + 1):
484
- if k >= len(frames):
485
- break
486
- f, m = frames[k], masks[k]
487
- for i in range(m.shape[0]):
488
- for j in range(m.shape[1]):
489
- if m[i, j]:
490
- p_patches.append(
491
- f[i * patch:(i + 1) * patch, j * patch:(j + 1) * patch].copy()
492
- )
493
- total_selected += len(p_patches)
494
 
495
- if not p_patches:
496
- canvases.append(i_block)
497
- continue
498
 
499
- # Lay P-patches in a wb-wide grid below the I-frame.
500
- rows = (len(p_patches) + wb - 1) // wb
501
- p_grid = np.full((rows * patch, canvas_w, 3), 255, dtype=np.uint8)
502
- for idx, p in enumerate(p_patches):
503
- r, c = divmod(idx, wb)
504
- p_grid[r * patch:(r + 1) * patch, c * patch:(c + 1) * patch] = p
505
-
506
- canvas = np.vstack([i_block, p_grid])
507
- canvases.append(canvas)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
 
509
  if not canvases:
510
  canvases = [np.full((patch, patch, 3), 255, dtype=np.uint8)]
511
- return canvases, total_selected
 
512
 
513
 
514
  def make_charts(
@@ -630,6 +681,7 @@ def process(
630
  bitcost_pct: float = 99.0,
631
  fade_strength: float = 0.55,
632
  gop: str = "global",
 
633
  progress=gr.Progress(track_tqdm=False),
634
  ):
635
  if not video_path:
@@ -721,19 +773,20 @@ def process(
721
  vis_fps = max(2.0, min(8.0, (meta.get("fps") or 25.0) / 4.0))
722
  write_mp4(vis, vis_path, vis_fps)
723
 
724
- progress(0.85, desc="Packing canvases (one per GOP group)")
725
- canvases, n_selected = pack_canvases_per_group(
726
  resized, masks, groups, int(patch_size),
 
727
  )
728
  canvas_items: List[Tuple[str, str]] = []
729
  for idx, canv in enumerate(canvases):
730
  cp = os.path.join(out_dir, f"canvas_{idx:03d}.png")
731
  cv2.imwrite(cp, canv)
732
- s_idx, e_idx = groups[idx] if idx < len(groups) else (idx, idx)
733
- n_p = max(0, e_idx - s_idx) # number of P-frames in this group
734
  caption = (
735
- f"Group {idx + 1}/{len(canvases)} · I-frame @ sampled #{s_idx} "
736
- f"+ {n_p} P-frame{'s' if n_p != 1 else ''}"
737
  )
738
  canvas_items.append((cp, caption))
739
 
@@ -754,6 +807,7 @@ def process(
754
  "bitcost_pct": float(bitcost_pct),
755
  "fade_strength": float(fade_strength),
756
  "gop": gop_resolved,
 
757
  },
758
  "gop_groups": [
759
  {
@@ -787,7 +841,8 @@ def process(
787
  {
788
  "index": i,
789
  "size": f"{canvases[i].shape[1]}x{canvases[i].shape[0]}",
790
- "group": list(groups[i]) if i < len(groups) else None,
 
791
  "structure": "IPPP — first frame full (I), rest contribute "
792
  "only their selected patches (P).",
793
  }
@@ -1249,6 +1304,14 @@ with gr.Blocks(**_BLOCK_KW) as demo:
1249
  "each. Dynamic mode mirrors codec_tools' readiness "
1250
  "grouping (equal-energy groups).",
1251
  )
 
 
 
 
 
 
 
 
1252
 
1253
  with gr.Accordion("Time window", open=False):
1254
  with gr.Row():
@@ -1355,7 +1418,7 @@ with gr.Blocks(**_BLOCK_KW) as demo:
1355
  viz_mode, heatmap_alpha,
1356
  start_sec, end_sec,
1357
  saliency_signal, score_log_scale, bitcost_pct, fade_strength,
1358
- gop,
1359
  ],
1360
  outputs=[vis_out, canvas_out, info_out, chart_out],
1361
  )
@@ -1367,7 +1430,7 @@ with gr.Blocks(**_BLOCK_KW) as demo:
1367
  video_in, sample_frames, patch_size, top_k, max_pixels,
1368
  viz_mode, heatmap_alpha, start_sec, end_sec,
1369
  saliency_signal, score_log_scale, bitcost_pct, fade_strength,
1370
- gop,
1371
  ],
1372
  )
1373
 
 
58
  96.0, # bitcost_pct
59
  0.55, # fade_strength
60
  "dynamic", # gop
61
+ 4, # target_canvases
62
  )
63
 
64
 
 
448
  proc.kill()
449
 
450
 
451
+ def _build_ippp_canvas(
452
+ frames: List[np.ndarray], masks: List[np.ndarray],
453
+ i_idx: int, p_range: range, patch: int,
454
+ ) -> np.ndarray:
455
+ """Build one IPPP canvas: full I-frame on top, P-frame selected patches
456
+ in a wb-wide raster grid below."""
457
+ i_frame = frames[i_idx]
458
+ h, w = i_frame.shape[:2]
459
+ hb, wb = h // patch, w // patch
460
+ canvas_w = wb * patch
461
+ i_block = i_frame[: hb * patch, : canvas_w].copy()
462
+
463
+ p_patches: List[np.ndarray] = []
464
+ for k in p_range:
465
+ if k >= len(frames):
466
+ break
467
+ f, m = frames[k], masks[k]
468
+ for i in range(m.shape[0]):
469
+ for j in range(m.shape[1]):
470
+ if m[i, j]:
471
+ p_patches.append(
472
+ f[i * patch:(i + 1) * patch, j * patch:(j + 1) * patch].copy()
473
+ )
474
+
475
+ if not p_patches:
476
+ return i_block
477
+
478
+ rows = (len(p_patches) + wb - 1) // wb
479
+ p_grid = np.full((rows * patch, canvas_w, 3), 255, dtype=np.uint8)
480
+ for idx, p in enumerate(p_patches):
481
+ r, c = divmod(idx, wb)
482
+ p_grid[r * patch:(r + 1) * patch, c * patch:(c + 1) * patch] = p
483
+ return np.vstack([i_block, p_grid])
484
+
485
+
486
+ def _allocate_canvases_per_group(
487
+ target_canvases: int, num_groups: int,
488
+ ) -> List[int]:
489
+ """Split a total target canvas count across N groups as evenly as
490
+ possible; the first `remainder` groups get +1 each."""
491
+ target = max(1, int(target_canvases))
492
+ n = max(1, int(num_groups))
493
+ base, rem = divmod(target, n)
494
+ out = [base + (1 if i < rem else 0) for i in range(n)]
495
+ # Floor to at least 1 canvas per group so no group is invisible.
496
+ return [max(1, x) for x in out]
497
+
498
+
499
  def pack_canvases_per_group(
500
  frames: List[np.ndarray],
501
  masks: List[np.ndarray],
502
  groups: List[Tuple[int, int]],
503
  patch: int,
504
+ target_canvases: int = 4,
505
+ ) -> Tuple[List[np.ndarray], List[Tuple[int, int, int]], int]:
506
+ """Pack exactly `target_canvases` IPPP canvases for the whole video,
507
+ distributing them across GOP groups as evenly as possible.
508
+
509
+ Each group's frame range [s..e] is split into K consecutive sub-ranges
510
+ (K = canvases allocated to that group). Each sub-range [ss..ee] becomes
511
+ one canvas:
512
+ - frame ss is the I-frame: its whole image goes to the canvas top.
513
+ - frames ss+1..ee are P-frames: only saliency-selected patches go
514
+ below the I-frame, packed time-major in a wb-wide raster grid.
515
+
516
+ Returns:
517
+ canvases — list of np.ndarray, length == target_canvases
518
+ (or fewer if some groups have only 1 frame).
519
+ sub_ranges — list of (group_idx, sub_start, sub_end) parallel to
520
+ canvases, for caption / debugging.
521
+ total_selected — I-frame patches (counted as full grid) + P-frame
522
+ selected patches across all canvases.
523
  """
524
  canvases: List[np.ndarray] = []
525
+ sub_ranges: List[Tuple[int, int, int]] = []
526
  total_selected = 0
527
+ if not groups or not frames:
528
+ return [np.full((patch, patch, 3), 255, dtype=np.uint8)], [(0, 0, 0)], 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
 
530
+ per_group_counts = _allocate_canvases_per_group(target_canvases, len(groups))
 
 
531
 
532
+ for g_idx, (s, e) in enumerate(groups):
533
+ if s >= len(frames):
534
+ continue
535
+ group_len = e - s + 1
536
+ k = max(1, min(per_group_counts[g_idx], group_len))
537
+ # Split [s..e] into k consecutive sub-ranges of (almost) equal size.
538
+ base, rem = divmod(group_len, k)
539
+ cursor = s
540
+ for sub_i in range(k):
541
+ sub_len = base + (1 if sub_i < rem else 0)
542
+ ss = cursor
543
+ ee = min(e, cursor + sub_len - 1)
544
+ cursor = ee + 1
545
+ canvas = _build_ippp_canvas(
546
+ frames, masks, i_idx=ss, p_range=range(ss + 1, ee + 1),
547
+ patch=patch,
548
+ )
549
+ canvases.append(canvas)
550
+ sub_ranges.append((g_idx, ss, ee))
551
+ # Accounting
552
+ i_h, i_w = canvas.shape[:2]
553
+ hb, wb = frames[ss].shape[0] // patch, frames[ss].shape[1] // patch
554
+ total_selected += hb * wb # I-frame counts as fully kept.
555
+ for kk in range(ss + 1, ee + 1):
556
+ if kk < len(masks):
557
+ total_selected += int(masks[kk].sum())
558
 
559
  if not canvases:
560
  canvases = [np.full((patch, patch, 3), 255, dtype=np.uint8)]
561
+ sub_ranges = [(0, 0, 0)]
562
+ return canvases, sub_ranges, total_selected
563
 
564
 
565
  def make_charts(
 
681
  bitcost_pct: float = 99.0,
682
  fade_strength: float = 0.55,
683
  gop: str = "global",
684
+ target_canvases: int = 4,
685
  progress=gr.Progress(track_tqdm=False),
686
  ):
687
  if not video_path:
 
773
  vis_fps = max(2.0, min(8.0, (meta.get("fps") or 25.0) / 4.0))
774
  write_mp4(vis, vis_path, vis_fps)
775
 
776
+ progress(0.85, desc="Packing canvases (IPPP)")
777
+ canvases, sub_ranges, n_selected = pack_canvases_per_group(
778
  resized, masks, groups, int(patch_size),
779
+ target_canvases=int(target_canvases),
780
  )
781
  canvas_items: List[Tuple[str, str]] = []
782
  for idx, canv in enumerate(canvases):
783
  cp = os.path.join(out_dir, f"canvas_{idx:03d}.png")
784
  cv2.imwrite(cp, canv)
785
+ g_idx, ss, ee = sub_ranges[idx] if idx < len(sub_ranges) else (0, idx, idx)
786
+ n_p = max(0, ee - ss)
787
  caption = (
788
+ f"Canvas {idx + 1}/{len(canvases)} · group {g_idx + 1} · "
789
+ f"I@#{ss} + {n_p} P-frame{'s' if n_p != 1 else ''}"
790
  )
791
  canvas_items.append((cp, caption))
792
 
 
807
  "bitcost_pct": float(bitcost_pct),
808
  "fade_strength": float(fade_strength),
809
  "gop": gop_resolved,
810
+ "target_canvases": int(target_canvases),
811
  },
812
  "gop_groups": [
813
  {
 
841
  {
842
  "index": i,
843
  "size": f"{canvases[i].shape[1]}x{canvases[i].shape[0]}",
844
+ "group": int(sub_ranges[i][0]) if i < len(sub_ranges) else None,
845
+ "sub_range": list(sub_ranges[i][1:3]) if i < len(sub_ranges) else None,
846
  "structure": "IPPP — first frame full (I), rest contribute "
847
  "only their selected patches (P).",
848
  }
 
1304
  "each. Dynamic mode mirrors codec_tools' readiness "
1305
  "grouping (equal-energy groups).",
1306
  )
1307
+ target_canvases = gr.Slider(
1308
+ 1, 16, value=4, step=1,
1309
+ label="Target canvases (total per video)",
1310
+ info="Fixed canvas count regardless of GOP. The budget is "
1311
+ "split across groups; each group is further sliced "
1312
+ "into sub-ranges of consecutive frames, one IPPP "
1313
+ "canvas per sub-range.",
1314
+ )
1315
 
1316
  with gr.Accordion("Time window", open=False):
1317
  with gr.Row():
 
1418
  viz_mode, heatmap_alpha,
1419
  start_sec, end_sec,
1420
  saliency_signal, score_log_scale, bitcost_pct, fade_strength,
1421
+ gop, target_canvases,
1422
  ],
1423
  outputs=[vis_out, canvas_out, info_out, chart_out],
1424
  )
 
1430
  video_in, sample_frames, patch_size, top_k, max_pixels,
1431
  viz_mode, heatmap_alpha, start_sec, end_sec,
1432
  saliency_signal, score_log_scale, bitcost_pct, fade_strength,
1433
+ gop, target_canvases,
1434
  ],
1435
  )
1436