Add 'Target canvases (total per video)' slider, default 4
Browse filesDecouples the canvas count from the GOP grouping. The total is split
across GOP groups as evenly as possible (the first `remainder` groups
get +1 each, with a floor of 1 per group so nothing is invisible).
Each group's frame range is then sliced into k consecutive sub-ranges,
one IPPP canvas per sub-range.
Examples (16 sampled frames, target=4):
GOP=4 -> 4 groups × 1 canvas = 4
GOP=8 -> 2 groups × 2 canvases = 4
GOP=16 -> 1 group × 4 canvases = 4
Dynamic -> 4 groups × 1 canvas = 4
API
- pack_canvases_per_group() now returns (canvases, sub_ranges,
n_selected) and takes target_canvases.
- Caption switches from 'Group K/N' to 'Canvas K/N · group G · I@#s
+ p P-frames' so the sub-range origin is visible.
- Run info JSON 'canvases' entries get 'sub_range', and the params
block gains 'target_canvases'.
- DEMO_PRESET extended by one value.
|
@@ -58,6 +58,7 @@ DEMO_PRESET = (
|
|
| 58 |
96.0, # bitcost_pct
|
| 59 |
0.55, # fade_strength
|
| 60 |
"dynamic", # gop
|
|
|
|
| 61 |
)
|
| 62 |
|
| 63 |
|
|
@@ -447,68 +448,118 @@ def write_mp4(frames: List[np.ndarray], path: str, fps: float) -> None:
|
|
| 447 |
proc.kill()
|
| 448 |
|
| 449 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
def pack_canvases_per_group(
|
| 451 |
frames: List[np.ndarray],
|
| 452 |
masks: List[np.ndarray],
|
| 453 |
groups: List[Tuple[int, int]],
|
| 454 |
patch: int,
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
"""
|
| 468 |
canvases: List[np.ndarray] = []
|
|
|
|
| 469 |
total_selected = 0
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
continue
|
| 473 |
-
i_frame = frames[s]
|
| 474 |
-
h, w = i_frame.shape[:2]
|
| 475 |
-
hb, wb = h // patch, w // patch
|
| 476 |
-
canvas_w = wb * patch
|
| 477 |
-
# I-frame block (already a multiple of patch from smart_resize).
|
| 478 |
-
i_block = i_frame[: hb * patch, : canvas_w].copy()
|
| 479 |
-
total_selected += hb * wb # I-frame counts as fully kept.
|
| 480 |
-
|
| 481 |
-
# Collect selected patches from P-frames (s+1..e), time-major.
|
| 482 |
-
p_patches: List[np.ndarray] = []
|
| 483 |
-
for k in range(s + 1, e + 1):
|
| 484 |
-
if k >= len(frames):
|
| 485 |
-
break
|
| 486 |
-
f, m = frames[k], masks[k]
|
| 487 |
-
for i in range(m.shape[0]):
|
| 488 |
-
for j in range(m.shape[1]):
|
| 489 |
-
if m[i, j]:
|
| 490 |
-
p_patches.append(
|
| 491 |
-
f[i * patch:(i + 1) * patch, j * patch:(j + 1) * patch].copy()
|
| 492 |
-
)
|
| 493 |
-
total_selected += len(p_patches)
|
| 494 |
|
| 495 |
-
|
| 496 |
-
canvases.append(i_block)
|
| 497 |
-
continue
|
| 498 |
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 508 |
|
| 509 |
if not canvases:
|
| 510 |
canvases = [np.full((patch, patch, 3), 255, dtype=np.uint8)]
|
| 511 |
-
|
|
|
|
| 512 |
|
| 513 |
|
| 514 |
def make_charts(
|
|
@@ -630,6 +681,7 @@ def process(
|
|
| 630 |
bitcost_pct: float = 99.0,
|
| 631 |
fade_strength: float = 0.55,
|
| 632 |
gop: str = "global",
|
|
|
|
| 633 |
progress=gr.Progress(track_tqdm=False),
|
| 634 |
):
|
| 635 |
if not video_path:
|
|
@@ -721,19 +773,20 @@ def process(
|
|
| 721 |
vis_fps = max(2.0, min(8.0, (meta.get("fps") or 25.0) / 4.0))
|
| 722 |
write_mp4(vis, vis_path, vis_fps)
|
| 723 |
|
| 724 |
-
progress(0.85, desc="Packing canvases (
|
| 725 |
-
canvases, n_selected = pack_canvases_per_group(
|
| 726 |
resized, masks, groups, int(patch_size),
|
|
|
|
| 727 |
)
|
| 728 |
canvas_items: List[Tuple[str, str]] = []
|
| 729 |
for idx, canv in enumerate(canvases):
|
| 730 |
cp = os.path.join(out_dir, f"canvas_{idx:03d}.png")
|
| 731 |
cv2.imwrite(cp, canv)
|
| 732 |
-
|
| 733 |
-
n_p = max(0,
|
| 734 |
caption = (
|
| 735 |
-
f"
|
| 736 |
-
f"+ {n_p} P-frame{'s' if n_p != 1 else ''}"
|
| 737 |
)
|
| 738 |
canvas_items.append((cp, caption))
|
| 739 |
|
|
@@ -754,6 +807,7 @@ def process(
|
|
| 754 |
"bitcost_pct": float(bitcost_pct),
|
| 755 |
"fade_strength": float(fade_strength),
|
| 756 |
"gop": gop_resolved,
|
|
|
|
| 757 |
},
|
| 758 |
"gop_groups": [
|
| 759 |
{
|
|
@@ -787,7 +841,8 @@ def process(
|
|
| 787 |
{
|
| 788 |
"index": i,
|
| 789 |
"size": f"{canvases[i].shape[1]}x{canvases[i].shape[0]}",
|
| 790 |
-
"group":
|
|
|
|
| 791 |
"structure": "IPPP — first frame full (I), rest contribute "
|
| 792 |
"only their selected patches (P).",
|
| 793 |
}
|
|
@@ -1249,6 +1304,14 @@ with gr.Blocks(**_BLOCK_KW) as demo:
|
|
| 1249 |
"each. Dynamic mode mirrors codec_tools' readiness "
|
| 1250 |
"grouping (equal-energy groups).",
|
| 1251 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1252 |
|
| 1253 |
with gr.Accordion("Time window", open=False):
|
| 1254 |
with gr.Row():
|
|
@@ -1355,7 +1418,7 @@ with gr.Blocks(**_BLOCK_KW) as demo:
|
|
| 1355 |
viz_mode, heatmap_alpha,
|
| 1356 |
start_sec, end_sec,
|
| 1357 |
saliency_signal, score_log_scale, bitcost_pct, fade_strength,
|
| 1358 |
-
gop,
|
| 1359 |
],
|
| 1360 |
outputs=[vis_out, canvas_out, info_out, chart_out],
|
| 1361 |
)
|
|
@@ -1367,7 +1430,7 @@ with gr.Blocks(**_BLOCK_KW) as demo:
|
|
| 1367 |
video_in, sample_frames, patch_size, top_k, max_pixels,
|
| 1368 |
viz_mode, heatmap_alpha, start_sec, end_sec,
|
| 1369 |
saliency_signal, score_log_scale, bitcost_pct, fade_strength,
|
| 1370 |
-
gop,
|
| 1371 |
],
|
| 1372 |
)
|
| 1373 |
|
|
|
|
| 58 |
96.0, # bitcost_pct
|
| 59 |
0.55, # fade_strength
|
| 60 |
"dynamic", # gop
|
| 61 |
+
4, # target_canvases
|
| 62 |
)
|
| 63 |
|
| 64 |
|
|
|
|
| 448 |
proc.kill()
|
| 449 |
|
| 450 |
|
| 451 |
+
def _build_ippp_canvas(
|
| 452 |
+
frames: List[np.ndarray], masks: List[np.ndarray],
|
| 453 |
+
i_idx: int, p_range: range, patch: int,
|
| 454 |
+
) -> np.ndarray:
|
| 455 |
+
"""Build one IPPP canvas: full I-frame on top, P-frame selected patches
|
| 456 |
+
in a wb-wide raster grid below."""
|
| 457 |
+
i_frame = frames[i_idx]
|
| 458 |
+
h, w = i_frame.shape[:2]
|
| 459 |
+
hb, wb = h // patch, w // patch
|
| 460 |
+
canvas_w = wb * patch
|
| 461 |
+
i_block = i_frame[: hb * patch, : canvas_w].copy()
|
| 462 |
+
|
| 463 |
+
p_patches: List[np.ndarray] = []
|
| 464 |
+
for k in p_range:
|
| 465 |
+
if k >= len(frames):
|
| 466 |
+
break
|
| 467 |
+
f, m = frames[k], masks[k]
|
| 468 |
+
for i in range(m.shape[0]):
|
| 469 |
+
for j in range(m.shape[1]):
|
| 470 |
+
if m[i, j]:
|
| 471 |
+
p_patches.append(
|
| 472 |
+
f[i * patch:(i + 1) * patch, j * patch:(j + 1) * patch].copy()
|
| 473 |
+
)
|
| 474 |
+
|
| 475 |
+
if not p_patches:
|
| 476 |
+
return i_block
|
| 477 |
+
|
| 478 |
+
rows = (len(p_patches) + wb - 1) // wb
|
| 479 |
+
p_grid = np.full((rows * patch, canvas_w, 3), 255, dtype=np.uint8)
|
| 480 |
+
for idx, p in enumerate(p_patches):
|
| 481 |
+
r, c = divmod(idx, wb)
|
| 482 |
+
p_grid[r * patch:(r + 1) * patch, c * patch:(c + 1) * patch] = p
|
| 483 |
+
return np.vstack([i_block, p_grid])
|
| 484 |
+
|
| 485 |
+
|
| 486 |
+
def _allocate_canvases_per_group(
|
| 487 |
+
target_canvases: int, num_groups: int,
|
| 488 |
+
) -> List[int]:
|
| 489 |
+
"""Split a total target canvas count across N groups as evenly as
|
| 490 |
+
possible; the first `remainder` groups get +1 each."""
|
| 491 |
+
target = max(1, int(target_canvases))
|
| 492 |
+
n = max(1, int(num_groups))
|
| 493 |
+
base, rem = divmod(target, n)
|
| 494 |
+
out = [base + (1 if i < rem else 0) for i in range(n)]
|
| 495 |
+
# Floor to at least 1 canvas per group so no group is invisible.
|
| 496 |
+
return [max(1, x) for x in out]
|
| 497 |
+
|
| 498 |
+
|
| 499 |
def pack_canvases_per_group(
|
| 500 |
frames: List[np.ndarray],
|
| 501 |
masks: List[np.ndarray],
|
| 502 |
groups: List[Tuple[int, int]],
|
| 503 |
patch: int,
|
| 504 |
+
target_canvases: int = 4,
|
| 505 |
+
) -> Tuple[List[np.ndarray], List[Tuple[int, int, int]], int]:
|
| 506 |
+
"""Pack exactly `target_canvases` IPPP canvases for the whole video,
|
| 507 |
+
distributing them across GOP groups as evenly as possible.
|
| 508 |
+
|
| 509 |
+
Each group's frame range [s..e] is split into K consecutive sub-ranges
|
| 510 |
+
(K = canvases allocated to that group). Each sub-range [ss..ee] becomes
|
| 511 |
+
one canvas:
|
| 512 |
+
- frame ss is the I-frame: its whole image goes to the canvas top.
|
| 513 |
+
- frames ss+1..ee are P-frames: only saliency-selected patches go
|
| 514 |
+
below the I-frame, packed time-major in a wb-wide raster grid.
|
| 515 |
+
|
| 516 |
+
Returns:
|
| 517 |
+
canvases — list of np.ndarray, length == target_canvases
|
| 518 |
+
(or fewer if some groups have only 1 frame).
|
| 519 |
+
sub_ranges — list of (group_idx, sub_start, sub_end) parallel to
|
| 520 |
+
canvases, for caption / debugging.
|
| 521 |
+
total_selected — I-frame patches (counted as full grid) + P-frame
|
| 522 |
+
selected patches across all canvases.
|
| 523 |
"""
|
| 524 |
canvases: List[np.ndarray] = []
|
| 525 |
+
sub_ranges: List[Tuple[int, int, int]] = []
|
| 526 |
total_selected = 0
|
| 527 |
+
if not groups or not frames:
|
| 528 |
+
return [np.full((patch, patch, 3), 255, dtype=np.uint8)], [(0, 0, 0)], 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
|
| 530 |
+
per_group_counts = _allocate_canvases_per_group(target_canvases, len(groups))
|
|
|
|
|
|
|
| 531 |
|
| 532 |
+
for g_idx, (s, e) in enumerate(groups):
|
| 533 |
+
if s >= len(frames):
|
| 534 |
+
continue
|
| 535 |
+
group_len = e - s + 1
|
| 536 |
+
k = max(1, min(per_group_counts[g_idx], group_len))
|
| 537 |
+
# Split [s..e] into k consecutive sub-ranges of (almost) equal size.
|
| 538 |
+
base, rem = divmod(group_len, k)
|
| 539 |
+
cursor = s
|
| 540 |
+
for sub_i in range(k):
|
| 541 |
+
sub_len = base + (1 if sub_i < rem else 0)
|
| 542 |
+
ss = cursor
|
| 543 |
+
ee = min(e, cursor + sub_len - 1)
|
| 544 |
+
cursor = ee + 1
|
| 545 |
+
canvas = _build_ippp_canvas(
|
| 546 |
+
frames, masks, i_idx=ss, p_range=range(ss + 1, ee + 1),
|
| 547 |
+
patch=patch,
|
| 548 |
+
)
|
| 549 |
+
canvases.append(canvas)
|
| 550 |
+
sub_ranges.append((g_idx, ss, ee))
|
| 551 |
+
# Accounting
|
| 552 |
+
i_h, i_w = canvas.shape[:2]
|
| 553 |
+
hb, wb = frames[ss].shape[0] // patch, frames[ss].shape[1] // patch
|
| 554 |
+
total_selected += hb * wb # I-frame counts as fully kept.
|
| 555 |
+
for kk in range(ss + 1, ee + 1):
|
| 556 |
+
if kk < len(masks):
|
| 557 |
+
total_selected += int(masks[kk].sum())
|
| 558 |
|
| 559 |
if not canvases:
|
| 560 |
canvases = [np.full((patch, patch, 3), 255, dtype=np.uint8)]
|
| 561 |
+
sub_ranges = [(0, 0, 0)]
|
| 562 |
+
return canvases, sub_ranges, total_selected
|
| 563 |
|
| 564 |
|
| 565 |
def make_charts(
|
|
|
|
| 681 |
bitcost_pct: float = 99.0,
|
| 682 |
fade_strength: float = 0.55,
|
| 683 |
gop: str = "global",
|
| 684 |
+
target_canvases: int = 4,
|
| 685 |
progress=gr.Progress(track_tqdm=False),
|
| 686 |
):
|
| 687 |
if not video_path:
|
|
|
|
| 773 |
vis_fps = max(2.0, min(8.0, (meta.get("fps") or 25.0) / 4.0))
|
| 774 |
write_mp4(vis, vis_path, vis_fps)
|
| 775 |
|
| 776 |
+
progress(0.85, desc="Packing canvases (IPPP)")
|
| 777 |
+
canvases, sub_ranges, n_selected = pack_canvases_per_group(
|
| 778 |
resized, masks, groups, int(patch_size),
|
| 779 |
+
target_canvases=int(target_canvases),
|
| 780 |
)
|
| 781 |
canvas_items: List[Tuple[str, str]] = []
|
| 782 |
for idx, canv in enumerate(canvases):
|
| 783 |
cp = os.path.join(out_dir, f"canvas_{idx:03d}.png")
|
| 784 |
cv2.imwrite(cp, canv)
|
| 785 |
+
g_idx, ss, ee = sub_ranges[idx] if idx < len(sub_ranges) else (0, idx, idx)
|
| 786 |
+
n_p = max(0, ee - ss)
|
| 787 |
caption = (
|
| 788 |
+
f"Canvas {idx + 1}/{len(canvases)} · group {g_idx + 1} · "
|
| 789 |
+
f"I@#{ss} + {n_p} P-frame{'s' if n_p != 1 else ''}"
|
| 790 |
)
|
| 791 |
canvas_items.append((cp, caption))
|
| 792 |
|
|
|
|
| 807 |
"bitcost_pct": float(bitcost_pct),
|
| 808 |
"fade_strength": float(fade_strength),
|
| 809 |
"gop": gop_resolved,
|
| 810 |
+
"target_canvases": int(target_canvases),
|
| 811 |
},
|
| 812 |
"gop_groups": [
|
| 813 |
{
|
|
|
|
| 841 |
{
|
| 842 |
"index": i,
|
| 843 |
"size": f"{canvases[i].shape[1]}x{canvases[i].shape[0]}",
|
| 844 |
+
"group": int(sub_ranges[i][0]) if i < len(sub_ranges) else None,
|
| 845 |
+
"sub_range": list(sub_ranges[i][1:3]) if i < len(sub_ranges) else None,
|
| 846 |
"structure": "IPPP — first frame full (I), rest contribute "
|
| 847 |
"only their selected patches (P).",
|
| 848 |
}
|
|
|
|
| 1304 |
"each. Dynamic mode mirrors codec_tools' readiness "
|
| 1305 |
"grouping (equal-energy groups).",
|
| 1306 |
)
|
| 1307 |
+
target_canvases = gr.Slider(
|
| 1308 |
+
1, 16, value=4, step=1,
|
| 1309 |
+
label="Target canvases (total per video)",
|
| 1310 |
+
info="Fixed canvas count regardless of GOP. The budget is "
|
| 1311 |
+
"split across groups; each group is further sliced "
|
| 1312 |
+
"into sub-ranges of consecutive frames, one IPPP "
|
| 1313 |
+
"canvas per sub-range.",
|
| 1314 |
+
)
|
| 1315 |
|
| 1316 |
with gr.Accordion("Time window", open=False):
|
| 1317 |
with gr.Row():
|
|
|
|
| 1418 |
viz_mode, heatmap_alpha,
|
| 1419 |
start_sec, end_sec,
|
| 1420 |
saliency_signal, score_log_scale, bitcost_pct, fade_strength,
|
| 1421 |
+
gop, target_canvases,
|
| 1422 |
],
|
| 1423 |
outputs=[vis_out, canvas_out, info_out, chart_out],
|
| 1424 |
)
|
|
|
|
| 1430 |
video_in, sample_frames, patch_size, top_k, max_pixels,
|
| 1431 |
viz_mode, heatmap_alpha, start_sec, end_sec,
|
| 1432 |
saliency_signal, score_log_scale, bitcost_pct, fade_strength,
|
| 1433 |
+
gop, target_canvases,
|
| 1434 |
],
|
| 1435 |
)
|
| 1436 |
|