Commit ·
5a9b121
1
Parent(s): 210c709
Show GOP canvases as explicit I/P sections
Browse files
app.py
CHANGED
|
@@ -525,12 +525,12 @@ def _build_ippp_canvas(
|
|
| 525 |
frames: List[np.ndarray], masks: List[np.ndarray],
|
| 526 |
i_idx: int, p_range: range, patch: int,
|
| 527 |
) -> Tuple[np.ndarray, int]:
|
| 528 |
-
"""Build one
|
| 529 |
|
| 530 |
Layout:
|
| 531 |
1. The group's first frame is copied whole as the I-frame.
|
| 532 |
-
2.
|
| 533 |
-
I
|
| 534 |
|
| 535 |
Returns (canvas, n_patches) where n_patches is the number of selected
|
| 536 |
P-frame patches packed under the I-frame."""
|
|
@@ -540,11 +540,14 @@ def _build_ippp_canvas(
|
|
| 540 |
frame_h, frame_w = hb * patch, wb * patch
|
| 541 |
i_crop = i_frame[:frame_h, :frame_w].copy()
|
| 542 |
|
| 543 |
-
|
|
|
|
|
|
|
| 544 |
for k in p_range:
|
| 545 |
if k >= len(frames):
|
| 546 |
break
|
| 547 |
f, m = frames[k], masks[k]
|
|
|
|
| 548 |
for i in range(m.shape[0]):
|
| 549 |
for j in range(m.shape[1]):
|
| 550 |
if m[i, j]:
|
|
@@ -554,28 +557,29 @@ def _build_ippp_canvas(
|
|
| 554 |
j * patch:(j + 1) * patch,
|
| 555 |
].copy()
|
| 556 |
)
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
canvas = np.full((frame_h + packed_h, frame_w, 3), 250, dtype=np.uint8)
|
| 562 |
-
canvas[:frame_h, :frame_w] = i_crop
|
| 563 |
-
|
| 564 |
-
if packed_h > 0:
|
| 565 |
-
cv2.line(
|
| 566 |
-
canvas,
|
| 567 |
-
(0, frame_h - 1),
|
| 568 |
-
(frame_w - 1, frame_h - 1),
|
| 569 |
-
(99, 102, 241),
|
| 570 |
-
2,
|
| 571 |
-
lineType=cv2.LINE_AA,
|
| 572 |
-
)
|
| 573 |
for idx, tile in enumerate(packed_patches):
|
| 574 |
row = idx // wb
|
| 575 |
col = idx % wb
|
| 576 |
-
y0 =
|
| 577 |
x0 = col * patch
|
| 578 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 579 |
|
| 580 |
return canvas, n_patches
|
| 581 |
|
|
@@ -589,10 +593,9 @@ def pack_canvases_per_group(
|
|
| 589 |
) -> Tuple[List[np.ndarray], List[Tuple[int, int, int]], int]:
|
| 590 |
"""Pack exactly one IPPP canvas per GOP group.
|
| 591 |
|
| 592 |
-
Each group's first frame is kept whole as the I-frame, and
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
compatibility and is ignored.
|
| 596 |
|
| 597 |
Returns:
|
| 598 |
canvases — list of np.ndarray, length == number of groups.
|
|
@@ -892,10 +895,11 @@ def process(
|
|
| 892 |
src_start = int(fids[ss]) if ss < len(fids) else None
|
| 893 |
src_end = int(fids[ee]) if ee < len(fids) else None
|
| 894 |
p_frame_count = max(0, ee - ss)
|
|
|
|
| 895 |
p_patch_count = int(sum(int(m.sum()) for m in masks[ss + 1:ee + 1]))
|
| 896 |
caption = (
|
| 897 |
f"Canvas {idx + 1}/{len(canvases)} · group {g_idx + 1} · "
|
| 898 |
-
f"sampled #{ss}-{ee} · src {src_start}-{src_end} · "
|
| 899 |
f"I src#{src_start} + {p_patch_count} P patches from "
|
| 900 |
f"{p_frame_count} frame{'s' if p_frame_count != 1 else ''}"
|
| 901 |
)
|
|
@@ -928,7 +932,7 @@ def process(
|
|
| 928 |
"bitcost_pct": float(bitcost_pct),
|
| 929 |
"fade_strength": float(fade_strength),
|
| 930 |
"gop": gop_resolved,
|
| 931 |
-
"canvas_policy": "
|
| 932 |
"i_frame_policy": "first_frame_full_in_each_group",
|
| 933 |
},
|
| 934 |
"gop_groups": [
|
|
@@ -941,8 +945,11 @@ def process(
|
|
| 941 |
"end_source_frame_id": int(fids[e]) if e < len(fids) else None,
|
| 942 |
"source_frame_ids": [int(fids[i]) for i in range(s, e + 1)],
|
| 943 |
"n_frames": int(e - s + 1),
|
|
|
|
| 944 |
"i_frame_source_id": int(fids[s]) if s < len(fids) else None,
|
|
|
|
| 945 |
"p_frame_count": int(max(0, e - s)),
|
|
|
|
| 946 |
"p_frame_selected_patches": int(sum(int(m.sum()) for m in masks[s + 1:e + 1])),
|
| 947 |
"selected": int(sum(int(m.sum()) for m in masks[s:e + 1])),
|
| 948 |
}
|
|
@@ -992,19 +999,31 @@ def process(
|
|
| 992 |
[int(fids[x]) for x in range(sub_ranges[i][1], sub_ranges[i][2] + 1)]
|
| 993 |
if i < len(sub_ranges) else []
|
| 994 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 995 |
"i_frame_source_id": (
|
| 996 |
int(fids[sub_ranges[i][1]]) if i < len(sub_ranges) else None
|
| 997 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 998 |
"p_frame_count": (
|
| 999 |
int(max(0, sub_ranges[i][2] - sub_ranges[i][1]))
|
| 1000 |
if i < len(sub_ranges) else 0
|
| 1001 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1002 |
"p_frame_selected_patches": (
|
| 1003 |
int(sum(int(m.sum()) for m in masks[sub_ranges[i][1] + 1:sub_ranges[i][2] + 1]))
|
| 1004 |
if i < len(sub_ranges) else 0
|
| 1005 |
),
|
| 1006 |
-
"structure": "
|
| 1007 |
-
"
|
| 1008 |
}
|
| 1009 |
for i in range(len(canvases))
|
| 1010 |
],
|
|
@@ -1565,6 +1584,7 @@ with gr.Blocks(**_BLOCK_KW) as demo:
|
|
| 1565 |
gop = gr.Radio(
|
| 1566 |
[
|
| 1567 |
("GOP = 4 — fixed 4-frame groups", "4"),
|
|
|
|
| 1568 |
("GOP = 8 — fixed 8-frame groups", "8"),
|
| 1569 |
("GOP = 16 — fixed 16-frame groups", "16"),
|
| 1570 |
("Codec-stream: adaptive groups by saliency energy", "dynamic"),
|
|
@@ -1572,10 +1592,11 @@ with gr.Blocks(**_BLOCK_KW) as demo:
|
|
| 1572 |
value="8",
|
| 1573 |
label="GOP (group of pictures)",
|
| 1574 |
info="Splits sampled frames into GOP groups. Each group "
|
| 1575 |
-
"produces exactly one
|
| 1576 |
-
"frame stays whole as the I-frame, and later
|
| 1577 |
-
"
|
| 1578 |
-
"
|
|
|
|
| 1579 |
"Codec-stream mode adaptively groups by saliency "
|
| 1580 |
"energy, targeting roughly 8-64 sampled frames per group.",
|
| 1581 |
)
|
|
@@ -1650,10 +1671,10 @@ with gr.Blocks(**_BLOCK_KW) as demo:
|
|
| 1650 |
gr.Markdown("### Packed canvases (one per GOP group)")
|
| 1651 |
gr.Markdown(
|
| 1652 |
"<small>Each canvas is one GOP group rendered in "
|
| 1653 |
-
"<b>
|
| 1654 |
-
"<b>I-frame</b> kept whole on top,
|
| 1655 |
-
"<b>P-frame</b>
|
| 1656 |
-
"time order.</small>"
|
| 1657 |
)
|
| 1658 |
canvas_out = gr.Gallery(
|
| 1659 |
label="", show_label=False,
|
|
|
|
| 525 |
frames: List[np.ndarray], masks: List[np.ndarray],
|
| 526 |
i_idx: int, p_range: range, patch: int,
|
| 527 |
) -> Tuple[np.ndarray, int]:
|
| 528 |
+
"""Build one GOP canvas with explicit I/P sections.
|
| 529 |
|
| 530 |
Layout:
|
| 531 |
1. The group's first frame is copied whole as the I-frame.
|
| 532 |
+
2. Each later P-frame gets its own packed section below, in time order.
|
| 533 |
+
So GOP=4 becomes I|P|P|P, GOP=5 becomes I|P|P|P|P, etc.
|
| 534 |
|
| 535 |
Returns (canvas, n_patches) where n_patches is the number of selected
|
| 536 |
P-frame patches packed under the I-frame."""
|
|
|
|
| 540 |
frame_h, frame_w = hb * patch, wb * patch
|
| 541 |
i_crop = i_frame[:frame_h, :frame_w].copy()
|
| 542 |
|
| 543 |
+
divider_h = 2
|
| 544 |
+
p_sections: List[np.ndarray] = []
|
| 545 |
+
n_patches = 0
|
| 546 |
for k in p_range:
|
| 547 |
if k >= len(frames):
|
| 548 |
break
|
| 549 |
f, m = frames[k], masks[k]
|
| 550 |
+
packed_patches: List[np.ndarray] = []
|
| 551 |
for i in range(m.shape[0]):
|
| 552 |
for j in range(m.shape[1]):
|
| 553 |
if m[i, j]:
|
|
|
|
| 557 |
j * patch:(j + 1) * patch,
|
| 558 |
].copy()
|
| 559 |
)
|
| 560 |
+
n_patches += len(packed_patches)
|
| 561 |
+
packed_rows = max(1, int(math.ceil(len(packed_patches) / max(1, wb))))
|
| 562 |
+
packed_h = packed_rows * patch
|
| 563 |
+
section_bg = np.full((packed_h, frame_w, 3), 246, dtype=np.uint8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 564 |
for idx, tile in enumerate(packed_patches):
|
| 565 |
row = idx // wb
|
| 566 |
col = idx % wb
|
| 567 |
+
y0 = row * patch
|
| 568 |
x0 = col * patch
|
| 569 |
+
section_bg[y0:y0 + patch, x0:x0 + patch] = tile
|
| 570 |
+
p_sections.append(section_bg)
|
| 571 |
+
|
| 572 |
+
total_h = frame_h + sum(divider_h + sec.shape[0] for sec in p_sections)
|
| 573 |
+
canvas = np.full((total_h, frame_w, 3), 250, dtype=np.uint8)
|
| 574 |
+
canvas[:frame_h, :frame_w] = i_crop
|
| 575 |
+
|
| 576 |
+
y = frame_h
|
| 577 |
+
for section in p_sections:
|
| 578 |
+
canvas[y:y + divider_h, :] = (99, 102, 241)
|
| 579 |
+
y += divider_h
|
| 580 |
+
sec_h = section.shape[0]
|
| 581 |
+
canvas[y:y + sec_h, :frame_w] = section
|
| 582 |
+
y += sec_h
|
| 583 |
|
| 584 |
return canvas, n_patches
|
| 585 |
|
|
|
|
| 593 |
) -> Tuple[List[np.ndarray], List[Tuple[int, int, int]], int]:
|
| 594 |
"""Pack exactly one IPPP canvas per GOP group.
|
| 595 |
|
| 596 |
+
Each group's first frame is kept whole as the I-frame, and every
|
| 597 |
+
later frame gets its own packed P section below it. `target_canvases`
|
| 598 |
+
is kept only for API compatibility and is ignored.
|
|
|
|
| 599 |
|
| 600 |
Returns:
|
| 601 |
canvases — list of np.ndarray, length == number of groups.
|
|
|
|
| 895 |
src_start = int(fids[ss]) if ss < len(fids) else None
|
| 896 |
src_end = int(fids[ee]) if ee < len(fids) else None
|
| 897 |
p_frame_count = max(0, ee - ss)
|
| 898 |
+
structure_label = " ".join(["I"] + ["P"] * p_frame_count)
|
| 899 |
p_patch_count = int(sum(int(m.sum()) for m in masks[ss + 1:ee + 1]))
|
| 900 |
caption = (
|
| 901 |
f"Canvas {idx + 1}/{len(canvases)} · group {g_idx + 1} · "
|
| 902 |
+
f"{structure_label} · sampled #{ss}-{ee} · src {src_start}-{src_end} · "
|
| 903 |
f"I src#{src_start} + {p_patch_count} P patches from "
|
| 904 |
f"{p_frame_count} frame{'s' if p_frame_count != 1 else ''}"
|
| 905 |
)
|
|
|
|
| 932 |
"bitcost_pct": float(bitcost_pct),
|
| 933 |
"fade_strength": float(fade_strength),
|
| 934 |
"gop": gop_resolved,
|
| 935 |
+
"canvas_policy": "one_canvas_per_group_with_per_frame_p_sections",
|
| 936 |
"i_frame_policy": "first_frame_full_in_each_group",
|
| 937 |
},
|
| 938 |
"gop_groups": [
|
|
|
|
| 945 |
"end_source_frame_id": int(fids[e]) if e < len(fids) else None,
|
| 946 |
"source_frame_ids": [int(fids[i]) for i in range(s, e + 1)],
|
| 947 |
"n_frames": int(e - s + 1),
|
| 948 |
+
"structure_label": " ".join(["I"] + ["P"] * max(0, e - s)),
|
| 949 |
"i_frame_source_id": int(fids[s]) if s < len(fids) else None,
|
| 950 |
+
"p_source_frame_ids": [int(fids[i]) for i in range(s + 1, e + 1)],
|
| 951 |
"p_frame_count": int(max(0, e - s)),
|
| 952 |
+
"p_frame_patch_counts": [int(masks[i].sum()) for i in range(s + 1, e + 1)],
|
| 953 |
"p_frame_selected_patches": int(sum(int(m.sum()) for m in masks[s + 1:e + 1])),
|
| 954 |
"selected": int(sum(int(m.sum()) for m in masks[s:e + 1])),
|
| 955 |
}
|
|
|
|
| 999 |
[int(fids[x]) for x in range(sub_ranges[i][1], sub_ranges[i][2] + 1)]
|
| 1000 |
if i < len(sub_ranges) else []
|
| 1001 |
),
|
| 1002 |
+
"structure_label": (
|
| 1003 |
+
" ".join(["I"] + ["P"] * max(0, sub_ranges[i][2] - sub_ranges[i][1]))
|
| 1004 |
+
if i < len(sub_ranges) else "I"
|
| 1005 |
+
),
|
| 1006 |
"i_frame_source_id": (
|
| 1007 |
int(fids[sub_ranges[i][1]]) if i < len(sub_ranges) else None
|
| 1008 |
),
|
| 1009 |
+
"p_source_frame_ids": (
|
| 1010 |
+
[int(fids[x]) for x in range(sub_ranges[i][1] + 1, sub_ranges[i][2] + 1)]
|
| 1011 |
+
if i < len(sub_ranges) else []
|
| 1012 |
+
),
|
| 1013 |
"p_frame_count": (
|
| 1014 |
int(max(0, sub_ranges[i][2] - sub_ranges[i][1]))
|
| 1015 |
if i < len(sub_ranges) else 0
|
| 1016 |
),
|
| 1017 |
+
"p_frame_patch_counts": (
|
| 1018 |
+
[int(masks[x].sum()) for x in range(sub_ranges[i][1] + 1, sub_ranges[i][2] + 1)]
|
| 1019 |
+
if i < len(sub_ranges) else []
|
| 1020 |
+
),
|
| 1021 |
"p_frame_selected_patches": (
|
| 1022 |
int(sum(int(m.sum()) for m in masks[sub_ranges[i][1] + 1:sub_ranges[i][2] + 1]))
|
| 1023 |
if i < len(sub_ranges) else 0
|
| 1024 |
),
|
| 1025 |
+
"structure": "Full I-frame on top; one packed P section per "
|
| 1026 |
+
"later frame, in time order.",
|
| 1027 |
}
|
| 1028 |
for i in range(len(canvases))
|
| 1029 |
],
|
|
|
|
| 1584 |
gop = gr.Radio(
|
| 1585 |
[
|
| 1586 |
("GOP = 4 — fixed 4-frame groups", "4"),
|
| 1587 |
+
("GOP = 5 — fixed 5-frame groups", "5"),
|
| 1588 |
("GOP = 8 — fixed 8-frame groups", "8"),
|
| 1589 |
("GOP = 16 — fixed 16-frame groups", "16"),
|
| 1590 |
("Codec-stream: adaptive groups by saliency energy", "dynamic"),
|
|
|
|
| 1592 |
value="8",
|
| 1593 |
label="GOP (group of pictures)",
|
| 1594 |
info="Splits sampled frames into GOP groups. Each group "
|
| 1595 |
+
"produces exactly one GOP canvas: the group's first "
|
| 1596 |
+
"frame stays whole as the I-frame, and each later "
|
| 1597 |
+
"frame gets its own P section below it. So GOP=4 "
|
| 1598 |
+
"becomes I P P P, GOP=5 becomes I P P P P. With 32 "
|
| 1599 |
+
"sampled frames and GOP=8, this yields 4 canvases. "
|
| 1600 |
"Codec-stream mode adaptively groups by saliency "
|
| 1601 |
"energy, targeting roughly 8-64 sampled frames per group.",
|
| 1602 |
)
|
|
|
|
| 1671 |
gr.Markdown("### Packed canvases (one per GOP group)")
|
| 1672 |
gr.Markdown(
|
| 1673 |
"<small>Each canvas is one GOP group rendered in "
|
| 1674 |
+
"<b>I/P structure</b>: the group's first frame is "
|
| 1675 |
+
"the <b>I-frame</b> kept whole on top, and each "
|
| 1676 |
+
"later frame gets its own packed <b>P-frame</b> "
|
| 1677 |
+
"section below in time order.</small>"
|
| 1678 |
)
|
| 1679 |
canvas_out = gr.Gallery(
|
| 1680 |
label="", show_label=False,
|