Spaces:

FeilongTang
/

OneVision-Encoder-Codec-View

Running

App Files Files Community

FeilongTang commited on 24 days ago

Commit

210c709

1 Parent(s): 257cddf

Align packed canvases with GOP groups

Browse files

Files changed (1) hide show

app.py +86 -37

app.py CHANGED Viewed

@@ -18,8 +18,8 @@ Pipeline (mirrors codec_tools/pipeline/process_video_bitcost_readiness.py):
        full color, dropped patches are faded to a gray-white wash so the
        viewer can see exactly which patches the codec stage chose.
     6. Pack one canvas per GOP group: the first frame of each group is
-       kept whole as the I-frame, and later frames only overwrite their
-       selected patches as P-frame updates.
 """
 import json
@@ -525,27 +525,22 @@ def _build_ippp_canvas(
     frames: List[np.ndarray], masks: List[np.ndarray],
     i_idx: int, p_range: range, patch: int,
 ) -> Tuple[np.ndarray, int]:
-    """Build one IPPP canvas at the *same dimensions as the I-frame*.
-    Codec convention: every frame in a group shares the picture size; a
-    P-frame only encodes the macroblocks that need to change. So:
-      1. Initialise the canvas to the I-frame's full image.
-      2. For each P-frame in time order, replace each saliency-selected
-         patch position with the P-frame's pixels at that position.
-      3. The canvas now reads as 'what the encoder would have reconstructed
-         at the end of this group' — same shape as the I-frame, with the
-         high-energy regions updated by later P-frames.
-    Returns (canvas, n_overlays) where n_overlays is the count of P-frame
-    patches that overwrote a position (a position may be hit multiple
-    times by different P-frames; we count each hit)."""
     i_frame = frames[i_idx]
     h, w = i_frame.shape[:2]
     hb, wb = h // patch, w // patch
-    canvas_h, canvas_w = hb * patch, wb * patch
-    canvas = i_frame[:canvas_h, :canvas_w].copy()
-    n_overlays = 0
     for k in p_range:
         if k >= len(frames):
             break
@@ -553,15 +548,36 @@ def _build_ippp_canvas(
         for i in range(m.shape[0]):
             for j in range(m.shape[1]):
                 if m[i, j]:
-                    canvas[
-                        i * patch:(i + 1) * patch,
-                        j * patch:(j + 1) * patch,
-                    ] = f[
-                        i * patch:(i + 1) * patch,
-                        j * patch:(j + 1) * patch,
-                    ]
-                    n_overlays += 1
-    return canvas, n_overlays
 def pack_canvases_per_group(
@@ -595,14 +611,14 @@ def pack_canvases_per_group(
         if s >= len(frames):
             continue
         ss, ee = s, e
-        canvas, n_p_overlays = _build_ippp_canvas(
             frames, masks, i_idx=ss, p_range=range(ss + 1, ee + 1),
             patch=patch,
         )
         canvases.append(canvas)
         sub_ranges.append((g_idx, ss, ee))
         hb, wb = frames[ss].shape[0] // patch, frames[ss].shape[1] // patch
-        total_selected += hb * wb + n_p_overlays
     if not canvases:
         canvases = [np.full((patch, patch, 3), 255, dtype=np.uint8)]
@@ -873,10 +889,15 @@ def process(
         cp = os.path.join(out_dir, f"canvas_{idx:03d}.png")
         cv2.imwrite(cp, canv)
         g_idx, ss, ee = sub_ranges[idx] if idx < len(sub_ranges) else (0, idx, idx)
-        n_p = max(0, ee - ss)
         caption = (
             f"Canvas {idx + 1}/{len(canvases)} · group {g_idx + 1} · "
-            f"I@#{ss} + {n_p} P-frame{'s' if n_p != 1 else ''}"
         )
         canvas_items.append((cp, caption))
@@ -914,7 +935,15 @@ def process(
             {
                 "start_frame_idx": int(s),
                 "end_frame_idx": int(e),
                 "n_frames": int(e - s + 1),
                 "selected": int(sum(int(m.sum()) for m in masks[s:e + 1])),
             }
             for (s, e) in groups
@@ -955,8 +984,27 @@ def process(
                 "size": f"{canvases[i].shape[1]}x{canvases[i].shape[0]}",
                 "group": int(sub_ranges[i][0]) if i < len(sub_ranges) else None,
                 "sub_range": list(sub_ranges[i][1:3]) if i < len(sub_ranges) else None,
-                "structure": "IPPP — first frame full (I), rest contribute "
-                             "only their selected patches (P).",
             }
             for i in range(len(canvases))
         ],
@@ -1603,12 +1651,13 @@ with gr.Blocks(**_BLOCK_KW) as demo:
                         gr.Markdown(
                             "<small>Each canvas is one GOP group rendered in "
                             "<b>IPPP order</b>: the group's first frame is the "
-                            "<b>I-frame</b> kept whole (top), followed by the "
-                            "<b>P-frame</b> selected patches packed below.</small>"
                         )
                         canvas_out = gr.Gallery(
                             label="", show_label=False,
-                            columns=2, rows=2, height=420,
                             object_fit="contain",
                             preview=True,
                         )

        full color, dropped patches are faded to a gray-white wash so the
        viewer can see exactly which patches the codec stage chose.
     6. Pack one canvas per GOP group: the first frame of each group is
+       kept whole as the I-frame, and later frames contribute only their
+       selected patches packed below it in time order.
 """
 import json
     frames: List[np.ndarray], masks: List[np.ndarray],
     i_idx: int, p_range: range, patch: int,
 ) -> Tuple[np.ndarray, int]:
+    """Build one IPPP canvas with the I-frame on top and packed P patches below.
+    Layout:
+      1. The group's first frame is copied whole as the I-frame.
+      2. Every selected patch from later P-frames is appended below the
+         I-frame in time-major raster order.
+    Returns (canvas, n_patches) where n_patches is the number of selected
+    P-frame patches packed under the I-frame."""
     i_frame = frames[i_idx]
     h, w = i_frame.shape[:2]
     hb, wb = h // patch, w // patch
+    frame_h, frame_w = hb * patch, wb * patch
+    i_crop = i_frame[:frame_h, :frame_w].copy()
+    packed_patches: List[np.ndarray] = []
     for k in p_range:
         if k >= len(frames):
             break
         for i in range(m.shape[0]):
             for j in range(m.shape[1]):
                 if m[i, j]:
+                    packed_patches.append(
+                        f[
+                            i * patch:(i + 1) * patch,
+                            j * patch:(j + 1) * patch,
+                        ].copy()
+                    )
+    n_patches = len(packed_patches)
+    packed_rows = int(math.ceil(n_patches / max(1, wb))) if n_patches else 0
+    packed_h = packed_rows * patch
+    canvas = np.full((frame_h + packed_h, frame_w, 3), 250, dtype=np.uint8)
+    canvas[:frame_h, :frame_w] = i_crop
+    if packed_h > 0:
+        cv2.line(
+            canvas,
+            (0, frame_h - 1),
+            (frame_w - 1, frame_h - 1),
+            (99, 102, 241),
+            2,
+            lineType=cv2.LINE_AA,
+        )
+        for idx, tile in enumerate(packed_patches):
+            row = idx // wb
+            col = idx % wb
+            y0 = frame_h + row * patch
+            x0 = col * patch
+            canvas[y0:y0 + patch, x0:x0 + patch] = tile
+    return canvas, n_patches
 def pack_canvases_per_group(
         if s >= len(frames):
             continue
         ss, ee = s, e
+        canvas, n_patches = _build_ippp_canvas(
             frames, masks, i_idx=ss, p_range=range(ss + 1, ee + 1),
             patch=patch,
         )
         canvases.append(canvas)
         sub_ranges.append((g_idx, ss, ee))
         hb, wb = frames[ss].shape[0] // patch, frames[ss].shape[1] // patch
+        total_selected += hb * wb + n_patches
     if not canvases:
         canvases = [np.full((patch, patch, 3), 255, dtype=np.uint8)]
         cp = os.path.join(out_dir, f"canvas_{idx:03d}.png")
         cv2.imwrite(cp, canv)
         g_idx, ss, ee = sub_ranges[idx] if idx < len(sub_ranges) else (0, idx, idx)
+        src_start = int(fids[ss]) if ss < len(fids) else None
+        src_end = int(fids[ee]) if ee < len(fids) else None
+        p_frame_count = max(0, ee - ss)
+        p_patch_count = int(sum(int(m.sum()) for m in masks[ss + 1:ee + 1]))
         caption = (
             f"Canvas {idx + 1}/{len(canvases)} · group {g_idx + 1} · "
+            f"sampled #{ss}-{ee} · src {src_start}-{src_end} · "
+            f"I src#{src_start} + {p_patch_count} P patches from "
+            f"{p_frame_count} frame{'s' if p_frame_count != 1 else ''}"
         )
         canvas_items.append((cp, caption))
             {
                 "start_frame_idx": int(s),
                 "end_frame_idx": int(e),
+                "start_sample_idx": int(s),
+                "end_sample_idx": int(e),
+                "start_source_frame_id": int(fids[s]) if s < len(fids) else None,
+                "end_source_frame_id": int(fids[e]) if e < len(fids) else None,
+                "source_frame_ids": [int(fids[i]) for i in range(s, e + 1)],
                 "n_frames": int(e - s + 1),
+                "i_frame_source_id": int(fids[s]) if s < len(fids) else None,
+                "p_frame_count": int(max(0, e - s)),
+                "p_frame_selected_patches": int(sum(int(m.sum()) for m in masks[s + 1:e + 1])),
                 "selected": int(sum(int(m.sum()) for m in masks[s:e + 1])),
             }
             for (s, e) in groups
                 "size": f"{canvases[i].shape[1]}x{canvases[i].shape[0]}",
                 "group": int(sub_ranges[i][0]) if i < len(sub_ranges) else None,
                 "sub_range": list(sub_ranges[i][1:3]) if i < len(sub_ranges) else None,
+                "sampled_indices": (
+                    [int(x) for x in range(sub_ranges[i][1], sub_ranges[i][2] + 1)]
+                    if i < len(sub_ranges) else []
+                ),
+                "source_frame_ids": (
+                    [int(fids[x]) for x in range(sub_ranges[i][1], sub_ranges[i][2] + 1)]
+                    if i < len(sub_ranges) else []
+                ),
+                "i_frame_source_id": (
+                    int(fids[sub_ranges[i][1]]) if i < len(sub_ranges) else None
+                ),
+                "p_frame_count": (
+                    int(max(0, sub_ranges[i][2] - sub_ranges[i][1]))
+                    if i < len(sub_ranges) else 0
+                ),
+                "p_frame_selected_patches": (
+                    int(sum(int(m.sum()) for m in masks[sub_ranges[i][1] + 1:sub_ranges[i][2] + 1]))
+                    if i < len(sub_ranges) else 0
+                ),
+                "structure": "IPPP — full I-frame on top, selected P patches "
+                             "packed below in time-major order.",
             }
             for i in range(len(canvases))
         ],
                         gr.Markdown(
                             "<small>Each canvas is one GOP group rendered in "
                             "<b>IPPP order</b>: the group's first frame is the "
+                            "<b>I-frame</b> kept whole on top, followed by the "
+                            "<b>P-frame</b> selected patches packed below in "
+                            "time order.</small>"
                         )
                         canvas_out = gr.Gallery(
                             label="", show_label=False,
+                            columns=2, rows=2, height=520,
                             object_fit="contain",
                             preview=True,
                         )