FeilongTang commited on
Commit
210c709
·
1 Parent(s): 257cddf

Align packed canvases with GOP groups

Browse files
Files changed (1) hide show
  1. app.py +86 -37
app.py CHANGED
@@ -18,8 +18,8 @@ Pipeline (mirrors codec_tools/pipeline/process_video_bitcost_readiness.py):
18
  full color, dropped patches are faded to a gray-white wash so the
19
  viewer can see exactly which patches the codec stage chose.
20
  6. Pack one canvas per GOP group: the first frame of each group is
21
- kept whole as the I-frame, and later frames only overwrite their
22
- selected patches as P-frame updates.
23
  """
24
 
25
  import json
@@ -525,27 +525,22 @@ def _build_ippp_canvas(
525
  frames: List[np.ndarray], masks: List[np.ndarray],
526
  i_idx: int, p_range: range, patch: int,
527
  ) -> Tuple[np.ndarray, int]:
528
- """Build one IPPP canvas at the *same dimensions as the I-frame*.
529
-
530
- Codec convention: every frame in a group shares the picture size; a
531
- P-frame only encodes the macroblocks that need to change. So:
532
- 1. Initialise the canvas to the I-frame's full image.
533
- 2. For each P-frame in time order, replace each saliency-selected
534
- patch position with the P-frame's pixels at that position.
535
- 3. The canvas now reads as 'what the encoder would have reconstructed
536
- at the end of this group' — same shape as the I-frame, with the
537
- high-energy regions updated by later P-frames.
538
-
539
- Returns (canvas, n_overlays) where n_overlays is the count of P-frame
540
- patches that overwrote a position (a position may be hit multiple
541
- times by different P-frames; we count each hit)."""
542
  i_frame = frames[i_idx]
543
  h, w = i_frame.shape[:2]
544
  hb, wb = h // patch, w // patch
545
- canvas_h, canvas_w = hb * patch, wb * patch
546
- canvas = i_frame[:canvas_h, :canvas_w].copy()
547
 
548
- n_overlays = 0
549
  for k in p_range:
550
  if k >= len(frames):
551
  break
@@ -553,15 +548,36 @@ def _build_ippp_canvas(
553
  for i in range(m.shape[0]):
554
  for j in range(m.shape[1]):
555
  if m[i, j]:
556
- canvas[
557
- i * patch:(i + 1) * patch,
558
- j * patch:(j + 1) * patch,
559
- ] = f[
560
- i * patch:(i + 1) * patch,
561
- j * patch:(j + 1) * patch,
562
- ]
563
- n_overlays += 1
564
- return canvas, n_overlays
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
 
566
 
567
  def pack_canvases_per_group(
@@ -595,14 +611,14 @@ def pack_canvases_per_group(
595
  if s >= len(frames):
596
  continue
597
  ss, ee = s, e
598
- canvas, n_p_overlays = _build_ippp_canvas(
599
  frames, masks, i_idx=ss, p_range=range(ss + 1, ee + 1),
600
  patch=patch,
601
  )
602
  canvases.append(canvas)
603
  sub_ranges.append((g_idx, ss, ee))
604
  hb, wb = frames[ss].shape[0] // patch, frames[ss].shape[1] // patch
605
- total_selected += hb * wb + n_p_overlays
606
 
607
  if not canvases:
608
  canvases = [np.full((patch, patch, 3), 255, dtype=np.uint8)]
@@ -873,10 +889,15 @@ def process(
873
  cp = os.path.join(out_dir, f"canvas_{idx:03d}.png")
874
  cv2.imwrite(cp, canv)
875
  g_idx, ss, ee = sub_ranges[idx] if idx < len(sub_ranges) else (0, idx, idx)
876
- n_p = max(0, ee - ss)
 
 
 
877
  caption = (
878
  f"Canvas {idx + 1}/{len(canvases)} · group {g_idx + 1} · "
879
- f"I@#{ss} + {n_p} P-frame{'s' if n_p != 1 else ''}"
 
 
880
  )
881
  canvas_items.append((cp, caption))
882
 
@@ -914,7 +935,15 @@ def process(
914
  {
915
  "start_frame_idx": int(s),
916
  "end_frame_idx": int(e),
 
 
 
 
 
917
  "n_frames": int(e - s + 1),
 
 
 
918
  "selected": int(sum(int(m.sum()) for m in masks[s:e + 1])),
919
  }
920
  for (s, e) in groups
@@ -955,8 +984,27 @@ def process(
955
  "size": f"{canvases[i].shape[1]}x{canvases[i].shape[0]}",
956
  "group": int(sub_ranges[i][0]) if i < len(sub_ranges) else None,
957
  "sub_range": list(sub_ranges[i][1:3]) if i < len(sub_ranges) else None,
958
- "structure": "IPPP — first frame full (I), rest contribute "
959
- "only their selected patches (P).",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
960
  }
961
  for i in range(len(canvases))
962
  ],
@@ -1603,12 +1651,13 @@ with gr.Blocks(**_BLOCK_KW) as demo:
1603
  gr.Markdown(
1604
  "<small>Each canvas is one GOP group rendered in "
1605
  "<b>IPPP order</b>: the group's first frame is the "
1606
- "<b>I-frame</b> kept whole (top), followed by the "
1607
- "<b>P-frame</b> selected patches packed below.</small>"
 
1608
  )
1609
  canvas_out = gr.Gallery(
1610
  label="", show_label=False,
1611
- columns=2, rows=2, height=420,
1612
  object_fit="contain",
1613
  preview=True,
1614
  )
 
18
  full color, dropped patches are faded to a gray-white wash so the
19
  viewer can see exactly which patches the codec stage chose.
20
  6. Pack one canvas per GOP group: the first frame of each group is
21
+ kept whole as the I-frame, and later frames contribute only their
22
+ selected patches packed below it in time order.
23
  """
24
 
25
  import json
 
525
  frames: List[np.ndarray], masks: List[np.ndarray],
526
  i_idx: int, p_range: range, patch: int,
527
  ) -> Tuple[np.ndarray, int]:
528
+ """Build one IPPP canvas with the I-frame on top and packed P patches below.
529
+
530
+ Layout:
531
+ 1. The group's first frame is copied whole as the I-frame.
532
+ 2. Every selected patch from later P-frames is appended below the
533
+ I-frame in time-major raster order.
534
+
535
+ Returns (canvas, n_patches) where n_patches is the number of selected
536
+ P-frame patches packed under the I-frame."""
 
 
 
 
 
537
  i_frame = frames[i_idx]
538
  h, w = i_frame.shape[:2]
539
  hb, wb = h // patch, w // patch
540
+ frame_h, frame_w = hb * patch, wb * patch
541
+ i_crop = i_frame[:frame_h, :frame_w].copy()
542
 
543
+ packed_patches: List[np.ndarray] = []
544
  for k in p_range:
545
  if k >= len(frames):
546
  break
 
548
  for i in range(m.shape[0]):
549
  for j in range(m.shape[1]):
550
  if m[i, j]:
551
+ packed_patches.append(
552
+ f[
553
+ i * patch:(i + 1) * patch,
554
+ j * patch:(j + 1) * patch,
555
+ ].copy()
556
+ )
557
+
558
+ n_patches = len(packed_patches)
559
+ packed_rows = int(math.ceil(n_patches / max(1, wb))) if n_patches else 0
560
+ packed_h = packed_rows * patch
561
+ canvas = np.full((frame_h + packed_h, frame_w, 3), 250, dtype=np.uint8)
562
+ canvas[:frame_h, :frame_w] = i_crop
563
+
564
+ if packed_h > 0:
565
+ cv2.line(
566
+ canvas,
567
+ (0, frame_h - 1),
568
+ (frame_w - 1, frame_h - 1),
569
+ (99, 102, 241),
570
+ 2,
571
+ lineType=cv2.LINE_AA,
572
+ )
573
+ for idx, tile in enumerate(packed_patches):
574
+ row = idx // wb
575
+ col = idx % wb
576
+ y0 = frame_h + row * patch
577
+ x0 = col * patch
578
+ canvas[y0:y0 + patch, x0:x0 + patch] = tile
579
+
580
+ return canvas, n_patches
581
 
582
 
583
  def pack_canvases_per_group(
 
611
  if s >= len(frames):
612
  continue
613
  ss, ee = s, e
614
+ canvas, n_patches = _build_ippp_canvas(
615
  frames, masks, i_idx=ss, p_range=range(ss + 1, ee + 1),
616
  patch=patch,
617
  )
618
  canvases.append(canvas)
619
  sub_ranges.append((g_idx, ss, ee))
620
  hb, wb = frames[ss].shape[0] // patch, frames[ss].shape[1] // patch
621
+ total_selected += hb * wb + n_patches
622
 
623
  if not canvases:
624
  canvases = [np.full((patch, patch, 3), 255, dtype=np.uint8)]
 
889
  cp = os.path.join(out_dir, f"canvas_{idx:03d}.png")
890
  cv2.imwrite(cp, canv)
891
  g_idx, ss, ee = sub_ranges[idx] if idx < len(sub_ranges) else (0, idx, idx)
892
+ src_start = int(fids[ss]) if ss < len(fids) else None
893
+ src_end = int(fids[ee]) if ee < len(fids) else None
894
+ p_frame_count = max(0, ee - ss)
895
+ p_patch_count = int(sum(int(m.sum()) for m in masks[ss + 1:ee + 1]))
896
  caption = (
897
  f"Canvas {idx + 1}/{len(canvases)} · group {g_idx + 1} · "
898
+ f"sampled #{ss}-{ee} · src {src_start}-{src_end} · "
899
+ f"I src#{src_start} + {p_patch_count} P patches from "
900
+ f"{p_frame_count} frame{'s' if p_frame_count != 1 else ''}"
901
  )
902
  canvas_items.append((cp, caption))
903
 
 
935
  {
936
  "start_frame_idx": int(s),
937
  "end_frame_idx": int(e),
938
+ "start_sample_idx": int(s),
939
+ "end_sample_idx": int(e),
940
+ "start_source_frame_id": int(fids[s]) if s < len(fids) else None,
941
+ "end_source_frame_id": int(fids[e]) if e < len(fids) else None,
942
+ "source_frame_ids": [int(fids[i]) for i in range(s, e + 1)],
943
  "n_frames": int(e - s + 1),
944
+ "i_frame_source_id": int(fids[s]) if s < len(fids) else None,
945
+ "p_frame_count": int(max(0, e - s)),
946
+ "p_frame_selected_patches": int(sum(int(m.sum()) for m in masks[s + 1:e + 1])),
947
  "selected": int(sum(int(m.sum()) for m in masks[s:e + 1])),
948
  }
949
  for (s, e) in groups
 
984
  "size": f"{canvases[i].shape[1]}x{canvases[i].shape[0]}",
985
  "group": int(sub_ranges[i][0]) if i < len(sub_ranges) else None,
986
  "sub_range": list(sub_ranges[i][1:3]) if i < len(sub_ranges) else None,
987
+ "sampled_indices": (
988
+ [int(x) for x in range(sub_ranges[i][1], sub_ranges[i][2] + 1)]
989
+ if i < len(sub_ranges) else []
990
+ ),
991
+ "source_frame_ids": (
992
+ [int(fids[x]) for x in range(sub_ranges[i][1], sub_ranges[i][2] + 1)]
993
+ if i < len(sub_ranges) else []
994
+ ),
995
+ "i_frame_source_id": (
996
+ int(fids[sub_ranges[i][1]]) if i < len(sub_ranges) else None
997
+ ),
998
+ "p_frame_count": (
999
+ int(max(0, sub_ranges[i][2] - sub_ranges[i][1]))
1000
+ if i < len(sub_ranges) else 0
1001
+ ),
1002
+ "p_frame_selected_patches": (
1003
+ int(sum(int(m.sum()) for m in masks[sub_ranges[i][1] + 1:sub_ranges[i][2] + 1]))
1004
+ if i < len(sub_ranges) else 0
1005
+ ),
1006
+ "structure": "IPPP — full I-frame on top, selected P patches "
1007
+ "packed below in time-major order.",
1008
  }
1009
  for i in range(len(canvases))
1010
  ],
 
1651
  gr.Markdown(
1652
  "<small>Each canvas is one GOP group rendered in "
1653
  "<b>IPPP order</b>: the group's first frame is the "
1654
+ "<b>I-frame</b> kept whole on top, followed by the "
1655
+ "<b>P-frame</b> selected patches packed below in "
1656
+ "time order.</small>"
1657
  )
1658
  canvas_out = gr.Gallery(
1659
  label="", show_label=False,
1660
+ columns=2, rows=2, height=520,
1661
  object_fit="contain",
1662
  preview=True,
1663
  )