FeilongTang commited on
Commit
5a9b121
·
1 Parent(s): 210c709

Show GOP canvases as explicit I/P sections

Browse files
Files changed (1) hide show
  1. app.py +59 -38
app.py CHANGED
@@ -525,12 +525,12 @@ def _build_ippp_canvas(
525
  frames: List[np.ndarray], masks: List[np.ndarray],
526
  i_idx: int, p_range: range, patch: int,
527
  ) -> Tuple[np.ndarray, int]:
528
- """Build one IPPP canvas with the I-frame on top and packed P patches below.
529
 
530
  Layout:
531
  1. The group's first frame is copied whole as the I-frame.
532
- 2. Every selected patch from later P-frames is appended below the
533
- I-frame in time-major raster order.
534
 
535
  Returns (canvas, n_patches) where n_patches is the number of selected
536
  P-frame patches packed under the I-frame."""
@@ -540,11 +540,14 @@ def _build_ippp_canvas(
540
  frame_h, frame_w = hb * patch, wb * patch
541
  i_crop = i_frame[:frame_h, :frame_w].copy()
542
 
543
- packed_patches: List[np.ndarray] = []
 
 
544
  for k in p_range:
545
  if k >= len(frames):
546
  break
547
  f, m = frames[k], masks[k]
 
548
  for i in range(m.shape[0]):
549
  for j in range(m.shape[1]):
550
  if m[i, j]:
@@ -554,28 +557,29 @@ def _build_ippp_canvas(
554
  j * patch:(j + 1) * patch,
555
  ].copy()
556
  )
557
-
558
- n_patches = len(packed_patches)
559
- packed_rows = int(math.ceil(n_patches / max(1, wb))) if n_patches else 0
560
- packed_h = packed_rows * patch
561
- canvas = np.full((frame_h + packed_h, frame_w, 3), 250, dtype=np.uint8)
562
- canvas[:frame_h, :frame_w] = i_crop
563
-
564
- if packed_h > 0:
565
- cv2.line(
566
- canvas,
567
- (0, frame_h - 1),
568
- (frame_w - 1, frame_h - 1),
569
- (99, 102, 241),
570
- 2,
571
- lineType=cv2.LINE_AA,
572
- )
573
  for idx, tile in enumerate(packed_patches):
574
  row = idx // wb
575
  col = idx % wb
576
- y0 = frame_h + row * patch
577
  x0 = col * patch
578
- canvas[y0:y0 + patch, x0:x0 + patch] = tile
 
 
 
 
 
 
 
 
 
 
 
 
 
579
 
580
  return canvas, n_patches
581
 
@@ -589,10 +593,9 @@ def pack_canvases_per_group(
589
  ) -> Tuple[List[np.ndarray], List[Tuple[int, int, int]], int]:
590
  """Pack exactly one IPPP canvas per GOP group.
591
 
592
- Each group's first frame is kept whole as the I-frame, and the
593
- remaining frames in that same group contribute only their selected
594
- patches as P-frame overlays. `target_canvases` is kept only for API
595
- compatibility and is ignored.
596
 
597
  Returns:
598
  canvases — list of np.ndarray, length == number of groups.
@@ -892,10 +895,11 @@ def process(
892
  src_start = int(fids[ss]) if ss < len(fids) else None
893
  src_end = int(fids[ee]) if ee < len(fids) else None
894
  p_frame_count = max(0, ee - ss)
 
895
  p_patch_count = int(sum(int(m.sum()) for m in masks[ss + 1:ee + 1]))
896
  caption = (
897
  f"Canvas {idx + 1}/{len(canvases)} · group {g_idx + 1} · "
898
- f"sampled #{ss}-{ee} · src {src_start}-{src_end} · "
899
  f"I src#{src_start} + {p_patch_count} P patches from "
900
  f"{p_frame_count} frame{'s' if p_frame_count != 1 else ''}"
901
  )
@@ -928,7 +932,7 @@ def process(
928
  "bitcost_pct": float(bitcost_pct),
929
  "fade_strength": float(fade_strength),
930
  "gop": gop_resolved,
931
- "canvas_policy": "one_ippp_canvas_per_group",
932
  "i_frame_policy": "first_frame_full_in_each_group",
933
  },
934
  "gop_groups": [
@@ -941,8 +945,11 @@ def process(
941
  "end_source_frame_id": int(fids[e]) if e < len(fids) else None,
942
  "source_frame_ids": [int(fids[i]) for i in range(s, e + 1)],
943
  "n_frames": int(e - s + 1),
 
944
  "i_frame_source_id": int(fids[s]) if s < len(fids) else None,
 
945
  "p_frame_count": int(max(0, e - s)),
 
946
  "p_frame_selected_patches": int(sum(int(m.sum()) for m in masks[s + 1:e + 1])),
947
  "selected": int(sum(int(m.sum()) for m in masks[s:e + 1])),
948
  }
@@ -992,19 +999,31 @@ def process(
992
  [int(fids[x]) for x in range(sub_ranges[i][1], sub_ranges[i][2] + 1)]
993
  if i < len(sub_ranges) else []
994
  ),
 
 
 
 
995
  "i_frame_source_id": (
996
  int(fids[sub_ranges[i][1]]) if i < len(sub_ranges) else None
997
  ),
 
 
 
 
998
  "p_frame_count": (
999
  int(max(0, sub_ranges[i][2] - sub_ranges[i][1]))
1000
  if i < len(sub_ranges) else 0
1001
  ),
 
 
 
 
1002
  "p_frame_selected_patches": (
1003
  int(sum(int(m.sum()) for m in masks[sub_ranges[i][1] + 1:sub_ranges[i][2] + 1]))
1004
  if i < len(sub_ranges) else 0
1005
  ),
1006
- "structure": "IPPP — full I-frame on top, selected P patches "
1007
- "packed below in time-major order.",
1008
  }
1009
  for i in range(len(canvases))
1010
  ],
@@ -1565,6 +1584,7 @@ with gr.Blocks(**_BLOCK_KW) as demo:
1565
  gop = gr.Radio(
1566
  [
1567
  ("GOP = 4 — fixed 4-frame groups", "4"),
 
1568
  ("GOP = 8 — fixed 8-frame groups", "8"),
1569
  ("GOP = 16 — fixed 16-frame groups", "16"),
1570
  ("Codec-stream: adaptive groups by saliency energy", "dynamic"),
@@ -1572,10 +1592,11 @@ with gr.Blocks(**_BLOCK_KW) as demo:
1572
  value="8",
1573
  label="GOP (group of pictures)",
1574
  info="Splits sampled frames into GOP groups. Each group "
1575
- "produces exactly one IPPP canvas: the group's first "
1576
- "frame stays whole as the I-frame, and later frames "
1577
- "only contribute selected patches as P-updates. With "
1578
- "32 sampled frames and GOP=8, this yields 4 canvases. "
 
1579
  "Codec-stream mode adaptively groups by saliency "
1580
  "energy, targeting roughly 8-64 sampled frames per group.",
1581
  )
@@ -1650,10 +1671,10 @@ with gr.Blocks(**_BLOCK_KW) as demo:
1650
  gr.Markdown("### Packed canvases (one per GOP group)")
1651
  gr.Markdown(
1652
  "<small>Each canvas is one GOP group rendered in "
1653
- "<b>IPPP order</b>: the group's first frame is the "
1654
- "<b>I-frame</b> kept whole on top, followed by the "
1655
- "<b>P-frame</b> selected patches packed below in "
1656
- "time order.</small>"
1657
  )
1658
  canvas_out = gr.Gallery(
1659
  label="", show_label=False,
 
525
  frames: List[np.ndarray], masks: List[np.ndarray],
526
  i_idx: int, p_range: range, patch: int,
527
  ) -> Tuple[np.ndarray, int]:
528
+ """Build one GOP canvas with explicit I/P sections.
529
 
530
  Layout:
531
  1. The group's first frame is copied whole as the I-frame.
532
+ 2. Each later P-frame gets its own packed section below, in time order.
533
+ So GOP=4 becomes I|P|P|P, GOP=5 becomes I|P|P|P|P, etc.
534
 
535
  Returns (canvas, n_patches) where n_patches is the number of selected
536
  P-frame patches packed under the I-frame."""
 
540
  frame_h, frame_w = hb * patch, wb * patch
541
  i_crop = i_frame[:frame_h, :frame_w].copy()
542
 
543
+ divider_h = 2
544
+ p_sections: List[np.ndarray] = []
545
+ n_patches = 0
546
  for k in p_range:
547
  if k >= len(frames):
548
  break
549
  f, m = frames[k], masks[k]
550
+ packed_patches: List[np.ndarray] = []
551
  for i in range(m.shape[0]):
552
  for j in range(m.shape[1]):
553
  if m[i, j]:
 
557
  j * patch:(j + 1) * patch,
558
  ].copy()
559
  )
560
+ n_patches += len(packed_patches)
561
+ packed_rows = max(1, int(math.ceil(len(packed_patches) / max(1, wb))))
562
+ packed_h = packed_rows * patch
563
+ section_bg = np.full((packed_h, frame_w, 3), 246, dtype=np.uint8)
 
 
 
 
 
 
 
 
 
 
 
 
564
  for idx, tile in enumerate(packed_patches):
565
  row = idx // wb
566
  col = idx % wb
567
+ y0 = row * patch
568
  x0 = col * patch
569
+ section_bg[y0:y0 + patch, x0:x0 + patch] = tile
570
+ p_sections.append(section_bg)
571
+
572
+ total_h = frame_h + sum(divider_h + sec.shape[0] for sec in p_sections)
573
+ canvas = np.full((total_h, frame_w, 3), 250, dtype=np.uint8)
574
+ canvas[:frame_h, :frame_w] = i_crop
575
+
576
+ y = frame_h
577
+ for section in p_sections:
578
+ canvas[y:y + divider_h, :] = (99, 102, 241)
579
+ y += divider_h
580
+ sec_h = section.shape[0]
581
+ canvas[y:y + sec_h, :frame_w] = section
582
+ y += sec_h
583
 
584
  return canvas, n_patches
585
 
 
593
  ) -> Tuple[List[np.ndarray], List[Tuple[int, int, int]], int]:
594
  """Pack exactly one IPPP canvas per GOP group.
595
 
596
+ Each group's first frame is kept whole as the I-frame, and every
597
+ later frame gets its own packed P section below it. `target_canvases`
598
+ is kept only for API compatibility and is ignored.
 
599
 
600
  Returns:
601
  canvases — list of np.ndarray, length == number of groups.
 
895
  src_start = int(fids[ss]) if ss < len(fids) else None
896
  src_end = int(fids[ee]) if ee < len(fids) else None
897
  p_frame_count = max(0, ee - ss)
898
+ structure_label = " ".join(["I"] + ["P"] * p_frame_count)
899
  p_patch_count = int(sum(int(m.sum()) for m in masks[ss + 1:ee + 1]))
900
  caption = (
901
  f"Canvas {idx + 1}/{len(canvases)} · group {g_idx + 1} · "
902
+ f"{structure_label} · sampled #{ss}-{ee} · src {src_start}-{src_end} · "
903
  f"I src#{src_start} + {p_patch_count} P patches from "
904
  f"{p_frame_count} frame{'s' if p_frame_count != 1 else ''}"
905
  )
 
932
  "bitcost_pct": float(bitcost_pct),
933
  "fade_strength": float(fade_strength),
934
  "gop": gop_resolved,
935
+ "canvas_policy": "one_canvas_per_group_with_per_frame_p_sections",
936
  "i_frame_policy": "first_frame_full_in_each_group",
937
  },
938
  "gop_groups": [
 
945
  "end_source_frame_id": int(fids[e]) if e < len(fids) else None,
946
  "source_frame_ids": [int(fids[i]) for i in range(s, e + 1)],
947
  "n_frames": int(e - s + 1),
948
+ "structure_label": " ".join(["I"] + ["P"] * max(0, e - s)),
949
  "i_frame_source_id": int(fids[s]) if s < len(fids) else None,
950
+ "p_source_frame_ids": [int(fids[i]) for i in range(s + 1, e + 1)],
951
  "p_frame_count": int(max(0, e - s)),
952
+ "p_frame_patch_counts": [int(masks[i].sum()) for i in range(s + 1, e + 1)],
953
  "p_frame_selected_patches": int(sum(int(m.sum()) for m in masks[s + 1:e + 1])),
954
  "selected": int(sum(int(m.sum()) for m in masks[s:e + 1])),
955
  }
 
999
  [int(fids[x]) for x in range(sub_ranges[i][1], sub_ranges[i][2] + 1)]
1000
  if i < len(sub_ranges) else []
1001
  ),
1002
+ "structure_label": (
1003
+ " ".join(["I"] + ["P"] * max(0, sub_ranges[i][2] - sub_ranges[i][1]))
1004
+ if i < len(sub_ranges) else "I"
1005
+ ),
1006
  "i_frame_source_id": (
1007
  int(fids[sub_ranges[i][1]]) if i < len(sub_ranges) else None
1008
  ),
1009
+ "p_source_frame_ids": (
1010
+ [int(fids[x]) for x in range(sub_ranges[i][1] + 1, sub_ranges[i][2] + 1)]
1011
+ if i < len(sub_ranges) else []
1012
+ ),
1013
  "p_frame_count": (
1014
  int(max(0, sub_ranges[i][2] - sub_ranges[i][1]))
1015
  if i < len(sub_ranges) else 0
1016
  ),
1017
+ "p_frame_patch_counts": (
1018
+ [int(masks[x].sum()) for x in range(sub_ranges[i][1] + 1, sub_ranges[i][2] + 1)]
1019
+ if i < len(sub_ranges) else []
1020
+ ),
1021
  "p_frame_selected_patches": (
1022
  int(sum(int(m.sum()) for m in masks[sub_ranges[i][1] + 1:sub_ranges[i][2] + 1]))
1023
  if i < len(sub_ranges) else 0
1024
  ),
1025
+ "structure": "Full I-frame on top; one packed P section per "
1026
+ "later frame, in time order.",
1027
  }
1028
  for i in range(len(canvases))
1029
  ],
 
1584
  gop = gr.Radio(
1585
  [
1586
  ("GOP = 4 — fixed 4-frame groups", "4"),
1587
+ ("GOP = 5 — fixed 5-frame groups", "5"),
1588
  ("GOP = 8 — fixed 8-frame groups", "8"),
1589
  ("GOP = 16 — fixed 16-frame groups", "16"),
1590
  ("Codec-stream: adaptive groups by saliency energy", "dynamic"),
 
1592
  value="8",
1593
  label="GOP (group of pictures)",
1594
  info="Splits sampled frames into GOP groups. Each group "
1595
+ "produces exactly one GOP canvas: the group's first "
1596
+ "frame stays whole as the I-frame, and each later "
1597
+ "frame gets its own P section below it. So GOP=4 "
1598
+ "becomes I P P P, GOP=5 becomes I P P P P. With 32 "
1599
+ "sampled frames and GOP=8, this yields 4 canvases. "
1600
  "Codec-stream mode adaptively groups by saliency "
1601
  "energy, targeting roughly 8-64 sampled frames per group.",
1602
  )
 
1671
  gr.Markdown("### Packed canvases (one per GOP group)")
1672
  gr.Markdown(
1673
  "<small>Each canvas is one GOP group rendered in "
1674
+ "<b>I/P structure</b>: the group's first frame is "
1675
+ "the <b>I-frame</b> kept whole on top, and each "
1676
+ "later frame gets its own packed <b>P-frame</b> "
1677
+ "section below in time order.</small>"
1678
  )
1679
  canvas_out = gr.Gallery(
1680
  label="", show_label=False,