FeilongTang commited on
Commit
e3d4cd8
·
1 Parent(s): 047a9df

IPPP canvas: overlay P-frame patches onto I-frame at same dimensions

Browse files

Replaces the I-on-top + P-grid-below layout with the proper codec
semantic: every frame in a group shares the picture size; a P-frame
only encodes the macroblocks that need to change.

Per sub-range:
1. Start canvas = I-frame's full image (canvas_h × canvas_w =
hb*patch × wb*patch).
2. For each P-frame in time order, for every saliency-selected
patch position (i, j), overwrite canvas[i*patch:(i+1)*patch,
j*patch:(j+1)*patch] with the P-frame's pixels at that position.
3. Final canvas is what the encoder would have reconstructed at the
end of this sub-range. Moving subjects produce visible 'ghost'
trails because successive P-frame overlays at overlapping
positions show the motion.

Every canvas now ends up exactly the I-frame's dimensions — they all
match each other regardless of how many P-frames the sub-range had.

Accounting: total_selected_patches_incl_i_frames now sums the I-frame
grid (hb*wb) plus the count of P-frame overlay hits (same position can
be overlaid by multiple P-frames; each hit counts).

Files changed (1) hide show
  1. app.py +34 -26
app.py CHANGED
@@ -451,16 +451,28 @@ def write_mp4(frames: List[np.ndarray], path: str, fps: float) -> None:
451
  def _build_ippp_canvas(
452
  frames: List[np.ndarray], masks: List[np.ndarray],
453
  i_idx: int, p_range: range, patch: int,
454
- ) -> np.ndarray:
455
- """Build one IPPP canvas: full I-frame on top, P-frame selected patches
456
- in a wb-wide raster grid below."""
 
 
 
 
 
 
 
 
 
 
 
 
457
  i_frame = frames[i_idx]
458
  h, w = i_frame.shape[:2]
459
  hb, wb = h // patch, w // patch
460
- canvas_w = wb * patch
461
- i_block = i_frame[: hb * patch, : canvas_w].copy()
462
 
463
- p_patches: List[np.ndarray] = []
464
  for k in p_range:
465
  if k >= len(frames):
466
  break
@@ -468,19 +480,15 @@ def _build_ippp_canvas(
468
  for i in range(m.shape[0]):
469
  for j in range(m.shape[1]):
470
  if m[i, j]:
471
- p_patches.append(
472
- f[i * patch:(i + 1) * patch, j * patch:(j + 1) * patch].copy()
473
- )
474
-
475
- if not p_patches:
476
- return i_block
477
-
478
- rows = (len(p_patches) + wb - 1) // wb
479
- p_grid = np.full((rows * patch, canvas_w, 3), 255, dtype=np.uint8)
480
- for idx, p in enumerate(p_patches):
481
- r, c = divmod(idx, wb)
482
- p_grid[r * patch:(r + 1) * patch, c * patch:(c + 1) * patch] = p
483
- return np.vstack([i_block, p_grid])
484
 
485
 
486
  def _allocate_canvases_per_group(
@@ -542,19 +550,19 @@ def pack_canvases_per_group(
542
  ss = cursor
543
  ee = min(e, cursor + sub_len - 1)
544
  cursor = ee + 1
545
- canvas = _build_ippp_canvas(
546
  frames, masks, i_idx=ss, p_range=range(ss + 1, ee + 1),
547
  patch=patch,
548
  )
549
  canvases.append(canvas)
550
  sub_ranges.append((g_idx, ss, ee))
551
- # Accounting
552
- i_h, i_w = canvas.shape[:2]
 
 
 
553
  hb, wb = frames[ss].shape[0] // patch, frames[ss].shape[1] // patch
554
- total_selected += hb * wb # I-frame counts as fully kept.
555
- for kk in range(ss + 1, ee + 1):
556
- if kk < len(masks):
557
- total_selected += int(masks[kk].sum())
558
 
559
  if not canvases:
560
  canvases = [np.full((patch, patch, 3), 255, dtype=np.uint8)]
 
451
  def _build_ippp_canvas(
452
  frames: List[np.ndarray], masks: List[np.ndarray],
453
  i_idx: int, p_range: range, patch: int,
454
+ ) -> Tuple[np.ndarray, int]:
455
+ """Build one IPPP canvas at the *same dimensions as the I-frame*.
456
+
457
+ Codec convention: every frame in a group shares the picture size; a
458
+ P-frame only encodes the macroblocks that need to change. So:
459
+ 1. Initialise the canvas to the I-frame's full image.
460
+ 2. For each P-frame in time order, replace each saliency-selected
461
+ patch position with the P-frame's pixels at that position.
462
+ 3. The canvas now reads as 'what the encoder would have reconstructed
463
+ at the end of this group' — same shape as the I-frame, with the
464
+ high-energy regions updated by later P-frames.
465
+
466
+ Returns (canvas, n_overlays) where n_overlays is the count of P-frame
467
+ patches that overwrote a position (a position may be hit multiple
468
+ times by different P-frames; we count each hit)."""
469
  i_frame = frames[i_idx]
470
  h, w = i_frame.shape[:2]
471
  hb, wb = h // patch, w // patch
472
+ canvas_h, canvas_w = hb * patch, wb * patch
473
+ canvas = i_frame[:canvas_h, :canvas_w].copy()
474
 
475
+ n_overlays = 0
476
  for k in p_range:
477
  if k >= len(frames):
478
  break
 
480
  for i in range(m.shape[0]):
481
  for j in range(m.shape[1]):
482
  if m[i, j]:
483
+ canvas[
484
+ i * patch:(i + 1) * patch,
485
+ j * patch:(j + 1) * patch,
486
+ ] = f[
487
+ i * patch:(i + 1) * patch,
488
+ j * patch:(j + 1) * patch,
489
+ ]
490
+ n_overlays += 1
491
+ return canvas, n_overlays
 
 
 
 
492
 
493
 
494
  def _allocate_canvases_per_group(
 
550
  ss = cursor
551
  ee = min(e, cursor + sub_len - 1)
552
  cursor = ee + 1
553
+ canvas, n_p_overlays = _build_ippp_canvas(
554
  frames, masks, i_idx=ss, p_range=range(ss + 1, ee + 1),
555
  patch=patch,
556
  )
557
  canvases.append(canvas)
558
  sub_ranges.append((g_idx, ss, ee))
559
+ # Accounting:
560
+ # - I-frame counts as the full grid (anchor, every position
561
+ # starts from it).
562
+ # - Each P-frame overlay is +1 (positions may be overlaid
563
+ # multiple times by later P-frames; we count each hit).
564
  hb, wb = frames[ss].shape[0] // patch, frames[ss].shape[1] // patch
565
+ total_selected += hb * wb + n_p_overlays
 
 
 
566
 
567
  if not canvases:
568
  canvases = [np.full((patch, patch, 3), 255, dtype=np.uint8)]