IPPP canvas: overlay P-frame patches onto I-frame at same dimensions
Browse filesReplaces the I-on-top + P-grid-below layout with the proper codec
semantic: every frame in a group shares the picture size; a P-frame
only encodes the macroblocks that need to change.
Per sub-range:
1. Start canvas = I-frame's full image (canvas_h × canvas_w =
hb*patch × wb*patch).
2. For each P-frame in time order, for every saliency-selected
patch position (i, j), overwrite canvas[i*patch:(i+1)*patch,
j*patch:(j+1)*patch] with the P-frame's pixels at that position.
3. Final canvas is what the encoder would have reconstructed at the
end of this sub-range. Moving subjects produce visible 'ghost'
trails because successive P-frame overlays at overlapping
positions show the motion.
Every canvas now ends up exactly the I-frame's dimensions — they all
match each other regardless of how many P-frames the sub-range had.
Accounting: total_selected_patches_incl_i_frames now sums the I-frame
grid (hb*wb) plus the count of P-frame overlay hits (same position can
be overlaid by multiple P-frames; each hit counts).
|
@@ -451,16 +451,28 @@ def write_mp4(frames: List[np.ndarray], path: str, fps: float) -> None:
|
|
| 451 |
def _build_ippp_canvas(
|
| 452 |
frames: List[np.ndarray], masks: List[np.ndarray],
|
| 453 |
i_idx: int, p_range: range, patch: int,
|
| 454 |
-
) -> np.ndarray:
|
| 455 |
-
"""Build one IPPP canvas
|
| 456 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
i_frame = frames[i_idx]
|
| 458 |
h, w = i_frame.shape[:2]
|
| 459 |
hb, wb = h // patch, w // patch
|
| 460 |
-
canvas_w = wb * patch
|
| 461 |
-
|
| 462 |
|
| 463 |
-
|
| 464 |
for k in p_range:
|
| 465 |
if k >= len(frames):
|
| 466 |
break
|
|
@@ -468,19 +480,15 @@ def _build_ippp_canvas(
|
|
| 468 |
for i in range(m.shape[0]):
|
| 469 |
for j in range(m.shape[1]):
|
| 470 |
if m[i, j]:
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
for idx, p in enumerate(p_patches):
|
| 481 |
-
r, c = divmod(idx, wb)
|
| 482 |
-
p_grid[r * patch:(r + 1) * patch, c * patch:(c + 1) * patch] = p
|
| 483 |
-
return np.vstack([i_block, p_grid])
|
| 484 |
|
| 485 |
|
| 486 |
def _allocate_canvases_per_group(
|
|
@@ -542,19 +550,19 @@ def pack_canvases_per_group(
|
|
| 542 |
ss = cursor
|
| 543 |
ee = min(e, cursor + sub_len - 1)
|
| 544 |
cursor = ee + 1
|
| 545 |
-
canvas = _build_ippp_canvas(
|
| 546 |
frames, masks, i_idx=ss, p_range=range(ss + 1, ee + 1),
|
| 547 |
patch=patch,
|
| 548 |
)
|
| 549 |
canvases.append(canvas)
|
| 550 |
sub_ranges.append((g_idx, ss, ee))
|
| 551 |
-
# Accounting
|
| 552 |
-
|
|
|
|
|
|
|
|
|
|
| 553 |
hb, wb = frames[ss].shape[0] // patch, frames[ss].shape[1] // patch
|
| 554 |
-
total_selected += hb * wb
|
| 555 |
-
for kk in range(ss + 1, ee + 1):
|
| 556 |
-
if kk < len(masks):
|
| 557 |
-
total_selected += int(masks[kk].sum())
|
| 558 |
|
| 559 |
if not canvases:
|
| 560 |
canvases = [np.full((patch, patch, 3), 255, dtype=np.uint8)]
|
|
|
|
| 451 |
def _build_ippp_canvas(
|
| 452 |
frames: List[np.ndarray], masks: List[np.ndarray],
|
| 453 |
i_idx: int, p_range: range, patch: int,
|
| 454 |
+
) -> Tuple[np.ndarray, int]:
|
| 455 |
+
"""Build one IPPP canvas at the *same dimensions as the I-frame*.
|
| 456 |
+
|
| 457 |
+
Codec convention: every frame in a group shares the picture size; a
|
| 458 |
+
P-frame only encodes the macroblocks that need to change. So:
|
| 459 |
+
1. Initialise the canvas to the I-frame's full image.
|
| 460 |
+
2. For each P-frame in time order, replace each saliency-selected
|
| 461 |
+
patch position with the P-frame's pixels at that position.
|
| 462 |
+
3. The canvas now reads as 'what the encoder would have reconstructed
|
| 463 |
+
at the end of this group' — same shape as the I-frame, with the
|
| 464 |
+
high-energy regions updated by later P-frames.
|
| 465 |
+
|
| 466 |
+
Returns (canvas, n_overlays) where n_overlays is the count of P-frame
|
| 467 |
+
patches that overwrote a position (a position may be hit multiple
|
| 468 |
+
times by different P-frames; we count each hit)."""
|
| 469 |
i_frame = frames[i_idx]
|
| 470 |
h, w = i_frame.shape[:2]
|
| 471 |
hb, wb = h // patch, w // patch
|
| 472 |
+
canvas_h, canvas_w = hb * patch, wb * patch
|
| 473 |
+
canvas = i_frame[:canvas_h, :canvas_w].copy()
|
| 474 |
|
| 475 |
+
n_overlays = 0
|
| 476 |
for k in p_range:
|
| 477 |
if k >= len(frames):
|
| 478 |
break
|
|
|
|
| 480 |
for i in range(m.shape[0]):
|
| 481 |
for j in range(m.shape[1]):
|
| 482 |
if m[i, j]:
|
| 483 |
+
canvas[
|
| 484 |
+
i * patch:(i + 1) * patch,
|
| 485 |
+
j * patch:(j + 1) * patch,
|
| 486 |
+
] = f[
|
| 487 |
+
i * patch:(i + 1) * patch,
|
| 488 |
+
j * patch:(j + 1) * patch,
|
| 489 |
+
]
|
| 490 |
+
n_overlays += 1
|
| 491 |
+
return canvas, n_overlays
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
|
| 493 |
|
| 494 |
def _allocate_canvases_per_group(
|
|
|
|
| 550 |
ss = cursor
|
| 551 |
ee = min(e, cursor + sub_len - 1)
|
| 552 |
cursor = ee + 1
|
| 553 |
+
canvas, n_p_overlays = _build_ippp_canvas(
|
| 554 |
frames, masks, i_idx=ss, p_range=range(ss + 1, ee + 1),
|
| 555 |
patch=patch,
|
| 556 |
)
|
| 557 |
canvases.append(canvas)
|
| 558 |
sub_ranges.append((g_idx, ss, ee))
|
| 559 |
+
# Accounting:
|
| 560 |
+
# - I-frame counts as the full grid (anchor, every position
|
| 561 |
+
# starts from it).
|
| 562 |
+
# - Each P-frame overlay is +1 (positions may be overlaid
|
| 563 |
+
# multiple times by later P-frames; we count each hit).
|
| 564 |
hb, wb = frames[ss].shape[0] // patch, frames[ss].shape[1] // patch
|
| 565 |
+
total_selected += hb * wb + n_p_overlays
|
|
|
|
|
|
|
|
|
|
| 566 |
|
| 567 |
if not canvases:
|
| 568 |
canvases = [np.full((patch, patch, 3), 255, dtype=np.uint8)]
|