Commit ·
34df0c5
1
Parent(s): 2f7884e
Restore original visualization resize behavior
Browse files
app.py
CHANGED
|
@@ -8,8 +8,8 @@ high local complexity = roughly what the encoder would spend bits on).
|
|
| 8 |
|
| 9 |
Pipeline (mirrors codec_tools/pipeline/process_video_bitcost_readiness.py):
|
| 10 |
1. Uniformly sample N frames from the input video.
|
| 11 |
-
2.
|
| 12 |
-
|
| 13 |
3. Slice every frame into a patch grid; score each patch by its
|
| 14 |
Sobel gradient magnitude mean.
|
| 15 |
4. Pick the top-K highest-scoring patches under the selected GOP
|
|
@@ -22,6 +22,7 @@ Pipeline (mirrors codec_tools/pipeline/process_video_bitcost_readiness.py):
|
|
| 22 |
"""
|
| 23 |
|
| 24 |
import json
|
|
|
|
| 25 |
import os
|
| 26 |
import shutil
|
| 27 |
import subprocess
|
|
@@ -48,7 +49,7 @@ DEMO_PRESET = (
|
|
| 48 |
DEMO_VIDEO_PATH, # video_in
|
| 49 |
16, # sample_frames
|
| 50 |
14, # patch_size
|
| 51 |
-
|
| 52 |
150000, # max_pixels
|
| 53 |
"sbs", # viz_mode
|
| 54 |
0.55, # heatmap_alpha
|
|
@@ -63,22 +64,16 @@ DEMO_PRESET = (
|
|
| 63 |
|
| 64 |
|
| 65 |
def smart_resize(frame: np.ndarray, max_pixels: int, factor: int) -> np.ndarray:
|
| 66 |
-
"""Resize
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
`max_pixels` is kept for API compatibility with earlier revisions, but
|
| 78 |
-
the frame token count is now controlled by `factor` directly.
|
| 79 |
-
"""
|
| 80 |
-
side_px = int(factor) * int(factor)
|
| 81 |
-
return cv2.resize(frame, (side_px, side_px), interpolation=cv2.INTER_AREA)
|
| 82 |
|
| 83 |
|
| 84 |
def sample_frame_ids(total: int, n: int) -> List[int]:
|
|
@@ -1399,14 +1394,12 @@ with gr.Blocks(**_BLOCK_KW) as demo:
|
|
| 1399 |
4, 64, value=16, step=1, label="Sampled frames",
|
| 1400 |
)
|
| 1401 |
top_k = gr.Slider(
|
| 1402 |
-
16, 16384, value=
|
| 1403 |
label="Total patches budget (whole video)",
|
| 1404 |
-
info="
|
| 1405 |
-
"
|
| 1406 |
-
"
|
| 1407 |
-
"
|
| 1408 |
-
"budget < sample_frames x patch_size^2, the full-frame "
|
| 1409 |
-
"baseline will use fewer frames than codec.",
|
| 1410 |
)
|
| 1411 |
patch_size = gr.Radio(
|
| 1412 |
PATCH_CHOICES, value=14, label="Patch size (px)",
|
|
|
|
| 8 |
|
| 9 |
Pipeline (mirrors codec_tools/pipeline/process_video_bitcost_readiness.py):
|
| 10 |
1. Uniformly sample N frames from the input video.
|
| 11 |
+
2. smart_resize each frame so dims are multiples of `patch` and the
|
| 12 |
+
total pixel count <= max_pixels.
|
| 13 |
3. Slice every frame into a patch grid; score each patch by its
|
| 14 |
Sobel gradient magnitude mean.
|
| 15 |
4. Pick the top-K highest-scoring patches under the selected GOP
|
|
|
|
| 22 |
"""
|
| 23 |
|
| 24 |
import json
|
| 25 |
+
import math
|
| 26 |
import os
|
| 27 |
import shutil
|
| 28 |
import subprocess
|
|
|
|
| 49 |
DEMO_VIDEO_PATH, # video_in
|
| 50 |
16, # sample_frames
|
| 51 |
14, # patch_size
|
| 52 |
+
1024, # total_patches
|
| 53 |
150000, # max_pixels
|
| 54 |
"sbs", # viz_mode
|
| 55 |
0.55, # heatmap_alpha
|
|
|
|
| 64 |
|
| 65 |
|
| 66 |
def smart_resize(frame: np.ndarray, max_pixels: int, factor: int) -> np.ndarray:
|
| 67 |
+
"""Resize so h,w are multiples of `factor` and h*w <= max_pixels."""
|
| 68 |
+
h, w = frame.shape[:2]
|
| 69 |
+
pixels = h * w
|
| 70 |
+
if pixels > max_pixels:
|
| 71 |
+
scale = math.sqrt(max_pixels / pixels)
|
| 72 |
+
h = max(factor, int(h * scale))
|
| 73 |
+
w = max(factor, int(w * scale))
|
| 74 |
+
h = max(factor, (h // factor) * factor)
|
| 75 |
+
w = max(factor, (w // factor) * factor)
|
| 76 |
+
return cv2.resize(frame, (w, h), interpolation=cv2.INTER_AREA)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
def sample_frame_ids(total: int, n: int) -> List[int]:
|
|
|
|
| 1394 |
4, 64, value=16, step=1, label="Sampled frames",
|
| 1395 |
)
|
| 1396 |
top_k = gr.Slider(
|
| 1397 |
+
16, 16384, value=1024, step=16,
|
| 1398 |
label="Total patches budget (whole video)",
|
| 1399 |
+
info="The single budget shared across the whole video. "
|
| 1400 |
+
"The uniform full-frame baseline will fit as many "
|
| 1401 |
+
"complete frames as this budget allows; the codec path "
|
| 1402 |
+
"spends the same budget on saliency-selected patches.",
|
|
|
|
|
|
|
| 1403 |
)
|
| 1404 |
patch_size = gr.Radio(
|
| 1405 |
PATCH_CHOICES, value=14, label="Patch size (px)",
|