FeilongTang commited on
Commit
34df0c5
·
1 Parent(s): 2f7884e

Restore original visualization resize behavior

Browse files
Files changed (1) hide show
  1. app.py +19 -26
app.py CHANGED
@@ -8,8 +8,8 @@ high local complexity = roughly what the encoder would spend bits on).
8
 
9
  Pipeline (mirrors codec_tools/pipeline/process_video_bitcost_readiness.py):
10
  1. Uniformly sample N frames from the input video.
11
- 2. Resize each sampled frame to a fixed square patch grid driven by
12
- `patch_size`.
13
  3. Slice every frame into a patch grid; score each patch by its
14
  Sobel gradient magnitude mean.
15
  4. Pick the top-K highest-scoring patches under the selected GOP
@@ -22,6 +22,7 @@ Pipeline (mirrors codec_tools/pipeline/process_video_bitcost_readiness.py):
22
  """
23
 
24
  import json
 
25
  import os
26
  import shutil
27
  import subprocess
@@ -48,7 +49,7 @@ DEMO_PRESET = (
48
  DEMO_VIDEO_PATH, # video_in
49
  16, # sample_frames
50
  14, # patch_size
51
- 3136, # total_patches (= 16 * 14^2)
52
  150000, # max_pixels
53
  "sbs", # viz_mode
54
  0.55, # heatmap_alpha
@@ -63,22 +64,16 @@ DEMO_PRESET = (
63
 
64
 
65
  def smart_resize(frame: np.ndarray, max_pixels: int, factor: int) -> np.ndarray:
66
- """Resize each frame to a square patch grid.
67
-
68
- The demo uses `factor` as both:
69
- - patch size in pixels
70
- - patches per side in the resized frame
71
-
72
- So patch_size=14 means:
73
- - each patch is 14 x 14 pixels
74
- - each frame is resized to 14 x 14 patches
75
- - each frame therefore contributes 14^2 = 196 patch slots
76
-
77
- `max_pixels` is kept for API compatibility with earlier revisions, but
78
- the frame token count is now controlled by `factor` directly.
79
- """
80
- side_px = int(factor) * int(factor)
81
- return cv2.resize(frame, (side_px, side_px), interpolation=cv2.INTER_AREA)
82
 
83
 
84
  def sample_frame_ids(total: int, n: int) -> List[int]:
@@ -1399,14 +1394,12 @@ with gr.Blocks(**_BLOCK_KW) as demo:
1399
  4, 64, value=16, step=1, label="Sampled frames",
1400
  )
1401
  top_k = gr.Slider(
1402
- 16, 16384, value=3136, step=16,
1403
  label="Total patches budget (whole video)",
1404
- info="Default = sample_frames x patch_size^2 "
1405
- "(16 x 14^2 = 3136). The uniform baseline spends "
1406
- "this budget on evenly sampled complete frames; the "
1407
- "codec path spends it on saliency-selected patches. If "
1408
- "budget < sample_frames x patch_size^2, the full-frame "
1409
- "baseline will use fewer frames than codec.",
1410
  )
1411
  patch_size = gr.Radio(
1412
  PATCH_CHOICES, value=14, label="Patch size (px)",
 
8
 
9
  Pipeline (mirrors codec_tools/pipeline/process_video_bitcost_readiness.py):
10
  1. Uniformly sample N frames from the input video.
11
+ 2. smart_resize each frame so dims are multiples of `patch` and the
12
+ total pixel count <= max_pixels.
13
  3. Slice every frame into a patch grid; score each patch by its
14
  Sobel gradient magnitude mean.
15
  4. Pick the top-K highest-scoring patches under the selected GOP
 
22
  """
23
 
24
  import json
25
+ import math
26
  import os
27
  import shutil
28
  import subprocess
 
49
  DEMO_VIDEO_PATH, # video_in
50
  16, # sample_frames
51
  14, # patch_size
52
+ 1024, # total_patches
53
  150000, # max_pixels
54
  "sbs", # viz_mode
55
  0.55, # heatmap_alpha
 
64
 
65
 
66
  def smart_resize(frame: np.ndarray, max_pixels: int, factor: int) -> np.ndarray:
67
+ """Resize so h,w are multiples of `factor` and h*w <= max_pixels."""
68
+ h, w = frame.shape[:2]
69
+ pixels = h * w
70
+ if pixels > max_pixels:
71
+ scale = math.sqrt(max_pixels / pixels)
72
+ h = max(factor, int(h * scale))
73
+ w = max(factor, int(w * scale))
74
+ h = max(factor, (h // factor) * factor)
75
+ w = max(factor, (w // factor) * factor)
76
+ return cv2.resize(frame, (w, h), interpolation=cv2.INTER_AREA)
 
 
 
 
 
 
77
 
78
 
79
  def sample_frame_ids(total: int, n: int) -> List[int]:
 
1394
  4, 64, value=16, step=1, label="Sampled frames",
1395
  )
1396
  top_k = gr.Slider(
1397
+ 16, 16384, value=1024, step=16,
1398
  label="Total patches budget (whole video)",
1399
+ info="The single budget shared across the whole video. "
1400
+ "The uniform full-frame baseline will fit as many "
1401
+ "complete frames as this budget allows; the codec path "
1402
+ "spends the same budget on saliency-selected patches.",
 
 
1403
  )
1404
  patch_size = gr.Radio(
1405
  PATCH_CHOICES, value=14, label="Patch size (px)",