forked from WeChatCV/Stand-In_Preprocessor_ComfyUI
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcropper.py
More file actions
68 lines (50 loc) · 2.46 KB
/
cropper.py
File metadata and controls
68 lines (50 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import torch
class VideoFramePreprocessor:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"images": ("IMAGE",), # Input is a batch of video frames
}
}
# CHANGED: Added three INT outputs for width, height, and frame_count
RETURN_TYPES = ("IMAGE", "INT", "INT", "INT")
RETURN_NAMES = ("processed_images", "width", "height", "frame_count")
FUNCTION = "process_frames"
CATEGORY = "Stand-In"
def process_frames(self, images: torch.Tensor):
if images.dim() != 4:
raise ValueError("Input must be a batch of images (video frames).")
total_frames, original_h, original_w, _ = images.shape
print(f"Original video specs: {total_frames} frames, {original_w}x{original_h}")
# 1. Trim frame count to be 4n+1
new_total_frames = total_frames - ((total_frames - 1) % 4)
if new_total_frames != total_frames:
print(f"Trimming frames to be 4n+1: {total_frames} -> {new_total_frames}")
images = images[:new_total_frames, :, :, :]
else:
print("Frame count already meets 4n+1 requirement. No trimming needed.")
# 2. Crop dimensions to the nearest multiple of 16 (rounding down)
new_h = (original_h // 16) * 16
new_w = (original_w // 16) * 16
if new_h != original_h or new_w != original_w:
print(f"Cropping dimensions to a multiple of 16: {original_w}x{original_h} -> {new_w}x{new_h}")
h_to_remove = original_h - new_h
w_to_remove = original_w - new_w
h_start = h_to_remove // 2
w_start = w_to_remove // 2
processed_images = images[:, h_start : h_start + new_h, w_start : w_start + new_w, :]
else:
print("Dimensions are already multiples of 16. No cropping needed.")
processed_images = images
# Get final dimensions from the processed tensor
final_frames, final_h, final_w, _ = processed_images.shape
print(f"Final video specs: {final_frames} frames, {final_w}x{final_h}")
# CHANGED: Return the processed images along with the final dimensions
return (processed_images, final_w, final_h, final_frames)
NODE_CLASS_MAPPINGS = {
"VideoFramePreprocessor": VideoFramePreprocessor,
}
NODE_DISPLAY_NAME_MAPPINGS = {
"VideoFramePreprocessor": "Stand-In Trimmer & Cropper",
}