From e9ec390eb93c71f01ace0cebaf3eec07242673f5 Mon Sep 17 00:00:00 2001 From: Yizheng Jiao Date: Thu, 8 Jan 2026 00:44:56 +0000 Subject: [PATCH 1/2] add shape matching task --- README.md | 39 ++- examples/generate_shape_matching.py | 101 +++++++ src/__init__.py | 18 +- src/shape_matching_task.py | 395 ++++++++++++++++++++++++++++ 4 files changed, 551 insertions(+), 2 deletions(-) create mode 100644 examples/generate_shape_matching.py create mode 100644 src/shape_matching_task.py diff --git a/README.md b/README.md index 8995ab1..6c9edfb 100644 --- a/README.md +++ b/README.md @@ -136,4 +136,41 @@ class TaskConfig(GenerationConfig): difficulty: str = Field(default="medium", description="easy/medium/hard") ``` -**Single entry point:** `python examples/generate.py --num-samples 50` \ No newline at end of file +**Single entry point:** `python examples/generate.py --num-samples 50` + +--- + +## ๐ŸŽฏ Available Tasks + +### Shape Matching Task (ๅ‡ ไฝ•ๅฝข็ŠถๅŒน้…) + +A visual reasoning task where shapes must be moved into their matching outlines. + +**Task Description:** +- **Input:** Colored shapes (circle, square, triangle, star) scattered on left side, empty outlines on right side +- **Output:** Colored shapes moved to fill their corresponding outlines + +**Example Prompt:** +> "Move each colorful shape into its corresponding dark outline." + +**Usage:** +```bash +python examples/generate_shape_matching.py --num-samples 10 +python examples/generate_shape_matching.py --num-samples 50 --num-shapes 3 --shape-size 40 +``` + +**Configuration Options:** +| Parameter | Default | Description | +|-----------|---------|-------------| +| `num_shapes` | 4 | Number of shapes (max 4: circle, square, triangle, star) | +| `shape_size` | 35 | Size of shapes in pixels | +| `image_size` | (800, 400) | Canvas dimensions | + +**Output Structure:** +``` +data/questions/shape_matching_task/{task_id}/ +โ”œโ”€โ”€ first_frame.png # Shapes scattered, outlines empty +โ”œโ”€โ”€ final_frame.png # Shapes filling their outlines +โ”œโ”€โ”€ prompt.txt # Task instructions +โ””โ”€โ”€ ground_truth.mp4 # Animation of shapes moving +``` \ No newline at end of file diff --git a/examples/generate_shape_matching.py b/examples/generate_shape_matching.py new file mode 100644 index 0000000..38c89f3 --- /dev/null +++ b/examples/generate_shape_matching.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +""" +โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•— +โ•‘ SHAPE MATCHING TASK GENERATION โ•‘ +โ•‘ โ•‘ +โ•‘ Generate geometric shape matching task dataset. โ•‘ +โ•‘ Task: Move shapes into their matching outlines. โ•‘ +โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +Usage: + python examples/generate_shape_matching.py --num-samples 10 + python examples/generate_shape_matching.py --num-samples 100 --output data/shapes --seed 42 + python examples/generate_shape_matching.py --num-samples 50 --num-shapes 3 --shape-size 40 +""" + +import argparse +from pathlib import Path +import sys + +# Add project root to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from core import OutputWriter +from src import ShapeMatchingGenerator, ShapeMatchingConfig + + +def main(): + parser = argparse.ArgumentParser( + description="Generate shape matching task dataset", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python examples/generate_shape_matching.py --num-samples 10 + python examples/generate_shape_matching.py --num-samples 100 --output data/shapes --seed 42 + python examples/generate_shape_matching.py --num-samples 50 --num-shapes 3 --shape-size 40 + """ + ) + parser.add_argument( + "--num-samples", + type=int, + required=True, + help="Number of task samples to generate" + ) + parser.add_argument( + "--output", + type=str, + default="data/questions", + help="Output directory (default: data/questions)" + ) + parser.add_argument( + "--seed", + type=int, + default=None, + help="Random seed for reproducibility" + ) + parser.add_argument( + "--no-videos", + action="store_true", + help="Disable video generation" + ) + parser.add_argument( + "--num-shapes", + type=int, + default=4, + help="Number of shapes (max 4: circle, square, triangle, star)" + ) + parser.add_argument( + "--shape-size", + type=int, + default=35, + help="Size of shapes in pixels (default: 35)" + ) + + args = parser.parse_args() + + print(f"๐ŸŽฒ Generating {args.num_samples} shape matching tasks...") + print(f" Shapes: {args.num_shapes}, Size: {args.shape_size}px") + + # Configure task + config = ShapeMatchingConfig( + num_samples=args.num_samples, + random_seed=args.seed, + output_dir=Path(args.output), + generate_videos=not args.no_videos, + num_shapes=args.num_shapes, + shape_size=args.shape_size, + ) + + # Generate tasks + generator = ShapeMatchingGenerator(config) + tasks = generator.generate_dataset() + + # Write to disk + writer = OutputWriter(Path(args.output)) + writer.write_dataset(tasks) + + print(f"โœ… Done! Generated {len(tasks)} tasks in {args.output}/{config.domain}_task/") + + +if __name__ == "__main__": + main() diff --git a/src/__init__.py b/src/__init__.py index b215fa2..8ad8276 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -5,10 +5,26 @@ - config.py : Task-specific configuration (TaskConfig) - generator.py: Task generation logic (TaskGenerator) - prompts.py : Task prompts/instructions (get_prompt) + - shape_matching_task.py: Shape matching task (ShapeMatchingGenerator) """ from .config import TaskConfig from .generator import TaskGenerator from .prompts import get_prompt +from .shape_matching_task import ( + ShapeMatchingConfig, + ShapeMatchingTask, + ShapeMatchingGenerator, + get_shape_matching_prompt +) -__all__ = ["TaskConfig", "TaskGenerator", "get_prompt"] +__all__ = [ + "TaskConfig", + "TaskGenerator", + "get_prompt", + # Shape matching task + "ShapeMatchingConfig", + "ShapeMatchingTask", + "ShapeMatchingGenerator", + "get_shape_matching_prompt", +] diff --git a/src/shape_matching_task.py b/src/shape_matching_task.py new file mode 100644 index 0000000..43737c1 --- /dev/null +++ b/src/shape_matching_task.py @@ -0,0 +1,395 @@ +""" +โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•— +โ•‘ SHAPE MATCHING TASK GENERATOR โ•‘ +โ•‘ โ•‘ +โ•‘ Based on opencv_code/ex4.py algorithm โ•‘ +โ•‘ Task: Move shapes into their matching outlines โ•‘ +โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +""" + +import random +import math +from typing import Optional, List, Tuple +from PIL import Image, ImageDraw +import numpy as np + +from core import BaseGenerator, TaskPair, ImageRenderer +from core.video_utils import VideoGenerator +from .config import TaskConfig + + +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# PROMPTS FOR SHAPE MATCHING TASK +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +SHAPE_MATCHING_PROMPTS = { + "default": [ + "Move each colorful shape into its corresponding dark outline.", + "Match each colored shape with its outline by moving it to the correct position.", + "Drag each solid shape to fill its matching outline on the right side.", + ], + "puzzle": [ + "Complete the shape puzzle by placing each colored piece into its matching slot.", + "Fit each shape into the correct outline, like pieces of a puzzle.", + ], + "sorting": [ + "Sort the shapes by moving each one to its designated outline position.", + "Arrange the shapes by placing them over their corresponding outlines.", + ], +} + + +def get_shape_matching_prompt(task_type: str = "default") -> str: + """Select a random prompt for shape matching task.""" + prompts = SHAPE_MATCHING_PROMPTS.get(task_type, SHAPE_MATCHING_PROMPTS["default"]) + return random.choice(prompts) + + +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# SHAPE MATCHING TASK GENERATOR +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +class ShapeMatchingConfig(TaskConfig): + """Configuration for shape matching task.""" + domain: str = "shape_matching" + image_size: tuple[int, int] = (800, 400) + + # Task-specific settings + num_shapes: int = 4 # Number of shapes + shape_size: int = 35 # Size (radius) of shapes + + +class ShapeMatchingTask: + """ + Geometric shape matching task. + + Generates input/output image pairs showing: + - Input: Colored shapes scattered on left, empty outlines on right + - Output: Colored shapes moved to fill their matching outlines + """ + + # Available shape types + SHAPE_TYPES = ["circle", "square", "triangle", "star"] + + # Default colors for shapes (RGB for PIL) + SHAPE_COLORS = { + "circle": (100, 100, 255), # Blue + "square": (100, 255, 100), # Green + "triangle": (255, 200, 100), # Yellow/Orange + "star": (255, 100, 255) # Purple + } + + def __init__( + self, + width: int = 800, + height: int = 400, + num_shapes: int = 4, + shape_size: int = 35 + ): + self.width = width + self.height = height + self.num_shapes = min(num_shapes, len(self.SHAPE_TYPES)) + self.shape_size = shape_size + + # Generate task data + self.data = self._generate_data() + + def _generate_data(self) -> list: + """Generate shapes with start and target positions.""" + data = [] + margin = 60 + + # Select shapes for this task + selected_shapes = random.sample(self.SHAPE_TYPES, self.num_shapes) + + # Generate target positions on the right side (grid layout) + cols = 2 + rows = (self.num_shapes + 1) // 2 + right_start_x = self.width // 2 + cell_w = (self.width // 2) // cols + cell_h = self.height // rows + + slots = [] + for r in range(rows): + for c in range(cols): + if len(slots) < self.num_shapes: + cx = right_start_x + c * cell_w + cell_w // 2 + cy = r * cell_h + cell_h // 2 + slots.append((cx, cy)) + + random.shuffle(slots) + + for i, shape_type in enumerate(selected_shapes): + # Target position (right side) + tx, ty = slots[i] + + # Start position (left side, scattered) + sx = random.randint(margin, self.width // 2 - margin) + sy = random.randint(margin, self.height - margin) + + data.append({ + "type": shape_type, + "color": self.SHAPE_COLORS.get(shape_type, (150, 150, 150)), + "size": self.shape_size, + "start_pos": (sx, sy), + "target_pos": (tx, ty) + }) + + return data + + def _draw_circle(self, draw: ImageDraw.Draw, center: Tuple[int, int], + size: int, color, outline_only: bool = False, outline_width: int = 3): + """Draw a circle shape.""" + cx, cy = center + bbox = [cx - size, cy - size, cx + size, cy + size] + + if outline_only: + draw.ellipse(bbox, outline=color, width=outline_width) + else: + draw.ellipse(bbox, fill=color, outline=(50, 50, 50), width=1) + + def _draw_square(self, draw: ImageDraw.Draw, center: Tuple[int, int], + size: int, color, outline_only: bool = False, outline_width: int = 3): + """Draw a square shape.""" + cx, cy = center + bbox = [cx - size, cy - size, cx + size, cy + size] + + if outline_only: + draw.rectangle(bbox, outline=color, width=outline_width) + else: + draw.rectangle(bbox, fill=color, outline=(50, 50, 50), width=1) + + def _draw_triangle(self, draw: ImageDraw.Draw, center: Tuple[int, int], + size: int, color, outline_only: bool = False, outline_width: int = 3): + """Draw an equilateral triangle pointing up.""" + cx, cy = center + points = [] + + for i in range(3): + angle = i * (2 * math.pi / 3) - (math.pi / 2) # Start from top + x = int(cx + size * math.cos(angle)) + y = int(cy + size * math.sin(angle)) + points.append((x, y)) + + if outline_only: + draw.polygon(points, outline=color, width=outline_width) + else: + draw.polygon(points, fill=color, outline=(50, 50, 50), width=1) + + def _draw_star(self, draw: ImageDraw.Draw, center: Tuple[int, int], + size: int, color, outline_only: bool = False, outline_width: int = 3): + """Draw a 5-pointed star.""" + cx, cy = center + points = [] + outer_r = size + inner_r = size * 0.4 + + for i in range(10): + angle = i * (2 * math.pi / 10) - (math.pi / 2) + r = outer_r if i % 2 == 0 else inner_r + x = int(cx + r * math.cos(angle)) + y = int(cy + r * math.sin(angle)) + points.append((x, y)) + + if outline_only: + draw.polygon(points, outline=color, width=outline_width) + else: + draw.polygon(points, fill=color, outline=(50, 50, 50), width=1) + + def _draw_shape(self, draw: ImageDraw.Draw, shape_type: str, + center: Tuple[int, int], size: int, color, + outline_only: bool = False, outline_width: int = 3): + """Draw a shape based on type.""" + if shape_type == "circle": + self._draw_circle(draw, center, size, color, outline_only, outline_width) + elif shape_type == "square": + self._draw_square(draw, center, size, color, outline_only, outline_width) + elif shape_type == "triangle": + self._draw_triangle(draw, center, size, color, outline_only, outline_width) + elif shape_type == "star": + self._draw_star(draw, center, size, color, outline_only, outline_width) + + def render(self, state: str = "input") -> Image.Image: + """ + Render the task state. + + Args: + state: "input" for shapes scattered, "output" for shapes matched + + Returns: + PIL Image of the rendered state + """ + # Create white background + canvas = Image.new("RGB", (self.width, self.height), color=(255, 255, 255)) + draw = ImageDraw.Draw(canvas) + + # Draw dividing line + draw.line([(self.width // 2, 20), (self.width // 2, self.height - 20)], + fill=(200, 200, 200), width=2) + + # 1. Draw target outlines (always visible) + outline_color = (80, 80, 80) + for item in self.data: + self._draw_shape(draw, item["type"], item["target_pos"], + item["size"], outline_color, outline_only=True, outline_width=3) + + # 2. Draw solid shapes + for item in self.data: + if state == "input": + # Shapes at start positions (left side) + pos = item["start_pos"] + else: + # Shapes at target positions (matching outlines) + pos = item["target_pos"] + + self._draw_shape(draw, item["type"], pos, item["size"], item["color"]) + + return canvas + + def get_task_type(self) -> str: + """Get task type for prompt selection.""" + return "default" + + +class ShapeMatchingGenerator(BaseGenerator): + """ + Generator for shape matching task pairs. + + Generates image pairs showing: + - Input: Colored shapes scattered, empty outlines on right + - Output: Shapes moved to fill their matching outlines + """ + + def __init__(self, config: ShapeMatchingConfig): + super().__init__(config) + self.config: ShapeMatchingConfig = config + + # Initialize video generator if enabled + self.video_generator = None + if getattr(config, 'generate_videos', False) and VideoGenerator.is_available(): + self.video_generator = VideoGenerator( + fps=getattr(config, 'video_fps', 10), + output_format="mp4" + ) + + def generate_task_pair(self, task_id: str) -> TaskPair: + """Generate one shape matching task pair.""" + + # Create task instance + task = ShapeMatchingTask( + width=self.config.image_size[0], + height=self.config.image_size[1], + num_shapes=getattr(self.config, 'num_shapes', 4), + shape_size=getattr(self.config, 'shape_size', 35) + ) + + # Render input and output images + first_image = task.render("input") + final_image = task.render("output") + + # Generate video (optional) + video_path = None + if self.video_generator: + video_path = self._generate_video(first_image, final_image, task_id, task) + + # Get prompt + prompt = get_shape_matching_prompt(task.get_task_type()) + + return TaskPair( + task_id=task_id, + domain=self.config.domain, + prompt=prompt, + first_image=first_image, + final_image=final_image, + ground_truth_video=video_path + ) + + def _generate_video( + self, + first_image: Image.Image, + final_image: Image.Image, + task_id: str, + task: ShapeMatchingTask + ) -> Optional[str]: + """Generate animation video showing shapes moving to outlines.""" + from pathlib import Path + import tempfile + + temp_dir = Path(tempfile.gettempdir()) / f"{self.config.domain}_videos" + temp_dir.mkdir(parents=True, exist_ok=True) + video_path = temp_dir / f"{task_id}_ground_truth.mp4" + + # Create animation frames + frames = self._create_matching_animation_frames(task) + + result = self.video_generator.create_video_from_frames(frames, video_path) + return str(result) if result else None + + def _create_matching_animation_frames( + self, + task: ShapeMatchingTask, + hold_frames: int = 5, + transition_frames: int = 25 + ) -> list: + """ + Create animation frames showing shapes moving to their outlines. + """ + frames = [] + + # Hold initial frame + initial_frame = task.render("input") + for _ in range(hold_frames): + frames.append(initial_frame) + + # Transition frames - interpolate positions + for i in range(transition_frames): + progress = i / (transition_frames - 1) if transition_frames > 1 else 1.0 + eased_progress = self._ease_in_out(progress) + + frame = self._render_interpolated_frame(task, eased_progress) + frames.append(frame) + + # Hold final frame + final_frame = task.render("output") + for _ in range(hold_frames): + frames.append(final_frame) + + return frames + + def _render_interpolated_frame( + self, + task: ShapeMatchingTask, + progress: float + ) -> Image.Image: + """Render a frame with shapes at interpolated positions.""" + canvas = Image.new("RGB", (task.width, task.height), color=(255, 255, 255)) + draw = ImageDraw.Draw(canvas) + + # Draw dividing line + draw.line([(task.width // 2, 20), (task.width // 2, task.height - 20)], + fill=(200, 200, 200), width=2) + + # Draw target outlines + outline_color = (80, 80, 80) + for item in task.data: + task._draw_shape(draw, item["type"], item["target_pos"], + item["size"], outline_color, outline_only=True, outline_width=3) + + # Draw shapes at interpolated positions + for item in task.data: + sx, sy = item["start_pos"] + tx, ty = item["target_pos"] + + cx = int(sx + (tx - sx) * progress) + cy = int(sy + (ty - sy) * progress) + + task._draw_shape(draw, item["type"], (cx, cy), item["size"], item["color"]) + + return canvas + + def _ease_in_out(self, t: float) -> float: + """Ease-in-out function for smooth animation.""" + if t < 0.5: + return 2 * t * t + else: + return 1 - pow(-2 * t + 2, 2) / 2 From 6893e5e7397b253597172157f98421e9509cc0fb Mon Sep 17 00:00:00 2001 From: Yizheng Jiao Date: Thu, 8 Jan 2026 21:10:21 +0000 Subject: [PATCH 2/2] Add O-4_shape_matching_data-generator - Reorganized shape matching task following G-1 template structure - Domain: shape_matching, task_id format: shape_matching_XXXX - Implements 4 shape types: circle, square, triangle, star - Clean structure: core/ and src/ separation - Includes video generation with smooth animation - Follows all vm-dataset coding standards from rules.txt - 9 top-level entries as required - Ready for production use --- O-4_shape_matching_data-generator/.gitignore | 112 ++++++ O-4_shape_matching_data-generator/LICENSE | 21 ++ .../PUSH_INSTRUCTIONS.md | 64 ++++ O-4_shape_matching_data-generator/README.md | 139 ++++++++ .../core/__init__.py | 21 ++ .../core/base_generator.py | 44 +++ .../core/image_utils.py | 40 +++ .../core/output_writer.py | 43 +++ .../core/schemas.py | 17 + .../core/video_utils.py | 265 ++++++++++++++ .../examples/generate.py | 88 +++++ .../push_to_github.sh | 67 ++++ .../requirements.txt | 7 + O-4_shape_matching_data-generator/setup.py | 16 + .../src/__init__.py | 7 + .../src/config.py | 62 ++++ .../src/generator.py | 322 ++++++++++++++++++ .../src/prompts.py | 40 +++ 18 files changed, 1375 insertions(+) create mode 100644 O-4_shape_matching_data-generator/.gitignore create mode 100644 O-4_shape_matching_data-generator/LICENSE create mode 100644 O-4_shape_matching_data-generator/PUSH_INSTRUCTIONS.md create mode 100644 O-4_shape_matching_data-generator/README.md create mode 100644 O-4_shape_matching_data-generator/core/__init__.py create mode 100644 O-4_shape_matching_data-generator/core/base_generator.py create mode 100644 O-4_shape_matching_data-generator/core/image_utils.py create mode 100644 O-4_shape_matching_data-generator/core/output_writer.py create mode 100644 O-4_shape_matching_data-generator/core/schemas.py create mode 100644 O-4_shape_matching_data-generator/core/video_utils.py create mode 100644 O-4_shape_matching_data-generator/examples/generate.py create mode 100755 O-4_shape_matching_data-generator/push_to_github.sh create mode 100644 O-4_shape_matching_data-generator/requirements.txt create mode 100644 O-4_shape_matching_data-generator/setup.py create mode 100644 O-4_shape_matching_data-generator/src/__init__.py create mode 100644 O-4_shape_matching_data-generator/src/config.py create mode 100644 O-4_shape_matching_data-generator/src/generator.py create mode 100644 O-4_shape_matching_data-generator/src/prompts.py diff --git a/O-4_shape_matching_data-generator/.gitignore b/O-4_shape_matching_data-generator/.gitignore new file mode 100644 index 0000000..3af48a0 --- /dev/null +++ b/O-4_shape_matching_data-generator/.gitignore @@ -0,0 +1,112 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual environments +venv/ +env/ +ENV/ +env.bak/ +venv.bak/ + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ +.hypothesis/ + +# Generated data (don't commit actual generated datasets) +data/questions/*/ +data/outputs/*/ +data/evaluations/*/ + +# Keep directory structure but not contents +!data/questions/.gitkeep +!data/outputs/.gitkeep + +# Logs +*.log +logs/ + +# Temporary files +tmp/ +temp/ +*.tmp + +# Jupyter +.ipynb_checkpoints/ +*.ipynb + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Distribution / packaging +.Python +*.manifest +*.spec + +# Environments +.env +.env.local +.venv + +# PyCharm +.idea/ + +# VS Code +.vscode/ + +# macOS +.DS_Store +.AppleDouble +.LSOverride + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Windows +Thumbs.db +ehthumbs.db +Desktop.ini +$RECYCLE.BIN/ diff --git a/O-4_shape_matching_data-generator/LICENSE b/O-4_shape_matching_data-generator/LICENSE new file mode 100644 index 0000000..bfb7224 --- /dev/null +++ b/O-4_shape_matching_data-generator/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 VM Dataset Team + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/O-4_shape_matching_data-generator/PUSH_INSTRUCTIONS.md b/O-4_shape_matching_data-generator/PUSH_INSTRUCTIONS.md new file mode 100644 index 0000000..963eef3 --- /dev/null +++ b/O-4_shape_matching_data-generator/PUSH_INSTRUCTIONS.md @@ -0,0 +1,64 @@ +# ๆŽจ้€ๅˆฐGitHubไป“ๅบ“็š„ๆญฅ้ชค + +## ๅฝ“ๅ‰็Šถๆ€ +โœ… Gitไป“ๅบ“ๅทฒๅˆๅง‹ๅŒ– +โœ… ๆ‰€ๆœ‰ๆ–‡ไปถๅทฒๆไบคๅˆฐๆœฌๅœฐmainๅˆ†ๆ”ฏ +โœ… Commit ID: fddfd20 + +## ๆŽจ้€ๆญฅ้ชค + +### ๆ–นๅผ1: ๆŽจ้€ๅˆฐ vm-dataset ็ป„็ป‡๏ผˆๆŽจ่๏ผ‰ + +1. ๅœจๆต่งˆๅ™จไธญๆ‰“ๅผ€: https://github.com/organizations/vm-dataset/repositories/new + +2. ๅกซๅ†™ไป“ๅบ“ไฟกๆฏ๏ผš + - Repository name: `O-4_shape_matching_data-generator` + - Description: `Shape matching task data generator for visual reasoning dataset` + - Visibility: Public + - โš ๏ธ ไธ่ฆๅ‹พ้€‰ "Add a README file" + - โš ๏ธ ไธ่ฆๅ‹พ้€‰ "Add .gitignore" + - โš ๏ธ ไธ่ฆๅ‹พ้€‰ "Choose a license" + +3. ็‚นๅ‡ป "Create repository" + +4. ๅœจ็ปˆ็ซฏๆ‰ง่กŒไปฅไธ‹ๅ‘ฝไปค๏ผš + ```bash + cd /workspaces/template-data-generator/O-4_shape_matching_data-generator + git remote add origin https://github.com/vm-dataset/O-4_shape_matching_data-generator.git + git branch -M main + git push -u origin main + ``` + +### ๆ–นๅผ2: ๆŽจ้€ๅˆฐไธชไบบ่ดฆๆˆท๏ผˆไธดๆ—ถๆ–นๆกˆ๏ผ‰ + +1. ๅœจๆต่งˆๅ™จไธญๆ‰“ๅผ€: https://github.com/new + +2. ๅกซๅ†™ไป“ๅบ“ไฟกๆฏ๏ผš + - Repository name: `O-4_shape_matching_data-generator` + - Description: `Shape matching task data generator for visual reasoning dataset` + - Visibility: Public + - โš ๏ธ ไธ่ฆๅ‹พ้€‰ไปปไฝ•ๅˆๅง‹ๅŒ–้€‰้กน + +3. ๅœจ็ปˆ็ซฏๆ‰ง่กŒ๏ผš + ```bash + cd /workspaces/template-data-generator/O-4_shape_matching_data-generator + git remote add origin https://github.com/jyizheng/O-4_shape_matching_data-generator.git + git branch -M main + git push -u origin main + ``` + +4. ไน‹ๅŽๅฏไปฅ้€š่ฟ‡TransferๅŠŸ่ƒฝ่ฝฌ็งปๅˆฐvm-dataset็ป„็ป‡ + +## ้ชŒ่ฏ + +ๆŽจ้€ๆˆๅŠŸๅŽ๏ผŒ่ฎฟ้—ฎไป“ๅบ“URL็กฎ่ฎค๏ผš +- ๆ‰€ๆœ‰ๆ–‡ไปถ้ƒฝๅทฒไธŠไผ  +- README.mdๆญฃ็กฎๆ˜พ็คบ +- ๆ–‡ไปถ็ป“ๆž„ๅฎŒๆ•ด + +## ้กน็›ฎไฟกๆฏ + +- Domain: shape_matching +- Task IDๆ ผๅผ: shape_matching_XXXX +- ๅŒ…ๅซ16ไธชๆ–‡ไปถ๏ผŒ1244่กŒไปฃ็  +- ็ฌฆๅˆG-1ๆจกๆฟๅ’Œrules.txt่ง„่Œƒ diff --git a/O-4_shape_matching_data-generator/README.md b/O-4_shape_matching_data-generator/README.md new file mode 100644 index 0000000..57bcdc6 --- /dev/null +++ b/O-4_shape_matching_data-generator/README.md @@ -0,0 +1,139 @@ +# O-4 Shape Matching Data Generator ๐Ÿ”ท + +A data generator for creating synthetic "Shape Matching" reasoning tasks. This generator creates datasets where colored shapes must be moved into their corresponding dark outlines. + +--- + +## ๐Ÿš€ Quick Start + +```bash +# 1. Clone the repository +git clone https://github.com/vm-dataset/O-4_shape_matching_data-generator.git +cd O-4_shape_matching_data-generator + +# 2. Create and activate virtual environment +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate + +# 3. Install dependencies +pip install --upgrade pip +pip install -r requirements.txt +pip install -e . + +# 4. Generate tasks +python examples/generate.py --num-samples 50 +``` + +--- + +## ๐Ÿ“ Structure + +``` +O-4_shape_matching_data-generator/ +โ”œโ”€โ”€ core/ # Standard utilities +โ”‚ โ”œโ”€โ”€ base_generator.py # Abstract base class +โ”‚ โ”œโ”€โ”€ schemas.py # Pydantic models +โ”‚ โ”œโ”€โ”€ image_utils.py # Image helpers +โ”‚ โ”œโ”€โ”€ video_utils.py # Video generation +โ”‚ โ””โ”€โ”€ output_writer.py # File output +โ”œโ”€โ”€ src/ # Shape matching task logic +โ”‚ โ”œโ”€โ”€ generator.py # Shape matching generator +โ”‚ โ”œโ”€โ”€ prompts.py # Task prompt templates +โ”‚ โ””โ”€โ”€ config.py # Task configuration +โ”œโ”€โ”€ examples/ +โ”‚ โ””โ”€โ”€ generate.py # Entry point +โ””โ”€โ”€ data/questions/ # Generated output +``` + +--- + +## ๐Ÿ“ฆ Output Format + +This generator produces: + +``` +data/questions/shape_matching_task/{task_id}/ +โ”œโ”€โ”€ first_frame.png # Initial state with shapes scattered (REQUIRED) +โ”œโ”€โ”€ final_frame.png # Final state with shapes matching outlines (REQUIRED) +โ”œโ”€โ”€ prompt.txt # Instructions (REQUIRED) +โ””โ”€โ”€ ground_truth.mp4 # Solution video (OPTIONAL) +``` + +--- + +## ๐ŸŽฏ Task Description + +This generator creates **shape matching tasks** with the following characteristics: + +1. **Initial Frame**: A scene containing: + - Colored shapes scattered on the left side (circle, square, triangle, star) + - Dark outline targets on the right side (matching the shapes) + - White background with a dividing line + +2. **Animation Process**: Each colored shape moves from its starting position to its matching outline + +3. **Final Frame**: All colored shapes are aligned with their corresponding outlines + +4. **Task Requirements**: + - Move each colorful shape into its corresponding dark outline + - Shapes must match their target outlines exactly + +### Task Specifications + +- **Domain**: `shape_matching` +- **Image size**: 800ร—400 pixels +- **Background**: Pure white with a dividing line +- **FPS**: 30 frames per second +- **Shapes**: circle / square / triangle / star (up to 4 shapes) +- **Animation**: hold 1s at start โ†’ linear move 2s โ†’ hold 1s at end +- **Target outlines**: Always visible on the right side + +### Prompt Format + +The prompt provides clear instructions for the task: + +``` +Move each colorful shape into its corresponding dark outline. +``` + +--- + +## ๐ŸŽจ Customization + +### Basic Usage + +```bash +# Generate 100 samples +python examples/generate.py --num-samples 100 + +# Custom output directory +python examples/generate.py --num-samples 50 --output data/my_shapes + +# Set random seed for reproducibility +python examples/generate.py --num-samples 50 --seed 42 + +# Disable video generation +python examples/generate.py --num-samples 50 --no-videos +``` + +### Configuration + +Modify [src/config.py](src/config.py) to customize: + +- `domain`: Task domain name (default: `shape_matching`) +- `image_size`: Image dimensions (default: `(800, 400)`) +- `num_shapes`: Number of shapes in task (default: `4`, max: `4`) +- `shape_size`: Size of shapes in pixels (default: `35`) +- `video_fps`: Video frame rate (default: `30`) + +--- + +## ๐Ÿ“„ License + +MIT License - see LICENSE file for details. + +--- + +## ๐Ÿค Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. diff --git a/O-4_shape_matching_data-generator/core/__init__.py b/O-4_shape_matching_data-generator/core/__init__.py new file mode 100644 index 0000000..523fda1 --- /dev/null +++ b/O-4_shape_matching_data-generator/core/__init__.py @@ -0,0 +1,21 @@ +""" +Core utilities for template-data-generator. + +DO NOT MODIFY - This is framework code. +Customize files in src/ for your task. +""" + +from .base_generator import BaseGenerator, GenerationConfig +from .schemas import TaskPair +from .image_utils import ImageRenderer +from .output_writer import OutputWriter +from .video_utils import VideoGenerator + +__all__ = [ + "BaseGenerator", + "GenerationConfig", + "TaskPair", + "ImageRenderer", + "OutputWriter", + "VideoGenerator", +] diff --git a/O-4_shape_matching_data-generator/core/base_generator.py b/O-4_shape_matching_data-generator/core/base_generator.py new file mode 100644 index 0000000..3403569 --- /dev/null +++ b/O-4_shape_matching_data-generator/core/base_generator.py @@ -0,0 +1,44 @@ +"""Base generator class.""" + +from abc import ABC, abstractmethod +from typing import List, Optional +from pathlib import Path +from pydantic import BaseModel, Field +from .schemas import TaskPair + + +class GenerationConfig(BaseModel): + """Generation configuration.""" + num_samples: int + domain: str + difficulty: Optional[str] = None + random_seed: Optional[int] = None + output_dir: Path = Path("data/questions") + image_size: tuple[int, int] = (400, 400) + + +class BaseGenerator(ABC): + """Base class for task generators. Implement generate_task_pair().""" + + def __init__(self, config: GenerationConfig): + self.config = config + if config.random_seed is not None: + import random + import numpy as np + random.seed(config.random_seed) + np.random.seed(config.random_seed) + + @abstractmethod + def generate_task_pair(self, task_id: str) -> TaskPair: + """Generate a single task. Implement this in your generator.""" + pass + + def generate_dataset(self) -> List[TaskPair]: + """Generate complete dataset.""" + pairs = [] + for i in range(self.config.num_samples): + task_id = f"{self.config.domain}_{i:04d}" + pair = self.generate_task_pair(task_id) + pairs.append(pair) + print(f" Generated: {task_id}") + return pairs diff --git a/O-4_shape_matching_data-generator/core/image_utils.py b/O-4_shape_matching_data-generator/core/image_utils.py new file mode 100644 index 0000000..c4f9ad0 --- /dev/null +++ b/O-4_shape_matching_data-generator/core/image_utils.py @@ -0,0 +1,40 @@ +"""Image utilities.""" + +from PIL import Image, ImageDraw +from typing import Tuple + + +class ImageRenderer: + """Helper for image rendering.""" + + def __init__(self, image_size: Tuple[int, int] = (400, 400)): + self.image_size = image_size + + def create_blank_image(self, bg_color: Tuple[int, int, int] = (255, 255, 255)) -> Image.Image: + """Create blank RGB image.""" + return Image.new('RGB', self.image_size, bg_color) + + def draw_grid(self, image: Image.Image, rows: int, cols: int) -> Image.Image: + """Draw grid on image.""" + draw = ImageDraw.Draw(image) + width, height = image.size + cell_w, cell_h = width / cols, height / rows + + for i in range(cols + 1): + x = int(i * cell_w) + draw.line([(x, 0), (x, height)], fill=(200, 200, 200), width=2) + for i in range(rows + 1): + y = int(i * cell_h) + draw.line([(0, y), (width, y)], fill=(200, 200, 200), width=2) + return image + + def draw_text(self, image: Image.Image, text: str, position: Tuple[int, int]) -> Image.Image: + """Draw text on image.""" + draw = ImageDraw.Draw(image) + draw.text(position, text, fill=(0, 0, 0)) + return image + + @staticmethod + def ensure_rgb(image: Image.Image) -> Image.Image: + """Convert image to RGB.""" + return image.convert('RGB') if image.mode != 'RGB' else image diff --git a/O-4_shape_matching_data-generator/core/output_writer.py b/O-4_shape_matching_data-generator/core/output_writer.py new file mode 100644 index 0000000..87f39fc --- /dev/null +++ b/O-4_shape_matching_data-generator/core/output_writer.py @@ -0,0 +1,43 @@ +"""Output writer for standard format.""" + +import shutil +from pathlib import Path +from typing import List +from .schemas import TaskPair +from .image_utils import ImageRenderer + + +class OutputWriter: + """Writes tasks to standard folder structure.""" + + def __init__(self, output_dir: Path): + self.output_dir = Path(output_dir) + self.output_dir.mkdir(parents=True, exist_ok=True) + + def write_task_pair(self, task_pair: TaskPair) -> Path: + """Write single task to disk.""" + task_dir = self.output_dir / f"{task_pair.domain}_task" / task_pair.task_id + task_dir.mkdir(parents=True, exist_ok=True) + + # Write images + ImageRenderer.ensure_rgb(task_pair.first_image).save(task_dir / "first_frame.png") + + if task_pair.final_image: + ImageRenderer.ensure_rgb(task_pair.final_image).save(task_dir / "final_frame.png") + + # Write prompt + (task_dir / "prompt.txt").write_text(task_pair.prompt) + + # Write video if provided (preserve original extension) + if task_pair.ground_truth_video and Path(task_pair.ground_truth_video).exists(): + video_src = Path(task_pair.ground_truth_video) + video_ext = video_src.suffix # .mp4 or .avi + shutil.copy(video_src, task_dir / f"ground_truth{video_ext}") + + return task_dir + + def write_dataset(self, task_pairs: List[TaskPair]) -> Path: + """Write all tasks to disk.""" + for pair in task_pairs: + self.write_task_pair(pair) + return self.output_dir diff --git a/O-4_shape_matching_data-generator/core/schemas.py b/O-4_shape_matching_data-generator/core/schemas.py new file mode 100644 index 0000000..868f9ee --- /dev/null +++ b/O-4_shape_matching_data-generator/core/schemas.py @@ -0,0 +1,17 @@ +"""Pydantic schemas for task data.""" + +from typing import Optional, Any +from pydantic import BaseModel + + +class TaskPair(BaseModel): + """A task pair with initial and final states.""" + task_id: str + domain: str + prompt: str + first_image: Any # PIL Image + final_image: Optional[Any] = None # PIL Image + ground_truth_video: Optional[str] = None # Path to video (optional) + + class Config: + arbitrary_types_allowed = True diff --git a/O-4_shape_matching_data-generator/core/video_utils.py b/O-4_shape_matching_data-generator/core/video_utils.py new file mode 100644 index 0000000..d7a3f13 --- /dev/null +++ b/O-4_shape_matching_data-generator/core/video_utils.py @@ -0,0 +1,265 @@ +"""Video generation utilities - Generic framework code (DO NOT MODIFY).""" + +from pathlib import Path +from typing import List, Tuple, Optional +from PIL import Image + +# Check if cv2 is available +import importlib.util + +CV2_AVAILABLE = importlib.util.find_spec("cv2") is not None + +if CV2_AVAILABLE: + import cv2 + import numpy as np +else: + cv2 = None + np = None + print("โš ๏ธ Warning: opencv-python not installed. Video generation disabled.") + print(" Install with: pip install opencv-python==4.8.1.78") + + +class VideoGenerator: + """ + Generate videos from image sequences. + + This is a generic utility class - use it in your custom generator. + """ + + def __init__(self, fps: int = 10, output_format: str = "mp4"): + """ + Initialize video generator. + + Args: + fps: Frames per second + output_format: Video format - "mp4" (recommended) or "avi" + """ + self.fps = fps + self.output_format = output_format + + # Use H.264 for mp4 (better compatibility) or XVID for avi + if output_format == "mp4": + self.codec = 'mp4v' # Most compatible mp4 codec + self.extension = '.mp4' + else: + self.codec = 'XVID' + self.extension = '.avi' + + if not CV2_AVAILABLE: + raise ImportError("opencv-python is required for video generation") + + @staticmethod + def is_available() -> bool: + """Check if video generation is available.""" + return CV2_AVAILABLE + + def create_video_from_frames( + self, + frames: List[Image.Image], + output_path: Path, + size: Optional[Tuple[int, int]] = None + ) -> Path: + """ + Create video from PIL Image frames. + + Args: + frames: List of PIL Images + output_path: Path to save video (extension will be corrected) + size: Optional (width, height) tuple. If None, uses first frame size + + Returns: + Path to created video file + """ + if not frames: + raise ValueError("No frames provided") + + # Get video size + if size is None: + size = frames[0].size + + width, height = size + + # Ensure correct extension + output_path = Path(output_path) + output_path = output_path.with_suffix(self.extension) + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Initialize video writer + fourcc = cv2.VideoWriter_fourcc(*self.codec) + + writer = cv2.VideoWriter( + str(output_path), + fourcc, + self.fps, + (width, height) + ) + + # Write frames + for frame in frames: + # Ensure RGB and correct size + if frame.size != size: + frame = frame.resize(size, Image.Resampling.LANCZOS) + + # Convert PIL Image to OpenCV format (BGR) + frame_rgb = frame.convert('RGB') + frame_array = np.array(frame_rgb) + frame_bgr = cv2.cvtColor(frame_array, cv2.COLOR_RGB2BGR) + + writer.write(frame_bgr) + + writer.release() + return output_path + + def create_crossfade_video( + self, + start_image: Image.Image, + end_image: Image.Image, + output_path: Path, + hold_frames: int = 5, + transition_frames: int = 15 + ) -> Optional[Path]: + """ + Create video with smooth cross-fade transition between two images. + + Args: + start_image: Initial image + end_image: Final image + output_path: Path to save video + hold_frames: Frames to hold at start and end + transition_frames: Frames for transition + + Returns: + Path to video file, or None if cv2 not available + """ + if not CV2_AVAILABLE: + return None + + frames = [] + + # Hold initial position + for _ in range(hold_frames): + frames.append(start_image.copy()) + + # Smooth cross-fade transition + start_rgba = start_image.convert('RGBA') + end_rgba = end_image.convert('RGBA') + + # Ensure same size + if start_rgba.size != end_rgba.size: + end_rgba = end_rgba.resize(start_rgba.size, Image.Resampling.LANCZOS) + + for i in range(transition_frames): + alpha = i / (transition_frames - 1) if transition_frames > 1 else 1.0 + blended = Image.blend(start_rgba, end_rgba, alpha) + frames.append(blended.convert('RGB')) + + # Hold final position + for _ in range(hold_frames): + frames.append(end_image.copy()) + + return self.create_video_from_frames(frames, output_path) + + def create_sliding_fade_video( + self, + start_image: Image.Image, + end_image: Image.Image, + output_path: Path, + hold_frames: int = 5, + transition_frames: int = 15 + ) -> Optional[Path]: + """ + Create video with sliding transition where pieces fade out then fade in. + + The piece slides smoothly from start to end position, but also: + - Fades out (becomes transparent) in the first half + - Fades in (becomes opaque) in the second half + + Args: + start_image: Initial image + end_image: Final image + output_path: Path to save video + hold_frames: Frames to hold at start and end + transition_frames: Frames for transition + + Returns: + Path to video file, or None if cv2 not available + """ + if not CV2_AVAILABLE: + return None + + frames = [] + + # Hold initial position + for _ in range(hold_frames): + frames.append(start_image.copy()) + + # Sliding transition with fade out/fade in + start_rgba = start_image.convert('RGBA') + end_rgba = end_image.convert('RGBA') + + # Ensure same size + if start_rgba.size != end_rgba.size: + end_rgba = end_rgba.resize(start_rgba.size, Image.Resampling.LANCZOS) + + for i in range(transition_frames): + # Progress through transition (0 to 1) + progress = i / (transition_frames - 1) if transition_frames > 1 else 1.0 + + # Fade curve: fade out in first half, fade in in second half + # Creates a dip in opacity in the middle + if progress < 0.5: + # Fading out: opacity goes from 1.0 to 0.2 + opacity = 1.0 - (progress * 2) * 0.8 + else: + # Fading in: opacity goes from 0.2 to 1.0 + opacity = 0.2 + ((progress - 0.5) * 2) * 0.8 + + # Blend the positions (sliding motion) + blended = Image.blend(start_rgba, end_rgba, progress) + + # Apply opacity effect by blending with semi-transparent version + transparent = Image.new('RGBA', blended.size, (0, 0, 0, 0)) + faded = Image.blend(transparent, blended, opacity) + + frames.append(faded.convert('RGB')) + + # Hold final position + for _ in range(hold_frames): + frames.append(end_image.copy()) + + return self.create_video_from_frames(frames, output_path) + + def interpolate_frames( + self, + start_frame: Image.Image, + end_frame: Image.Image, + num_intermediate: int = 10 + ) -> List[Image.Image]: + """ + Create smooth transition between two frames using alpha blending. + + Args: + start_frame: Initial frame + end_frame: Final frame + num_intermediate: Number of intermediate frames to generate + + Returns: + List of frames including start, intermediates, and end + """ + frames = [start_frame] + + # Ensure same size and mode + if start_frame.size != end_frame.size: + end_frame = end_frame.resize(start_frame.size, Image.Resampling.LANCZOS) + + start_frame = start_frame.convert('RGBA') + end_frame = end_frame.convert('RGBA') + + # Generate intermediate frames + for i in range(1, num_intermediate + 1): + alpha = i / (num_intermediate + 1) + blended = Image.blend(start_frame, end_frame, alpha) + frames.append(blended.convert('RGB')) + + frames.append(end_frame.convert('RGB')) + return frames diff --git a/O-4_shape_matching_data-generator/examples/generate.py b/O-4_shape_matching_data-generator/examples/generate.py new file mode 100644 index 0000000..6e159cb --- /dev/null +++ b/O-4_shape_matching_data-generator/examples/generate.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +""" +โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•— +โ•‘ TASK GENERATION SCRIPT โ•‘ +โ•‘ โ•‘ +โ•‘ Run this to generate your dataset. โ•‘ +โ•‘ Customize TaskConfig and TaskGenerator in src/ for your task. โ•‘ +โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +Usage: + python examples/generate.py --num-samples 100 + python examples/generate.py --num-samples 100 --output data/my_task --seed 42 +""" + +import argparse +from pathlib import Path +import sys + +# Add project root to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from core import OutputWriter +from src import TaskGenerator, TaskConfig + + +def main(): + parser = argparse.ArgumentParser( + description="Generate task dataset", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python examples/generate.py --num-samples 10 + python examples/generate.py --num-samples 100 --output data/output --seed 42 + """ + ) + parser.add_argument( + "--num-samples", + type=int, + required=True, + help="Number of task samples to generate" + ) + parser.add_argument( + "--output", + type=str, + default="data/questions", + help="Output directory (default: data/questions)" + ) + parser.add_argument( + "--seed", + type=int, + default=None, + help="Random seed for reproducibility" + ) + parser.add_argument( + "--no-videos", + action="store_true", + help="Disable video generation" + ) + + args = parser.parse_args() + + print(f"๐ŸŽฒ Generating {args.num_samples} tasks...") + + # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + # Configure your task here + # Add any additional TaskConfig parameters as needed + # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + config = TaskConfig( + num_samples=args.num_samples, + random_seed=args.seed, + output_dir=Path(args.output), + generate_videos=not args.no_videos, + ) + + # Generate tasks + generator = TaskGenerator(config) + tasks = generator.generate_dataset() + + # Write to disk + writer = OutputWriter(Path(args.output)) + writer.write_dataset(tasks) + + print(f"โœ… Done! Generated {len(tasks)} tasks in {args.output}/{config.domain}_task/") + + +if __name__ == "__main__": + main() diff --git a/O-4_shape_matching_data-generator/push_to_github.sh b/O-4_shape_matching_data-generator/push_to_github.sh new file mode 100755 index 0000000..4ab9159 --- /dev/null +++ b/O-4_shape_matching_data-generator/push_to_github.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +echo "๐Ÿ“ฆ ๅ‡†ๅค‡ๆŽจ้€ O-4_shape_matching_data-generator ๅˆฐ GitHub" +echo "" +echo "่ฏท้€‰ๆ‹ฉๆŽจ้€็›ฎๆ ‡๏ผš" +echo "1) vm-dataset/O-4_shape_matching_data-generator (ๆŽจ่)" +echo "2) jyizheng/O-4_shape_matching_data-generator (ไธชไบบ่ดฆๆˆท)" +echo "" +read -p "่ฏท่พ“ๅ…ฅ้€‰้กน (1 ๆˆ– 2): " choice + +case $choice in + 1) + REPO_URL="https://github.com/vm-dataset/O-4_shape_matching_data-generator.git" + echo "" + echo "โš ๏ธ ่ฏทๅ…ˆๅœจๆต่งˆๅ™จไธญๅˆ›ๅปบไป“ๅบ“๏ผš" + echo " https://github.com/organizations/vm-dataset/repositories/new" + echo "" + echo "ไป“ๅบ“่ฎพ็ฝฎ๏ผš" + echo " - Name: O-4_shape_matching_data-generator" + echo " - Description: Shape matching task data generator for visual reasoning dataset" + echo " - Public" + echo " - ไธ่ฆๅˆๅง‹ๅŒ–ไปปไฝ•ๆ–‡ไปถ" + echo "" + read -p "ๅทฒๅˆ›ๅปบ๏ผŸๆŒ‰Enter็ปง็ปญ... " + ;; + 2) + REPO_URL="https://github.com/jyizheng/O-4_shape_matching_data-generator.git" + echo "" + echo "โš ๏ธ ่ฏทๅ…ˆๅœจๆต่งˆๅ™จไธญๅˆ›ๅปบไป“ๅบ“๏ผš" + echo " https://github.com/new" + echo "" + echo "ไป“ๅบ“่ฎพ็ฝฎ๏ผš" + echo " - Name: O-4_shape_matching_data-generator" + echo " - Description: Shape matching task data generator for visual reasoning dataset" + echo " - Public" + echo " - ไธ่ฆๅˆๅง‹ๅŒ–ไปปไฝ•ๆ–‡ไปถ" + echo "" + read -p "ๅทฒๅˆ›ๅปบ๏ผŸๆŒ‰Enter็ปง็ปญ... " + ;; + *) + echo "ๆ— ๆ•ˆ้€‰้กน" + exit 1 + ;; +esac + +echo "" +echo "๐Ÿ”— ้…็ฝฎ่ฟœ็จ‹ไป“ๅบ“..." +git remote add origin $REPO_URL 2>/dev/null || git remote set-url origin $REPO_URL + +echo "๐ŸŒฟ ็กฎไฟๅœจmainๅˆ†ๆ”ฏ..." +git branch -M main + +echo "๐Ÿš€ ๆŽจ้€ๅˆฐGitHub..." +git push -u origin main + +if [ $? -eq 0 ]; then + echo "" + echo "โœ… ๆˆๅŠŸๆŽจ้€ๅˆฐ $REPO_URL" + echo "" + echo "่ฎฟ้—ฎไป“ๅบ“: ${REPO_URL%.git}" +else + echo "" + echo "โŒ ๆŽจ้€ๅคฑ่ดฅ๏ผŒ่ฏทๆฃ€ๆŸฅ๏ผš" + echo " 1. ๆ˜ฏๅฆๅทฒๅœจGitHubไธŠๅˆ›ๅปบไป“ๅบ“" + echo " 2. ๆ˜ฏๅฆๆœ‰ๆŽจ้€ๆƒ้™" + echo " 3. ็ฝ‘็ปœ่ฟžๆŽฅๆ˜ฏๅฆๆญฃๅธธ" +fi diff --git a/O-4_shape_matching_data-generator/requirements.txt b/O-4_shape_matching_data-generator/requirements.txt new file mode 100644 index 0000000..5aaf63f --- /dev/null +++ b/O-4_shape_matching_data-generator/requirements.txt @@ -0,0 +1,7 @@ +# Core dependencies +numpy==1.26.4 +Pillow==10.4.0 +pydantic==2.10.5 + +# Video generation +opencv-python==4.10.0.84 diff --git a/O-4_shape_matching_data-generator/setup.py b/O-4_shape_matching_data-generator/setup.py new file mode 100644 index 0000000..2e8c717 --- /dev/null +++ b/O-4_shape_matching_data-generator/setup.py @@ -0,0 +1,16 @@ +from setuptools import setup, find_packages + +setup( + name="shape-matching-task-generator", + version="0.1.0", + description="Shape matching task data generator", + author="VM Dataset Team", + packages=find_packages(), + install_requires=[ + "numpy>=1.26.4", + "Pillow>=10.4.0", + "pydantic>=2.10.5", + "opencv-python>=4.10.0.84", + ], + python_requires=">=3.8", +) diff --git a/O-4_shape_matching_data-generator/src/__init__.py b/O-4_shape_matching_data-generator/src/__init__.py new file mode 100644 index 0000000..0853015 --- /dev/null +++ b/O-4_shape_matching_data-generator/src/__init__.py @@ -0,0 +1,7 @@ +"""Shape matching task generator.""" + +from .config import TaskConfig +from .generator import TaskGenerator +from .prompts import get_prompt + +__all__ = ["TaskConfig", "TaskGenerator", "get_prompt"] diff --git a/O-4_shape_matching_data-generator/src/config.py b/O-4_shape_matching_data-generator/src/config.py new file mode 100644 index 0000000..add91c6 --- /dev/null +++ b/O-4_shape_matching_data-generator/src/config.py @@ -0,0 +1,62 @@ +""" +โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•— +โ•‘ YOUR TASK CONFIGURATION โ•‘ +โ•‘ โ•‘ +โ•‘ CUSTOMIZE THIS FILE to define your task-specific settings. โ•‘ +โ•‘ Inherits common settings from core.GenerationConfig โ•‘ +โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +""" + +from pydantic import Field +from core import GenerationConfig + + +class TaskConfig(GenerationConfig): + """ + Your task-specific configuration. + + CUSTOMIZE THIS CLASS to add your task's hyperparameters. + + Inherited from GenerationConfig: + - num_samples: int # Number of samples to generate + - domain: str # Task domain name + - difficulty: Optional[str] # Difficulty level + - random_seed: Optional[int] # For reproducibility + - output_dir: Path # Where to save outputs + - image_size: tuple[int, int] # Image dimensions + """ + + # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + # OVERRIDE DEFAULTS + # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + + domain: str = Field(default="shape_matching") + image_size: tuple[int, int] = Field(default=(800, 400)) + + # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + # VIDEO SETTINGS + # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + + generate_videos: bool = Field( + default=True, + description="Whether to generate ground truth videos" + ) + + video_fps: int = Field( + default=30, + description="Video frame rate" + ) + + # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + # TASK-SPECIFIC SETTINGS + # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + + num_shapes: int = Field( + default=4, + description="Number of shapes in the task (max 4)" + ) + + shape_size: int = Field( + default=35, + description="Size (radius) of shapes in pixels" + ) diff --git a/O-4_shape_matching_data-generator/src/generator.py b/O-4_shape_matching_data-generator/src/generator.py new file mode 100644 index 0000000..ca06a36 --- /dev/null +++ b/O-4_shape_matching_data-generator/src/generator.py @@ -0,0 +1,322 @@ +""" +โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•— +โ•‘ SHAPE MATCHING TASK GENERATOR โ•‘ +โ•‘ โ•‘ +โ•‘ Implements: Move shapes into their matching outlines โ•‘ +โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +""" + +import random +import math +import tempfile +from typing import Optional, Tuple +from pathlib import Path +from PIL import Image, ImageDraw + +from core import BaseGenerator, TaskPair +from core.video_utils import VideoGenerator +from .config import TaskConfig +from .prompts import get_prompt + + +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# TASK CONSTANTS +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +# Available shape types +SHAPE_TYPES = ["circle", "square", "triangle", "star"] + +# Default colors for shapes (RGB for PIL) +SHAPE_COLORS = { + "circle": (100, 100, 255), # Blue + "square": (100, 255, 100), # Green + "triangle": (255, 200, 100), # Yellow/Orange + "star": (255, 100, 255) # Purple +} + +# Animation parameters +HOLD_DURATION = 1.0 # seconds to hold at start/end +MOVE_DURATION = 2.0 # seconds to move + + +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# SHAPE DRAWING UTILITIES +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +def draw_circle(draw: ImageDraw.Draw, center: Tuple[int, int], + size: int, color, outline_only: bool = False, outline_width: int = 3): + """Draw a circle shape.""" + cx, cy = center + bbox = [cx - size, cy - size, cx + size, cy + size] + + if outline_only: + draw.ellipse(bbox, outline=color, width=outline_width) + else: + draw.ellipse(bbox, fill=color, outline=(50, 50, 50), width=1) + + +def draw_square(draw: ImageDraw.Draw, center: Tuple[int, int], + size: int, color, outline_only: bool = False, outline_width: int = 3): + """Draw a square shape.""" + cx, cy = center + bbox = [cx - size, cy - size, cx + size, cy + size] + + if outline_only: + draw.rectangle(bbox, outline=color, width=outline_width) + else: + draw.rectangle(bbox, fill=color, outline=(50, 50, 50), width=1) + + +def draw_triangle(draw: ImageDraw.Draw, center: Tuple[int, int], + size: int, color, outline_only: bool = False, outline_width: int = 3): + """Draw an equilateral triangle pointing up.""" + cx, cy = center + points = [] + + for i in range(3): + angle = i * (2 * math.pi / 3) - (math.pi / 2) # Start from top + x = int(cx + size * math.cos(angle)) + y = int(cy + size * math.sin(angle)) + points.append((x, y)) + + if outline_only: + draw.polygon(points, outline=color, width=outline_width) + else: + draw.polygon(points, fill=color, outline=(50, 50, 50), width=1) + + +def draw_star(draw: ImageDraw.Draw, center: Tuple[int, int], + size: int, color, outline_only: bool = False, outline_width: int = 3): + """Draw a 5-pointed star.""" + cx, cy = center + points = [] + outer_r = size + inner_r = size * 0.4 + + for i in range(10): + angle = i * (2 * math.pi / 10) - (math.pi / 2) + r = outer_r if i % 2 == 0 else inner_r + x = int(cx + r * math.cos(angle)) + y = int(cy + r * math.sin(angle)) + points.append((x, y)) + + if outline_only: + draw.polygon(points, outline=color, width=outline_width) + else: + draw.polygon(points, fill=color, outline=(50, 50, 50), width=1) + + +def draw_shape(draw: ImageDraw.Draw, shape_type: str, + center: Tuple[int, int], size: int, color, + outline_only: bool = False, outline_width: int = 3): + """Draw a shape based on type.""" + if shape_type == "circle": + draw_circle(draw, center, size, color, outline_only, outline_width) + elif shape_type == "square": + draw_square(draw, center, size, color, outline_only, outline_width) + elif shape_type == "triangle": + draw_triangle(draw, center, size, color, outline_only, outline_width) + elif shape_type == "star": + draw_star(draw, center, size, color, outline_only, outline_width) + + +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# TASK GENERATOR +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +class TaskGenerator(BaseGenerator): + """ + Shape matching task generator. + + Creates tasks where colored shapes must be moved to match dark outlines. + """ + + def __init__(self, config: TaskConfig): + super().__init__(config) + self.width, self.height = config.image_size + self.num_shapes = min(config.num_shapes, len(SHAPE_TYPES)) + self.shape_size = config.shape_size + + # Initialize video generator if enabled + self.video_generator = None + if config.generate_videos and VideoGenerator.is_available(): + self.video_generator = VideoGenerator(fps=config.video_fps, output_format="mp4") + + def generate_task_pair(self, task_id: str) -> TaskPair: + """Generate one shape matching task pair.""" + + # Generate task data + task_data = self._generate_task_data() + + # Render images + first_image = self._render_frame(task_data, state="input") + final_image = self._render_frame(task_data, state="output") + + # Generate video (optional) + video_path = None + if self.config.generate_videos and self.video_generator: + video_path = self._generate_video(task_id, task_data) + + # Get prompt + prompt = get_prompt("default") + + return TaskPair( + task_id=task_id, + domain=self.config.domain, + prompt=prompt, + first_image=first_image, + final_image=final_image, + ground_truth_video=video_path + ) + + def _generate_task_data(self) -> dict: + """Generate shapes with start and target positions.""" + shapes = [] + margin = 60 + + # Select shapes for this task + selected_shapes = random.sample(SHAPE_TYPES, self.num_shapes) + + # Generate target positions on the right side (grid layout) + cols = 2 + rows = (self.num_shapes + 1) // 2 + right_start_x = self.width // 2 + cell_w = (self.width // 2) // cols + cell_h = self.height // rows + + slots = [] + for r in range(rows): + for c in range(cols): + if len(slots) < self.num_shapes: + cx = right_start_x + c * cell_w + cell_w // 2 + cy = r * cell_h + cell_h // 2 + slots.append((cx, cy)) + + random.shuffle(slots) + + for i, shape_type in enumerate(selected_shapes): + # Target position (right side) + tx, ty = slots[i] + + # Start position (left side, scattered) + sx = random.randint(margin, self.width // 2 - margin) + sy = random.randint(margin, self.height - margin) + + shapes.append({ + "type": shape_type, + "color": SHAPE_COLORS.get(shape_type, (150, 150, 150)), + "size": self.shape_size, + "start_pos": (sx, sy), + "target_pos": (tx, ty) + }) + + return {"shapes": shapes} + + def _render_frame(self, task_data: dict, state: str = "input") -> Image.Image: + """ + Render a frame. + + Args: + task_data: Task data containing shapes + state: "input" for shapes scattered, "output" for shapes matched + """ + # Create white background + canvas = Image.new("RGB", (self.width, self.height), color=(255, 255, 255)) + draw = ImageDraw.Draw(canvas) + + # Draw dividing line + draw.line([(self.width // 2, 20), (self.width // 2, self.height - 20)], + fill=(200, 200, 200), width=2) + + # 1. Draw target outlines (always visible) + outline_color = (80, 80, 80) + for shape in task_data["shapes"]: + draw_shape(draw, shape["type"], shape["target_pos"], + shape["size"], outline_color, outline_only=True, outline_width=3) + + # 2. Draw solid shapes + for shape in task_data["shapes"]: + if state == "input": + # Shapes at start positions (left side) + pos = shape["start_pos"] + else: + # Shapes at target positions (matching outlines) + pos = shape["target_pos"] + + draw_shape(draw, shape["type"], pos, shape["size"], shape["color"]) + + return canvas + + def _generate_video(self, task_id: str, task_data: dict) -> Optional[str]: + """Generate animation video showing shapes moving to outlines.""" + temp_dir = Path(tempfile.gettempdir()) / f"{self.config.domain}_videos" + temp_dir.mkdir(parents=True, exist_ok=True) + video_path = temp_dir / f"{task_id}_ground_truth.mp4" + + # Create animation frames + frames = self._create_animation_frames(task_data) + + result = self.video_generator.create_video_from_frames(frames, video_path) + return str(result) if result else None + + def _create_animation_frames(self, task_data: dict) -> list: + """Create animation frames showing shapes moving to their outlines.""" + frames = [] + fps = self.config.video_fps + + hold_frames = int(HOLD_DURATION * fps) + transition_frames = int(MOVE_DURATION * fps) + + # Hold initial frame + initial_frame = self._render_frame(task_data, state="input") + for _ in range(hold_frames): + frames.append(initial_frame) + + # Transition frames - interpolate positions + for i in range(transition_frames): + progress = i / (transition_frames - 1) if transition_frames > 1 else 1.0 + eased_progress = self._ease_in_out(progress) + + frame = self._render_interpolated_frame(task_data, eased_progress) + frames.append(frame) + + # Hold final frame + final_frame = self._render_frame(task_data, state="output") + for _ in range(hold_frames): + frames.append(final_frame) + + return frames + + def _render_interpolated_frame(self, task_data: dict, progress: float) -> Image.Image: + """Render a frame with shapes at interpolated positions.""" + canvas = Image.new("RGB", (self.width, self.height), color=(255, 255, 255)) + draw = ImageDraw.Draw(canvas) + + # Draw dividing line + draw.line([(self.width // 2, 20), (self.width // 2, self.height - 20)], + fill=(200, 200, 200), width=2) + + # Draw target outlines + outline_color = (80, 80, 80) + for shape in task_data["shapes"]: + draw_shape(draw, shape["type"], shape["target_pos"], + shape["size"], outline_color, outline_only=True, outline_width=3) + + # Draw shapes at interpolated positions + for shape in task_data["shapes"]: + sx, sy = shape["start_pos"] + tx, ty = shape["target_pos"] + + cx = int(sx + (tx - sx) * progress) + cy = int(sy + (ty - sy) * progress) + + draw_shape(draw, shape["type"], (cx, cy), shape["size"], shape["color"]) + + return canvas + + def _ease_in_out(self, t: float) -> float: + """Ease-in-out function for smooth animation.""" + if t < 0.5: + return 2 * t * t + else: + return 1 - pow(-2 * t + 2, 2) / 2 diff --git a/O-4_shape_matching_data-generator/src/prompts.py b/O-4_shape_matching_data-generator/src/prompts.py new file mode 100644 index 0000000..8a5b169 --- /dev/null +++ b/O-4_shape_matching_data-generator/src/prompts.py @@ -0,0 +1,40 @@ +""" +โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•— +โ•‘ YOUR TASK PROMPTS โ•‘ +โ•‘ โ•‘ +โ•‘ CUSTOMIZE THIS FILE to define prompts/instructions for your task. โ•‘ +โ•‘ Prompts are selected based on task type and returned to the model. โ•‘ +โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +""" + +import random + + +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# DEFINE YOUR PROMPTS +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +PROMPTS = { + "default": [ + "Move each colorful shape into its corresponding dark outline.", + ], +} + + +def get_prompt(task_type: str = "default") -> str: + """ + Select a random prompt for the given task type. + + Args: + task_type: Type of task (key in PROMPTS dict) + + Returns: + Random prompt string from the specified type + """ + prompts = PROMPTS.get(task_type, PROMPTS["default"]) + return random.choice(prompts) + + +def get_all_prompts(task_type: str = "default") -> list[str]: + """Get all prompts for a given task type.""" + return PROMPTS.get(task_type, PROMPTS["default"])