From e9ec390eb93c71f01ace0cebaf3eec07242673f5 Mon Sep 17 00:00:00 2001
From: Yizheng Jiao <jyizheng@gmail.com>
Date: Thu, 8 Jan 2026 00:44:56 +0000
Subject: [PATCH 1/2] add shape matching task

---
 README.md                           |  39 ++-
 examples/generate_shape_matching.py | 101 +++++++
 src/__init__.py                     |  18 +-
 src/shape_matching_task.py          | 395 ++++++++++++++++++++++++++++
 4 files changed, 551 insertions(+), 2 deletions(-)
 create mode 100644 examples/generate_shape_matching.py
 create mode 100644 src/shape_matching_task.py

diff --git a/README.md b/README.md
index 8995ab1..6c9edfb 100644
--- a/README.md
+++ b/README.md
@@ -136,4 +136,41 @@ class TaskConfig(GenerationConfig):
     difficulty: str = Field(default="medium", description="easy/medium/hard")
 ```
 
-**Single entry point:** `python examples/generate.py --num-samples 50`
\ No newline at end of file
+**Single entry point:** `python examples/generate.py --num-samples 50`
+
+---
+
+## 🎯 Available Tasks
+
+### Shape Matching Task (几何形状匹配)
+
+A visual reasoning task where shapes must be moved into their matching outlines.
+
+**Task Description:**
+- **Input:** Colored shapes (circle, square, triangle, star) scattered on left side, empty outlines on right side
+- **Output:** Colored shapes moved to fill their corresponding outlines
+
+**Example Prompt:**
+> "Move each colorful shape into its corresponding dark outline."
+
+**Usage:**
+```bash
+python examples/generate_shape_matching.py --num-samples 10
+python examples/generate_shape_matching.py --num-samples 50 --num-shapes 3 --shape-size 40
+```
+
+**Configuration Options:**
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `num_shapes` | 4 | Number of shapes (max 4: circle, square, triangle, star) |
+| `shape_size` | 35 | Size of shapes in pixels |
+| `image_size` | (800, 400) | Canvas dimensions |
+
+**Output Structure:**
+```
+data/questions/shape_matching_task/{task_id}/
+├── first_frame.png    # Shapes scattered, outlines empty
+├── final_frame.png    # Shapes filling their outlines
+├── prompt.txt         # Task instructions
+└── ground_truth.mp4   # Animation of shapes moving
+```
\ No newline at end of file
diff --git a/examples/generate_shape_matching.py b/examples/generate_shape_matching.py
new file mode 100644
index 0000000..38c89f3
--- /dev/null
+++ b/examples/generate_shape_matching.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""
+╔══════════════════════════════════════════════════════════════════════════════╗
+║                      SHAPE MATCHING TASK GENERATION                           ║
+║                                                                               ║
+║  Generate geometric shape matching task dataset.                              ║
+║  Task: Move shapes into their matching outlines.                              ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+
+Usage:
+    python examples/generate_shape_matching.py --num-samples 10
+    python examples/generate_shape_matching.py --num-samples 100 --output data/shapes --seed 42
+    python examples/generate_shape_matching.py --num-samples 50 --num-shapes 3 --shape-size 40
+"""
+
+import argparse
+from pathlib import Path
+import sys
+
+# Add project root to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from core import OutputWriter
+from src import ShapeMatchingGenerator, ShapeMatchingConfig
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate shape matching task dataset",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    python examples/generate_shape_matching.py --num-samples 10
+    python examples/generate_shape_matching.py --num-samples 100 --output data/shapes --seed 42
+    python examples/generate_shape_matching.py --num-samples 50 --num-shapes 3 --shape-size 40
+        """
+    )
+    parser.add_argument(
+        "--num-samples",
+        type=int,
+        required=True,
+        help="Number of task samples to generate"
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="data/questions",
+        help="Output directory (default: data/questions)"
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=None,
+        help="Random seed for reproducibility"
+    )
+    parser.add_argument(
+        "--no-videos",
+        action="store_true",
+        help="Disable video generation"
+    )
+    parser.add_argument(
+        "--num-shapes",
+        type=int,
+        default=4,
+        help="Number of shapes (max 4: circle, square, triangle, star)"
+    )
+    parser.add_argument(
+        "--shape-size",
+        type=int,
+        default=35,
+        help="Size of shapes in pixels (default: 35)"
+    )
+    
+    args = parser.parse_args()
+    
+    print(f"🎲 Generating {args.num_samples} shape matching tasks...")
+    print(f"   Shapes: {args.num_shapes}, Size: {args.shape_size}px")
+    
+    # Configure task
+    config = ShapeMatchingConfig(
+        num_samples=args.num_samples,
+        random_seed=args.seed,
+        output_dir=Path(args.output),
+        generate_videos=not args.no_videos,
+        num_shapes=args.num_shapes,
+        shape_size=args.shape_size,
+    )
+    
+    # Generate tasks
+    generator = ShapeMatchingGenerator(config)
+    tasks = generator.generate_dataset()
+    
+    # Write to disk
+    writer = OutputWriter(Path(args.output))
+    writer.write_dataset(tasks)
+    
+    print(f"✅ Done! Generated {len(tasks)} tasks in {args.output}/{config.domain}_task/")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/__init__.py b/src/__init__.py
index b215fa2..8ad8276 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -5,10 +5,26 @@
     - config.py   : Task-specific configuration (TaskConfig)
     - generator.py: Task generation logic (TaskGenerator)
     - prompts.py  : Task prompts/instructions (get_prompt)
+    - shape_matching_task.py: Shape matching task (ShapeMatchingGenerator)
 """
 
 from .config import TaskConfig
 from .generator import TaskGenerator
 from .prompts import get_prompt
+from .shape_matching_task import (
+    ShapeMatchingConfig,
+    ShapeMatchingTask,
+    ShapeMatchingGenerator,
+    get_shape_matching_prompt
+)
 
-__all__ = ["TaskConfig", "TaskGenerator", "get_prompt"]
+__all__ = [
+    "TaskConfig",
+    "TaskGenerator", 
+    "get_prompt",
+    # Shape matching task
+    "ShapeMatchingConfig",
+    "ShapeMatchingTask",
+    "ShapeMatchingGenerator",
+    "get_shape_matching_prompt",
+]
diff --git a/src/shape_matching_task.py b/src/shape_matching_task.py
new file mode 100644
index 0000000..43737c1
--- /dev/null
+++ b/src/shape_matching_task.py
@@ -0,0 +1,395 @@
+"""
+╔══════════════════════════════════════════════════════════════════════════════╗
+║                      SHAPE MATCHING TASK GENERATOR                            ║
+║                                                                               ║
+║  Based on opencv_code/ex4.py algorithm                                        ║
+║  Task: Move shapes into their matching outlines                               ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+"""
+
+import random
+import math
+from typing import Optional, List, Tuple
+from PIL import Image, ImageDraw
+import numpy as np
+
+from core import BaseGenerator, TaskPair, ImageRenderer
+from core.video_utils import VideoGenerator
+from .config import TaskConfig
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+#  PROMPTS FOR SHAPE MATCHING TASK
+# ══════════════════════════════════════════════════════════════════════════════
+
+SHAPE_MATCHING_PROMPTS = {
+    "default": [
+        "Move each colorful shape into its corresponding dark outline.",
+        "Match each colored shape with its outline by moving it to the correct position.",
+        "Drag each solid shape to fill its matching outline on the right side.",
+    ],
+    "puzzle": [
+        "Complete the shape puzzle by placing each colored piece into its matching slot.",
+        "Fit each shape into the correct outline, like pieces of a puzzle.",
+    ],
+    "sorting": [
+        "Sort the shapes by moving each one to its designated outline position.",
+        "Arrange the shapes by placing them over their corresponding outlines.",
+    ],
+}
+
+
+def get_shape_matching_prompt(task_type: str = "default") -> str:
+    """Select a random prompt for shape matching task."""
+    prompts = SHAPE_MATCHING_PROMPTS.get(task_type, SHAPE_MATCHING_PROMPTS["default"])
+    return random.choice(prompts)
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+#  SHAPE MATCHING TASK GENERATOR
+# ══════════════════════════════════════════════════════════════════════════════
+
+class ShapeMatchingConfig(TaskConfig):
+    """Configuration for shape matching task."""
+    domain: str = "shape_matching"
+    image_size: tuple[int, int] = (800, 400)
+    
+    # Task-specific settings
+    num_shapes: int = 4  # Number of shapes
+    shape_size: int = 35  # Size (radius) of shapes
+
+
+class ShapeMatchingTask:
+    """
+    Geometric shape matching task.
+    
+    Generates input/output image pairs showing:
+    - Input: Colored shapes scattered on left, empty outlines on right
+    - Output: Colored shapes moved to fill their matching outlines
+    """
+    
+    # Available shape types
+    SHAPE_TYPES = ["circle", "square", "triangle", "star"]
+    
+    # Default colors for shapes (RGB for PIL)
+    SHAPE_COLORS = {
+        "circle": (100, 100, 255),    # Blue
+        "square": (100, 255, 100),    # Green
+        "triangle": (255, 200, 100),  # Yellow/Orange
+        "star": (255, 100, 255)       # Purple
+    }
+    
+    def __init__(
+        self,
+        width: int = 800,
+        height: int = 400,
+        num_shapes: int = 4,
+        shape_size: int = 35
+    ):
+        self.width = width
+        self.height = height
+        self.num_shapes = min(num_shapes, len(self.SHAPE_TYPES))
+        self.shape_size = shape_size
+        
+        # Generate task data
+        self.data = self._generate_data()
+    
+    def _generate_data(self) -> list:
+        """Generate shapes with start and target positions."""
+        data = []
+        margin = 60
+        
+        # Select shapes for this task
+        selected_shapes = random.sample(self.SHAPE_TYPES, self.num_shapes)
+        
+        # Generate target positions on the right side (grid layout)
+        cols = 2
+        rows = (self.num_shapes + 1) // 2
+        right_start_x = self.width // 2
+        cell_w = (self.width // 2) // cols
+        cell_h = self.height // rows
+        
+        slots = []
+        for r in range(rows):
+            for c in range(cols):
+                if len(slots) < self.num_shapes:
+                    cx = right_start_x + c * cell_w + cell_w // 2
+                    cy = r * cell_h + cell_h // 2
+                    slots.append((cx, cy))
+        
+        random.shuffle(slots)
+        
+        for i, shape_type in enumerate(selected_shapes):
+            # Target position (right side)
+            tx, ty = slots[i]
+            
+            # Start position (left side, scattered)
+            sx = random.randint(margin, self.width // 2 - margin)
+            sy = random.randint(margin, self.height - margin)
+            
+            data.append({
+                "type": shape_type,
+                "color": self.SHAPE_COLORS.get(shape_type, (150, 150, 150)),
+                "size": self.shape_size,
+                "start_pos": (sx, sy),
+                "target_pos": (tx, ty)
+            })
+        
+        return data
+    
+    def _draw_circle(self, draw: ImageDraw.Draw, center: Tuple[int, int], 
+                     size: int, color, outline_only: bool = False, outline_width: int = 3):
+        """Draw a circle shape."""
+        cx, cy = center
+        bbox = [cx - size, cy - size, cx + size, cy + size]
+        
+        if outline_only:
+            draw.ellipse(bbox, outline=color, width=outline_width)
+        else:
+            draw.ellipse(bbox, fill=color, outline=(50, 50, 50), width=1)
+    
+    def _draw_square(self, draw: ImageDraw.Draw, center: Tuple[int, int],
+                     size: int, color, outline_only: bool = False, outline_width: int = 3):
+        """Draw a square shape."""
+        cx, cy = center
+        bbox = [cx - size, cy - size, cx + size, cy + size]
+        
+        if outline_only:
+            draw.rectangle(bbox, outline=color, width=outline_width)
+        else:
+            draw.rectangle(bbox, fill=color, outline=(50, 50, 50), width=1)
+    
+    def _draw_triangle(self, draw: ImageDraw.Draw, center: Tuple[int, int],
+                       size: int, color, outline_only: bool = False, outline_width: int = 3):
+        """Draw an equilateral triangle pointing up."""
+        cx, cy = center
+        points = []
+        
+        for i in range(3):
+            angle = i * (2 * math.pi / 3) - (math.pi / 2)  # Start from top
+            x = int(cx + size * math.cos(angle))
+            y = int(cy + size * math.sin(angle))
+            points.append((x, y))
+        
+        if outline_only:
+            draw.polygon(points, outline=color, width=outline_width)
+        else:
+            draw.polygon(points, fill=color, outline=(50, 50, 50), width=1)
+    
+    def _draw_star(self, draw: ImageDraw.Draw, center: Tuple[int, int],
+                   size: int, color, outline_only: bool = False, outline_width: int = 3):
+        """Draw a 5-pointed star."""
+        cx, cy = center
+        points = []
+        outer_r = size
+        inner_r = size * 0.4
+        
+        for i in range(10):
+            angle = i * (2 * math.pi / 10) - (math.pi / 2)
+            r = outer_r if i % 2 == 0 else inner_r
+            x = int(cx + r * math.cos(angle))
+            y = int(cy + r * math.sin(angle))
+            points.append((x, y))
+        
+        if outline_only:
+            draw.polygon(points, outline=color, width=outline_width)
+        else:
+            draw.polygon(points, fill=color, outline=(50, 50, 50), width=1)
+    
+    def _draw_shape(self, draw: ImageDraw.Draw, shape_type: str, 
+                    center: Tuple[int, int], size: int, color,
+                    outline_only: bool = False, outline_width: int = 3):
+        """Draw a shape based on type."""
+        if shape_type == "circle":
+            self._draw_circle(draw, center, size, color, outline_only, outline_width)
+        elif shape_type == "square":
+            self._draw_square(draw, center, size, color, outline_only, outline_width)
+        elif shape_type == "triangle":
+            self._draw_triangle(draw, center, size, color, outline_only, outline_width)
+        elif shape_type == "star":
+            self._draw_star(draw, center, size, color, outline_only, outline_width)
+    
+    def render(self, state: str = "input") -> Image.Image:
+        """
+        Render the task state.
+        
+        Args:
+            state: "input" for shapes scattered, "output" for shapes matched
+            
+        Returns:
+            PIL Image of the rendered state
+        """
+        # Create white background
+        canvas = Image.new("RGB", (self.width, self.height), color=(255, 255, 255))
+        draw = ImageDraw.Draw(canvas)
+        
+        # Draw dividing line
+        draw.line([(self.width // 2, 20), (self.width // 2, self.height - 20)],
+                 fill=(200, 200, 200), width=2)
+        
+        # 1. Draw target outlines (always visible)
+        outline_color = (80, 80, 80)
+        for item in self.data:
+            self._draw_shape(draw, item["type"], item["target_pos"],
+                           item["size"], outline_color, outline_only=True, outline_width=3)
+        
+        # 2. Draw solid shapes
+        for item in self.data:
+            if state == "input":
+                # Shapes at start positions (left side)
+                pos = item["start_pos"]
+            else:
+                # Shapes at target positions (matching outlines)
+                pos = item["target_pos"]
+            
+            self._draw_shape(draw, item["type"], pos, item["size"], item["color"])
+        
+        return canvas
+    
+    def get_task_type(self) -> str:
+        """Get task type for prompt selection."""
+        return "default"
+
+
+class ShapeMatchingGenerator(BaseGenerator):
+    """
+    Generator for shape matching task pairs.
+    
+    Generates image pairs showing:
+    - Input: Colored shapes scattered, empty outlines on right
+    - Output: Shapes moved to fill their matching outlines
+    """
+    
+    def __init__(self, config: ShapeMatchingConfig):
+        super().__init__(config)
+        self.config: ShapeMatchingConfig = config
+        
+        # Initialize video generator if enabled
+        self.video_generator = None
+        if getattr(config, 'generate_videos', False) and VideoGenerator.is_available():
+            self.video_generator = VideoGenerator(
+                fps=getattr(config, 'video_fps', 10),
+                output_format="mp4"
+            )
+    
+    def generate_task_pair(self, task_id: str) -> TaskPair:
+        """Generate one shape matching task pair."""
+        
+        # Create task instance
+        task = ShapeMatchingTask(
+            width=self.config.image_size[0],
+            height=self.config.image_size[1],
+            num_shapes=getattr(self.config, 'num_shapes', 4),
+            shape_size=getattr(self.config, 'shape_size', 35)
+        )
+        
+        # Render input and output images
+        first_image = task.render("input")
+        final_image = task.render("output")
+        
+        # Generate video (optional)
+        video_path = None
+        if self.video_generator:
+            video_path = self._generate_video(first_image, final_image, task_id, task)
+        
+        # Get prompt
+        prompt = get_shape_matching_prompt(task.get_task_type())
+        
+        return TaskPair(
+            task_id=task_id,
+            domain=self.config.domain,
+            prompt=prompt,
+            first_image=first_image,
+            final_image=final_image,
+            ground_truth_video=video_path
+        )
+    
+    def _generate_video(
+        self,
+        first_image: Image.Image,
+        final_image: Image.Image,
+        task_id: str,
+        task: ShapeMatchingTask
+    ) -> Optional[str]:
+        """Generate animation video showing shapes moving to outlines."""
+        from pathlib import Path
+        import tempfile
+        
+        temp_dir = Path(tempfile.gettempdir()) / f"{self.config.domain}_videos"
+        temp_dir.mkdir(parents=True, exist_ok=True)
+        video_path = temp_dir / f"{task_id}_ground_truth.mp4"
+        
+        # Create animation frames
+        frames = self._create_matching_animation_frames(task)
+        
+        result = self.video_generator.create_video_from_frames(frames, video_path)
+        return str(result) if result else None
+    
+    def _create_matching_animation_frames(
+        self,
+        task: ShapeMatchingTask,
+        hold_frames: int = 5,
+        transition_frames: int = 25
+    ) -> list:
+        """
+        Create animation frames showing shapes moving to their outlines.
+        """
+        frames = []
+        
+        # Hold initial frame
+        initial_frame = task.render("input")
+        for _ in range(hold_frames):
+            frames.append(initial_frame)
+        
+        # Transition frames - interpolate positions
+        for i in range(transition_frames):
+            progress = i / (transition_frames - 1) if transition_frames > 1 else 1.0
+            eased_progress = self._ease_in_out(progress)
+            
+            frame = self._render_interpolated_frame(task, eased_progress)
+            frames.append(frame)
+        
+        # Hold final frame
+        final_frame = task.render("output")
+        for _ in range(hold_frames):
+            frames.append(final_frame)
+        
+        return frames
+    
+    def _render_interpolated_frame(
+        self,
+        task: ShapeMatchingTask,
+        progress: float
+    ) -> Image.Image:
+        """Render a frame with shapes at interpolated positions."""
+        canvas = Image.new("RGB", (task.width, task.height), color=(255, 255, 255))
+        draw = ImageDraw.Draw(canvas)
+        
+        # Draw dividing line
+        draw.line([(task.width // 2, 20), (task.width // 2, task.height - 20)],
+                 fill=(200, 200, 200), width=2)
+        
+        # Draw target outlines
+        outline_color = (80, 80, 80)
+        for item in task.data:
+            task._draw_shape(draw, item["type"], item["target_pos"],
+                           item["size"], outline_color, outline_only=True, outline_width=3)
+        
+        # Draw shapes at interpolated positions
+        for item in task.data:
+            sx, sy = item["start_pos"]
+            tx, ty = item["target_pos"]
+            
+            cx = int(sx + (tx - sx) * progress)
+            cy = int(sy + (ty - sy) * progress)
+            
+            task._draw_shape(draw, item["type"], (cx, cy), item["size"], item["color"])
+        
+        return canvas
+    
+    def _ease_in_out(self, t: float) -> float:
+        """Ease-in-out function for smooth animation."""
+        if t < 0.5:
+            return 2 * t * t
+        else:
+            return 1 - pow(-2 * t + 2, 2) / 2

From 6893e5e7397b253597172157f98421e9509cc0fb Mon Sep 17 00:00:00 2001
From: Yizheng Jiao <jyizheng@gmail.com>
Date: Thu, 8 Jan 2026 21:10:21 +0000
Subject: [PATCH 2/2] Add O-4_shape_matching_data-generator

- Reorganized shape matching task following G-1 template structure
- Domain: shape_matching, task_id format: shape_matching_XXXX
- Implements 4 shape types: circle, square, triangle, star
- Clean structure: core/ and src/ separation
- Includes video generation with smooth animation
- Follows all vm-dataset coding standards from rules.txt
- 9 top-level entries as required
- Ready for production use
---
 O-4_shape_matching_data-generator/.gitignore  | 112 ++++++
 O-4_shape_matching_data-generator/LICENSE     |  21 ++
 .../PUSH_INSTRUCTIONS.md                      |  64 ++++
 O-4_shape_matching_data-generator/README.md   | 139 ++++++++
 .../core/__init__.py                          |  21 ++
 .../core/base_generator.py                    |  44 +++
 .../core/image_utils.py                       |  40 +++
 .../core/output_writer.py                     |  43 +++
 .../core/schemas.py                           |  17 +
 .../core/video_utils.py                       | 265 ++++++++++++++
 .../examples/generate.py                      |  88 +++++
 .../push_to_github.sh                         |  67 ++++
 .../requirements.txt                          |   7 +
 O-4_shape_matching_data-generator/setup.py    |  16 +
 .../src/__init__.py                           |   7 +
 .../src/config.py                             |  62 ++++
 .../src/generator.py                          | 322 ++++++++++++++++++
 .../src/prompts.py                            |  40 +++
 18 files changed, 1375 insertions(+)
 create mode 100644 O-4_shape_matching_data-generator/.gitignore
 create mode 100644 O-4_shape_matching_data-generator/LICENSE
 create mode 100644 O-4_shape_matching_data-generator/PUSH_INSTRUCTIONS.md
 create mode 100644 O-4_shape_matching_data-generator/README.md
 create mode 100644 O-4_shape_matching_data-generator/core/__init__.py
 create mode 100644 O-4_shape_matching_data-generator/core/base_generator.py
 create mode 100644 O-4_shape_matching_data-generator/core/image_utils.py
 create mode 100644 O-4_shape_matching_data-generator/core/output_writer.py
 create mode 100644 O-4_shape_matching_data-generator/core/schemas.py
 create mode 100644 O-4_shape_matching_data-generator/core/video_utils.py
 create mode 100644 O-4_shape_matching_data-generator/examples/generate.py
 create mode 100755 O-4_shape_matching_data-generator/push_to_github.sh
 create mode 100644 O-4_shape_matching_data-generator/requirements.txt
 create mode 100644 O-4_shape_matching_data-generator/setup.py
 create mode 100644 O-4_shape_matching_data-generator/src/__init__.py
 create mode 100644 O-4_shape_matching_data-generator/src/config.py
 create mode 100644 O-4_shape_matching_data-generator/src/generator.py
 create mode 100644 O-4_shape_matching_data-generator/src/prompts.py

diff --git a/O-4_shape_matching_data-generator/.gitignore b/O-4_shape_matching_data-generator/.gitignore
new file mode 100644
index 0000000..3af48a0
--- /dev/null
+++ b/O-4_shape_matching_data-generator/.gitignore
@@ -0,0 +1,112 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Virtual environments
+venv/
+env/
+ENV/
+env.bak/
+venv.bak/
+
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.hypothesis/
+
+# Generated data (don't commit actual generated datasets)
+data/questions/*/
+data/outputs/*/
+data/evaluations/*/
+
+# Keep directory structure but not contents
+!data/questions/.gitkeep
+!data/outputs/.gitkeep
+
+# Logs
+*.log
+logs/
+
+# Temporary files
+tmp/
+temp/
+*.tmp
+
+# Jupyter
+.ipynb_checkpoints/
+*.ipynb
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Distribution / packaging
+.Python
+*.manifest
+*.spec
+
+# Environments
+.env
+.env.local
+.venv
+
+# PyCharm
+.idea/
+
+# VS Code
+.vscode/
+
+# macOS
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Windows
+Thumbs.db
+ehthumbs.db
+Desktop.ini
+$RECYCLE.BIN/
diff --git a/O-4_shape_matching_data-generator/LICENSE b/O-4_shape_matching_data-generator/LICENSE
new file mode 100644
index 0000000..bfb7224
--- /dev/null
+++ b/O-4_shape_matching_data-generator/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 VM Dataset Team
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/O-4_shape_matching_data-generator/PUSH_INSTRUCTIONS.md b/O-4_shape_matching_data-generator/PUSH_INSTRUCTIONS.md
new file mode 100644
index 0000000..963eef3
--- /dev/null
+++ b/O-4_shape_matching_data-generator/PUSH_INSTRUCTIONS.md
@@ -0,0 +1,64 @@
+# 推送到GitHub仓库的步骤
+
+## 当前状态
+✅ Git仓库已初始化
+✅ 所有文件已提交到本地main分支
+✅ Commit ID: fddfd20
+
+## 推送步骤
+
+### 方式1: 推送到 vm-dataset 组织（推荐）
+
+1. 在浏览器中打开: https://github.com/organizations/vm-dataset/repositories/new
+
+2. 填写仓库信息：
+   - Repository name: `O-4_shape_matching_data-generator`
+   - Description: `Shape matching task data generator for visual reasoning dataset`
+   - Visibility: Public
+   - ⚠️ 不要勾选 "Add a README file"
+   - ⚠️ 不要勾选 "Add .gitignore"
+   - ⚠️ 不要勾选 "Choose a license"
+
+3. 点击 "Create repository"
+
+4. 在终端执行以下命令：
+   ```bash
+   cd /workspaces/template-data-generator/O-4_shape_matching_data-generator
+   git remote add origin https://github.com/vm-dataset/O-4_shape_matching_data-generator.git
+   git branch -M main
+   git push -u origin main
+   ```
+
+### 方式2: 推送到个人账户（临时方案）
+
+1. 在浏览器中打开: https://github.com/new
+
+2. 填写仓库信息：
+   - Repository name: `O-4_shape_matching_data-generator`
+   - Description: `Shape matching task data generator for visual reasoning dataset`
+   - Visibility: Public
+   - ⚠️ 不要勾选任何初始化选项
+
+3. 在终端执行：
+   ```bash
+   cd /workspaces/template-data-generator/O-4_shape_matching_data-generator
+   git remote add origin https://github.com/jyizheng/O-4_shape_matching_data-generator.git
+   git branch -M main
+   git push -u origin main
+   ```
+
+4. 之后可以通过Transfer功能转移到vm-dataset组织
+
+## 验证
+
+推送成功后，访问仓库URL确认：
+- 所有文件都已上传
+- README.md正确显示
+- 文件结构完整
+
+## 项目信息
+
+- Domain: shape_matching
+- Task ID格式: shape_matching_XXXX
+- 包含16个文件，1244行代码
+- 符合G-1模板和rules.txt规范
diff --git a/O-4_shape_matching_data-generator/README.md b/O-4_shape_matching_data-generator/README.md
new file mode 100644
index 0000000..57bcdc6
--- /dev/null
+++ b/O-4_shape_matching_data-generator/README.md
@@ -0,0 +1,139 @@
+# O-4 Shape Matching Data Generator 🔷
+
+A data generator for creating synthetic "Shape Matching" reasoning tasks. This generator creates datasets where colored shapes must be moved into their corresponding dark outlines.
+
+---
+
+## 🚀 Quick Start
+
+```bash
+# 1. Clone the repository
+git clone https://github.com/vm-dataset/O-4_shape_matching_data-generator.git
+cd O-4_shape_matching_data-generator
+
+# 2. Create and activate virtual environment
+python3 -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+
+# 3. Install dependencies
+pip install --upgrade pip
+pip install -r requirements.txt
+pip install -e .
+
+# 4. Generate tasks
+python examples/generate.py --num-samples 50
+```
+
+---
+
+## 📁 Structure
+
+```
+O-4_shape_matching_data-generator/
+├── core/                    # Standard utilities
+│   ├── base_generator.py   # Abstract base class
+│   ├── schemas.py          # Pydantic models
+│   ├── image_utils.py      # Image helpers
+│   ├── video_utils.py      # Video generation
+│   └── output_writer.py    # File output
+├── src/                     # Shape matching task logic
+│   ├── generator.py        # Shape matching generator
+│   ├── prompts.py          # Task prompt templates
+│   └── config.py           # Task configuration
+├── examples/
+│   └── generate.py         # Entry point
+└── data/questions/         # Generated output
+```
+
+---
+
+## 📦 Output Format
+
+This generator produces:
+
+```
+data/questions/shape_matching_task/{task_id}/
+├── first_frame.png          # Initial state with shapes scattered (REQUIRED)
+├── final_frame.png          # Final state with shapes matching outlines (REQUIRED)
+├── prompt.txt               # Instructions (REQUIRED)
+└── ground_truth.mp4         # Solution video (OPTIONAL)
+```
+
+---
+
+## 🎯 Task Description
+
+This generator creates **shape matching tasks** with the following characteristics:
+
+1. **Initial Frame**: A scene containing:
+   - Colored shapes scattered on the left side (circle, square, triangle, star)
+   - Dark outline targets on the right side (matching the shapes)
+   - White background with a dividing line
+
+2. **Animation Process**: Each colored shape moves from its starting position to its matching outline
+
+3. **Final Frame**: All colored shapes are aligned with their corresponding outlines
+
+4. **Task Requirements**:
+   - Move each colorful shape into its corresponding dark outline
+   - Shapes must match their target outlines exactly
+
+### Task Specifications
+
+- **Domain**: `shape_matching`
+- **Image size**: 800×400 pixels
+- **Background**: Pure white with a dividing line
+- **FPS**: 30 frames per second
+- **Shapes**: circle / square / triangle / star (up to 4 shapes)
+- **Animation**: hold 1s at start → linear move 2s → hold 1s at end
+- **Target outlines**: Always visible on the right side
+
+### Prompt Format
+
+The prompt provides clear instructions for the task:
+
+```
+Move each colorful shape into its corresponding dark outline.
+```
+
+---
+
+## 🎨 Customization
+
+### Basic Usage
+
+```bash
+# Generate 100 samples
+python examples/generate.py --num-samples 100
+
+# Custom output directory
+python examples/generate.py --num-samples 50 --output data/my_shapes
+
+# Set random seed for reproducibility
+python examples/generate.py --num-samples 50 --seed 42
+
+# Disable video generation
+python examples/generate.py --num-samples 50 --no-videos
+```
+
+### Configuration
+
+Modify [src/config.py](src/config.py) to customize:
+
+- `domain`: Task domain name (default: `shape_matching`)
+- `image_size`: Image dimensions (default: `(800, 400)`)
+- `num_shapes`: Number of shapes in task (default: `4`, max: `4`)
+- `shape_size`: Size of shapes in pixels (default: `35`)
+- `video_fps`: Video frame rate (default: `30`)
+
+---
+
+## 📄 License
+
+MIT License - see LICENSE file for details.
+
+---
+
+## 🤝 Contributing
+
+Contributions are welcome! Please feel free to submit a Pull Request.
diff --git a/O-4_shape_matching_data-generator/core/__init__.py b/O-4_shape_matching_data-generator/core/__init__.py
new file mode 100644
index 0000000..523fda1
--- /dev/null
+++ b/O-4_shape_matching_data-generator/core/__init__.py
@@ -0,0 +1,21 @@
+"""
+Core utilities for template-data-generator.
+
+DO NOT MODIFY - This is framework code.
+Customize files in src/ for your task.
+"""
+
+from .base_generator import BaseGenerator, GenerationConfig
+from .schemas import TaskPair
+from .image_utils import ImageRenderer
+from .output_writer import OutputWriter
+from .video_utils import VideoGenerator
+
+__all__ = [
+    "BaseGenerator",
+    "GenerationConfig",
+    "TaskPair",
+    "ImageRenderer",
+    "OutputWriter",
+    "VideoGenerator",
+]
diff --git a/O-4_shape_matching_data-generator/core/base_generator.py b/O-4_shape_matching_data-generator/core/base_generator.py
new file mode 100644
index 0000000..3403569
--- /dev/null
+++ b/O-4_shape_matching_data-generator/core/base_generator.py
@@ -0,0 +1,44 @@
+"""Base generator class."""
+
+from abc import ABC, abstractmethod
+from typing import List, Optional
+from pathlib import Path
+from pydantic import BaseModel, Field
+from .schemas import TaskPair
+
+
+class GenerationConfig(BaseModel):
+    """Generation configuration."""
+    num_samples: int
+    domain: str
+    difficulty: Optional[str] = None
+    random_seed: Optional[int] = None
+    output_dir: Path = Path("data/questions")
+    image_size: tuple[int, int] = (400, 400)
+
+
+class BaseGenerator(ABC):
+    """Base class for task generators. Implement generate_task_pair()."""
+    
+    def __init__(self, config: GenerationConfig):
+        self.config = config
+        if config.random_seed is not None:
+            import random
+            import numpy as np
+            random.seed(config.random_seed)
+            np.random.seed(config.random_seed)
+    
+    @abstractmethod
+    def generate_task_pair(self, task_id: str) -> TaskPair:
+        """Generate a single task. Implement this in your generator."""
+        pass
+    
+    def generate_dataset(self) -> List[TaskPair]:
+        """Generate complete dataset."""
+        pairs = []
+        for i in range(self.config.num_samples):
+            task_id = f"{self.config.domain}_{i:04d}"
+            pair = self.generate_task_pair(task_id)
+            pairs.append(pair)
+            print(f"  Generated: {task_id}")
+        return pairs
diff --git a/O-4_shape_matching_data-generator/core/image_utils.py b/O-4_shape_matching_data-generator/core/image_utils.py
new file mode 100644
index 0000000..c4f9ad0
--- /dev/null
+++ b/O-4_shape_matching_data-generator/core/image_utils.py
@@ -0,0 +1,40 @@
+"""Image utilities."""
+
+from PIL import Image, ImageDraw
+from typing import Tuple
+
+
+class ImageRenderer:
+    """Helper for image rendering."""
+    
+    def __init__(self, image_size: Tuple[int, int] = (400, 400)):
+        self.image_size = image_size
+    
+    def create_blank_image(self, bg_color: Tuple[int, int, int] = (255, 255, 255)) -> Image.Image:
+        """Create blank RGB image."""
+        return Image.new('RGB', self.image_size, bg_color)
+    
+    def draw_grid(self, image: Image.Image, rows: int, cols: int) -> Image.Image:
+        """Draw grid on image."""
+        draw = ImageDraw.Draw(image)
+        width, height = image.size
+        cell_w, cell_h = width / cols, height / rows
+        
+        for i in range(cols + 1):
+            x = int(i * cell_w)
+            draw.line([(x, 0), (x, height)], fill=(200, 200, 200), width=2)
+        for i in range(rows + 1):
+            y = int(i * cell_h)
+            draw.line([(0, y), (width, y)], fill=(200, 200, 200), width=2)
+        return image
+    
+    def draw_text(self, image: Image.Image, text: str, position: Tuple[int, int]) -> Image.Image:
+        """Draw text on image."""
+        draw = ImageDraw.Draw(image)
+        draw.text(position, text, fill=(0, 0, 0))
+        return image
+    
+    @staticmethod
+    def ensure_rgb(image: Image.Image) -> Image.Image:
+        """Convert image to RGB."""
+        return image.convert('RGB') if image.mode != 'RGB' else image
diff --git a/O-4_shape_matching_data-generator/core/output_writer.py b/O-4_shape_matching_data-generator/core/output_writer.py
new file mode 100644
index 0000000..87f39fc
--- /dev/null
+++ b/O-4_shape_matching_data-generator/core/output_writer.py
@@ -0,0 +1,43 @@
+"""Output writer for standard format."""
+
+import shutil
+from pathlib import Path
+from typing import List
+from .schemas import TaskPair
+from .image_utils import ImageRenderer
+
+
+class OutputWriter:
+    """Writes tasks to standard folder structure."""
+    
+    def __init__(self, output_dir: Path):
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+    
+    def write_task_pair(self, task_pair: TaskPair) -> Path:
+        """Write single task to disk."""
+        task_dir = self.output_dir / f"{task_pair.domain}_task" / task_pair.task_id
+        task_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Write images
+        ImageRenderer.ensure_rgb(task_pair.first_image).save(task_dir / "first_frame.png")
+        
+        if task_pair.final_image:
+            ImageRenderer.ensure_rgb(task_pair.final_image).save(task_dir / "final_frame.png")
+        
+        # Write prompt
+        (task_dir / "prompt.txt").write_text(task_pair.prompt)
+        
+        # Write video if provided (preserve original extension)
+        if task_pair.ground_truth_video and Path(task_pair.ground_truth_video).exists():
+            video_src = Path(task_pair.ground_truth_video)
+            video_ext = video_src.suffix  # .mp4 or .avi
+            shutil.copy(video_src, task_dir / f"ground_truth{video_ext}")
+        
+        return task_dir
+    
+    def write_dataset(self, task_pairs: List[TaskPair]) -> Path:
+        """Write all tasks to disk."""
+        for pair in task_pairs:
+            self.write_task_pair(pair)
+        return self.output_dir
diff --git a/O-4_shape_matching_data-generator/core/schemas.py b/O-4_shape_matching_data-generator/core/schemas.py
new file mode 100644
index 0000000..868f9ee
--- /dev/null
+++ b/O-4_shape_matching_data-generator/core/schemas.py
@@ -0,0 +1,17 @@
+"""Pydantic schemas for task data."""
+
+from typing import Optional, Any
+from pydantic import BaseModel
+
+
+class TaskPair(BaseModel):
+    """A task pair with initial and final states."""
+    task_id: str
+    domain: str
+    prompt: str
+    first_image: Any  # PIL Image
+    final_image: Optional[Any] = None  # PIL Image
+    ground_truth_video: Optional[str] = None  # Path to video (optional)
+    
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/O-4_shape_matching_data-generator/core/video_utils.py b/O-4_shape_matching_data-generator/core/video_utils.py
new file mode 100644
index 0000000..d7a3f13
--- /dev/null
+++ b/O-4_shape_matching_data-generator/core/video_utils.py
@@ -0,0 +1,265 @@
+"""Video generation utilities - Generic framework code (DO NOT MODIFY)."""
+
+from pathlib import Path
+from typing import List, Tuple, Optional
+from PIL import Image
+
+# Check if cv2 is available
+import importlib.util
+
+CV2_AVAILABLE = importlib.util.find_spec("cv2") is not None
+
+if CV2_AVAILABLE:
+    import cv2
+    import numpy as np
+else:
+    cv2 = None
+    np = None
+    print("⚠️  Warning: opencv-python not installed. Video generation disabled.")
+    print("   Install with: pip install opencv-python==4.8.1.78")
+
+
+class VideoGenerator:
+    """
+    Generate videos from image sequences.
+    
+    This is a generic utility class - use it in your custom generator.
+    """
+    
+    def __init__(self, fps: int = 10, output_format: str = "mp4"):
+        """
+        Initialize video generator.
+        
+        Args:
+            fps: Frames per second
+            output_format: Video format - "mp4" (recommended) or "avi"
+        """
+        self.fps = fps
+        self.output_format = output_format
+        
+        # Use H.264 for mp4 (better compatibility) or XVID for avi
+        if output_format == "mp4":
+            self.codec = 'mp4v'  # Most compatible mp4 codec
+            self.extension = '.mp4'
+        else:
+            self.codec = 'XVID'
+            self.extension = '.avi'
+        
+        if not CV2_AVAILABLE:
+            raise ImportError("opencv-python is required for video generation")
+    
+    @staticmethod
+    def is_available() -> bool:
+        """Check if video generation is available."""
+        return CV2_AVAILABLE
+    
+    def create_video_from_frames(
+        self,
+        frames: List[Image.Image],
+        output_path: Path,
+        size: Optional[Tuple[int, int]] = None
+    ) -> Path:
+        """
+        Create video from PIL Image frames.
+        
+        Args:
+            frames: List of PIL Images
+            output_path: Path to save video (extension will be corrected)
+            size: Optional (width, height) tuple. If None, uses first frame size
+            
+        Returns:
+            Path to created video file
+        """
+        if not frames:
+            raise ValueError("No frames provided")
+        
+        # Get video size
+        if size is None:
+            size = frames[0].size
+        
+        width, height = size
+        
+        # Ensure correct extension
+        output_path = Path(output_path)
+        output_path = output_path.with_suffix(self.extension)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Initialize video writer
+        fourcc = cv2.VideoWriter_fourcc(*self.codec)
+        
+        writer = cv2.VideoWriter(
+            str(output_path),
+            fourcc,
+            self.fps,
+            (width, height)
+        )
+        
+        # Write frames
+        for frame in frames:
+            # Ensure RGB and correct size
+            if frame.size != size:
+                frame = frame.resize(size, Image.Resampling.LANCZOS)
+            
+            # Convert PIL Image to OpenCV format (BGR)
+            frame_rgb = frame.convert('RGB')
+            frame_array = np.array(frame_rgb)
+            frame_bgr = cv2.cvtColor(frame_array, cv2.COLOR_RGB2BGR)
+            
+            writer.write(frame_bgr)
+        
+        writer.release()
+        return output_path
+    
+    def create_crossfade_video(
+        self,
+        start_image: Image.Image,
+        end_image: Image.Image,
+        output_path: Path,
+        hold_frames: int = 5,
+        transition_frames: int = 15
+    ) -> Optional[Path]:
+        """
+        Create video with smooth cross-fade transition between two images.
+        
+        Args:
+            start_image: Initial image
+            end_image: Final image
+            output_path: Path to save video
+            hold_frames: Frames to hold at start and end
+            transition_frames: Frames for transition
+            
+        Returns:
+            Path to video file, or None if cv2 not available
+        """
+        if not CV2_AVAILABLE:
+            return None
+        
+        frames = []
+        
+        # Hold initial position
+        for _ in range(hold_frames):
+            frames.append(start_image.copy())
+        
+        # Smooth cross-fade transition
+        start_rgba = start_image.convert('RGBA')
+        end_rgba = end_image.convert('RGBA')
+        
+        # Ensure same size
+        if start_rgba.size != end_rgba.size:
+            end_rgba = end_rgba.resize(start_rgba.size, Image.Resampling.LANCZOS)
+        
+        for i in range(transition_frames):
+            alpha = i / (transition_frames - 1) if transition_frames > 1 else 1.0
+            blended = Image.blend(start_rgba, end_rgba, alpha)
+            frames.append(blended.convert('RGB'))
+        
+        # Hold final position
+        for _ in range(hold_frames):
+            frames.append(end_image.copy())
+        
+        return self.create_video_from_frames(frames, output_path)
+    
+    def create_sliding_fade_video(
+        self,
+        start_image: Image.Image,
+        end_image: Image.Image,
+        output_path: Path,
+        hold_frames: int = 5,
+        transition_frames: int = 15
+    ) -> Optional[Path]:
+        """
+        Create video with sliding transition where pieces fade out then fade in.
+        
+        The piece slides smoothly from start to end position, but also:
+        - Fades out (becomes transparent) in the first half
+        - Fades in (becomes opaque) in the second half
+        
+        Args:
+            start_image: Initial image
+            end_image: Final image
+            output_path: Path to save video
+            hold_frames: Frames to hold at start and end
+            transition_frames: Frames for transition
+            
+        Returns:
+            Path to video file, or None if cv2 not available
+        """
+        if not CV2_AVAILABLE:
+            return None
+        
+        frames = []
+        
+        # Hold initial position
+        for _ in range(hold_frames):
+            frames.append(start_image.copy())
+        
+        # Sliding transition with fade out/fade in
+        start_rgba = start_image.convert('RGBA')
+        end_rgba = end_image.convert('RGBA')
+        
+        # Ensure same size
+        if start_rgba.size != end_rgba.size:
+            end_rgba = end_rgba.resize(start_rgba.size, Image.Resampling.LANCZOS)
+        
+        for i in range(transition_frames):
+            # Progress through transition (0 to 1)
+            progress = i / (transition_frames - 1) if transition_frames > 1 else 1.0
+            
+            # Fade curve: fade out in first half, fade in in second half
+            # Creates a dip in opacity in the middle
+            if progress < 0.5:
+                # Fading out: opacity goes from 1.0 to 0.2
+                opacity = 1.0 - (progress * 2) * 0.8
+            else:
+                # Fading in: opacity goes from 0.2 to 1.0
+                opacity = 0.2 + ((progress - 0.5) * 2) * 0.8
+            
+            # Blend the positions (sliding motion)
+            blended = Image.blend(start_rgba, end_rgba, progress)
+            
+            # Apply opacity effect by blending with semi-transparent version
+            transparent = Image.new('RGBA', blended.size, (0, 0, 0, 0))
+            faded = Image.blend(transparent, blended, opacity)
+            
+            frames.append(faded.convert('RGB'))
+        
+        # Hold final position
+        for _ in range(hold_frames):
+            frames.append(end_image.copy())
+        
+        return self.create_video_from_frames(frames, output_path)
+    
+    def interpolate_frames(
+        self,
+        start_frame: Image.Image,
+        end_frame: Image.Image,
+        num_intermediate: int = 10
+    ) -> List[Image.Image]:
+        """
+        Create smooth transition between two frames using alpha blending.
+        
+        Args:
+            start_frame: Initial frame
+            end_frame: Final frame
+            num_intermediate: Number of intermediate frames to generate
+            
+        Returns:
+            List of frames including start, intermediates, and end
+        """
+        frames = [start_frame]
+        
+        # Ensure same size and mode
+        if start_frame.size != end_frame.size:
+            end_frame = end_frame.resize(start_frame.size, Image.Resampling.LANCZOS)
+        
+        start_frame = start_frame.convert('RGBA')
+        end_frame = end_frame.convert('RGBA')
+        
+        # Generate intermediate frames
+        for i in range(1, num_intermediate + 1):
+            alpha = i / (num_intermediate + 1)
+            blended = Image.blend(start_frame, end_frame, alpha)
+            frames.append(blended.convert('RGB'))
+        
+        frames.append(end_frame.convert('RGB'))
+        return frames
diff --git a/O-4_shape_matching_data-generator/examples/generate.py b/O-4_shape_matching_data-generator/examples/generate.py
new file mode 100644
index 0000000..6e159cb
--- /dev/null
+++ b/O-4_shape_matching_data-generator/examples/generate.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+"""
+╔══════════════════════════════════════════════════════════════════════════════╗
+║                           TASK GENERATION SCRIPT                              ║
+║                                                                               ║
+║  Run this to generate your dataset.                                           ║
+║  Customize TaskConfig and TaskGenerator in src/ for your task.                ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+
+Usage:
+    python examples/generate.py --num-samples 100
+    python examples/generate.py --num-samples 100 --output data/my_task --seed 42
+"""
+
+import argparse
+from pathlib import Path
+import sys
+
+# Add project root to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from core import OutputWriter
+from src import TaskGenerator, TaskConfig
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate task dataset",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    python examples/generate.py --num-samples 10
+    python examples/generate.py --num-samples 100 --output data/output --seed 42
+        """
+    )
+    parser.add_argument(
+        "--num-samples",
+        type=int,
+        required=True,
+        help="Number of task samples to generate"
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="data/questions",
+        help="Output directory (default: data/questions)"
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=None,
+        help="Random seed for reproducibility"
+    )
+    parser.add_argument(
+        "--no-videos",
+        action="store_true",
+        help="Disable video generation"
+    )
+    
+    args = parser.parse_args()
+    
+    print(f"🎲 Generating {args.num_samples} tasks...")
+    
+    # ──────────────────────────────────────────────────────────────────────────
+    #  Configure your task here
+    #  Add any additional TaskConfig parameters as needed
+    # ──────────────────────────────────────────────────────────────────────────
+    
+    config = TaskConfig(
+        num_samples=args.num_samples,
+        random_seed=args.seed,
+        output_dir=Path(args.output),
+        generate_videos=not args.no_videos,
+    )
+    
+    # Generate tasks
+    generator = TaskGenerator(config)
+    tasks = generator.generate_dataset()
+    
+    # Write to disk
+    writer = OutputWriter(Path(args.output))
+    writer.write_dataset(tasks)
+    
+    print(f"✅ Done! Generated {len(tasks)} tasks in {args.output}/{config.domain}_task/")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/O-4_shape_matching_data-generator/push_to_github.sh b/O-4_shape_matching_data-generator/push_to_github.sh
new file mode 100755
index 0000000..4ab9159
--- /dev/null
+++ b/O-4_shape_matching_data-generator/push_to_github.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+echo "📦 准备推送 O-4_shape_matching_data-generator 到 GitHub"
+echo ""
+echo "请选择推送目标："
+echo "1) vm-dataset/O-4_shape_matching_data-generator (推荐)"
+echo "2) jyizheng/O-4_shape_matching_data-generator (个人账户)"
+echo ""
+read -p "请输入选项 (1 或 2): " choice
+
+case $choice in
+  1)
+    REPO_URL="https://github.com/vm-dataset/O-4_shape_matching_data-generator.git"
+    echo ""
+    echo "⚠️  请先在浏览器中创建仓库："
+    echo "   https://github.com/organizations/vm-dataset/repositories/new"
+    echo ""
+    echo "仓库设置："
+    echo "  - Name: O-4_shape_matching_data-generator"
+    echo "  - Description: Shape matching task data generator for visual reasoning dataset"
+    echo "  - Public"
+    echo "  - 不要初始化任何文件"
+    echo ""
+    read -p "已创建？按Enter继续... "
+    ;;
+  2)
+    REPO_URL="https://github.com/jyizheng/O-4_shape_matching_data-generator.git"
+    echo ""
+    echo "⚠️  请先在浏览器中创建仓库："
+    echo "   https://github.com/new"
+    echo ""
+    echo "仓库设置："
+    echo "  - Name: O-4_shape_matching_data-generator"
+    echo "  - Description: Shape matching task data generator for visual reasoning dataset"
+    echo "  - Public"
+    echo "  - 不要初始化任何文件"
+    echo ""
+    read -p "已创建？按Enter继续... "
+    ;;
+  *)
+    echo "无效选项"
+    exit 1
+    ;;
+esac
+
+echo ""
+echo "🔗 配置远程仓库..."
+git remote add origin $REPO_URL 2>/dev/null || git remote set-url origin $REPO_URL
+
+echo "🌿 确保在main分支..."
+git branch -M main
+
+echo "🚀 推送到GitHub..."
+git push -u origin main
+
+if [ $? -eq 0 ]; then
+    echo ""
+    echo "✅ 成功推送到 $REPO_URL"
+    echo ""
+    echo "访问仓库: ${REPO_URL%.git}"
+else
+    echo ""
+    echo "❌ 推送失败，请检查："
+    echo "   1. 是否已在GitHub上创建仓库"
+    echo "   2. 是否有推送权限"
+    echo "   3. 网络连接是否正常"
+fi
diff --git a/O-4_shape_matching_data-generator/requirements.txt b/O-4_shape_matching_data-generator/requirements.txt
new file mode 100644
index 0000000..5aaf63f
--- /dev/null
+++ b/O-4_shape_matching_data-generator/requirements.txt
@@ -0,0 +1,7 @@
+# Core dependencies
+numpy==1.26.4
+Pillow==10.4.0
+pydantic==2.10.5
+
+# Video generation
+opencv-python==4.10.0.84
diff --git a/O-4_shape_matching_data-generator/setup.py b/O-4_shape_matching_data-generator/setup.py
new file mode 100644
index 0000000..2e8c717
--- /dev/null
+++ b/O-4_shape_matching_data-generator/setup.py
@@ -0,0 +1,16 @@
+from setuptools import setup, find_packages
+
+setup(
+    name="shape-matching-task-generator",
+    version="0.1.0",
+    description="Shape matching task data generator",
+    author="VM Dataset Team",
+    packages=find_packages(),
+    install_requires=[
+        "numpy>=1.26.4",
+        "Pillow>=10.4.0",
+        "pydantic>=2.10.5",
+        "opencv-python>=4.10.0.84",
+    ],
+    python_requires=">=3.8",
+)
diff --git a/O-4_shape_matching_data-generator/src/__init__.py b/O-4_shape_matching_data-generator/src/__init__.py
new file mode 100644
index 0000000..0853015
--- /dev/null
+++ b/O-4_shape_matching_data-generator/src/__init__.py
@@ -0,0 +1,7 @@
+"""Shape matching task generator."""
+
+from .config import TaskConfig
+from .generator import TaskGenerator
+from .prompts import get_prompt
+
+__all__ = ["TaskConfig", "TaskGenerator", "get_prompt"]
diff --git a/O-4_shape_matching_data-generator/src/config.py b/O-4_shape_matching_data-generator/src/config.py
new file mode 100644
index 0000000..add91c6
--- /dev/null
+++ b/O-4_shape_matching_data-generator/src/config.py
@@ -0,0 +1,62 @@
+"""
+╔══════════════════════════════════════════════════════════════════════════════╗
+║                           YOUR TASK CONFIGURATION                             ║
+║                                                                               ║
+║  CUSTOMIZE THIS FILE to define your task-specific settings.                   ║
+║  Inherits common settings from core.GenerationConfig                          ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+"""
+
+from pydantic import Field
+from core import GenerationConfig
+
+
+class TaskConfig(GenerationConfig):
+    """
+    Your task-specific configuration.
+    
+    CUSTOMIZE THIS CLASS to add your task's hyperparameters.
+    
+    Inherited from GenerationConfig:
+        - num_samples: int          # Number of samples to generate
+        - domain: str               # Task domain name
+        - difficulty: Optional[str] # Difficulty level
+        - random_seed: Optional[int] # For reproducibility
+        - output_dir: Path          # Where to save outputs
+        - image_size: tuple[int, int] # Image dimensions
+    """
+    
+    # ══════════════════════════════════════════════════════════════════════════
+    #  OVERRIDE DEFAULTS
+    # ══════════════════════════════════════════════════════════════════════════
+    
+    domain: str = Field(default="shape_matching")
+    image_size: tuple[int, int] = Field(default=(800, 400))
+    
+    # ══════════════════════════════════════════════════════════════════════════
+    #  VIDEO SETTINGS
+    # ══════════════════════════════════════════════════════════════════════════
+    
+    generate_videos: bool = Field(
+        default=True,
+        description="Whether to generate ground truth videos"
+    )
+    
+    video_fps: int = Field(
+        default=30,
+        description="Video frame rate"
+    )
+    
+    # ══════════════════════════════════════════════════════════════════════════
+    #  TASK-SPECIFIC SETTINGS
+    # ══════════════════════════════════════════════════════════════════════════
+    
+    num_shapes: int = Field(
+        default=4,
+        description="Number of shapes in the task (max 4)"
+    )
+    
+    shape_size: int = Field(
+        default=35,
+        description="Size (radius) of shapes in pixels"
+    )
diff --git a/O-4_shape_matching_data-generator/src/generator.py b/O-4_shape_matching_data-generator/src/generator.py
new file mode 100644
index 0000000..ca06a36
--- /dev/null
+++ b/O-4_shape_matching_data-generator/src/generator.py
@@ -0,0 +1,322 @@
+"""
+╔══════════════════════════════════════════════════════════════════════════════╗
+║                      SHAPE MATCHING TASK GENERATOR                            ║
+║                                                                               ║
+║  Implements: Move shapes into their matching outlines                         ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+"""
+
+import random
+import math
+import tempfile
+from typing import Optional, Tuple
+from pathlib import Path
+from PIL import Image, ImageDraw
+
+from core import BaseGenerator, TaskPair
+from core.video_utils import VideoGenerator
+from .config import TaskConfig
+from .prompts import get_prompt
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+#  TASK CONSTANTS
+# ══════════════════════════════════════════════════════════════════════════════
+
+# Available shape types
+SHAPE_TYPES = ["circle", "square", "triangle", "star"]
+
+# Default colors for shapes (RGB for PIL)
+SHAPE_COLORS = {
+    "circle": (100, 100, 255),    # Blue
+    "square": (100, 255, 100),    # Green
+    "triangle": (255, 200, 100),  # Yellow/Orange
+    "star": (255, 100, 255)       # Purple
+}
+
+# Animation parameters
+HOLD_DURATION = 1.0  # seconds to hold at start/end
+MOVE_DURATION = 2.0  # seconds to move
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+#  SHAPE DRAWING UTILITIES
+# ══════════════════════════════════════════════════════════════════════════════
+
+def draw_circle(draw: ImageDraw.Draw, center: Tuple[int, int], 
+                size: int, color, outline_only: bool = False, outline_width: int = 3):
+    """Draw a circle shape."""
+    cx, cy = center
+    bbox = [cx - size, cy - size, cx + size, cy + size]
+    
+    if outline_only:
+        draw.ellipse(bbox, outline=color, width=outline_width)
+    else:
+        draw.ellipse(bbox, fill=color, outline=(50, 50, 50), width=1)
+
+
+def draw_square(draw: ImageDraw.Draw, center: Tuple[int, int],
+                size: int, color, outline_only: bool = False, outline_width: int = 3):
+    """Draw a square shape."""
+    cx, cy = center
+    bbox = [cx - size, cy - size, cx + size, cy + size]
+    
+    if outline_only:
+        draw.rectangle(bbox, outline=color, width=outline_width)
+    else:
+        draw.rectangle(bbox, fill=color, outline=(50, 50, 50), width=1)
+
+
+def draw_triangle(draw: ImageDraw.Draw, center: Tuple[int, int],
+                  size: int, color, outline_only: bool = False, outline_width: int = 3):
+    """Draw an equilateral triangle pointing up."""
+    cx, cy = center
+    points = []
+    
+    for i in range(3):
+        angle = i * (2 * math.pi / 3) - (math.pi / 2)  # Start from top
+        x = int(cx + size * math.cos(angle))
+        y = int(cy + size * math.sin(angle))
+        points.append((x, y))
+    
+    if outline_only:
+        draw.polygon(points, outline=color, width=outline_width)
+    else:
+        draw.polygon(points, fill=color, outline=(50, 50, 50), width=1)
+
+
+def draw_star(draw: ImageDraw.Draw, center: Tuple[int, int],
+              size: int, color, outline_only: bool = False, outline_width: int = 3):
+    """Draw a 5-pointed star."""
+    cx, cy = center
+    points = []
+    outer_r = size
+    inner_r = size * 0.4
+    
+    for i in range(10):
+        angle = i * (2 * math.pi / 10) - (math.pi / 2)
+        r = outer_r if i % 2 == 0 else inner_r
+        x = int(cx + r * math.cos(angle))
+        y = int(cy + r * math.sin(angle))
+        points.append((x, y))
+    
+    if outline_only:
+        draw.polygon(points, outline=color, width=outline_width)
+    else:
+        draw.polygon(points, fill=color, outline=(50, 50, 50), width=1)
+
+
+def draw_shape(draw: ImageDraw.Draw, shape_type: str, 
+               center: Tuple[int, int], size: int, color,
+               outline_only: bool = False, outline_width: int = 3):
+    """Draw a shape based on type."""
+    if shape_type == "circle":
+        draw_circle(draw, center, size, color, outline_only, outline_width)
+    elif shape_type == "square":
+        draw_square(draw, center, size, color, outline_only, outline_width)
+    elif shape_type == "triangle":
+        draw_triangle(draw, center, size, color, outline_only, outline_width)
+    elif shape_type == "star":
+        draw_star(draw, center, size, color, outline_only, outline_width)
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+#  TASK GENERATOR
+# ══════════════════════════════════════════════════════════════════════════════
+
+class TaskGenerator(BaseGenerator):
+    """
+    Shape matching task generator.
+    
+    Creates tasks where colored shapes must be moved to match dark outlines.
+    """
+    
+    def __init__(self, config: TaskConfig):
+        super().__init__(config)
+        self.width, self.height = config.image_size
+        self.num_shapes = min(config.num_shapes, len(SHAPE_TYPES))
+        self.shape_size = config.shape_size
+        
+        # Initialize video generator if enabled
+        self.video_generator = None
+        if config.generate_videos and VideoGenerator.is_available():
+            self.video_generator = VideoGenerator(fps=config.video_fps, output_format="mp4")
+    
+    def generate_task_pair(self, task_id: str) -> TaskPair:
+        """Generate one shape matching task pair."""
+        
+        # Generate task data
+        task_data = self._generate_task_data()
+        
+        # Render images
+        first_image = self._render_frame(task_data, state="input")
+        final_image = self._render_frame(task_data, state="output")
+        
+        # Generate video (optional)
+        video_path = None
+        if self.config.generate_videos and self.video_generator:
+            video_path = self._generate_video(task_id, task_data)
+        
+        # Get prompt
+        prompt = get_prompt("default")
+        
+        return TaskPair(
+            task_id=task_id,
+            domain=self.config.domain,
+            prompt=prompt,
+            first_image=first_image,
+            final_image=final_image,
+            ground_truth_video=video_path
+        )
+    
+    def _generate_task_data(self) -> dict:
+        """Generate shapes with start and target positions."""
+        shapes = []
+        margin = 60
+        
+        # Select shapes for this task
+        selected_shapes = random.sample(SHAPE_TYPES, self.num_shapes)
+        
+        # Generate target positions on the right side (grid layout)
+        cols = 2
+        rows = (self.num_shapes + 1) // 2
+        right_start_x = self.width // 2
+        cell_w = (self.width // 2) // cols
+        cell_h = self.height // rows
+        
+        slots = []
+        for r in range(rows):
+            for c in range(cols):
+                if len(slots) < self.num_shapes:
+                    cx = right_start_x + c * cell_w + cell_w // 2
+                    cy = r * cell_h + cell_h // 2
+                    slots.append((cx, cy))
+        
+        random.shuffle(slots)
+        
+        for i, shape_type in enumerate(selected_shapes):
+            # Target position (right side)
+            tx, ty = slots[i]
+            
+            # Start position (left side, scattered)
+            sx = random.randint(margin, self.width // 2 - margin)
+            sy = random.randint(margin, self.height - margin)
+            
+            shapes.append({
+                "type": shape_type,
+                "color": SHAPE_COLORS.get(shape_type, (150, 150, 150)),
+                "size": self.shape_size,
+                "start_pos": (sx, sy),
+                "target_pos": (tx, ty)
+            })
+        
+        return {"shapes": shapes}
+    
+    def _render_frame(self, task_data: dict, state: str = "input") -> Image.Image:
+        """
+        Render a frame.
+        
+        Args:
+            task_data: Task data containing shapes
+            state: "input" for shapes scattered, "output" for shapes matched
+        """
+        # Create white background
+        canvas = Image.new("RGB", (self.width, self.height), color=(255, 255, 255))
+        draw = ImageDraw.Draw(canvas)
+        
+        # Draw dividing line
+        draw.line([(self.width // 2, 20), (self.width // 2, self.height - 20)],
+                 fill=(200, 200, 200), width=2)
+        
+        # 1. Draw target outlines (always visible)
+        outline_color = (80, 80, 80)
+        for shape in task_data["shapes"]:
+            draw_shape(draw, shape["type"], shape["target_pos"],
+                      shape["size"], outline_color, outline_only=True, outline_width=3)
+        
+        # 2. Draw solid shapes
+        for shape in task_data["shapes"]:
+            if state == "input":
+                # Shapes at start positions (left side)
+                pos = shape["start_pos"]
+            else:
+                # Shapes at target positions (matching outlines)
+                pos = shape["target_pos"]
+            
+            draw_shape(draw, shape["type"], pos, shape["size"], shape["color"])
+        
+        return canvas
+    
+    def _generate_video(self, task_id: str, task_data: dict) -> Optional[str]:
+        """Generate animation video showing shapes moving to outlines."""
+        temp_dir = Path(tempfile.gettempdir()) / f"{self.config.domain}_videos"
+        temp_dir.mkdir(parents=True, exist_ok=True)
+        video_path = temp_dir / f"{task_id}_ground_truth.mp4"
+        
+        # Create animation frames
+        frames = self._create_animation_frames(task_data)
+        
+        result = self.video_generator.create_video_from_frames(frames, video_path)
+        return str(result) if result else None
+    
+    def _create_animation_frames(self, task_data: dict) -> list:
+        """Create animation frames showing shapes moving to their outlines."""
+        frames = []
+        fps = self.config.video_fps
+        
+        hold_frames = int(HOLD_DURATION * fps)
+        transition_frames = int(MOVE_DURATION * fps)
+        
+        # Hold initial frame
+        initial_frame = self._render_frame(task_data, state="input")
+        for _ in range(hold_frames):
+            frames.append(initial_frame)
+        
+        # Transition frames - interpolate positions
+        for i in range(transition_frames):
+            progress = i / (transition_frames - 1) if transition_frames > 1 else 1.0
+            eased_progress = self._ease_in_out(progress)
+            
+            frame = self._render_interpolated_frame(task_data, eased_progress)
+            frames.append(frame)
+        
+        # Hold final frame
+        final_frame = self._render_frame(task_data, state="output")
+        for _ in range(hold_frames):
+            frames.append(final_frame)
+        
+        return frames
+    
+    def _render_interpolated_frame(self, task_data: dict, progress: float) -> Image.Image:
+        """Render a frame with shapes at interpolated positions."""
+        canvas = Image.new("RGB", (self.width, self.height), color=(255, 255, 255))
+        draw = ImageDraw.Draw(canvas)
+        
+        # Draw dividing line
+        draw.line([(self.width // 2, 20), (self.width // 2, self.height - 20)],
+                 fill=(200, 200, 200), width=2)
+        
+        # Draw target outlines
+        outline_color = (80, 80, 80)
+        for shape in task_data["shapes"]:
+            draw_shape(draw, shape["type"], shape["target_pos"],
+                      shape["size"], outline_color, outline_only=True, outline_width=3)
+        
+        # Draw shapes at interpolated positions
+        for shape in task_data["shapes"]:
+            sx, sy = shape["start_pos"]
+            tx, ty = shape["target_pos"]
+            
+            cx = int(sx + (tx - sx) * progress)
+            cy = int(sy + (ty - sy) * progress)
+            
+            draw_shape(draw, shape["type"], (cx, cy), shape["size"], shape["color"])
+        
+        return canvas
+    
+    def _ease_in_out(self, t: float) -> float:
+        """Ease-in-out function for smooth animation."""
+        if t < 0.5:
+            return 2 * t * t
+        else:
+            return 1 - pow(-2 * t + 2, 2) / 2
diff --git a/O-4_shape_matching_data-generator/src/prompts.py b/O-4_shape_matching_data-generator/src/prompts.py
new file mode 100644
index 0000000..8a5b169
--- /dev/null
+++ b/O-4_shape_matching_data-generator/src/prompts.py
@@ -0,0 +1,40 @@
+"""
+╔══════════════════════════════════════════════════════════════════════════════╗
+║                           YOUR TASK PROMPTS                                   ║
+║                                                                               ║
+║  CUSTOMIZE THIS FILE to define prompts/instructions for your task.            ║
+║  Prompts are selected based on task type and returned to the model.           ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+"""
+
+import random
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+#  DEFINE YOUR PROMPTS
+# ══════════════════════════════════════════════════════════════════════════════
+
+PROMPTS = {
+    "default": [
+        "Move each colorful shape into its corresponding dark outline.",
+    ],
+}
+
+
+def get_prompt(task_type: str = "default") -> str:
+    """
+    Select a random prompt for the given task type.
+    
+    Args:
+        task_type: Type of task (key in PROMPTS dict)
+        
+    Returns:
+        Random prompt string from the specified type
+    """
+    prompts = PROMPTS.get(task_type, PROMPTS["default"])
+    return random.choice(prompts)
+
+
+def get_all_prompts(task_type: str = "default") -> list[str]:
+    """Get all prompts for a given task type."""
+    return PROMPTS.get(task_type, PROMPTS["default"])