Skip to content

生成视频模糊 #76

@zhouziyiaichifeiniu

Description

@zhouziyiaichifeiniu

你好,我使用nft/lora/wan22_t2v.yaml,wan22-i2v-5b生成的视频为模糊的。请问是为什么?

ef996912-d6b9-4ac0-8464-f18820c98f99.mp4
# Environment Configuration
launcher: "accelerate"  # Options: accelerate
config_file: config/accelerate_configs/multi_gpu.yaml
num_processes: 8  # Number of processes to launch (overrides config file)
main_process_port: 29500
mixed_precision: "bf16"  # Options: no, fp16, bf16

run_name: null  # Run name (auto: {model_type}_{finetune_type}_{trainer_type}_{timestamp})
project: "Flow-Factory-test-t2v2"  # Project name for logging
logging_backend: "wandb"  # Options: wandb, swanlab, tensorboard, none


# Data Configuration
data:
  dataset_dir: "dataset/pickscore"  # Path to dataset folder
  preprocessing_batch_size: 32  # Batch size for preprocessing
  dataloader_num_workers: 16  # Number of workers for DataLoader
  force_reprocess: false  # Force reprocessing of the dataset
# Cache directory for preprocessed datasets
  max_dataset_size: 1000  # Limit the maximum number of samples in the dataset

# Model Configuration
model:
  finetune_type: 'lora' # Options: full, lora
  lora_rank : 128
  lora_alpha : 128
  target_components: 'transformer' # Options: transformer, transformer_2, or ['transformer', 'transformer_2']
  target_modules: "default"
 # Wan-AI/Wan2.2-TI2V-5B-Diffusers / Wan-AI/Wan2.2-T2V-A14B-Diffusers
  model_type: "wan2_t2v"  # wan2_t2v, wan2_i2v, wan2_v2v
  resume_path: null # Path to load previous checkpoint/lora adapter
  resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
  # attn_backend: '_flash_3_hub' # Use flash attention 3 backend.

log:
checkpoints and logs
  save_freq: 20  # Save frequency in epochs (0 to disable)
  save_model_only: true  # Save only the model weights (not optimizer, scheduler, etc.)

# Training Configuration
train:
  # Trainer settings
  trainer_type: 'nft'
  advantage_aggregation: 'gdpo' # Options: 'sum', 'gdpo'
  nft_beta: 0.1
  # `Old` Policy settings
  off_policy: true # Whether to use ema parameters for sampling off-policy data.
  ema_decay_schedule: "piecewise_linear"  # Decay schedule for EMA. Options: ['constant', 'power', 'linear', 'piecewise_linear', 'cosine', 'warmup_cosine']
  flat_steps: 0
  ramp_rate: 0.001
  ema_decay: 0.5  # EMA decay rate (0 to disable)
  ema_update_interval: 1  # EMA update interval (in epochs)
  ema_device: "cuda"  # Device to store EMA model (options: cpu, cuda)
  # Training Timestep distribution
  num_train_timesteps: 8 # Set null to all steps
  time_sampling_strategy: discrete # Options: uniform, logit_normal, discrete, discrete_with_init, discrete_wo_init
  time_shift: 3.0
  ### Timestep range for discrete time sampling.
  ### For Wan2.2-T2V-A14B (boundary_ratio=0.875, 10 inference steps):
  ###   - transformer only:   0.3 (early steps, before boundary). float: [0, value], e.g., 0.3 → first 30% of timesteps
  ###   - transformer_2 only: [0.4, 0.9] (later steps, after boundary). [start, end]: e.g., [0.4, 0.9] → 40%-90% of trajectory
  timestep_range: 0.3
  # KL div
  kl_type: 'v-based'
  kl_beta: 0 # KL divergence beta, 0 to disable
  ref_param_device: 'cuda' # Options: cpu, cuda
  # Clipping
  clip_range: 1.0e-4  # PPO/GRPO clipping range
  adv_clip_range: 5.0  # Advantage clipping range

  # Sampling Settings
  resolution: [384, 720]  # Can be int or [height, width]
  num_frames: 81 # Training frames
  num_inference_steps: 20  # Number of timesteps
  guidance_scale: 4.0  # Guidance scale for sampling
  guidance_scale_2: 3.0  # Guidance scale for sampling

  # Batch and sampling
  per_device_batch_size: 1  # Batch size per device
  group_size: 16  # Group size for GRPO sampling
  global_std: false  # Use global std for advantage normalization
  unique_sample_num_per_epoch: 48  # Unique samples per group
  gradient_step_per_epoch: 1  # Gradient steps per epoch
  
  # Optimization
  seed: 42  # Random seed
  learning_rate: 1.0e-4  # Initial learning rate
  adam_weight_decay: 1.0e-4  # AdamW weight decay
  adam_betas: [0.9, 0.999]  # AdamW betas
  adam_epsilon: 1.0e-8  # AdamW epsilon
  max_grad_norm: 1.0  # Max gradient norm for clipping

  # Gradient checkpointing
  enable_gradient_checkpointing: true  # Enable gradient checkpointing to save memory with extra compute

  # Seed
  seed: 42  # Random seed

# Scheduler Configuration
scheduler:
  dynamics_type: "ODE"  # Options: Flow-SDE, Dance-SDE, CPS, ODE

# Evaluation settings
eval:
  resolution: [704, 1280]  # Evaluation resolution
  num_frames: 81 # Evaluation frames
  per_device_batch_size: 1  # Eval batch size
  guidance_scale: 4.0  # Guidance scale for sampling
  guidance_scale_2: 3.0  # Guidance scale for sampling
  num_inference_steps: 28  # Number of eval timesteps
  eval_freq: 20  # Eval frequency in epochs (0 to disable)
  seed: 42  # Eval seed (defaults to training seed)

# Reward Model Configuration
rewards:
  - name: "pick_score"
    reward_model: "PickScore"
    batch_size: 16
    device: "cuda"
    dtype: bfloat16

# Optional Evaluation Reward Models
eval_rewards:
  - name: "pick_score"
    reward_model: "PickScore"
    batch_size: 32
    device: "cuda"
    dtype: bfloat16

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions