生成视频模糊

你好，我使用nft/lora/wan22_t2v.yaml，wan22-i2v-5b生成的视频为模糊的。请问是为什么？

https://github.com/user-attachments/assets/3bf04812-61b3-40d3-8095-99a5585fa042

```yaml
# Environment Configuration
launcher: "accelerate"  # Options: accelerate
config_file: config/accelerate_configs/multi_gpu.yaml
num_processes: 8  # Number of processes to launch (overrides config file)
main_process_port: 29500
mixed_precision: "bf16"  # Options: no, fp16, bf16

run_name: null  # Run name (auto: {model_type}_{finetune_type}_{trainer_type}_{timestamp})
project: "Flow-Factory-test-t2v2"  # Project name for logging
logging_backend: "wandb"  # Options: wandb, swanlab, tensorboard, none


# Data Configuration
data:
  dataset_dir: "dataset/pickscore"  # Path to dataset folder
  preprocessing_batch_size: 32  # Batch size for preprocessing
  dataloader_num_workers: 16  # Number of workers for DataLoader
  force_reprocess: false  # Force reprocessing of the dataset
# Cache directory for preprocessed datasets
  max_dataset_size: 1000  # Limit the maximum number of samples in the dataset

# Model Configuration
model:
  finetune_type: 'lora' # Options: full, lora
  lora_rank : 128
  lora_alpha : 128
  target_components: 'transformer' # Options: transformer, transformer_2, or ['transformer', 'transformer_2']
  target_modules: "default"
 # Wan-AI/Wan2.2-TI2V-5B-Diffusers / Wan-AI/Wan2.2-T2V-A14B-Diffusers
  model_type: "wan2_t2v"  # wan2_t2v, wan2_i2v, wan2_v2v
  resume_path: null # Path to load previous checkpoint/lora adapter
  resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
  # attn_backend: '_flash_3_hub' # Use flash attention 3 backend.

log:
checkpoints and logs
  save_freq: 20  # Save frequency in epochs (0 to disable)
  save_model_only: true  # Save only the model weights (not optimizer, scheduler, etc.)

# Training Configuration
train:
  # Trainer settings
  trainer_type: 'nft'
  advantage_aggregation: 'gdpo' # Options: 'sum', 'gdpo'
  nft_beta: 0.1
  # `Old` Policy settings
  off_policy: true # Whether to use ema parameters for sampling off-policy data.
  ema_decay_schedule: "piecewise_linear"  # Decay schedule for EMA. Options: ['constant', 'power', 'linear', 'piecewise_linear', 'cosine', 'warmup_cosine']
  flat_steps: 0
  ramp_rate: 0.001
  ema_decay: 0.5  # EMA decay rate (0 to disable)
  ema_update_interval: 1  # EMA update interval (in epochs)
  ema_device: "cuda"  # Device to store EMA model (options: cpu, cuda)
  # Training Timestep distribution
  num_train_timesteps: 8 # Set null to all steps
  time_sampling_strategy: discrete # Options: uniform, logit_normal, discrete, discrete_with_init, discrete_wo_init
  time_shift: 3.0
  ### Timestep range for discrete time sampling.
  ### For Wan2.2-T2V-A14B (boundary_ratio=0.875, 10 inference steps):
  ###   - transformer only:   0.3 (early steps, before boundary). float: [0, value], e.g., 0.3 → first 30% of timesteps
  ###   - transformer_2 only: [0.4, 0.9] (later steps, after boundary). [start, end]: e.g., [0.4, 0.9] → 40%-90% of trajectory
  timestep_range: 0.3
  # KL div
  kl_type: 'v-based'
  kl_beta: 0 # KL divergence beta, 0 to disable
  ref_param_device: 'cuda' # Options: cpu, cuda
  # Clipping
  clip_range: 1.0e-4  # PPO/GRPO clipping range
  adv_clip_range: 5.0  # Advantage clipping range

  # Sampling Settings
  resolution: [384, 720]  # Can be int or [height, width]
  num_frames: 81 # Training frames
  num_inference_steps: 20  # Number of timesteps
  guidance_scale: 4.0  # Guidance scale for sampling
  guidance_scale_2: 3.0  # Guidance scale for sampling

  # Batch and sampling
  per_device_batch_size: 1  # Batch size per device
  group_size: 16  # Group size for GRPO sampling
  global_std: false  # Use global std for advantage normalization
  unique_sample_num_per_epoch: 48  # Unique samples per group
  gradient_step_per_epoch: 1  # Gradient steps per epoch
  
  # Optimization
  seed: 42  # Random seed
  learning_rate: 1.0e-4  # Initial learning rate
  adam_weight_decay: 1.0e-4  # AdamW weight decay
  adam_betas: [0.9, 0.999]  # AdamW betas
  adam_epsilon: 1.0e-8  # AdamW epsilon
  max_grad_norm: 1.0  # Max gradient norm for clipping

  # Gradient checkpointing
  enable_gradient_checkpointing: true  # Enable gradient checkpointing to save memory with extra compute

  # Seed
  seed: 42  # Random seed

# Scheduler Configuration
scheduler:
  dynamics_type: "ODE"  # Options: Flow-SDE, Dance-SDE, CPS, ODE

# Evaluation settings
eval:
  resolution: [704, 1280]  # Evaluation resolution
  num_frames: 81 # Evaluation frames
  per_device_batch_size: 1  # Eval batch size
  guidance_scale: 4.0  # Guidance scale for sampling
  guidance_scale_2: 3.0  # Guidance scale for sampling
  num_inference_steps: 28  # Number of eval timesteps
  eval_freq: 20  # Eval frequency in epochs (0 to disable)
  seed: 42  # Eval seed (defaults to training seed)

# Reward Model Configuration
rewards:
  - name: "pick_score"
    reward_model: "PickScore"
    batch_size: 16
    device: "cuda"
    dtype: bfloat16

# Optional Evaluation Reward Models
eval_rewards:
  - name: "pick_score"
    reward_model: "PickScore"
    batch_size: 32
    device: "cuda"
    dtype: bfloat16

```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

生成视频模糊 #76

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

生成视频模糊 #76

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions