Skip to content

训练到burn in step 1后map暴跌 #4

@hwh520

Description

@hwh520

作者大大新年好,我在跑端到端版本时,尝试换数据集,在burn in step1前map还在20%,迭代步数超过后就暴跌,我看论文上写要在第6epoch进入第二阶段,可是我无论怎么调burn in step1参数,他始终超过这个步数后就跌到0%。
以下是我的配置文件_base_ = [
'../../configs/base/default_runtime.py'
]

norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) # add
debug = False

num_stages = 2

num_stages = 1

model = dict(
type='PointOBB',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=0,
add_extra_convs='on_input',
num_outs=4,
norm_cfg=norm_cfg
),

loss_diff_view=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0),  # SSC loss
crop_size = (800,800),
construct_view = True,   # rot/flp view
construct_resize = True, # resized view
weight_mode = 'dynamic_weight',
scale_classes = [0,1,2,3,4],
scale_classes2 = [0],

roi_head=dict(
    type='PointOBBHead',
    num_stages=num_stages,
    top_k=7,
    with_atten=False,

    loss_symmetry_ss=dict(
        type='SmoothL1Loss', loss_weight=0.5, beta=0.1),
    angle_coder=dict(
                type='PSCCoder',
                angle_version='le90',
                dual_freq=False,
                num_step=3,
                thr_mod=0),
    angle_version = 'le90',
    rotation_agnostic_classes=[],
    agnostic_resize_classes = [0,1,2,3,4],
    agnostic_resize_classes2 = [7],
    use_angle_loss = False, 
    add_angle_pred_begin = False, 
    not_use_rot_mil = False, 
    detach_angle_head = False,
    stacked_convs = 2,
    use_ssff = True,

    bbox_roi_extractor=dict(
        type='RotatedSingleRoIExtractor',
        roi_layer=dict(
            type='RoIAlignRotated',
            out_size=7,
            sample_num=2,
            clockwise=True),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=dict(
        type='Shared2FCInstanceMILHead',
        num_stages=num_stages,
        with_loss_pseudo=False,
        in_channels=256,
        fc_out_channels=1024,
        roi_feat_size=7,
        num_classes=5,
        num_ref_fcs=0,
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[0., 0., 0., 0.],
            target_stds=[0.1, 0.1, 0.2, 0.2]),
        reg_class_agnostic=True,
        loss_type='MIL',
        loss_mil1=dict(
            type='MILLoss',
            binary_ins=False,
            loss_weight=0.25,
            loss_type='binary_cross_entropy'),
        loss_mil2=dict(
            type='MILLoss',
            binary_ins=False,
            loss_weight=0.25,
            loss_type='gfocal_loss'),),
),

bbox_pred_head=dict(
    type='PointOBB_FCOS_Head',
    num_classes=5,
    in_channels=256,
    stacked_convs=4,
    feat_channels=256,
    strides=[4, 8, 16, 32],
    center_sampling=True,
    center_sample_radius=1.5,
    norm_on_bbox=True,
    centerness_on_reg=True,
    separate_angle=False,
    scale_angle=True,
    bbox_coder=dict(
        type='DistanceAnglePointCoder', angle_version='le90'),
    h_bbox_coder=dict(type='DistancePointBBoxCoder'),
    loss_cls=dict(
        type='FocalLoss',
        use_sigmoid=True,
        gamma=2.0,
        alpha=0.25,
        loss_weight=1.0),
    loss_bbox=dict(type='RotatedIoULoss', loss_weight=1.0),
    loss_angle=None,
    loss_centerness=dict(
        type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
),

# model training and testing settings
train_cfg=dict(
    base_proposal=dict(
        base_scales=[4, 8, 16, 24, 32, 48, 64, 72, 80, 96],
        base_ratios=[1 / 3, 1 / 2, 1 / 1.5, 1.0, 1.5, 2.0, 3.0],
        shake_ratio=None,
        cut_mode='symmetry',  # 'clamp',
        gen_num_neg=0),
    fine_proposal=dict(
        gen_proposal_mode='fix_gen',
        cut_mode=None,
        shake_ratio=[0.1],
        base_ratios=[1, 1.2, 1.3, 0.8, 0.7],
        iou_thr=0.3,
        gen_num_neg=500,
    ),
    rcnn=None,
    iter_count = 0,  
    burn_in_steps1 = 160000, 
    burn_in_steps2 = 190000,  
),
test_cfg=dict(
    rpn=None,
    rcnn=None,
    nms_pre=2000,
    min_bbox_size=0,
    score_thr=0.05,
    nms=dict(iou_thr=0.1),
    max_per_img=2000))

dataset settings

dataset_type = 'DOTAPointDataset'

angle_version = 'le90'

img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(800, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5,version=angle_version) if not debug else dict(type='RandomFlip', flip_ratio=0.),

dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_ignore', 'gt_true_bboxes']),

]

test_pipeline = [
dict(type='LoadImageFromFile'),
# dict(type='LoadAnnotations', with_bbox=True),
dict(
type='MultiScaleFlipAug',
img_scale=(800, 800),
flip=False,
transforms=[
dict(type='Resize',img_scale=(800, 800), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img']),
])
]

data_root = '/mnt/data/xiekaikai/dronevehiclecopy/'

data = dict(
samples_per_gpu=1,
workers_per_gpu=4,
shuffle=False if debug else None,
train=dict(
type=dataset_type,
version=angle_version,
ann_file = data_root + "trainval/trainval_1024_P2Bfmt_dota_rbox.json",
img_prefix = data_root + 'trainval/images/',
pipeline=train_pipeline,
filter_empty_gt=True
),
val=dict(
samples_per_gpu=1,
type=dataset_type,
ann_file = data_root + "trainval/trainval_1024_P2Bfmt_dota_rbox.json",
img_prefix = data_root + 'trainval/images/',
pipeline=test_pipeline,
test_mode=False,
),
test=dict(
type=dataset_type,
ann_file=data_root + "test/images/",
img_prefix=data_root + 'test/images/',
pipeline=test_pipeline))

check = dict(stop_while_nan=False)

optimizer

optimizer = dict(type='SGD', lr=0.00125, momentum=0.9, weight_decay=0.0001)

optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))

learning policy

training_time = 2
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[8training_time, 11training_time])
runner = dict(type='EpochBasedRunner', max_epochs=12*training_time)

checkpoint_config = dict(interval=1)

因为我们的GPU问题,我们把lr和bath_size都调到原来的四分之一,其他超参数基本不变,因为图片尺寸问题只更改了scale参数。请问这是什么问题,辛苦作者了

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions