179人参与 • 2024-08-01 • 打印机
论文链接:[2403.19967] rewrite the stars
github仓库:github - ma-xu/rewrite-the-stars: [cvpr 2024] rewrite the stars
cvpr2024 rewrite the stars论文揭示了star operation
(元素乘法)在无需加宽网络下,将输入映射到高维非线性特征空间的能力。基于此提出了starnet
,在紧凑的网络结构和较低的能耗下展示了令人印象深刻的性能和低延迟。
高维和非线性特征变换 (high-dimensional and non-linear feature transformation)
高效网络设计 (efficient network design)
多层次隐式特征扩展 (multi-layer implicit feature expansion)
计算复杂度与性能的平衡 (balance between computational complexity and performance)
特征表示的丰富性 (richness of feature representation)
简化网络设计 (simplified network design)
1. 在上文提到的仓库中下载imagenet/starnet.py
2. 修改starnet.py中的forward函数,并且添加out_dices参数使其能够输出不同stage的特征向量
3. 将class starnet注册并且在__init__()函数中进行修改
4. 修改配置文件,主要是调整yolov5 neck和head的输入输出通道数
"""
implementation of prof-of-concept network: starnet.
we make starnet as simple as possible [to show the key contribution of element-wise multiplication]:
- like no layer-scale in network design,
- and no ema during training,
- which would improve the performance further.
created by: xu ma (email: ma.xu1@northeastern.edu)
modified date: mar/29/2024
"""
import torch
import torch.nn as nn
from timm.models.layers import droppath, trunc_normal_
from typing import list, sequence, union
# from timm.models.registry import register_model
from mmyolo.registry import models
model_urls = {
"starnet_s1": "https://github.com/ma-xu/rewrite-the-stars/releases/download/checkpoints_v1/starnet_s1.pth.tar",
"starnet_s2": "https://github.com/ma-xu/rewrite-the-stars/releases/download/checkpoints_v1/starnet_s2.pth.tar",
"starnet_s3": "https://github.com/ma-xu/rewrite-the-stars/releases/download/checkpoints_v1/starnet_s3.pth.tar",
"starnet_s4": "https://github.com/ma-xu/rewrite-the-stars/releases/download/checkpoints_v1/starnet_s4.pth.tar",
}
class convbn(torch.nn.sequential):
def __init__(self, in_planes, out_planes, kernel_size=1, stride=1, padding=0, dilation=1, groups=1, with_bn=true):
super().__init__()
self.add_module('conv', torch.nn.conv2d(in_planes, out_planes, kernel_size, stride, padding, dilation, groups))
if with_bn:
self.add_module('bn', torch.nn.batchnorm2d(out_planes))
torch.nn.init.constant_(self.bn.weight, 1)
torch.nn.init.constant_(self.bn.bias, 0)
class block(nn.module):
def __init__(self, dim, mlp_ratio=3, drop_path=0.):
super().__init__()
self.dwconv = convbn(dim, dim, 7, 1, (7 - 1) // 2, groups=dim, with_bn=true)
self.f1 = convbn(dim, mlp_ratio * dim, 1, with_bn=false)
self.f2 = convbn(dim, mlp_ratio * dim, 1, with_bn=false)
self.g = convbn(mlp_ratio * dim, dim, 1, with_bn=true)
self.dwconv2 = convbn(dim, dim, 7, 1, (7 - 1) // 2, groups=dim, with_bn=false)
self.act = nn.relu6()
self.drop_path = droppath(drop_path) if drop_path > 0. else nn.identity()
def forward(self, x):
input = x
x = self.dwconv(x)
x1, x2 = self.f1(x), self.f2(x)
x = self.act(x1) * x2
x = self.dwconv2(self.g(x))
x = input + self.drop_path(x)
return x
@models.register_module()
class starnet(nn.module):
def __init__(self, base_dim=32, out_indices: sequence[int] = (0, 1, 2), depths=[3, 3, 12, 5], mlp_ratio=4,
drop_path_rate=0.0, num_classes=1000, **kwargs):
super().__init__()
self.num_classes = num_classes
self.in_channel = 32
self.out_indices = out_indices
self.depths = depths
# stem layer
self.stem = nn.sequential(convbn(3, self.in_channel, kernel_size=3, stride=2, padding=1), nn.relu6())
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth
# build stages
self.stages = nn.modulelist()
cur = 0
for i_layer in range(len(depths)):
embed_dim = base_dim * 2 ** i_layer
down_sampler = convbn(self.in_channel, embed_dim, 3, 2, 1)
self.in_channel = embed_dim
blocks = [block(self.in_channel, mlp_ratio, dpr[cur + i]) for i in range(depths[i_layer])]
cur += depths[i_layer]
self.stages.append(nn.sequential(down_sampler, *blocks))
# head
# self.norm = nn.batchnorm2d(self.in_channel)
# self.avgpool = nn.adaptiveavgpool2d(1)
# self.head = nn.linear(self.in_channel, num_classes)
# self.apply(self._init_weights)
def _init_weights(self, m):
if isinstance(m, nn.linear or nn.conv2d):
trunc_normal_(m.weight, std=.02)
if isinstance(m, nn.linear) and m.bias is not none:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.layernorm or nn.batchnorm2d):
nn.init.constant_(m.bias, 0)
nn.init.constant_(m.weight, 1.0)
def forward(self, x):
x = self.stem(x)
##记录stage的输出
outs = []
for i in range(len(self.depths)):
x = self.stages[i](x)
if i in self.out_indices:
outs.append(x)
return tuple(outs)
@models.register_module()
def starnet_s1(pretrained=false, **kwargs):
model = starnet(24, (0, 1, 2), [2, 2, 8, 3], **kwargs)
if pretrained:
url = model_urls['starnet_s1']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
model.load_state_dict(checkpoint["state_dict"])
return model
@models.register_module()
def starnet_s2(pretrained=false, **kwargs):
model = starnet(32, (0, 1, 2), [1, 2, 6, 2], **kwargs)
if pretrained:
url = model_urls['starnet_s2']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
model.load_state_dict(checkpoint["state_dict"])
return model
@models.register_module()
def starnet_s3(pretrained=false, **kwargs):
model = starnet(32, (0, 1, 2), [2, 2, 8, 4], **kwargs)
if pretrained:
url = model_urls['starnet_s3']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
model.load_state_dict(checkpoint["state_dict"])
return model
@models.register_module()
def starnet_s4(pretrained=false, **kwargs):
model = starnet(32, (0, 1, 2), [3, 3, 12, 5], **kwargs)
if pretrained:
url = model_urls['starnet_s4']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
model.load_state_dict(checkpoint["state_dict"])
return model
# very small networks #
@models.register_module()
def starnet_s050(pretrained=false, **kwargs):
return starnet(16, (0, 1, 2), [1, 1, 3, 1], 3, **kwargs)
@models.register_module()
def starnet_s100(pretrained=false, **kwargs):
return starnet(20, (0, 1, 2), [1, 2, 4, 1], 4, **kwargs)
@models.register_module()
def starnet_s150(pretrained=false, **kwargs):
return starnet(24, (0, 1, 2), [1, 2, 4, 2], 3, **kwargs)
if __name__ == '__main__':
model = starnet()
input_tensor = torch.randn(1, 3, 224, 224)
outputs = model(input_tensor)
# copyright (c) openmmlab. all rights reserved.
from .base_backbone import basebackbone
from .csp_darknet import yolov5cspdarknet, yolov8cspdarknet, yoloxcspdarknet
from .csp_resnet import ppyoloecspresnet
from .cspnext import cspnext
from .efficient_rep import yolov6cspbep, yolov6efficientrep
from .yolov7_backbone import yolov7backbone
from .starnet import starnet
__all__ = [
'yolov5cspdarknet', 'basebackbone', 'yolov6efficientrep', 'yolov6cspbep',
'yoloxcspdarknet', 'cspnext', 'yolov7backbone', 'ppyoloecspresnet',
'yolov8cspdarknet','starnet'
]
_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
# ========================frequently modified parameters======================
# -----data related-----
data_root = 'data/coco/' # root path of data
# path of train annotation file
train_ann_file = 'annotations/instances_train2017.json'
train_data_prefix = 'train2017/' # prefix of train image path
# path of val annotation file
val_ann_file = 'annotations/instances_val2017.json'
val_data_prefix = 'val2017/' # prefix of val image path
num_classes = 80 # number of classes for classification
# batch size of a single gpu during training
train_batch_size_per_gpu = 16
# worker to pre-fetch data for each single gpu during training
train_num_workers = 8
# persistent_workers must be false if num_workers is 0
persistent_workers = true
# -----model related-----
# basic size of multi-scale prior box
anchors = [
[(10, 13), (16, 30), (33, 23)], # p3/8
[(30, 61), (62, 45), (59, 119)], # p4/16
[(116, 90), (156, 198), (373, 326)] # p5/32
]
# -----train val related-----
# base learning rate for optim_wrapper. corresponding to 8xb16=128 bs
base_lr = 0.01
max_epochs = 300 # maximum training epochs
model_test_cfg = dict(
# the config of multi-label for multi-class prediction.
multi_label=true,
# the number of boxes before nms
nms_pre=30000,
score_thr=0.001, # threshold to filter out boxes.
nms=dict(type='nms', iou_threshold=0.65), # nms type and threshold
max_per_img=300) # max number of detections of each image
# ========================possible modified parameters========================
# -----data related-----
img_scale = (640, 640) # width, height
# dataset type, this will be used to define the dataset
dataset_type = 'yolov5cocodataset'
# batch size of a single gpu during validation
val_batch_size_per_gpu = 1
# worker to pre-fetch data for each single gpu during validation
val_num_workers = 2
# config of batch shapes. only on val.
# it means not used if batch_shapes_cfg is none.
batch_shapes_cfg = dict(
type='batchshapepolicy',
batch_size=val_batch_size_per_gpu,
img_size=img_scale[0],
# the image scale of padding should be divided by pad_size_divisor
size_divisor=32,
# additional paddings for pixel scale
extra_pad_ratio=0.5)
# -----model related-----
# the scaling factor that controls the depth of the network structure
deepen_factor = 0.33
# the scaling factor that controls the width of the network structure
widen_factor = 0.5
# strides of multi-scale prior box
strides = [8, 16, 32]
num_det_layers = 3 # the number of model output scales
norm_cfg = dict(type='bn', momentum=0.03, eps=0.001) # normalization config
# -----train val related-----
affine_scale = 0.5 # yolov5randomaffine scaling ratio
loss_cls_weight = 0.5
loss_bbox_weight = 0.05
loss_obj_weight = 1.0
prior_match_thr = 4. # priori box matching threshold
# the obj loss weights of the three output layers
obj_level_weights = [4., 1., 0.4]
lr_factor = 0.01 # learning rate scaling factor
weight_decay = 0.0005
# save model checkpoint and validation intervals
save_checkpoint_intervals = 10
# the maximum checkpoints to keep.
max_keep_ckpts = 3
# single-scale training is recommended to
# be turned on, which can speed up training.
env_cfg = dict(cudnn_benchmark=true)
'''
starnet_channel,base_dim,depths,mlp_ratio
s1:24,[48, 96, 192],[2, 2, 8, 3],4
s2:32,[64, 128, 256],[1, 2, 6, 2],4
s3:32,[64, 128, 256],[2, 2, 8, 4],4
s4:32,[64, 128, 256],[3, 3, 12, 5],4
starnet_s050:16,[32,64,128],[1, 1, 3, 1],3
starnet_s0100:20,[40, 80, 120],[1, 2, 4, 1],4
starnet_s150:24,[48, 96, 192],[1, 2, 4, 2],3
'''
starnet_channel=[48, 96, 192]
depths=[1, 2, 6, 2]
# ===============================unmodified in most cases====================
model = dict(
type='yolodetector',
data_preprocessor=dict(
type='mmdet.detdatapreprocessor',
mean=[0., 0., 0.],
std=[255., 255., 255.],
bgr_to_rgb=true),
backbone=dict(
##s1
type='starnet',
base_dim=24,
out_indices=(0,1,2),
depths=depths,
mlp_ratio=4,
num_classes=num_classes,
# deepen_factor=deepen_factor,
# widen_factor=widen_factor,
# norm_cfg=norm_cfg,
# act_cfg=dict(type='silu', inplace=true)
),
neck=dict(
type='yolov5pafpn',
deepen_factor=deepen_factor,
widen_factor=widen_factor,
in_channels=starnet_channel,
out_channels=starnet_channel,
num_csp_blocks=3,
norm_cfg=norm_cfg,
act_cfg=dict(type='silu', inplace=true)),
bbox_head=dict(
type='yolov5head',
head_module=dict(
type='yolov5headmodule',
num_classes=num_classes,
in_channels=starnet_channel,
widen_factor=widen_factor,
featmap_strides=strides,
num_base_priors=3),
prior_generator=dict(
type='mmdet.yoloanchorgenerator',
base_sizes=anchors,
strides=strides),
# scaled based on number of detection layers
loss_cls=dict(
type='mmdet.crossentropyloss',
use_sigmoid=true,
reduction='mean',
loss_weight=loss_cls_weight *
(num_classes / 80 * 3 / num_det_layers)),
# 修改此处实现iou损失函数的替换
loss_bbox=dict(
type='iouloss',
focal=true,
iou_mode='ciou',
bbox_format='xywh',
eps=1e-7,
reduction='mean',
loss_weight=loss_bbox_weight * (3 / num_det_layers),
return_iou=true),
loss_obj=dict(
type='mmdet.crossentropyloss',
use_sigmoid=true,
reduction='mean',
loss_weight=loss_obj_weight *
((img_scale[0] / 640) ** 2 * 3 / num_det_layers)),
prior_match_thr=prior_match_thr,
obj_level_weights=obj_level_weights),
test_cfg=model_test_cfg)
albu_train_transforms = [
dict(type='blur', p=0.01),
dict(type='medianblur', p=0.01),
dict(type='togray', p=0.01),
dict(type='clahe', p=0.01)
]
pre_transform = [
dict(type='loadimagefromfile', file_client_args=_base_.file_client_args),
dict(type='loadannotations', with_bbox=true)
]
train_pipeline = [
*pre_transform,
dict(
type='mosaic',
img_scale=img_scale,
pad_val=114.0,
pre_transform=pre_transform),
dict(
type='yolov5randomaffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114)),
dict(
type='mmdet.albu',
transforms=albu_train_transforms,
bbox_params=dict(
type='bboxparams',
format='pascal_voc',
label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
keymap={
'img': 'image',
'gt_bboxes': 'bboxes'
}),
dict(type='yolov5hsvrandomaug'),
dict(type='mmdet.randomflip', prob=0.5),
dict(
type='mmdet.packdetinputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
'flip_direction'))
]
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
persistent_workers=persistent_workers,
pin_memory=true,
sampler=dict(type='defaultsampler', shuffle=true),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=train_ann_file,
data_prefix=dict(img=train_data_prefix),
filter_cfg=dict(filter_empty_gt=false, min_size=32),
pipeline=train_pipeline))
test_pipeline = [
dict(type='loadimagefromfile', file_client_args=_base_.file_client_args),
dict(type='yolov5keepratioresize', scale=img_scale),
dict(
type='letterresize',
scale=img_scale,
allow_scale_up=false,
pad_val=dict(img=114)),
dict(type='loadannotations', with_bbox=true, _scope_='mmdet'),
dict(
type='mmdet.packdetinputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param'))
]
val_dataloader = dict(
batch_size=val_batch_size_per_gpu,
num_workers=val_num_workers,
persistent_workers=persistent_workers,
pin_memory=true,
drop_last=false,
sampler=dict(type='defaultsampler', shuffle=false),
dataset=dict(
type=dataset_type,
data_root=data_root,
test_mode=true,
data_prefix=dict(img=val_data_prefix),
ann_file=val_ann_file,
pipeline=test_pipeline,
batch_shapes_cfg=batch_shapes_cfg))
test_dataloader = val_dataloader
param_scheduler = none
optim_wrapper = dict(
type='optimwrapper',
optimizer=dict(
type='sgd',
lr=base_lr,
momentum=0.937,
weight_decay=weight_decay,
nesterov=true,
batch_size_per_gpu=train_batch_size_per_gpu),
constructor='yolov5optimizerconstructor')
default_hooks = dict(
param_scheduler=dict(
type='yolov5paramschedulerhook',
scheduler_type='linear',
lr_factor=lr_factor,
max_epochs=max_epochs),
checkpoint=dict(
type='checkpointhook',
interval=save_checkpoint_intervals,
save_best='auto',
max_keep_ckpts=max_keep_ckpts))
custom_hooks = [
dict(
type='emahook',
ema_type='expmomentumema',
momentum=0.0001,
update_buffers=true,
strict_load=false,
priority=49)
]
val_evaluator = dict(
type='mmdet.cocometric',
proposal_nums=(100, 1, 10),
ann_file=data_root + val_ann_file,
metric='bbox')
test_evaluator = val_evaluator
train_cfg = dict(
type='epochbasedtrainloop',
max_epochs=max_epochs,
val_interval=save_checkpoint_intervals)
val_cfg = dict(type='valloop')
test_cfg = dict(type='testloop')
您想发表意见!!点此发布评论
版权声明:本文内容由互联网用户贡献,该文观点仅代表作者本人。本站仅提供信息存储服务,不拥有所有权,不承担相关法律责任。 如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件至 2386932994@qq.com 举报,一经查实将立刻删除。
发表评论