本文介绍SCNet模型,其核心是自校正卷积(SCConv),通过异构卷积和卷积核间通信扩大感受野,建立远程空间和通道依存关系,且不增参数。以10分类动物数据集测试,经数据增强后,对比SCNet101与ResNet101,前者分类效果更优,为卷积网络设计提供新思路。
☞☞☞AI 智能聊天, 问答助手, AI 智能搜索, 免费无限量使用 DeepSeek R1 模型☜☜☜
自校正卷积 SCConv(Self-Calibrated Convolutions)
!unzip -oq data/data110994/work.zip -d work/In [1]
import paddle
paddle.seed(8888)import numpy as npfrom typing import Callable#参数配置config_parameters = { "class_dim": 10, #分类数
"target_path":"/home/aistudio/work/",
'train_image_dir': '/home/aistudio/work/trainImages', 'eval_image_dir': '/home/aistudio/work/evalImages', 'epochs':20, 'batch_size': 64, 'lr': 0.01}#数据集的定义class TowerDataset(paddle.io.Dataset):
"""
步骤一:继承paddle.io.Dataset类
"""
def __init__(self, transforms: Callable, mode: str ='train'):
"""
步骤二:实现构造函数,定义数据读取方式
"""
super(TowerDataset, self).__init__()
self.mode = mode
self.transforms = transforms
train_image_dir = config_parameters['train_image_dir']
eval_image_dir = config_parameters['eval_image_dir']
train_data_folder = paddle.vision.DatasetFolder(train_image_dir)
eval_data_folder = paddle.vision.DatasetFolder(eval_image_dir)
if self.mode == 'train':
self.data = train_data_folder elif self.mode == 'eval':
self.data = eval_data_folder def __getitem__(self, index):
"""
步骤三:实现__getitem__方法,定义指定index时如何获取数据,并返回单条数据(训练数据,对应的标签)
"""
data = np.array(self.data[index][0]).astype('float32')
data = self.transforms(data)
label = np.array([self.data[index][1]]).astype('int64')
return data, label
def __len__(self):
"""
步骤四:实现__len__方法,返回数据集总数目
"""
return len(self.data)from paddle.vision import transforms as T#数据增强transform_train =T.Compose([T.Resize((256,256)), #T.RandomVerticalFlip(10),
#T.RandomHorizontalFlip(10),
T.RandomRotation(10),
T.Transpose(),
T.Normalize(mean=[0, 0, 0], # 像素值归一化
std =[255, 255, 255]), # transforms.ToTensor(), # transpose操作 + (img / 255),并且数据结构变为PaddleTensor
T.Normalize(mean=[0.50950350, 0.54632660, 0.57409690],# 减均值 除标准差
std= [0.26059777, 0.26041326, 0.29220656])# 计算过程:output[channel] = (input[channel] - mean[channel]) / std[channel]
])
transform_eval =T.Compose([ T.Resize((256,256)),
T.Transpose(),
T.Normalize(mean=[0, 0, 0], # 像素值归一化
std =[255, 255, 255]), # transforms.ToTensor(), # transpose操作 + (img / 255),并且数据结构变为PaddleTensor
T.Normalize(mean=[0.50950350, 0.54632660, 0.57409690],# 减均值 除标准差
std= [0.26059777, 0.26041326, 0.29220656])# 计算过程:output[channel] = (input[channel] - mean[channel]) / std[channel]
])
train_dataset = TowerDataset(mode='train',transforms=transform_train)
eval_dataset = TowerDataset(mode='eval', transforms=transform_eval )#数据异步加载train_loader = paddle.io.DataLoader(train_dataset,
places=paddle.CUDAPlace(0),
batch_size=16,
shuffle=True, #num_workers=2,
#use_shared_memory=True
)
eval_loader = paddle.io.DataLoader (eval_dataset,
places=paddle.CUDAPlace(0),
batch_size=16, #num_workers=2,
#use_shared_memory=True
)print('训练集样本量: {},验证集样本量: {}'.format(len(train_loader), len(eval_loader)))训练集样本量: 1309,验证集样本量: 328
代码中的另一种常规一些的实现方式,结构比较清晰,并且手动添加了一些注释,相对比较好理解import paddleimport paddle.nn as nnimport paddle.nn.functional as Fclass SCConv(nn.Layer):
def __init__(self, inplanes, planes, stride, padding, dilation, groups, pooling_r, norm_layer):
super(SCConv, self).__init__()
self.k2 = nn.Sequential(
nn.AvgPool2D(kernel_size=pooling_r, stride=pooling_r),
nn.Conv2D(inplanes, planes, kernel_size=3, stride=1,
padding=padding, dilation=dilation,
groups=groups, bias_attr=False),
norm_layer(planes),
)
self.k3 = nn.Sequential(
nn.Conv2D(inplanes, planes, kernel_size=3, stride=1,
padding=padding, dilation=dilation,
groups=groups, bias_attr=False),
norm_layer(planes),
)
self.k4 = nn.Sequential(
nn.Conv2D(inplanes, planes, kernel_size=3, stride=stride,
padding=padding, dilation=dilation,
groups=groups, bias_attr=False),
norm_layer(planes),
) def forward(self, x):
identity = x
out = F.sigmoid(paddle.add(identity, F.interpolate(self.k2(x), identity.shape[2:]))) # sigmoid(identity + k2)
out = paddle.multiply(self.k3(x), out) # k3 * sigmoid(identity + k2)
out = self.k4(out) # k4
return outclass SCBottleneck(nn.Layer):
"""SCNet SCBottleneck
"""
expansion = 4
pooling_r = 4 # down-sampling rate of the avg pooling layer in the K3 path of SC-Conv.
def __init__(self, inplanes, planes, stride=1, downsample=None,
cardinality=1, bottleneck_width=32,
avd=False, dilation=1, is_first=False,
norm_layer=None):
super(SCBottleneck, self).__init__()
group_width = int(planes * (bottleneck_width / 64.)) * cardinality
self.conv1_a = nn.Conv2D(inplanes, group_width, kernel_size=1, bias_attr=False)
self.bn1_a = norm_layer(group_width)
self.conv1_b = nn.Conv2D(inplanes, group_width, kernel_size=1, bias_attr=False)
self.bn1_b = norm_layer(group_width)
self.avd = avd and (stride > 1 or is_first) if self.avd:
self.avd_layer = nn.AvgPool2D(3, stride, padding=1)
stride = 1
self.k1 = nn.Sequential(
nn.Conv2D(
group_width, group_width, kernel_size=3, stride=stride,
padding=dilation, dilation=dilation,
groups=cardinality, bias_attr=False),
norm_layer(group_width),
)
self.scconv = SCConv(
group_width, group_width, stride=stride,
padding=dilation, dilation=dilation,
groups=cardinality, pooling_r=self.pooling_r, norm_layer=norm_layer)
self.conv3 = nn.Conv2D(
group_width * 2, planes * 4, kernel_size=1, bias_attr=False)
self.bn3 = norm_layer(planes*4)
self.relu = nn.ReLU()
self.downsample = downsample
self.dilation = dilation
self.stride = stride def forward(self, x):
residual = x
out_a= self.conv1_a(x)
out_a = self.bn1_a(out_a)
out_b = self.conv1_b(x)
out_b = self.bn1_b(out_b)
out_a = self.relu(out_a)
out_b = self.relu(out_b)
out_a = self.k1(out_a)
out_b = self.scconv(out_b)
out_a = self.relu(out_a)
out_b = self.relu(out_b) if self.avd:
out_a = self.avd_layer(out_a)
out_b = self.avd_layer(out_b)
out = self.conv3(paddle.concat([out_a, out_b], axis=1))
out = self.bn3(out) if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out) return outclass SCNet(nn.Layer):
""" SCNet Variants Definations
Parameters
----------
block : Block
Class for the residual block.
layers : list of int
Numbers of layers in each block.
classes : int, default 1000
Number of classificoncation classes.
dilated : bool, default False
Applying dilation strategy to pretrained SCNet yielding a stride-8 model.
deep_stem : bool, default False
Replace 7x7 conv in input stem with 3 3x3 conv.
avg_down : bool, default False
Use AvgPool instead of stride conv when
downsampling in the bottleneck.
norm_layer : object
Normalization layer used (default: :class:`paddle.nn.BatchNorm2D`).
Reference:
- He, Kaiming, et al. "Deep residual learning for image recognition."
Proceedings of the IEEE conference on computer vision and pattern recognition. 2016.
- Yu, Fisher, and Vladlen Koltun. "Multi-scale context aggregation by dilated convolutions."
"""
def __init__(self, block, layers, groups=1, bottleneck_width=32,
num_classes=1000, dilated=False, dilation=1,
deep_stem=False, stem_width=64, avg_down=False,
avd=False, norm_layer=nn.BatchNorm2D):
self.cardinality = groups
self.bottleneck_width = bottleneck_width # ResNet-D params
self.inplanes = stem_width*2 if deep_stem else 64
self.avg_down = avg_down
self.avd = avd super(SCNet, self).__init__()
conv_layer = nn.Conv2D if deep_stem:
self.conv1 = nn.Sequential(
conv_layer(3, stem_width, kernel_size=3, stride=2, padding=1, bias_attr=False),
norm_layer(stem_width),
nn.ReLU(),
conv_layer(stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias_attr=False),
norm_layer(stem_width),
nn.ReLU(),
conv_layer(stem_width, stem_width*2, kernel_size=3, stride=1, padding=1, bias_attr=False),
) else:
self.conv1 = conv_layer(3, 64, kernel_size=7, stride=2, padding=3,
bias_attr=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU()
self.maxpool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer, is_first=False)
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer) if dilated or dilation == 4:
self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
dilation=2, norm_layer=norm_layer)
self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
dilation=4, norm_layer=norm_layer) elif dilation==2:
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
dilation=1, norm_layer=norm_layer)
self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
dilation=2, norm_layer=norm_layer) else:
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
norm_layer=norm_layer)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
norm_layer=norm_layer)
self.avgpool = nn.AdaptiveAvgPool2D((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes) # for m in self.modules():
# if isinstance(m, nn.Conv2D):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
# elif isinstance(m, norm_layer):
# nn.init.constant_(m.weight, 1)
# nn.init.constant_(m.bias_attr, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=None,
is_first=True):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
down_layers = [] if self.avg_down: if dilation == 1:
down_layers.append(nn.AvgPool2D(kernel_size=stride, stride=stride,
ceil_mode=True, count_include_pad=False)) else:
down_layers.append(nn.AvgPool2D(kernel_size=1, stride=1,
ceil_mode=True, count_include_pad=False))
down_layers.append(nn.Conv2D(self.inplanes, planes * block.expansion,
kernel_size=1, stride=1, bias_attr=False)) else:
down_layers.append(nn.Conv2D(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias_attr=False))
down_layers.append(norm_layer(planes * block.expansion))
downsample = nn.Sequential(*down_layers)
layers = [] if dilation == 1 or dilation == 2:
layers.append(block(self.inplanes, planes, stride, downsample=downsample,
cardinality=self.cardinality,
bottleneck_width=self.bottleneck_width,
avd=self.avd, dilation=1, is_first=is_first,
norm_layer=norm_layer)) elif dilation == 4:
layers.append(block(self.inplanes, planes, stride, downsample=downsample,
cardinality=self.cardinality,
bottleneck_width=self.bottleneck_width,
avd=self.avd, dilation=2, is_first=is_first,
norm_layer=norm_layer)) else: raise RuntimeError("=> unknown dilation size: {}".format(dilation))
self.inplanes = planes * block.expansion for i in range(1, blocks):
layers.append(block(self.inplanes, planes,
cardinality=self.cardinality,
bottleneck_width=self.bottleneck_width,
avd=self.avd, dilation=dilation,
norm_layer=norm_layer)) return nn.Sequential(*layers) def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.reshape([x.shape[0], -1])
x = self.fc(x) return xdef scnet50(pretrained=False, **kwargs):
"""Constructs a SCNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = SCNet(SCBottleneck, [3, 4, 6, 3],
deep_stem=False, stem_width=32, avg_down=False,
avd=False, **kwargs) if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['scnet50'])) return modeldef scnet50_v1d(pretrained=False, **kwargs):
"""Constructs a SCNet-50_v1d model described in
`Bag of Tricks `_.
`ResNeSt: Split-Attention Networks `_.
Compared with default SCNet(SCNetv1b), SCNetv1d replaces the 7x7 conv
in the input stem with three 3x3 convs. And in the downsampling block,
a 3x3 avg_pool with stride 2 is added before conv, whose stride is
changed to 1.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = SCNet(SCBottleneck, [3, 4, 6, 3],
deep_stem=True, stem_width=32, avg_down=True,
avd=True, **kwargs) if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['scnet50_v1d'])) return modeldef scnet101(pretrained=False, **kwargs):
"""Constructs a SCNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = SCNet(SCBottleneck, [3, 4, 23, 3],
deep_stem=False, stem_width=64, avg_down=False,
avd=False, **kwargs) if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['scnet101'])) return modeldef scnet101_v1d(pretrained=False, **kwargs):
"""Constructs a SCNet-101_v1d model described in
`Bag of Tricks `_.
`ResNeSt: Split-Attention Networks `_.
Compared with default SCNet(SCNetv1b), SCNetv1d replaces the 7x7 conv
in the input stem with three 3x3 convs. And in the downsampling block,
a 3x3 avg_pool with stride 2 is added before conv, whose stride is
changed to 1.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = SCNet(SCBottleneck, [3, 4, 23, 3],
deep_stem=True, stem_width=64, avg_down=True,
avd=True, **kwargs) if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['scnet101_v1d'])) return model
if __name__ == '__main__':
images = paddle.rand([1, 3, 224, 224])
model = scnet101(pretrained=False)
a = model(images) print(a.shape)
model = scnet101(num_classes=10,pretrained=False) model = paddle.Model(model)In [ ]
#优化器选择class SaveBestModel(paddle.callbacks.Callback):
def __init__(self, target=0.5, path='work/best_model2', verbose=0):
self.target = target
self.epoch = None
self.path = path def on_epoch_end(self, epoch, logs=None):
self.epoch = epoch def on_eval_end(self, logs=None):
if logs.get('acc') > self.target:
self.target = logs.get('acc')
self.model.save(self.path) print('best acc is {} at epoch {}'.format(self.target, self.epoch))
callback_visualdl = paddle.callbacks.VisualDL(log_dir='work/no_SA')
callback_savebestmodel = SaveBestModel(target=0.5, path='work/best_model1')
callbacks = [callback_visualdl, callback_savebestmodel]
base_lr = config_parameters['lr']
epochs = config_parameters['epochs']def make_optimizer(parameters=None):
momentum = 0.9
learning_rate= paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=base_lr, T_max=epochs, verbose=False)
weight_decay=paddle.regularizer.L2Decay(0.0001)
optimizer = paddle.optimizer.Momentum(
learning_rate=learning_rate,
momentum=momentum,
weight_decay=weight_decay,
parameters=parameters) return optimizer
optimizer = make_optimizer(model.parameters())
model.prepare(optimizer,
paddle.nn.CrossEntropyLoss(),
paddle.metric.Accuracy())
model.fit(train_loader,
eval_loader,
epochs=10,
batch_size=1, # 是否打乱样本集
callbacks=callbacks,
verbose=1) # 日志展示格式
model_2 = paddle.vision.models.resnet101(num_classes=10,pretrained=False) model_2 = paddle.Model(model_2) model_2.summary((1,3,256,256))In [3]
#优化器选择class SaveBestModel(paddle.callbacks.Callback):
def __init__(self, target=0.5, path='work/best_model2', verbose=0):
self.target = target
self.epoch = None
self.path = path def on_epoch_end(self, epoch, logs=None):
self.epoch = epoch def on_eval_end(self, logs=None):
if logs.get('acc') > self.target:
self.target = logs.get('acc')
self.model.save(self.path) print('best acc is {} at epoch {}'.format(self.target, self.epoch))
callback_visualdl = paddle.callbacks.VisualDL(log_dir='work/no_SA')
callback_savebestmodel = SaveBestModel(target=0.5, path='work/best_model2')
callbacks = [callback_visualdl, callback_savebestmodel]
base_lr = 0.01epochs = config_parameters['epochs']def make_optimizer(parameters=None):
momentum = 0.9
learning_rate= paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=base_lr, T_max=epochs, verbose=False)
weight_decay=paddle.regularizer.L2Decay(0.0001)
optimizer = paddle.optimizer.Momentum(
learning_rate=learning_rate,
momentum=momentum,
weight_decay=weight_decay,
parameters=parameters) return optimizer
optimizer = make_optimizer(model_2.parameters())
model_2.prepare(optimizer,
paddle.nn.CrossEntropyLoss(),
paddle.metric.Accuracy())
In [4]
model_2.fit(train_loader,
eval_loader,
epochs=10,
batch_size=1, # 是否打乱样本集
callbacks=callbacks,
verbose=1) # 日志展示格式
# 可以通过
# 多个
# 都有
# 更好地
# 标准差
# 均值
# 异构
# 有效地
# 实现了
# ai
# 数据结构
# http
# cnn
# 架构
# red
# cos
# 异步加载
相关栏目:
【
Google疑问12 】
【
Facebook疑问10 】
【
网络优化91478 】
【
技术知识72672 】
【
云计算0 】
【
GEO优化84317 】
【
优选文章0 】
【
营销推广36048 】
【
网络运营41350 】
【
案例网站102563 】
【
AI智能45237 】
相关推荐:
AGI未来展望:DeepMind CEO的深度解读与行业洞察
Google NotebookLM:AI赋能的智能笔记与思维导图工具
GitHub Copilot终极指南:提升代码效率与质量
通义千问怎么设置常用功能快捷键_通义千问快捷键设置【步骤】
如何通过 DeepSeek 优化 Kubernetes 配置文件
如何用AI帮你创作节日贺卡文案?让祝福与众不同
使用 DeepSeek 进行网络协议栈分析与优化建议
Excel Copilot:AI驱动的数据分析革命,提升效率秘籍
教你用AI一键生成Excel VBA脚本,彻底告别重复操作
OpenAI Sora 2:AI视频生成新纪元
Feelin聊天网页版地址 Feelin AI官方网站首页
百度AI搜索如何开启无痕搜索_百度AI搜索无痕模式设置与隐私保护【攻略】
提升房地产业务:AI语音助手赋能房地产经纪公司
AI一键生成儿童绘本故事
AI任务管理器终极评测:找到最适合你的效率神器
Napkin AI:AI驱动的文本可视化工具,轻松创建思维导图
如何用AI一键生成名片设计 AI个人电子名片制作指南【教程】
怎么用ai生成配色方案 AI设计色彩搭配与灵感获取【技巧】
2025数据科学学习指南:技能、工具和学习路线图
ChatGPT怎么用一键生成读书笔记_ChatGPT笔记生成教程【攻略】
LALAL.AI教程:音视频人声分离、降噪终极指南
百度ai助手怎么设置不显示 百度ai助手界面净化设置
AI数据分析报告生成工具有哪些_一键生成可视化报告的AI工具推荐
AI电商网站搭建:CSV到WooCommerce全流程指南
Gemini怎样写描述型提示词_Gemini描述提示词编写【攻略】
去哪旅行ai抢票助手怎样添加备选车次_去哪旅行ai抢票助手备选车次设置与切换【攻略】
PixianAI抠图怎么修复瑕疵_PixianAI瑕疵修复与手动涂抹工具【步骤】
WorkPPT:AI驱动的PPT制作神器,效率提升不止10倍!
GravityWrite:AI驱动的内容创作,提升排名和效率
探索贝奥武夫:英雄史诗的起源、故事与文化意义
通义千问怎么用_通义千问使用方法详细指南【教程】
Gemini怎样连接Google账号_Gemini账号连接方法【方法】
ChatGPT怎样用提示词设上下文_ChatGPT上下文设置技巧【方法】
解读诗歌中的女性视角:Shelley Puhak 的作品解析
LogMeIn Resolve:IT 运维知识库的 AI 赋能实践
TechInternPath.ai:AI驱动的实习之路,助你梦想成真
轻松生成二维码:免费AI工具终极指南
利用AI赋能教育:学习方式的未来之路
都灵裹尸布之谜:AI揭示耶稣基督的真实面貌?
快速生成PPT工具怎么用_快速生成PPT工具使用方法详细指南【教程】
实测效率提升超35%!科大讯飞星火AIPC开启AI办公新纪元
探索古希腊之美:AI打造的绝|美女|神形象赏析
Elon Musk会解决X平台上的机器人问题吗?塔罗牌预测
Excel Copilot:AI驱动的强大新功能与实用案例解析
Google Gemini 对复杂物理解题过程的逐步解析
生成式AI革新客户服务:提升效率与个性化体验
如何通过 DeepSeek 进行深度神经网络超参数搜索
Miaoaotalk 猫语翻译器测评:宠物沟通新体验?
3步教你用AI将你的照片变成乐高积木风格
如何使用 Gemini 进行 Google Cloud 架构成本预估
2025-07-17
南京市珐之弘网络技术有限公司专注海外推广十年,是谷歌推广.Facebook广告全球合作伙伴,我们精英化的技术团队为企业提供谷歌海外推广+外贸网站建设+网站维护运营+Google SEO优化+社交营销为您提供一站式海外营销服务。