-
Notifications
You must be signed in to change notification settings - Fork 58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
技巧-Pytorch #39
Comments
保存/载入模型载入 model_path = os.path.join(
shufflenetv2_path, 'shufflenetv2_x1_69.402_88.374.pth.tar')
model = ShuffleNetV2()
if pretrained:
print(f"=> loading model '{model_path}'")
pretrained_dict = torch.load(model_path)
model_dict = model.state_dict()
# 1. filter out unnecessary keys
pretrained_dict = {k: v for k, v in pretrained_dict.items()
if k in model_dict}
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict)
# 3. load the new state dict
model.load_state_dict(model_dict) 保存后读入# 存储一个epoch后的模型(权重和偏置项), 以便后期使用
filename = ('%s/feature-current.pth' % check_root_feature)
torch.save(model.state_dict(), filename)
# 存储优化器状态
filename_opti = ('%s/opti-current.pth' % check_root_opti)
torch.save(optimizer_feature.state_dict(), filename_opti)
# 载入上一次的训练结果(权重和偏置项), 进一步的训练
model.load_state_dict(
torch.load(check_root_feature + '/feature-current.pth')
)
# 载入优化器状态
optimizer_feature.load_state_dict(
torch.load(check_root_opti + '/opti-current.pth')
) fileroot = ('%s/feature-current.pth' % check_root_feature)
# 基于torch.save(model.state_dict(), filename)存储方法的对应的恢复方法
model.load_state_dict(torch.load(fileroot)) |
展示数据# Have a look at data
inputs, classes = next(iter(dataloaders['train']))
out = torchvision.utils.make_grid(inputs)
imshow(out, title=[class_names[x] for x in classes]) |
显存优化
|
TensorBoardX# demo.py
import torch
import torchvision.utils as vutils
import numpy as np
import torchvision.models as models
from torchvision import datasets
from tensorboardX import SummaryWriter
resnet18 = models.resnet18(False)
writer = SummaryWriter()
sample_rate = 44100
freqs = [262, 294, 330, 349, 392, 440, 440, 440, 440, 440, 440]
for n_iter in range(100):
dummy_s1 = torch.rand(1)
dummy_s2 = torch.rand(1)
# data grouping by `slash`
writer.add_scalar('data/scalar1', dummy_s1[0], n_iter)
writer.add_scalar('data/scalar2', dummy_s2[0], n_iter)
writer.add_scalars('data/scalar_group', {'xsinx': n_iter * np.sin(n_iter),
'xcosx': n_iter * np.cos(n_iter),
'arctanx': np.arctan(n_iter)}, n_iter)
dummy_img = torch.rand(32, 3, 64, 64) # output from network
if n_iter % 10 == 0:
x = vutils.make_grid(dummy_img, normalize=True, scale_each=True)
writer.add_image('Image', x, n_iter)
dummy_audio = torch.zeros(sample_rate * 2)
for i in range(x.size(0)):
# amplitude of sound should in [-1, 1]
dummy_audio[i] = np.cos(freqs[n_iter // 10] * np.pi * float(i) / float(sample_rate))
writer.add_audio('myAudio', dummy_audio, n_iter, sample_rate=sample_rate)
writer.add_text('Text', 'text logged at step:' + str(n_iter), n_iter)
for name, param in resnet18.named_parameters():
writer.add_histogram(name, param.clone().cpu().data.numpy(), n_iter)
# needs tensorboard 0.4RC or later
writer.add_pr_curve('xoxo', np.random.randint(2, size=100), np.random.rand(100), n_iter)
dataset = datasets.MNIST('mnist', train=False, download=True)
images = dataset.test_data[:100].float()
label = dataset.test_labels[:100]
features = images.view(100, 784)
writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1))
# export scalar data to JSON for external processing
writer.export_scalars_to_json("./all_scalars.json")
writer.close() |
梯度剪裁optimizer.zero_grad()
loss, hidden = model(data, hidden, targets)
loss.backward()
# https://pytorch.org/docs/master/nn.html#clip-grad-norm
# torch.nn.utils.clip_grad_norm_(parameters, max_norm, norm_type=2)
# torch.nn.utils.clip_grad_value_(parameters, clip_value)
torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
optimizer.step() 既然在BP过程中会产生梯度消失/爆炸(就是偏导无限接近0,导致长时记忆无法更新),那么最简单粗暴的方法,设定阈值,当梯度小于/大于阈值时,对梯度进行限制. |
参数初始化关于初始化函数的选择官方文档里还有其他的: https://blog.csdn.net/dss_dssssd/article/details/83959474
针对于Relu的激活函数,基本使用He initialization,pytorch也是使用kaiming 初始化卷积层参数的 单层网络在创建model后直接调用
|
Unfold & Folda = torch.randn([2, 3, 4, 4])
a
Out[6]:
tensor([[[[-0.5390, -0.4974, 0.4392, 0.0885],
[ 0.3316, 0.2863, 0.5387, 0.9645],
[-0.2879, -0.0852, 0.1790, 2.1958],
[ 0.2817, 0.4995, 0.6190, -0.5218]],
[[-0.1495, -0.9248, 1.9004, 0.2535],
[-0.9124, 0.7679, -0.2503, 0.0491],
[-1.0860, -0.0838, -0.8773, -1.4696],
[ 1.5713, -0.9741, -0.1584, 1.1361]],
[[-0.1027, 2.1711, 0.0953, -0.9208],
[-1.2121, -1.2770, 1.1427, -0.3149],
[-0.0458, -1.5204, -0.1037, 0.6764],
[ 0.3862, 0.6306, -1.0143, -0.1202]]],
[[[ 1.2074, -1.0920, 0.9833, 0.7729],
[-0.3728, -0.4250, -0.3600, -0.7940],
[ 0.6346, -0.4655, 0.9664, 0.4688],
[-1.0701, 0.0883, 0.2658, 0.0234]],
[[ 1.5879, 0.5017, 0.4271, 0.6949],
[ 0.4801, -0.0612, -1.4131, 0.0766],
[-1.0388, -0.7434, -0.1933, -0.7082],
[ 0.2480, 0.3196, -2.1165, -0.3998]],
[[-0.8971, -0.5938, -1.5611, 0.3487],
[ 1.0478, -0.3852, 0.1441, -2.2990],
[-0.7650, 0.4652, -1.0962, 1.5915],
[ 0.7840, 0.2409, 0.3218, 0.4186]]]])
import torch
unfold = nn.Unfold(2, 1, 0, 2)
b = unfold(a)
b
Out[19]:
tensor([[[-0.5390, 0.4392, -0.2879, 0.1790],
[-0.4974, 0.0885, -0.0852, 2.1958],
[ 0.3316, 0.5387, 0.2817, 0.6190],
[ 0.2863, 0.9645, 0.4995, -0.5218],
[-0.1495, 1.9004, -1.0860, -0.8773],
[-0.9248, 0.2535, -0.0838, -1.4696],
[-0.9124, -0.2503, 1.5713, -0.1584],
[ 0.7679, 0.0491, -0.9741, 1.1361],
[-0.1027, 0.0953, -0.0458, -0.1037],
[ 2.1711, -0.9208, -1.5204, 0.6764],
[-1.2121, 1.1427, 0.3862, -1.0143],
[-1.2770, -0.3149, 0.6306, -0.1202]],
[[ 1.2074, 0.9833, 0.6346, 0.9664],
[-1.0920, 0.7729, -0.4655, 0.4688],
[-0.3728, -0.3600, -1.0701, 0.2658],
[-0.4250, -0.7940, 0.0883, 0.0234],
[ 1.5879, 0.4271, -1.0388, -0.1933],
[ 0.5017, 0.6949, -0.7434, -0.7082],
[ 0.4801, -1.4131, 0.2480, -2.1165],
[-0.0612, 0.0766, 0.3196, -0.3998],
[-0.8971, -1.5611, -0.7650, -1.0962],
[-0.5938, 0.3487, 0.4652, 1.5915],
[ 1.0478, 0.1441, 0.7840, 0.3218],
[-0.3852, -2.2990, 0.2409, 0.4186]]])
b.view(2, 12, 2, 2)
Out[18]:
tensor([[[[-0.5390, 0.4392],
[-0.2879, 0.1790]],
[[-0.4974, 0.0885],
[-0.0852, 2.1958]],
[[ 0.3316, 0.5387],
[ 0.2817, 0.6190]],
[[ 0.2863, 0.9645],
[ 0.4995, -0.5218]],
[[-0.1495, 1.9004],
[-1.0860, -0.8773]],
[[-0.9248, 0.2535],
[-0.0838, -1.4696]],
[[-0.9124, -0.2503],
[ 1.5713, -0.1584]],
[[ 0.7679, 0.0491],
[-0.9741, 1.1361]],
[[-0.1027, 0.0953],
[-0.0458, -0.1037]],
[[ 2.1711, -0.9208],
[-1.5204, 0.6764]],
[[-1.2121, 1.1427],
[ 0.3862, -1.0143]],
[[-1.2770, -0.3149],
[ 0.6306, -0.1202]]],
[[[ 1.2074, 0.9833],
[ 0.6346, 0.9664]],
[[-1.0920, 0.7729],
[-0.4655, 0.4688]],
[[-0.3728, -0.3600],
[-1.0701, 0.2658]],
[[-0.4250, -0.7940],
[ 0.0883, 0.0234]],
[[ 1.5879, 0.4271],
[-1.0388, -0.1933]],
[[ 0.5017, 0.6949],
[-0.7434, -0.7082]],
[[ 0.4801, -1.4131],
[ 0.2480, -2.1165]],
[[-0.0612, 0.0766],
[ 0.3196, -0.3998]],
[[-0.8971, -1.5611],
[-0.7650, -1.0962]],
[[-0.5938, 0.3487],
[ 0.4652, 1.5915]],
[[ 1.0478, 0.1441],
[ 0.7840, 0.3218]],
[[-0.3852, -2.2990],
[ 0.2409, 0.4186]]]]) 这里可以看出, viewtemp.view(1, 2*2*2, 1, 1)
Out[16]:
tensor([[[[2.]],
[[7.]],
[[1.]],
[[3.]],
[[2.]],
[[0.]],
[[1.]],
[[1.]]]])
temp.view(1, 2*2, 2, 1)
Out[17]:
tensor([[[[2.],
[7.]],
[[1.],
[3.]],
[[2.],
[0.]],
[[1.],
[1.]]]])
temp.view(1, 2*2, 1, 2)
Out[18]:
tensor([[[[2., 7.]],
[[1., 3.]],
[[2., 0.]],
[[1., 1.]]]])
temp
Out[30]:
tensor([[[[3., 7., 0., 8.],
[5., 2., 2., 4.],
[1., 1., 4., 4.],
[8., 6., 8., 7.]],
[[3., 3., 4., 0.],
[5., 0., 0., 7.],
[3., 2., 8., 3.],
[8., 5., 8., 8.]]]])
temp.view(1, 2*4, 1, 4)
Out[28]:
tensor([[[[3., 7., 0., 8.]],
[[5., 2., 2., 4.]],
[[1., 1., 4., 4.]],
[[8., 6., 8., 7.]],
[[3., 3., 4., 0.]],
[[5., 0., 0., 7.]],
[[3., 2., 8., 3.]],
[[8., 5., 8., 8.]]]])
F.unfold(temp, (2, 2), 1, 0, 2)
Out[29]:
tensor([[[3., 0., 1., 4.],
[7., 8., 1., 4.],
[5., 2., 8., 8.],
[2., 4., 6., 7.],
[3., 4., 3., 8.],
[3., 0., 2., 3.],
[5., 0., 8., 8.],
[0., 7., 5., 8.]]]) unfold与viewimport torch
import torch.nn.functional as F
a = torch.rand((1, 1, 4, 4))
a
Out[5]:
tensor([[[[0.6956, 0.2741, 0.7546, 0.6516],
[0.7810, 0.5884, 0.4314, 0.1446],
[0.4217, 0.5753, 0.0358, 0.3593],
[0.1191, 0.0768, 0.3927, 0.3685]]]])
b =F.unfold(a, 2, stride=2)
b
Out[10]:
tensor([[[0.6956, 0.7546, 0.4217, 0.0358],
[0.2741, 0.6516, 0.5753, 0.3593],
[0.7810, 0.4314, 0.1191, 0.3927],
[0.5884, 0.1446, 0.0768, 0.3685]]])
b.view(1, 4, 2, 2)
Out[9]:
tensor([[[[0.6956, 0.7546],
[0.4217, 0.0358]],
[[0.2741, 0.6516],
[0.5753, 0.3593]],
[[0.7810, 0.4314],
[0.1191, 0.3927]],
[[0.5884, 0.1446],
[0.0768, 0.3685]]]]) |
矩阵乘法torch.addmm out=(beta∗M)+(alpha∗mat1@mat2)
torch.addmm(beta=1, mat, alpha=1, mat1, mat2, out=None) torch.mm torch.mm(mat1, mat2, out=None) torch.bmm 用法: >>> batch1 = torch.randn(10, 3, 4)
>>> batch2 = torch.randn(10, 4, 5)
>>> res = torch.bmm(batch1, batch2)
>>> res.size()
torch.Size([10, 3, 5]) |
Shuffle Channels主要用在了shufflenet中,是一种修改通道顺序的操作,从各组中抽取特定通道进行组合。 def shuffle_channels(x, groups):
'''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,W] -> [N,C,H,W]'''
'''一共C个channel要分成g组混合的channel,先把C reshape成(g, C/g)的形状,然后转置成(C/g, g)最后平坦成C组channel'''
N, C, H, W = x.size()
return x.view(N, groups, C // groups, H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W) # 因为x之前view过了,他的内存不连续了,需要contiguous来规整一下 作者:急流勇进 另一份实现: https://github.com/jaxony/ShuffleNet/blob/e9bf42f0cda8dda518cafffd515654cc04584e7a/model.py#L36 def channel_shuffle(x, groups):
batchsize, num_channels, height, width = x.data.size()
channels_per_group = num_channels // groups
# reshape
x = x.view(batchsize, groups,
channels_per_group, height, width)
# transpose
# - contiguous() required if transpose() is used before view().
# See https://github.com/pytorch/pytorch/issues/764
x = torch.transpose(x, 1, 2).contiguous()
# flatten
x = x.view(batchsize, -1, height, width)
return x |
Subpixel Conv的像素处理操作nn.PixelShuffle(upscale_factor) 该操作实现了下图的操作,只是重排了特征,没有其他操作: 具体公式:https://blog.csdn.net/oLingFengYu/article/details/87728077 |
损失函数整理
https://blog.csdn.net/zhangxb35/article/details/72464152?utm_source=itdadao&utm_medium=referral
The text was updated successfully, but these errors were encountered: