pytorch中的权值初始化
官方论坛对weight-initilzation的讨论
torch.nn.Module.apply(fn)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
torch.nn.Module. apply (fn) # 递归的调用weights_init函数,遍历nn.Module的submodule作为参数 # 常用来对模型的参数进行初始化 # fn是对参数进行初始化的函数的句柄,fn以nn.Module或者自己定义的nn.Module的子类作为参数 # fn (Module -> None) – function to be applied to each submodule # Returns: self # Return type: Module 例子: def weights_init(m): classname = m.__class__.__name__ if classname.find( 'Conv' ) ! = - 1 : m.weight.data.normal_( 0.0 , 0.02 ) # m.weight.data是卷积核参数, m.bias.data是偏置项参数 elif classname.find( 'BatchNorm' ) ! = - 1 : m.weight.data.normal_( 1.0 , 0.02 ) m.bias.data.fill_( 0 ) netG = _netG(ngpu) # 生成模型实例 netG. apply (weights_init) # 递归的调用weights_init函数,遍历netG的submodule作为参数 |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
#-*-coding:utf-8-*- import torch from torch.autograd import Variable # 对模型参数进行初始化 # 官方论坛链接:https://discuss.pytorch.org/t/weight-initilzation/157/3 # 方法一 # 单独定义一个weights_init函数,输入参数是m(torch.nn.module或者自己定义的继承nn.module的子类) # 然后使用net.apply()进行参数初始化 # m.__class__.__name__ 获得nn.module的名字 # https://github.com/pytorch/examples/blob/master/dcgan/main.py#L90-L96 def weights_init(m): classname = m.__class__.__name__ if classname.find( 'Conv' ) ! = - 1 : m.weight.data.normal_( 0.0 , 0.02 ) elif classname.find( 'BatchNorm' ) ! = - 1 : m.weight.data.normal_( 1.0 , 0.02 ) m.bias.data.fill_( 0 ) netG = _netG(ngpu) # 生成模型实例 netG. apply (weights_init) # 递归的调用weights_init函数,遍历netG的submodule作为参数 # function to be applied to each submodule # 方法二 # 1. 使用net.modules()遍历模型中的网络层的类型 2. 对其中的m层的weigth.data(tensor)部分进行初始化操作 # Another initialization example from PyTorch Vision resnet implementation. # https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py#L112-L118 class ResNet(nn.Module): def __init__( self , block, layers, num_classes = 1000 ): self .inplanes = 64 super (ResNet, self ).__init__() self .conv1 = nn.Conv2d( 3 , 64 , kernel_size = 7 , stride = 2 , padding = 3 , bias = False ) self .bn1 = nn.BatchNorm2d( 64 ) self .relu = nn.ReLU(inplace = True ) self .maxpool = nn.MaxPool2d(kernel_size = 3 , stride = 2 , padding = 1 ) self .layer1 = self ._make_layer(block, 64 , layers[ 0 ]) self .layer2 = self ._make_layer(block, 128 , layers[ 1 ], stride = 2 ) self .layer3 = self ._make_layer(block, 256 , layers[ 2 ], stride = 2 ) self .layer4 = self ._make_layer(block, 512 , layers[ 3 ], stride = 2 ) self .avgpool = nn.AvgPool2d( 7 , stride = 1 ) self .fc = nn.Linear( 512 * block.expansion, num_classes) # 权值参数初始化 for m in self .modules(): if isinstance (m, nn.Conv2d): n = m.kernel_size[ 0 ] * m.kernel_size[ 1 ] * m.out_channels m.weight.data.normal_( 0 , math.sqrt( 2. / n)) elif isinstance (m, nn.BatchNorm2d): m.weight.data.fill_( 1 ) m.bias.data.zero_() # 方法三 # 自己知道网络中参数的顺序和类型, 然后将参数依次读取出来,调用torch.nn.init中的方法进行初始化 net = AlexNet( 2 ) params = list (net.parameters()) # params依次为Conv2d参数和Bias参数 # 或者 conv1Params = list (net.conv1.parameters()) # 其中,conv1Params[0]表示卷积核参数, conv1Params[1]表示bias项参数 # 然后使用torch.nn.init中函数进行初始化 torch.nn.init.normal(tensor, mean = 0 , std = 1 ) torch.nn.init.constant(tensor, 0 ) # net.modules()迭代的返回: AlexNet,Sequential,Conv2d,ReLU,MaxPool2d,LRN,AvgPool3d....,Conv2d,...,Conv2d,...,Linear, # 这里,只有Conv2d和Linear才有参数 # net.children()只返回实际存在的子模块: Sequential,Sequential,Sequential,Sequential,Sequential,Sequential,Sequential,Linear # 附AlexNet的定义 class AlexNet(nn.Module): def __init__( self , num_classes = 2 ): # 默认为两类,猫和狗 # super().__init__() # python3 super (AlexNet, self ).__init__() # 开始构建AlexNet网络模型,5层卷积,3层全连接层 # 5层卷积层 self .conv1 = nn.Sequential( nn.Conv2d(in_channels = 3 , out_channels = 96 , kernel_size = 11 , stride = 4 ), nn.ReLU(inplace = True ), nn.MaxPool2d(kernel_size = 3 , stride = 2 ), LRN(local_size = 5 , bias = 1 , alpha = 1e - 4 , beta = 0.75 , ACROSS_CHANNELS = True ) ) self .conv2 = nn.Sequential( nn.Conv2d(in_channels = 96 , out_channels = 256 , kernel_size = 5 , groups = 2 , padding = 2 ), nn.ReLU(inplace = True ), nn.MaxPool2d(kernel_size = 3 , stride = 2 ), LRN(local_size = 5 , bias = 1 , alpha = 1e - 4 , beta = 0.75 , ACROSS_CHANNELS = True ) ) self .conv3 = nn.Sequential( nn.Conv2d(in_channels = 256 , out_channels = 384 , kernel_size = 3 , padding = 1 ), nn.ReLU(inplace = True ) ) self .conv4 = nn.Sequential( nn.Conv2d(in_channels = 384 , out_channels = 384 , kernel_size = 3 , padding = 1 ), nn.ReLU(inplace = True ) ) self .conv5 = nn.Sequential( nn.Conv2d(in_channels = 384 , out_channels = 256 , kernel_size = 3 , padding = 1 ), nn.ReLU(inplace = True ), nn.MaxPool2d(kernel_size = 3 , stride = 2 ) ) # 3层全连接层 # 前向计算的时候,最开始输入需要进行view操作,将3D的tensor变为1D self .fc6 = nn.Sequential( nn.Linear(in_features = 6 * 6 * 256 , out_features = 4096 ), nn.ReLU(inplace = True ), nn.Dropout() ) self .fc7 = nn.Sequential( nn.Linear(in_features = 4096 , out_features = 4096 ), nn.ReLU(inplace = True ), nn.Dropout() ) self .fc8 = nn.Linear(in_features = 4096 , out_features = num_classes) def forward( self , x): x = self .conv5( self .conv4( self .conv3( self .conv2( self .conv1(x))))) x = x.view( - 1 , 6 * 6 * 256 ) x = self .fc8( self .fc7( self .fc6(x))) return x |
补充知识:pytorch Load部分weights
我们从网上down下来的模型与我们的模型可能就存在一个层的差异,此时我们就需要重新训练所有的参数是不合理的。
因此我们可以加载相同的参数,而忽略不同的参数,代码如下:
1
2
3
4
5
|
pretrained_dict = torch.load(“model.pth”) model_dict = et.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} model_dict.update(pretrained_dict) net.load_state_dict(model_dict) |
以上这篇pytorch中的weight-initilzation用法就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持服务器之家。
原文链接:https://blog.csdn.net/tsq292978891/article/details/79382306