pytorch nn functional

发表于 2019-05-08 | 更新于 2019-05-17 | 分类于 pytorch

torch.nn.functional

该包提供了很多网络函数

convoludion functions

conv2d

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

inputs = Variable(torch.randn(64,3,32,32))

filters1 = Variable(torch.randn(16,3,3,3))
output1 = F.conv2d(inputs,filters1)
print(output1.size())

filters2 = Variable(torch.randn(16,3,3,3))
output2 = F.conv2d(inputs,filters2,padding=1)
print(output2.size())

输出

torch.Size([64, 16, 30, 30])
torch.Size([64, 16, 32, 32])

relu functions

pooling functions

dropout functions

例子

import torch
import torch.nn.functional as F
 
x = torch.randn(1, 28, 28)
y = F.dropout(x, 0.5, True)
y = F.dropout2d(x, 0.5)
 
print(y)

注意$2$中说的问题，不过可能已经被改正了，注意一些就是了。

linear functions

loss functions

参考文献

1.https://pytorch.org/docs/stable/nn.html#torch-nn-functional
2.https://pytorch.org/docs/stable/nn.html#torch-nn-functional

pytorch nn

发表于 2019-05-08 | 更新于 2019-06-07 | 分类于 pytorch

Parameter

一句话介绍

torch.Tensor的子类，nn.Paramter()声明的变量被赋值给module的属性时，这个变量会自动添加到moudule的parameters list中，parameters()等函数返回的迭代器中可以访问。

API

1
2
3

class Parameter(torch.Tensor)
    # data是weights,requires_grad是是否需要梯度
    def __new__(cls, data=None, requires_grad=True)

代码示例

下面的代码实现了和nn.Conv2d同样的功能，使用nn.Parameter()将手动创建的变量设置为module的paramters。

import torch
import torch.nn as nn


class CNN(nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        
        self.cnn1_weight = nn.Parameter(torch.rand(16, 1, 5, 5))
        self.bias1_weight = nn.Parameter(torch.rand(16))
        
        self.cnn2_weight = nn.Parameter(torch.rand(32, 16, 5, 5))
        self.bias2_weight = nn.Parameter(torch.rand(32))
        
        self.linear1_weight = nn.Parameter(torch.rand(4 * 4 * 32, 10))
        self.bias3_weight = nn.Parameter(torch.rand(10))
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        out = F.conv2d(x, self.cnn1_weight, self.bias1_weight)
        out = F.relu(out)
        out = F.max_pool2d(out)
        
        out = F.conv2d(x, self.cnn2_weight, self.bias2_weight)
        out = F.relu(out)
        out = F.max_pool2d(out)
        
        out = F.linear(x, self.linear1_weight, self.bias3_weight)
        return out

Container

Module

Module是所有模型的基类。
它有以下几个常用的函数和常见的属性。

常见的函数

add_module(name, module) # 添加一个子module到当前module
apply(fn) # 对模型中的每一个submodule（调用.children()得到的）都调用fn函数
_apply() # in-place
children() # 返回module中所有的子module，不包含整个module，详情可见
buffers(recurse=True) # 返回module buffers的迭代器
cuda(device=None) # 将model para和buffer转换到gpu
cpu() # 将model para和buffer转换到cpu
double() #将float的paramters和buffer转换成double
float()
forward(*input) # 前向传播
eval() # 将module置为evaluation mode，只影响特定的traing和evaluation modules表现不同的module，比如Dropout和BatchNorm，一般Dropout在训练时使用，在测试时关闭。
train(mode=True) # 设置模型为train mode
load_state_dict(state_dict, strict=True) # 加载模型参数
modules() # 返回network中所有的module的迭代器，包含整个module，详情可见
named_modules() # 同时返回包含module和module名字的迭代器
named_children() # 同时返回包含子module和子module名字的迭代器
named_parameters() # 同时返回包含parameter和paramter名字的迭代器
parameters() # 返回模型参数的迭代器
state_dict(destination=None, prefix=’’, keep_vars=False) # 返回整个module的state，包含parameters和buffers。
zero_grad() # 设置model parameters的gradients为$0$
to(*args, **kwargs) # 移动或者改变parameters和buffer的类型或位置

常见的属性

self._backend = thnn_backend
self._parameters = OrderedDict()
self._buffers = OrderedDict()
self._backward_hooks = OrderedDict()
self._forward_hooks = OrderedDict()
self._forward_pre_hooks = OrderedDict()
self._state_dict_hooks = OrderedDict()
self._load_state_dict_pre_hooks = OrderedDict()
self._modules = OrderedDict()
self.training = True

代码示例

apply

def init_weights(m):
    print(m)
    if type(m) == nn.Linear:
        m.weight.data.fill_(1.0)
        print(m.weight)
net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
net.apply(init_weights)

module

关于module,children_modules,parameters的代码

Sequential

Convolution Layers

torch.nn.Conv2d

API

2维卷积。

torch.nn.Conv2d(
    in_channels,    # int，输入图像的通道
    out_channels,   # int，卷积产生的输出通道数（也就是有几个kernel） 
    kernel_size,    # int or tuple – kernel的大小 
    stride=1,       # int or tuple, 可选，步长，默认为$1$
    padding=0,      # int or tuple, 可选，向各边添加Zero-padding的数量，默认为$0$
    dilation=1,     # int or tuple, 可选，Spacing between kernel elements. Default: 1
    groups=1,       # int, 可选， Number of blocked connections from input channels to output channels. Default: 1
    bias=True       # bool，可选，如果为True,给output添加一个可以学习的bias
)

示例

例子1

用$6$个$5\times 5$的filter处理维度为$32\times 32\times 1$的图像。

import torch

model = torch.nn.Conv2d(1, 6, 5)

input = torch.randn(16, 1, 32, 32)
output = model(input)
print(output.size())
# output: torch.Size([16, 6, 28, 28])

例子2，stride和padding

import torch
import torch.nn as nn
from torch.autograd import Variable

inputs = Variable(torch.randn(64, 3, 32, 32))

m1 = nn.Conv2d(3, 16, 3)
print(m1)
# output: Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
output1 = m1(inputs)
print(output1.size())
# output: torch.Size([64, 16, 30, 30])

m2 = nn.Conv2d(3, 16, 3, padding=1)
print(m2)
# output: Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
output2 = m2(inputs)
print(output2.size())
# output: torch.Size([64, 16, 32, 32])

Pooling Layers

MaxPool2dd

API

class torch.nn.MaxPool2d(
    kernel_size,
    stride=None,
    padding=0,
    dilation=1,
    return_indices=False,
    ceil_mode=False
)

MaxPool2d默认layer stride默认是和kernel_size相同的

代码示例

import torch
from torch import nn
from torch.autograd import Variable
# maxpool2d

input = Variable(torch.randn(30,20,32,32))
print(input.size())
# outputtorch.Size([30, 20, 32, 32])

m1 = nn.MaxPool2d(2)
output = m1(input)
print(output.size())
# output: torch.Size([30, 20, 16, 16])


m2 = nn.MaxPool2d(5)
print(m2)
# output: MaxPool2d (size=(5, 5), stride=(5, 5), dilation=(1, 1))

for param in m2.parameters():
  print(param)

print(m2.state_dict().keys())
# output: []

output = m2(input)
print(output.size())
# output: torch.Size([30, 20, 6, 6])

Padding Layers

Linear layers

Linear

API

torch.nn.Linear(
    in_features, 
    out_features, 
    bias=True
)

代码示例

m = nn.Linear(20, 30)
input = torch.randn(128, 20)
output = m(input)
print(output.size())
torch.Size([128, 30])

Dropout layers

Drop2D

API

代码示例

import torch
import torch.nn as nn

m = nn.Dropout2d(0.3)
print(m)
inputs = torch.randn(1,28,28)
outputs = m(inputs)
print(outputs)

输出：

Dropout2d(p=0.3)
([[[ 0.8535, 1.0314, 2.7904, 1.2136, 2.7561, -2.0429, 0.0772,
-1.9372, -0.0864, -1.4132, -0.1648, 0.2403, 0.5727, 0.8102,
0.4544, 0.1414, 0.1547, -0.9266, -0.6033, 0.5813, -1.3541,
-0.0536, 0.9574, 0.0554, 0.8368, 0.7633, -0.3377, -1.4293],
[ 0.0000, 0.0000, -0.0000, -0.0000, 0.0000, -0.0000, 0.0000,
0.0000, 0.0000, -0.0000, -0.0000, 0.0000, -0.0000, -0.0000,
0.0000, -0.0000, 0.0000, -0.0000, 0.0000, 0.0000, -0.0000,
-0.0000, 0.0000, 0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
…
[ 0.6452, -0.6455, 0.2370, 0.1088, -0.5421, -0.5120, -2.2915,
0.2061, 1.6384, 2.2276, 2.4022, 0.2033, 0.6984, 0.1254,
1.1627, 1.0699, -2.1868, 1.1293, -0.7030, 0.0454, -1.5428,
-2.4052, -0.3204, -1.5984, 0.1282, 0.2127, -2.3506, -2.2395]]])

会发现输出的数组中有很多被置为$0$了。

Loss function

pytorch Variable(torch.autograd.Variable)

发表于 2019-05-08 | 更新于 2019-05-17 | 分类于 pytorch

Variable(class torch.autograd.Variable)

声明一个tensor

torch.zeros
torch.ones
torch.rand
torch.full()
torch.empyt()
torch.rand()
torch.randn()
torch.ones_like()
torch.zeros_like()
torch.randn_like()
torch.Tensor

代码示例

代码地址

import torch

torch.manual_seed(5)
x = torch.empty(5, 3)
print(torch.empty(5, 3)) # construct a 5x3 matrix, uninitialized
# tensor([[4.6179e-38, 4.5845e-41, 4.6179e-38],
#         [4.5845e-41, 6.3010e-36, 6.3010e-36],
#         [2.5204e-35, 6.3010e-36, 1.0082e-34],
#         [6.3010e-36, 6.3010e-36, 6.6073e-30],
#         [6.3010e-36, 6.3010e-36, 6.3010e-36]])

print(torch.rand(3, 4))  # construct a 4x3 matrix, uniform [0,1] 
# tensor([[0.8303, 0.1261, 0.9075, 0.8199],
#         [0.9201, 0.1166, 0.1644, 0.7379],
#         [0.0333, 0.9942, 0.6064, 0.5646]])

print(torch.randn(5, 3)) # construct a 5x3 matrix, normal distribution
# tensor([[-1.4017, -0.7626,  0.6312],
#         [-0.8991, -0.5578,  0.6907],
#         [ 0.2225, -0.6662,  0.6846],
#         [ 0.5740, -0.5829,  0.7679],
#         [ 0.5740, -0.5829,  0.7679],

print(torch.randn(2, 3).type())
# torch.FloatTensor

print(torch.zeros(5, 3)) # construct a 5x3 matrix filled zeros
# tensor([[0., 0., 0.],
#         [0., 0., 0.],
#         [0., 0., 0.],
#         [0., 0., 0.],
#         [0., 0., 0.]])

print(torch.ones(5, 3)) # construct a 5x3 matrix filled ones
# tensor([[1., 1., 1.],
#         [1., 1., 1.],
#         [1., 1., 1.],
#         [1., 1., 1.],
#         [1., 1., 1.]])

print(torch.ones(5, 3, dtype=torch.long)) # construct a tensor with dtype=torch.long
# tensor([[1, 1, 1],
#         [1, 1, 1],
#         [1, 1, 1],
#         [1, 1, 1],
#         [1, 1, 1]])

print(torch.tensor([1,2,3])) # construct a tensor direct from data
# tensor([1, 2, 3])

print(x.new_ones(5,4)) # constuct a tensor has the same property as x
# tensor([[1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.]])


print(torch.full([4,3],9))  # construct a tensor with a value 
# tensor([[9., 9., 9.],
#         [9., 9., 9.],
#         [9., 9., 9.],
#         [9., 9., 9.]])

print(x.new_ones(5,4,dtype=torch.int)) # construct a tensor with the same property as x, and also can have the specified type.
# tensor([[1, 1, 1, 1],
#         [1, 1, 1, 1],
#         [1, 1, 1, 1],
#         [1, 1, 1, 1],
#         [1, 1, 1, 1]], dtype=torch.int32)

print(torch.randn_like(x,dtype=torch.float)) # construct a tensor with the same shape with x, 
# tensor([[ 0.4699, -1.9540, -0.5587],
#         [ 0.4295, -2.2643, -0.2017],
#         [ 1.0677,  0.3246, -0.0684],
#         [-0.9959,  1.1563, -0.3992],
#         [ 1.2153, -0.8115, -0.8848]])

print(torch.ones_like(x))
# tensor([[1., 1., 1.],
#         [1., 1., 1.],
#         [1., 1., 1.],
#         [1., 1., 1.],
#         [1., 1., 1.]])

print(torch.zeros_like(x))
# tensor([[0., 0., 0.],
#         [0., 0., 0.],
#         [0., 0., 0.],
#         [0., 0., 0.],
#         [0., 0., 0.]])


print(torch.Tensor(3,4))
# tensor([[-3.8809e-21,  3.0948e-41,  2.3822e-44,  0.0000e+00],
#         [        nan,  7.2251e+28,  1.3733e-14,  1.8888e+31],
#         [ 4.9656e+28,  4.5439e+30,  7.1426e+22,  1.8759e+28]])

print(torch.Tensor(3,4).uniform_(0,1))
# tensor([[0.8437, 0.1399, 0.2239, 0.3462],
#         [0.5668, 0.3059, 0.1890, 0.4087],
#         [0.2560, 0.5138, 0.1299, 0.3750]])

print(torch.Tensor(3,4).normal_(0,1))
# tensor([[-0.5490, -0.0838, -0.1387, -0.5289],
#         [-0.4919, -0.4646, -0.0588,  1.2624],
#         [ 1.1935,  1.5696, -0.8977, -0.1139]])

print(torch.Tensor(3,4).fill_(5))
# tensor([[5., 5., 5., 5.],
#         [5., 5., 5., 5.],
#         [5., 5., 5., 5.]])

print(torch.arange(1, 3, 0.4))
# tensor([1.0000, 1.4000, 1.8000, 2.2000, 2.6000])

tensor的各种操作

1
2
3

import torch
a = torch.ones(2,3)
b = torch.ones(2,3)

加操作

print(a+b)                #方法1
c = torch.add(a,b)    #方法2
torch.add(a,b,result)    #方法3
a.add(b)                    #方法4,将a加上b，且a不变
a.add_(b)                #方法5,将a加上b并将其赋值给a

转置操作

1 2	print(a.t()) # 打印出tensor a的转置 print(a.t_()) #将tensor a 转置，并将其赋值给a

求最大行和列

1 2	torch.max(tensor,dim) np.max(array,dim)

和relu功能比较类似。

1 2	torch.clamp(tensor, min, max,out=None) np.maximun(x1, x2) # x1 and x2 must hava the same shape

tensor和numpy转化

convert tensor to numpy

1 2	a = torch.ones(3,4) b = a.numpy()

convert numpy to tensor

1 2	a = numpy.ones(4,3) b = torch.from_numpy(a)

Variable和Tensor

Variable
图1.Variable

属性

如图1,Variable wrap a Tensor,and it has six attributes,data,grad,requies_grad,volatile,is_leaf and grad_fn.We can acess the raw tensor through .data operation, we can accumualte gradients w.r.t this Variable into .grad,.Finally , creator attribute will tell us how the Variable were created,we can acess the creator attibute by .grad_fn,if the Variable was created by the user,then the grad_fn is None,else it will show us which Function created the Variable.
if the grad_fn is None,we call them graph leaves

1 2	Variable.shape #查看Variable的size Variable.size()

parameters

1	torch.autograd.Variable(data,requires_grad=False,volatile=False)

requires_grad : indicate whether the backward() will ever need to be called

backward

backward(gradient=None,retain_graph=None,create_graph=None,retain_variables=None)
如果Variable是一个scalar output，我们不需要指定gradient，但是如果Variable不是一个scalar，而是有多个element，我们就需要根据output指定一下gradient，gradient的type可以是tensor也可以是Variable，里面的值为梯度的求值比例，例如

x = Variable(torch.Tensor([3,6,4]),requires_grad=True)
y = Variable(torch.Tensor([5,3,6]),requires_grad=True)
z = x+y
z.backward(gradient=torch.Tensor([0.1,1,10]))

这里[0.1,1,10]分别表示的是对正常梯度分别乘上$0.1,1,10$，然后将他们累积在leaves Variable上

detach()    #
detach_()
register_hook()
register_grad()

参考文献

1.https://pytorch.org/docs/stable/tensors.html

pytorch Function(torch.autograd.Function)

发表于 2019-05-08 | 更新于 2019-05-09 | 分类于 pytorch

Function(class torch.autograd.Funtion)

用法

Function一般只定义一个操作，并且它无法保存参数，一般适用于激活函数，pooling等，它需要定义三个方法，init(),forward(),backward()（这个需要自己定义怎么求导）
Model保存了参数，适合定义一层，如线性层(Linear layer),卷积层(conv layer),也适合定义一个网络。
和Model的区别，model只需要定义__init()__,foward()方法，backward()不需要我们定义，它可以由自动求导机制计算。

Function定义只是一个函数，forward和backward都只与这个Function的输入和输出有关

functions

import torch
from torch.autograd import Variable

class MyReLU(torch.autograd.Function):
    """
    We can implement our own custom autograd Functions by subclassing
    torch.autograd.Function and implementing the forward and backward passes
    which operate on Tensors.
    """

    def forward(self, input):
        """
        In the forward pass we receive a Tensor containing the input and return a
        Tensor containing the output. You can cache arbitrary Tensors for use in the
        backward pass using the save_for_backward method.
        """
        self.save_for_backward(input)
        return input.clamp(min=0)

    def backward(self, grad_output):
        """
        In the backward pass we receive a Tensor containing the gradient of the loss
        with respect to the output, and we need to compute the gradient of the loss
        with respect to the input.
        """
        input, = self.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input

dtype = torch.FloatTensor
# dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and outputs, and wrap them in Variables.
x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)

# Create random Tensors for weights, and wrap them in Variables.
w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)
w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)

learning_rate = 1e-6
for t in range(500):
    # Construct an instance of our MyReLU class to use in our network
    relu = MyReLU()

    # Forward pass: compute predicted y using operations on Variables; we compute
    # ReLU using our custom autograd operation.
    y_pred = relu(x.mm(w1)).mm(w2)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.data[0])

    # Use autograd to compute the backward pass.
    loss.backward()

    # Update weights using gradient descent
    w1.data -= learning_rate * w1.grad.data
    w2.data -= learning_rate * w2.grad.data

    # Manually zero the gradients after updating weights
    w1.grad.data.zero_()
    w2.grad.data.zero_()

pytorch distributions

发表于 2019-05-08 | 更新于 2019-10-31 | 分类于 pytorch

torch.distributions

这个库和gym.space库很相似，都是提供一些分布，然后从中采样。
常见的有ExponentialFamily,Bernoulli,Binomial,Categorical,Exponential,Gamma,Independent,Laplace,Multinomial,MultivariateNormal。这里不做过程陈述，可以看gym中。

Categorical

对应tensorflow中的tf.multinomial。
类原型：

1	CLASS torch.distributions.categorical.Categorical(probs=None, logits=None, validate_args=None)

参数probs只能是$1$维或者$2$维，而且必须是非负，有限非零和的，然后将其归一化到和为$1$。
这个类和torch.multinormal是一样的，从${0,\cdots, K-1}$中按照probs的概率进行采样，$K$是probs.size(-1)，即是size()矩阵的最后一列，$2$维时把第$1$维当成了batch。

举一个简单的例子，代码。

import torch.distributions as diss
import torch

torch.manual_seed(5)

m = diss.Categorical(torch.tensor([0.25, 0.25, 0.25, 0.25 ]))
for _ in range(5):
    print(m.sample())

m = diss.Categorical(torch.tensor([[0.5, 0.25, 0.25], [0.25, 0.25, 0.5]]))
for _ in range(5):
    print(m.sample())

输出结果如下：

tensor(2)
tensor(1)
tensor(1)
tensor(1)
tensor(1)
tensor([2, 2])
tensor([1, 2])
tensor([0, 1])
tensor([0, 2])
tensor([0, 0])

作为对比，gym.spaces.Discrete示例如下：

from gym import spaces

# 1.Discrete
# 取值是{0, 1, ..., n - 1}
dis = spaces.Discrete(5)
dis.seed(5)
for _ in range(5):
    print(dis.sample())

输出结果是：

3
0
1
0
4

参考文献

1.https://pytorch.org/docs/stable/distributions.html

pytorch multiprocessing

发表于 2019-05-08 | 更新于 2019-11-12 | 分类于 pytorch

torch.multiprocessing

join

等待调用join()方法的线程执行完毕，然后继续执行。
可参见github官方demo。

在多个线程之间共享参数，如下代码所示。可以用来实现A3C。

import torch.multiprocessing as mp
import torch
import time
import os


def proc(sec, x):
   print(os.getpid(),"  ", x)
   time.sleep(sec)
   print(os.getpid(), "  ", x)
   x += sec
   print(str(os.getpid()) + "  over.  ", x)


if __name__ == '__main__':
   num_processes = 3
   processes = []
   x = torch.ones([3,])
   x.share_memory_()
   for rank in range(num_processes):
     p = mp.Process(target=proc, args=(rank + 1, x))
     p.start() 
     processes.append(p)
   for p in processes:
     p.join()
   print(x)

输出结果如下所示：

python share_memory.py
7739 tensor([1., 1., 1.])
7738 tensor([1., 1., 1.])
7737 tensor([1., 1., 1.])
7737 tensor([1., 1., 1.])
7737 over. tensor([2., 2., 2.])
7738 tensor([2., 2., 2.])
7738 over. tensor([4., 4., 4.])
7739 tensor([4., 4., 4.])
7739 over. tensor([7., 7., 7.])
tensor([7., 7., 7.])

我们可以发现$7739$这个线程中，传入的$x$还是和最开始的一样，但是在$7738$线程更新完$x$之后，$7739$使用的$x$就已经变成了更新后的$x$。所以，我猜测这里面应该是有一个对$x$的锁，保证$x$在同一时刻只能被一个线程访问。

参考文献

1.https://pytorch.org/docs/stable/multiprocessing.html

pytorch 常见问题（不定期更新）

发表于 2019-05-08 | 更新于 2019-05-26 | 分类于 pytorch

问题1-CUDNN_STATUS_ARCH_MISMATCH

报错

1	RuntimeError: CUDNN_STATUS_ARCH_MISMATCH

原因

CUDNN doesn’t support CUDA arch 2.1 cards.
CUDNN requires Compute Capability 3.0, at least.
意思是GPU的加速能力不够，CUDNN只支持CUDA Capability 3.0以上的GPU加速，实验室主机是GT620的显卡，2.1的加速能力。
GPU对应的capability: https://developer.nvidia.com/cuda-gpus
所以，对于不能使用cudnn对cuda加速的显卡，我们可以设置cudnn加速为False，这个默认是为True的
torch.backends.cudnn.enabled=False
但是，由于显卡版本为2.1，太老了，没有二进制版本。所以，还是会报其他错误，因此，就别使用cpu进行加速啦。

查看cuda版本

~#:nvcc --version

参考文献

1.https://pytorch.org/docs/stable/torch.html
2.https://pytorch.org/docs/stable/nn.html
3.http://pytorch.org/tutorials/beginner/pytorch_with_examples.html
4.https://discuss.pytorch.org/t/distributed-model-parallelism/10377
5.https://ptorch.com/news/40.html
6.https://discuss.pytorch.org/t/distributed-data-parallel-freezes-without-error-message/8009
7.https://discuss.pytorch.org/t/runtimeerror-cudnn-status-arch-mismatch/3580
8.https://discuss.pytorch.org/t/error-when-using-cudnn/577/7
10.https://pytorch.org/docs/stable/distributions.html#categorical

tensorflow assign

发表于 2019-05-08 | 分类于 tensorflow

tf.assign

简单解释

op = x.assign(y)
将y的值赋值给x，执行sess.run(op)后，x的值就变成和y一样了。

代码示例

代码地址

import tensorflow as tf

# 声明两个Variable
x1 = tf.Variable([3,4])
x2 = tf.Variable([9,1])

# y是将x2 assign 给x1的op
y = x1.assign(x2)

with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  xx1 = sess.run(x1)
  # 输出x1
  print(xx1)
  # [3 4]

  xx2 = sess.run(x2)
  # 输出x2
  print(xx2)
  # [9 1]

  print(sess.run(x1))
  # [3 4]
  
  # 执行y操作
  yy = sess.run(y)
  print(yy)
  # [9 1]
  
  # 发现x1已经用x2赋值了
  print(sess.run(x1))
  # [9 1]
  print(sess.run(x2))
  # [9 1]

tensorflow Tensor

发表于 2019-05-08 | 更新于 2019-05-23 | 分类于 tensorflow

tf.Tensor

目的

当做另一个op的输入，各个op通过Tensor连接起来，形成数据流。
可以使用t.eval()得到Tensor的值。。。

属性

数据类型，float32, int32, string等
形状

tf.Tensor一般是各种op操作后产生的变量，如tf.add,tf.log等运算，它的值是不可以改变的，没有assign()方法。

维度

0 标量
1 向量
2 矩阵
3 3阶张量
n n阶张量

创建0维

string_scalar = tf.Variable("Elephat", tf.string)
int_scalar = tf.Variable(414, tf.int16)
float_scalar = tf.Variable(3.2345, tf.float64)
# complex_scalar = tf.Variable(12.3 - 5j, tf.complex64)

创建1维

需要列表作为初值

string_vec = tf.Variable(["Elephat"], tf.string)
int_vec = tf.Variable([414, 32], tf.int16)
float_vec = tf.Variable([3.2345, 32], tf.float64)
# complex_vec = tf.Variable([12.3 - 5j, 1 + j], tf.complex64)

创建2维

至少需要包含一行和一列

bool_mat = tf.Variable([[True], [False]], tf.bool)
string_mat = tf.Variable(["Elephat"], tf.string)
int_mat = tf.Variable([[414], [32]], tf.int16)
float_mat = tf.Variable([[3.2345, 32]], tf.float64)
# complex_mat = tf.Variable([[12.3 - 5j], [1 + j]], tf.complex64)

获取维度

1	tf.rank(tensor)

切片

0阶标量不需要索引，本身就是一个数字
1阶向量，可以传递一个索引访问某个数字
2阶矩阵，可以传递两个数字，返回一个标量，传递1个数字返回一个向量。
可以使用:访问，表示不操作该维度。

获得Tensor的shape

tf.Tensor.shape
tf.shape(tensor) # 返回tensor的shape
tf.Tensor.get_shape()

改变tensor的shape

api

tf.reshape(tensor, shape, name=None)

tensor 输入待操作tensor
shape reshape后的shape

代码示例

1 2	# t = [1, 2, 3, 4, 5, 6, 7, 8, 9] tf.reshape(t, [3, 3]) # [[1, 2, 3,], [4, 5, 6], [7, 8, 9]]

增加数据维度

API

tf.expand_dims(input, axis=None, name=None, dim=None)

代码示例

代码地址

import tensorflow as tf
import numpy as np

x = tf.placeholder(tf.int32, [None, 10])
y1 = tf.expand_dims(x, 0)
y2 = tf.expand_dims(x, 1)
y3 = tf.expand_dims(x, 2)
y4 = tf.expand_dims(x, -1) # -1表示最后一维
# y5 = tf.expand_dims(x, 3) error

with tf.Session() as sess:
   inputs = np.random.rand(12, 10)
   r1, r2, r3, r4 = sess.run([y1, y2, y3, y4], feed_dict={x: inputs})
   print(r1.shape)
   print(r2.shape)
   print(r3.shape)
   print(r4.shape)

改变数据类型

API

tf.cast(x, dtype, name=None)

x # 待转换数据
dtype # 待转换数据类型

代码示例

1 2	x = tf.constant([1.8, 2.2], dtype=tf.float32) tf.cast(x, tf.int32)

评估张量

tf.Tensor.eval() 返回一个与Tensor内容相同的numpy数组

代码示例

1
2
3

constant = tf.constant([1, 2, 3])
tensor = constant * constant
print(tensor.eval()) # 注意，只有eval()处于活跃的Session中才会起作用。

特殊类型

tf.Variable 和tf.Tensor还不一样，点击查看tf.Variable详细介绍
tf.constant
tf.placeholder
tf.SparseTensor

tf.placeholder

API

返回一个Tensor
tf.placeholder(dtype, shape=None, name = None)

dtype # 类型
shape # 形状

代码示例

import tensorflow as tf
import numpy as np

x = tf.placeholder(tf.float32, shape=(None, 1024))
y = tf.matmul(x, x)

sess = tf.Session()
# print(sess.run(y))  this will fail
rand_array = np.random.rand(1024, 1024)
print(sess.run(y, feed_dict={x: rand_array}))

tf.constant

api

tf.constant(values, dtype=None, shape=None, name=‘Const’, verify_shape=False)
返回一个constant的Tensor。

values # 初始值
dtype # 类型
shape # 形状
name # 可选
verify_shape

代码示例

1 2	tensor = tf.constant([1, 2, 3, 4, 5, 6]) tensor = tf.constant(-1.0, shape=[3, 4])

tf.Variable

api

tf.Variable.__init__(initial_value=None, trainable=True, collections=None, validate_shape=True, caching_device=None, name=None, …)

代码示例

tensor1 = tf.Variable([[1,2], [3,5]])
tensor2 = tf.Variable(tf.constant([[1,2], [3,5]]))
sess.run(tf.global_variables_initializer())
sess.run(tensor1)
sess.run(tensor2)

创建常量Tensor

tf.ones(shape, dtype=tf.float32, name=None)
tf.zeros(shape, dtype=tf.float32, name=None)
tf.fill(shape, value, name=None)
tf.constant(value, dtype=None, shape=None, name=‘Const’)
tf.ones_like(tensor, dtype=None, name=None)
tf.zeros_like(tensor, dtype=None, name=None)
tf.linspace()

创建随机Tensor

tf.random_uniform(shape, minval=0, maxval=None, dtype=tf.float32, seed=None, name=None)
https://www.tensorflow.org/versions/r1.8/api_docs/python/tf/random_uniform
tf.random_normal(shape, mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None)
均值为mean，方差为stddev的正态分布
https://www.tensorflow.org/versions/r1.8/api_docs/python/tf/random_normal
tf.truncated_normal(shape, mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None)
均值为mean，方差为stddev的正态分布，保留[mean-2*stddev, mean+2*stddev]之内的随机数。
tf.random_shuffle(value, seed=None, name=None)
对value的第一维重新排列

代码示例

代码地址

import tensorflow as tf


sess = tf.Session()

x = tf.constant([[3, 4], [5, 8]])
print(sess.run(tf.constant([3,4])))
# [3 4]

print(sess.run(tf.ones_like(x)))
[[1 1]
 [1 1]]

print(sess.run(tf.zeros_like(x)))
[[0 0]
 [0 0]]

# 输出正态分布的随机采样值
print(sess.run(tf.random_normal([2,2])))
# [[-0.5188188   0.77538687]
 [ 1.2343276  -0.58534193]]

# 输出均匀[0,1]分布的随机采样值。
print(sess.run(tf.random_uniform([2,2])))
[[0.8851745  0.12824357]
 [0.28489232 0.76961493]]

print(sess.run(tf.random_uniform([2,2], dtype=tf.int32, maxval=4)))
[[0 2]
 [2 1]]

print(sess.run(tf.ones([3, 4])))
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]

print(sess.run(tf.zeros([2,2])))
[[0. 0.]
 [0. 0.]]

参考文献

1.https://www.tensorflow.org/guide/tensors?hl=zh_cn

tensorflow cnn demo

发表于 2019-05-08 | 分类于 tensorflow

tf.nn.conv2d

代码示例

代码地址

import tensorflow as tf
def conv(img):
    if len(img.shape) == 3:
        img = tf.reshape(img, [1]+img.get_shape().as_list())
    fiter = tf.random_normal([3, 3, 3, 1])
    img = tf.nn.conv2d(img, fiter, strides=[1, 1, 1, 1], padding='SAME')
    print(img.get_shape())
    return img

from skimage import data
# img = data.text()
img = data.astronaut()
print(img.shape)
plt.imshow(img)
plt.show()

x = tf.placeholder(tf.float32, shape=(img.shape))
result = tf.squeeze(conv(x)).eval(feed_dict={x:img})
plt.imshow(result)
plt.show()

马晓鑫爱马荟荟

记录硕士三年自己的积累

RSS

GitHub E-Mail