import torch 
import numpy as np
<torch._C.Generator at 0x21c1651e190>
def describe(x):
    print("Type: {}".format(x.type()))
    print("Shape/Size: {}".format(x.shape))
    print("Values: {}".format(x))
describe(torch.Tensor(2, 3))
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[2.0802e+23, 1.0431e-08, 2.7005e-06],
        [5.3698e-05, 1.3424e-05, 1.6765e+22]])
x = torch.rand(2, 3)
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0.0290, 0.4019, 0.2598],
        [0.3666, 0.0583, 0.7006]])
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0., 0., 0.],
        [0., 0., 0.]])
x = torch.ones(2, 3)
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[1., 1., 1.],
        [1., 1., 1.]])
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[5., 5., 5.],
        [5., 5., 5.]])
x = torch.Tensor(3, 4).fill_(5)
Type: torch.FloatTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[5., 5., 5., 5.],
        [5., 5., 5., 5.],
        [5., 5., 5., 5.]])
x = torch.Tensor([[1,2], [3,4]])
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 2])
Values: tensor([[1., 2.],
        [3., 4.]])
npy = np.random.rand(2, 3)
[[0.63238341 0.45281327 0.71481107]
 [0.91606157 0.10177937 0.28428342]]
Type: torch.DoubleTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0.6324, 0.4528, 0.7148],
        [0.9161, 0.1018, 0.2843]], dtype=torch.float64)
x = torch.arange(6).view(2, 3)
Type: torch.LongTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0, 1, 2],
        [3, 4, 5]])
x = torch.FloatTensor([[1, 2,3], [4, 5, 6]])
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[1., 2., 3.],
        [4., 5., 6.]])
x = x.long() # LongTensor
Type: torch.LongTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[1, 2, 3],
        [4, 5, 6]])
x = x.float() # FloatTensor
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[1., 2., 3.],
        [4., 5., 6.]])
x = torch.randn(2,3)
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[-0.8545,  0.5098, -0.0821],
        [ 0.6607,  0.0785,  0.7884]])
describe(torch.add(x, x))
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[-1.7090,  1.0197, -0.1641],
        [ 1.3215,  0.1569,  1.5769]])
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[-1.7090,  1.0197, -0.1641],
        [ 1.3215,  0.1569,  1.5769]])
x = torch.arange(6)
Type: torch.LongTensor
Shape/Size: torch.Size([6])
Values: tensor([0, 1, 2, 3, 4, 5])
x = x.view(2, 3)
Type: torch.LongTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0, 1, 2],
        [3, 4, 5]])
describe(torch.sum(x, dim=0))  # sum according col
Type: torch.LongTensor
Shape/Size: torch.Size([3])
Values: tensor([3, 5, 7])
describe(torch.sum(x, dim=1))  # sum according row
Type: torch.LongTensor
Shape/Size: torch.Size([2])
Values: tensor([ 3, 12])
describe(torch.transpose(x, 0, 1))
Type: torch.LongTensor
Shape/Size: torch.Size([3, 2])
Values: tensor([[0, 3],
        [1, 4],
        [2, 5]])
import torch
x = torch.arange(6).view(2, 3)
describe(x[:1, :2])  # slice
describe(x[0, 1])  # index
Type: torch.LongTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0, 1, 2],
        [3, 4, 5]])
Type: torch.LongTensor
Shape/Size: torch.Size([1, 2])
Values: tensor([[0, 1]])
Type: torch.LongTensor
Shape/Size: torch.Size([])
Values: 1
Type: torch.LongTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0, 1, 2],
        [3, 4, 5]])
indices = torch.LongTensor([0, 2])
describe(torch.index_select(x, dim=1, index=indices))  # select according index and dim
Type: torch.LongTensor
Shape/Size: torch.Size([2, 2])
Values: tensor([[0, 2],
        [3, 5]])
indices = torch.LongTensor([0, 0])
describe(torch.index_select(x, dim=0, index=indices))  # select ccording row, support duplicate index
Type: torch.LongTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0, 1, 2],
        [0, 1, 2]])
row_indices = torch.arange(2).long()
col_indices = torch.LongTensor([0,1])
describe(x[row_indices, col_indices])  # retrive element according multi index, only with :,return a slice
Type: torch.LongTensor
Shape/Size: torch.Size([2])
Values: tensor([0, 1])
Type: torch.LongTensor
Shape/Size: torch.Size([2])
Values: tensor([0, 4])
x = torch.LongTensor([[1, 2,3], [4, 5,6], [7, 8,9]])
Type: torch.LongTensor
Shape/Size: torch.Size([3, 3])
Values: tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
# convert a Float Tensor to a Long Tensor
x = torch.FloatTensor([[1, 2,3], [4, 5,6], [7,8,9]])
x = x.long()
Type: torch.LongTensor
Shape/Size: torch.Size([3, 3])
Values: tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
# create a vector of incremental numbers
x = torch.arange(10)
Type: torch.LongTensor
Shape/Size: torch.Size([10])
Values: tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x = torch.arange(0, 10).long()
Type: torch.LongTensor
Shape/Size: torch.Size([10])
Values: tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x = torch.arange(20)
describe(x.view(2, 10))
describe(x.view(4, 5))
describe(x.view(10, 2))
describe(x.view(20, 1))
Type: torch.LongTensor
Shape/Size: torch.Size([1, 20])
Values: tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19]])
Type: torch.LongTensor
Shape/Size: torch.Size([2, 10])
Values: tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])
Type: torch.LongTensor
Shape/Size: torch.Size([4, 5])
Values: tensor([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]])
Type: torch.LongTensor
Shape/Size: torch.Size([10, 2])
Values: tensor([[ 0,  1],
        [ 2,  3],
        [ 4,  5],
        [ 6,  7],
        [ 8,  9],
        [10, 11],
        [12, 13],
        [14, 15],
        [16, 17],
        [18, 19]])
Type: torch.LongTensor
Shape/Size: torch.Size([20, 1])
Values: tensor([[ 0],
        [ 1],
        [ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [ 8],
        [ 9],
# We can use view to add size-1 dimensions, which can be useful for combining with other tensors. This is called broadcasting.
x = torch.arange(12).view(3,4)
y = torch.arange(4).view(1, 4)
z = torch.arange(3).view(3, 1)

Type: torch.LongTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
Type: torch.LongTensor
Shape/Size: torch.Size([1, 4])
Values: tensor([[0, 1, 2, 3]])
Type: torch.LongTensor
Shape/Size: torch.Size([3, 1])
Values: tensor([[0],
Type: torch.LongTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 0,  2,  4,  6],
        [ 4,  6,  8, 10],
        [ 8, 10, 12, 14]])
Type: torch.LongTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 0,  1,  2,  3],
        [ 5,  6,  7,  8],
        [10, 11, 12, 13]])
x = torch.arange(12).view(3,4)
tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
x = x.unsqueeze(dim=1)  # unsqueeze means [3, 4] changed to [3,1,4] at dim=1   which is to add a dim
Type: torch.LongTensor
Shape/Size: torch.Size([3, 1, 4])
Values: tensor([[[ 0,  1,  2,  3]],

        [[ 4,  5,  6,  7]],

        [[ 8,  9, 10, 11]]])
x = x.squeeze()
Type: torch.LongTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
x = torch.randn(3, 4)
describe(torch.add(x, x))

Type: torch.FloatTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 1.5385, -0.9757,  1.5769,  0.3840],
        [-0.6039, -0.5240, -0.4175,  0.7618],
        [ 0.5356,  1.5739, -0.4864, -0.6622]])
Type: torch.FloatTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 3.0771, -1.9515,  3.1539,  0.7680],
        [-1.2077, -1.0479, -0.8351,  1.5236],
        [ 1.0713,  3.1477, -0.9729, -1.3244]])
x = torch.arange(12).reshape(3, 4)
Type: torch.LongTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
Type: torch.LongTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 0,  2,  4,  6],
        [ 8, 10, 12, 14],
        [16, 18, 20, 22]])
# operations for which reduce a dimension
x = torch.arange(12).reshape(3, 4)
describe(x.sum(dim=0))  # sum according col
describe(x.sum(dim=1)) # sum according row
Type: torch.LongTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
Type: torch.LongTensor
Shape/Size: torch.Size([4])
Values: tensor([12, 15, 18, 21])
Type: torch.LongTensor
Shape/Size: torch.Size([3])
Values: tensor([ 6, 22, 38])
# indexing slicing joining and mutating
x = torch.arange(6).view(2, 3)
Type: torch.LongTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0, 1, 2],
        [3, 4, 5]])
describe(x[:2, :2])  # : which a slice operation
Type: torch.LongTensor
Shape/Size: torch.Size([2, 2])
Values: tensor([[0, 1],
        [3, 4]])
describe(x[0][1])  # indxing 
Type: torch.LongTensor
Shape/Size: torch.Size([])
Values: 1
Type: torch.LongTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0, 8, 2],
        [3, 4, 5]])
# index_select is to select a subset of a tensor
x = torch.arange(9).view(3, 3)
indices = torch.LongTensor([0,2])
describe(torch.index_select(x, dim=0, index=indices))  # 根据索引取出部分行  批次
describe(torch.index_select(x, dim=1, index=indices))  # 根据列取出  部分特征
Type: torch.LongTensor
Shape/Size: torch.Size([3, 3])
Values: tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])
Type: torch.LongTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0, 1, 2],
        [6, 7, 8]])
Type: torch.LongTensor
Shape/Size: torch.Size([3, 2])
Values: tensor([[0, 2],
        [3, 5],
        [6, 8]])
# use numpy style advanced indexing
x = torch.arange(9).view(3, 3)
indices = torch.LongTensor([0,2])

describe(x[indices])  # 默认第一个维度 按照索引取, 其他维度全部取

describe(x[indices, :])

describe(x[:, indices])
Type: torch.LongTensor
Shape/Size: torch.Size([3, 3])
Values: tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])
Type: torch.LongTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0, 1, 2],
        [6, 7, 8]])
Type: torch.LongTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0, 1, 2],
        [6, 7, 8]])
Type: torch.LongTensor
Shape/Size: torch.Size([3, 2])
Values: tensor([[0, 2],
        [3, 5],
        [6, 8]])
# concate
x = torch.arange(6).view(2, 3)

describe(torch.cat([x, x], dim=0))  # 2 dim

describe(torch.cat([x, x], dim=1))  # 2 dim

describe(torch.stack([x, x]))  # stack will create a new dim
Type: torch.LongTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0, 1, 2],
        [3, 4, 5]])
Type: torch.LongTensor
Shape/Size: torch.Size([4, 3])
Values: tensor([[0, 1, 2],
        [3, 4, 5],
        [0, 1, 2],
        [3, 4, 5]])
Type: torch.LongTensor
Shape/Size: torch.Size([2, 6])
Values: tensor([[0, 1, 2, 0, 1, 2],
        [3, 4, 5, 3, 4, 5]])
Type: torch.LongTensor
Shape/Size: torch.Size([2, 2, 3])
Values: tensor([[[0, 1, 2],
         [3, 4, 5]],

        [[0, 1, 2],
         [3, 4, 5]]])
# concat along the first dim
x = torch.arange(9).view(3,3)
new_x = torch.cat([x, x, x], dim=1)
Type: torch.LongTensor
Shape/Size: torch.Size([3, 3])
Values: tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])
Type: torch.LongTensor
Shape/Size: torch.Size([3, 9])
Values: tensor([[0, 1, 2, 0, 1, 2, 0, 1, 2],
        [3, 4, 5, 3, 4, 5, 3, 4, 5],
        [6, 7, 8, 6, 7, 8, 6, 7, 8]])
x = torch.arange(0, 12).view(3, 4)

Type: torch.LongTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
Type: torch.LongTensor
Shape/Size: torch.Size([4, 3])
Values: tensor([[ 0,  4,  8],
        [ 1,  5,  9],
        [ 2,  6, 10],
        [ 3,  7, 11]])

dimension swap

batch_size = 3  # 3句话
seq_size = 4  # 每句话的长度,(含有几个字)
feature_size = 5  # 每个字的向量长度
x = torch.arange(batch_size*seq_size*feature_size).view(batch_size, seq_size, feature_size)
Type: torch.LongTensor
Shape/Size: torch.Size([3, 4, 5])
Values: tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]],

        [[40, 41, 42, 43, 44],
         [45, 46, 47, 48, 49],
         [50, 51, 52, 53, 54],
         [55, 56, 57, 58, 59]]])
describe(x.transpose(1,0))  # batch size   <==> seq size  # only 2 dim
Type: torch.LongTensor
Shape/Size: torch.Size([4, 3, 5])
Values: tensor([[[ 0,  1,  2,  3,  4],
         [20, 21, 22, 23, 24],
         [40, 41, 42, 43, 44]],

        [[ 5,  6,  7,  8,  9],
         [25, 26, 27, 28, 29],
         [45, 46, 47, 48, 49]],

        [[10, 11, 12, 13, 14],
         [30, 31, 32, 33, 34],
         [50, 51, 52, 53, 54]],

        [[15, 16, 17, 18, 19],
         [35, 36, 37, 38, 39],
         [55, 56, 57, 58, 59]]])
batch_size = 3
seq_size = 4
feature_size = 5

x = torch.arange(batch_size * seq_size * feature_size).view(batch_size, seq_size, feature_size)
describe(x.permute(1,0,2))  # all dim
Type: torch.LongTensor
Shape/Size: torch.Size([3, 4, 5])
Values: tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]],

        [[40, 41, 42, 43, 44],
         [45, 46, 47, 48, 49],
         [50, 51, 52, 53, 54],
         [55, 56, 57, 58, 59]]])
Type: torch.LongTensor
Shape/Size: torch.Size([4, 3, 5])
Values: tensor([[[ 0,  1,  2,  3,  4],
         [20, 21, 22, 23, 24],
         [40, 41, 42, 43, 44]],

        [[ 5,  6,  7,  8,  9],
         [25, 26, 27, 28, 29],
         [45, 46, 47, 48, 49]],

        [[10, 11, 12, 13, 14],
         [30, 31, 32, 33, 34],
         [50, 51, 52, 53, 54]],

        [[15, 16, 17, 18, 19],
         [35, 36, 37, 38, 39],
         [55, 56, 57, 58, 59]]])
batch_size = 3
seq_size = 4
feature_size = 5

x = torch.arange(batch_size * seq_size * feature_size).view(batch_size, seq_size, feature_size)
describe(x.permute(1,2,0))  # all dim  
Type: torch.LongTensor
Shape/Size: torch.Size([3, 4, 5])
Values: tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]],

        [[40, 41, 42, 43, 44],
         [45, 46, 47, 48, 49],
         [50, 51, 52, 53, 54],
         [55, 56, 57, 58, 59]]])
Type: torch.LongTensor
Shape/Size: torch.Size([4, 5, 3])
Values: tensor([[[ 0, 20, 40],
         [ 1, 21, 41],
         [ 2, 22, 42],
         [ 3, 23, 43],
         [ 4, 24, 44]],

        [[ 5, 25, 45],
         [ 6, 26, 46],
         [ 7, 27, 47],
         [ 8, 28, 48],
         [ 9, 29, 49]],

        [[10, 30, 50],
         [11, 31, 51],
         [12, 32, 52],
         [13, 33, 53],
         [14, 34, 54]],

        [[15, 35, 55],
         [16, 36, 56],
         [17, 37, 57],
         [18, 38, 58],
         [19, 39, 59]]])
# matric multiplication
x1 = torch.arange(6).view(2,3).float()

x2 = torch.ones(3,2)
x2[:, 1] += 1  # 第一个维度不管, 第二个维度的索引位置1 全部+1

describe(torch.mm(x1,x2))  # [2, 3] * [3, 2] -->[2, 2]
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 3])
Values: tensor([[0., 1., 2.],
        [3., 4., 5.]])
Type: torch.FloatTensor
Shape/Size: torch.Size([3, 2])
Values: tensor([[1., 2.],
        [1., 2.],
        [1., 2.]])
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 2])
Values: tensor([[ 3.,  6.],
        [12., 24.]])
x = torch.arange(0, 12).view(3,4).float()

x2 = torch.ones(4, 2)
x2[:, 1] += 1


Type: torch.FloatTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])
Type: torch.FloatTensor
Shape/Size: torch.Size([4, 2])
Values: tensor([[1., 2.],
        [1., 2.],
        [1., 2.],
        [1., 2.]])
Type: torch.FloatTensor
Shape/Size: torch.Size([3, 2])
Values: tensor([[ 6., 12.],
        [22., 44.],
        [38., 76.]])
# Compute gradients

x = torch.tensor([2.0, 3.0], requires_grad=True)
z = 3 * x
describe(z)  # grad_fn=<MulBackward0>
Type: torch.FloatTensor
Shape/Size: torch.Size([2])
Values: tensor([6., 9.], grad_fn=<MulBackward0>)
Type: torch.FloatTensor
Shape/Size: torch.Size([2])
Values: tensor([2., 3.], requires_grad=True)
x = torch.tensor([2.0, 3.0], requires_grad=True)
z = 3*x

loss = z.sum()

print("after loss.backward(), x.gread: {}".format(x.grad))
Type: torch.FloatTensor
Shape/Size: torch.Size([2])
Values: tensor([2., 3.], requires_grad=True)
Type: torch.FloatTensor
Shape/Size: torch.Size([2])
Values: tensor([6., 9.], grad_fn=<MulBackward0>)
Type: torch.FloatTensor
Shape/Size: torch.Size([])
Values: 15.0
after loss.backward(), x.gread: tensor([3., 3.])

Compute a conditional gradient

def f(x):
    if (x.data>0).all():
        return torch.sin(x)
        return torch.cos(x)
x = torch.tensor([1.0], requires_grad=True)
y = f(x)
Type: torch.FloatTensor
Shape/Size: torch.Size([1])
Values: tensor([0.5403])
x =torch.tensor([1.0, 0.5], requires_grad=True)
y = f(x)
y.backward()  # break

RuntimeError                              Traceback (most recent call last)

Cell In[109], line 3
      1 x =torch.tensor([1.0, 0.5], requires_grad=True)
      2 y = f(x)
----> 3 y.backward()
      4 describe(x.grad)

File D:\17-anconda\Lib\site-packages\torch\_tensor.py:487, in Tensor.backward(self, gradient, retain_graph, create_graph, inputs)
    477 if has_torch_function_unary(self):
    478     return handle_torch_function(
    479         Tensor.backward,
    480         (self,),
    485         inputs=inputs,
    486     )
--> 487 torch.autograd.backward(
    488     self, gradient, retain_graph, create_graph, inputs=inputs
    489 )

File D:\17-anconda\Lib\site-packages\torch\autograd\__init__.py:193, in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    189 inputs = (inputs,) if isinstance(inputs, torch.Tensor) else \
    190     tuple(inputs) if inputs is not None else tuple()
    192 grad_tensors_ = _tensor_or_tensors_to_tuple(grad_tensors, len(tensors))
--> 193 grad_tensors_ = _make_grads(tensors, grad_tensors_, is_grads_batched=False)
    194 if retain_graph is None:
    195     retain_graph = create_graph

File D:\17-anconda\Lib\site-packages\torch\autograd\__init__.py:88, in _make_grads(outputs, grads, is_grads_batched)
     86 if out.requires_grad:
     87     if out.numel() != 1:
---> 88         raise RuntimeError("grad can be implicitly created only for scalar outputs")
     89     new_grads.append(torch.ones_like(out, memory_format=torch.preserve_format))
     90 else:

RuntimeError: grad can be implicitly created only for scalar outputs
x =torch.tensor([1.0, 0.5], requires_grad=True)
y = f(x)
y.sum().backward()  # need a scalar not a tensor
Type: torch.FloatTensor
Shape/Size: torch.Size([2])
Values: tensor([0.5403, 0.8776])
x = torch.tensor([1.0, -1], requires_grad=True)  # this isn't right for this edge case:
y = f(x)
Type: torch.FloatTensor
Shape/Size: torch.Size([2])
Values: tensor([-0.8415,  0.8415])
def f2(x):
    mask = torch.gt(x, 0).float()
    return mask*(torch.sin(x)) + (1-mask)*torch.cos(x)
x  = torch.tensor([1.0, -1], requires_grad=True)
y = f2(x)
Type: torch.FloatTensor
Shape/Size: torch.Size([2])
Values: tensor([0.5403, 0.8415])
def describe_grad(x):
    if x.grad is None:
        print("None gradient information")
        print("Gradient: \n{}".format(x.grad))
        print("Gradient func: {}".format(x.grad_fn))
x = torch.ones(2, 2, requires_grad=True)

y = (x+2)*(x+5) + 3

z = y.mean()
Type: torch.FloatTensor
Shape/Size: torch.Size([2, 2])
Values: tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
None gradient information
x = torch.ones(2, 2, requires_grad=True)
y = x + 2
<AddBackward0 at 0x21c1ef22800>
x = torch.rand(3, 3)
Type: torch.FloatTensor
Shape/Size: torch.Size([3, 3])
Values: tensor([[0.5414, 0.6419, 0.2976],
        [0.7077, 0.4189, 0.0655],
        [0.8839, 0.8083, 0.7528]])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
x = torch.rand(3, 3).to(device)
Type: torch.FloatTensor
Shape/Size: torch.Size([3, 3])
Values: tensor([[0.8988, 0.6839, 0.7658],
        [0.9149, 0.3993, 0.1100],
        [0.2541, 0.4333, 0.4451]])
cpu_device = torch.device("cpu")
y = torch.rand(3,3)
x + y
tensor([[1.3954, 1.4704, 1.4262],
        [1.0452, 0.7491, 0.4924],
        [1.0584, 0.7519, 0.7359]])
y = y.to(cpu_device)
x = x.to(cpu_device)
x + y
tensor([[1.3954, 1.4704, 1.4262],
        [1.0452, 0.7491, 0.4924],
        [1.0584, 0.7519, 0.7359]])
x = torch.arange(12).view(3,4)

describe(torch.unsqueeze(x, dim=0))  # add a dimension of size 1 inserted at 0th axis

describe(torch.squeeze(x)) # remove the extra dimension 
Type: torch.LongTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
Type: torch.LongTensor
Shape/Size: torch.Size([1, 3, 4])
Values: tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]]])
Type: torch.LongTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
# Create a random tensor of shape 5x3 in the interval [3, 7)
x = torch.rand(5, 3)

y = 4*x + 3
Type: torch.FloatTensor
Shape/Size: torch.Size([5, 3])
Values: tensor([[0.5311, 0.6449, 0.7224],
        [0.4416, 0.3634, 0.8818],
        [0.9874, 0.7316, 0.2814],
        [0.0651, 0.0065, 0.5035],
        [0.3082, 0.3742, 0.4297]])
Type: torch.FloatTensor
Shape/Size: torch.Size([5, 3])
Values: tensor([[5.1244, 5.5798, 5.8897],
        [4.7664, 4.4535, 6.5273],
        [6.9496, 5.9264, 4.1257],
        [3.2603, 3.0260, 5.0138],
        [4.2326, 4.4967, 4.7188]])
x = torch.randn(3, 3)
Type: torch.FloatTensor
Shape/Size: torch.Size([3, 3])
Values: tensor([[ 0.2874, -1.3728,  0.6144],
        [-0.2319,  0.2589, -0.4750],
        [-0.1330, -2.9222, -1.3649]])
x = torch.Tensor([6, 1, 3, 0, 3])

non_zero_indices = torch.nonzero(x).flatten() # 取出非零的索引
describe(x[non_zero_indices])  #根据索引取出对应的值
Type: torch.FloatTensor
Shape/Size: torch.Size([5])
Values: tensor([6., 1., 3., 0., 3.])
Type: torch.LongTensor
Shape/Size: torch.Size([4])
Values: tensor([0, 1, 2, 4])
Type: torch.FloatTensor
Shape/Size: torch.Size([4])
Values: tensor([6., 1., 3., 3.])
#  Create a random tensor of size (3,1) and then horizonally stack 4 copies together.
x = torch.randn(3,1)
stack_x = torch.hstack([x, x,x,x])
Type: torch.FloatTensor
Shape/Size: torch.Size([3, 1])
Values: tensor([[ 0.8261],
        [ 0.8656],
Type: torch.FloatTensor
Shape/Size: torch.Size([3, 4])
Values: tensor([[ 0.8261,  0.8261,  0.8261,  0.8261],
        [ 0.8656,  0.8656,  0.8656,  0.8656],
        [-1.4082, -1.4082, -1.4082, -1.4082]])
# Return the batch matrix-matrix product of two 3 dimensional matrices (a=torch.rand(3,4,5), b=torch.rand(3,5,4)).
a = torch.rand(3,4,5)
b = torch.rand(3,5,4)
res = torch.bmm(a,b)
Type: torch.FloatTensor
Shape/Size: torch.Size([3, 4, 4])
Values: tensor([[[1.2869, 1.2449, 0.9418, 0.9270],
         [1.7913, 1.9191, 1.5453, 1.6199],
         [1.2045, 1.8555, 1.2670, 1.7233],
         [0.9687, 1.1051, 0.8805, 0.8424]],

        [[0.7930, 1.7822, 1.9577, 1.3544],
         [0.5916, 1.3901, 1.4047, 1.3880],
         [0.5592, 0.9601, 0.9455, 1.3010],
         [0.7200, 1.5366, 1.9125, 1.0774]],

        [[1.3656, 1.6819, 1.1158, 1.4991],
         [0.7533, 0.6934, 0.8819, 0.7267],
         [0.8952, 1.3834, 1.3718, 1.3493],
         [1.2043, 1.3467, 0.7761, 1.3627]]])
# Return the batch matrix-matrix product of a 3D matrix and a 2D matrix (a=torch.rand(3,4,5), b=torch.rand(5,4)).
a = torch.rand(3, 4, 5)
b = torch.rand(5,4)

b_unsqueeze = torch.unsqueeze(b, dim=0).expand(3, -1, -1)

torch.bmm(a, b_unsqueeze)
Type: torch.FloatTensor
Shape/Size: torch.Size([5, 4])
Values: tensor([[0.0543, 0.1708, 0.9065, 0.9649],
        [0.2669, 0.8926, 0.4426, 0.3603],
        [0.4807, 0.3700, 0.6377, 0.5379],
        [0.4975, 0.2898, 0.8037, 0.1902],
        [0.4931, 0.8293, 0.2335, 0.1662]])
Type: torch.FloatTensor
Shape/Size: torch.Size([3, 5, 4])
Values: tensor([[[0.0543, 0.1708, 0.9065, 0.9649],
         [0.2669, 0.8926, 0.4426, 0.3603],
         [0.4807, 0.3700, 0.6377, 0.5379],
         [0.4975, 0.2898, 0.8037, 0.1902],
         [0.4931, 0.8293, 0.2335, 0.1662]],

        [[0.0543, 0.1708, 0.9065, 0.9649],
         [0.2669, 0.8926, 0.4426, 0.3603],
         [0.4807, 0.3700, 0.6377, 0.5379],
         [0.4975, 0.2898, 0.8037, 0.1902],
         [0.4931, 0.8293, 0.2335, 0.1662]],

        [[0.0543, 0.1708, 0.9065, 0.9649],
         [0.2669, 0.8926, 0.4426, 0.3603],
         [0.4807, 0.3700, 0.6377, 0.5379],
         [0.4975, 0.2898, 0.8037, 0.1902],
         [0.4931, 0.8293, 0.2335, 0.1662]]])

tensor([[[1.1268, 1.5896, 2.0374, 1.5656],
         [0.4025, 0.3609, 0.9874, 0.6378],
         [0.7274, 0.7392, 1.0097, 0.6233],
         [0.7233, 0.7789, 1.1886, 0.5549]],

        [[0.9892, 1.1173, 1.3630, 0.7361],
         [1.0926, 1.0460, 1.8304, 1.1643],
         [0.7384, 1.1785, 1.4863, 1.1581],
         [1.0424, 1.2183, 1.7038, 1.0011]],

        [[0.8521, 1.0331, 1.0507, 0.7509],
         [0.8426, 1.1495, 0.9975, 0.5238],
         [0.9287, 1.2934, 1.4680, 1.0944],
         [1.4050, 2.1400, 2.2296, 1.6783]]])

