您好,登錄后才能下訂單哦!
這篇文章主要介紹pytorch Dropout過(guò)擬合的示例分析,文中介紹的非常詳細(xì),具有一定的參考價(jià)值,感興趣的小伙伴們一定要看完!
import torch from torch.autograd import Variable import matplotlib.pyplot as plt torch.manual_seed(1) N_SAMPLES = 20 N_HIDDEN = 300 # training data x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1) y = x + 0.3 * torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1)) x, y = Variable(x), Variable(y) # test data test_x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1) test_y = test_x + 0.3 * torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1)) test_x = Variable(test_x, volatile=True) test_y = Variable(test_y, volatile=True) # show data # plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.5, label='train') # plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.5, label='test') # plt.legend(loc='upper left') # plt.ylim((-2.5, 2.5)) # plt.show() net_overfitting = torch.nn.Sequential( torch.nn.Linear(1, N_HIDDEN), torch.nn.ReLU(), torch.nn.Linear(N_HIDDEN, N_HIDDEN), torch.nn.ReLU(), torch.nn.Linear(N_HIDDEN, 1), ) net_dropped = torch.nn.Sequential( torch.nn.Linear(1, N_HIDDEN), torch.nn.Dropout(0.5), torch.nn.ReLU(), torch.nn.Linear(N_HIDDEN, N_HIDDEN), torch.nn.Dropout(0.5), torch.nn.ReLU(), torch.nn.Linear(N_HIDDEN, 1), ) print(net_overfitting) print(net_dropped) optimizer_ofit = torch.optim.Adam( net_overfitting.parameters(), lr = 0.01, ) optimizer_drop = torch.optim.Adam( net_dropped.parameters(), lr = 0.01, ) loss_func = torch.nn.MSELoss() plt.ion() for t in range(500): pred_ofit = net_overfitting(x) pred_drop = net_dropped(x) loss_ofit = loss_func(pred_ofit, y) loss_drop = loss_func(pred_drop, y) optimizer_ofit.zero_grad() optimizer_drop.zero_grad() loss_ofit.backward() loss_drop.backward() optimizer_ofit.step() optimizer_drop.step() if t % 10 == 0: net_overfitting.eval() net_dropped.eval() plt.cla() test_pred_ofit = net_overfitting(test_x) test_pred_drop = net_dropped(test_x) plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.3, label='train') plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.3, label='test') plt.plot(test_x.data.numpy(), test_pred_ofit.data.numpy(), 'r-', lw=3, label='overfitting') plt.plot(test_x.data.numpy(), test_pred_drop.data.numpy(), 'b--', lw=3, label='dropout(50%)') plt.text(0, -1.2, 'overfitting loss=%.4f' % loss_func(test_pred_ofit, test_y).data[0], fontdict={'size': 20, 'color': 'red'}) plt.text(0, -1.5, 'dropout loss=%.4f' % loss_func(test_pred_drop, test_y).data[0], fontdict={'size': 20, 'color': 'blue'}) plt.legend(loc='upper left'); plt.ylim((-2.5, 2.5));plt.pause(0.1) net_overfitting.train() net_dropped.train() plt.ioff() plt.show()
補(bǔ)充:pytorch避免過(guò)擬合-dropout丟棄法的實(shí)現(xiàn)
對(duì)于一個(gè)單隱藏層的多層感知機(jī),其中輸入個(gè)數(shù)為4,隱藏單元個(gè)數(shù)為5,且隱藏單元的計(jì)算表達(dá)式為:
%matplotlib inline import torch import torch.nn as nn import numpy as np def dropout(X, drop_prob): X = X.float() assert 0 <= drop_prob <= 1 keep_prob = 1 - drop_prob # 這種情況下把全部元素都丟棄 if keep_prob == 0: return torch.zeros_like(X) mask = (torch.rand(X.shape) < keep_prob).float() return mask * X / keep_prob
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256 W1 = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, num_hiddens1)), dtype=torch.float, requires_grad=True) b1 = torch.zeros(num_hiddens1, requires_grad=True) W2 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens1, num_hiddens2)), dtype=torch.float, requires_grad=True) b2 = torch.zeros(num_hiddens2, requires_grad=True) W3 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens2, num_outputs)), dtype=torch.float, requires_grad=True) b3 = torch.zeros(num_outputs, requires_grad=True) params = [W1, b1, W2, b2, W3, b3]
定義模型將全連接層和激活函數(shù)ReLU串起來(lái),并對(duì)每個(gè)激活函數(shù)的輸出使用丟棄法。
分別設(shè)置各個(gè)層的丟棄概率。通常的建議是把靠近輸入層的丟棄概率設(shè)得小一點(diǎn)。
在這個(gè)實(shí)驗(yàn)中,我們把第一個(gè)隱藏層的丟棄概率設(shè)為0.2,把第二個(gè)隱藏層的丟棄概率設(shè)為0.5。
我們可以通過(guò)參數(shù)is_training來(lái)判斷運(yùn)行模式為訓(xùn)練還是測(cè)試,并只在訓(xùn)練模式下使用丟棄法。
drop_prob1, drop_prob2 = 0.2, 0.5 def net(X, is_training=True): X = X.view(-1, num_inputs) H1 = (torch.matmul(X, W1) + b1).relu() if is_training: # 只在訓(xùn)練模型時(shí)使用丟棄法 H1 = dropout(H1, drop_prob1) # 在第一層全連接后添加丟棄層 H2 = (torch.matmul(H1, W2) + b2).relu() if is_training: H2 = dropout(H2, drop_prob2) # 在第二層全連接后添加丟棄層 return torch.matmul(H2, W3) + b3 def evaluate_accuracy(data_iter, net): acc_sum, n = 0.0, 0 for X, y in data_iter: if isinstance(net, torch.nn.Module): net.eval() # 評(píng)估模式, 這會(huì)關(guān)閉dropout acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() net.train() # 改回訓(xùn)練模式 else: # 自定義的模型 if('is_training' in net.__code__.co_varnames): # 如果有is_training這個(gè)參數(shù) # 將is_training設(shè)置成False acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() else: acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() n += y.shape[0] return acc_sum / n
num_epochs, lr, batch_size = 5, 100.0, 256 loss = torch.nn.CrossEntropyLoss() def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'): """Download the fashion mnist dataset and then load into memory.""" trans = [] if resize: trans.append(torchvision.transforms.Resize(size=resize)) trans.append(torchvision.transforms.ToTensor()) transform = torchvision.transforms.Compose(trans) mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform) mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform) if sys.platform.startswith('win'): num_workers = 0 # 0表示不用額外的進(jìn)程來(lái)加速讀取數(shù)據(jù) else: num_workers = 4 train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_iter, test_iter def train_ch4(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None): for epoch in range(num_epochs): train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 for X, y in train_iter: y_hat = net(X) l = loss(y_hat, y).sum() # 梯度清零 if optimizer is not None: optimizer.zero_grad() elif params is not None and params[0].grad is not None: for param in params: param.grad.data.zero_() l.backward() if optimizer is None: sgd(params, lr, batch_size) else: optimizer.step() # “softmax回歸的簡(jiǎn)潔實(shí)現(xiàn)”一節(jié)將用到 train_l_sum += l.item() train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item() n += y.shape[0] test_acc = evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc)) train_iter, test_iter = load_data_fashion_mnist(batch_size) train_ch4(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)
以上是“pytorch Dropout過(guò)擬合的示例分析”這篇文章的所有內(nèi)容,感謝各位的閱讀!希望分享的內(nèi)容對(duì)大家有幫助,更多相關(guān)知識(shí),歡迎關(guān)注億速云行業(yè)資訊頻道!
免責(zé)聲明:本站發(fā)布的內(nèi)容(圖片、視頻和文字)以原創(chuàng)、轉(zhuǎn)載和分享為主,文章觀點(diǎn)不代表本網(wǎng)站立場(chǎng),如果涉及侵權(quán)請(qǐng)聯(lián)系站長(zhǎng)郵箱:is@yisu.com進(jìn)行舉報(bào),并提供相關(guān)證據(jù),一經(jīng)查實(shí),將立刻刪除涉嫌侵權(quán)內(nèi)容。