当前位置:网站首页>Pytorch forecast house price
Pytorch forecast house price
2022-06-26 05:07:00 【f0.0y】
import numpy as np
import pandas as pd
import torch
from torch import nn
from d2l import torch as d2l
import matplotlib.pyplot as plt
# Read training set data
train_data = pd.read_csv('../data/train.csv')
# Read test set data
test_data = pd.read_csv('../data/test.csv')
# Print dataset size
# print(train_data.shape)
# print(test_data.shape)
# Data preprocessing . Delete the first row of training set data ID And the target price in the last line , Delete the first row of test set data ID, The results are combined as features
all_features = pd.concat((train_data.iloc[:, 1:-1], test_data.iloc[:, 1:]))
#print(all_features.dtypes)
# Get non object Data column index of type
features_index = all_features.dtypes[all_features.dtypes != 'object'].index
# Standardized data
all_features[features_index] = all_features[features_index].apply(lambda x: (x - x.mean()) / x.std())
# The missing value (NaN) Set to 0
all_features[features_index] = all_features[features_index].fillna(0)
# Hot coding alone
all_features = pd.get_dummies(all_features, dummy_na=True)
print(all_features.shape)
# Get the number of training dataset rows
train_count = train_data.shape[0]
print(train_count)
# Get the characteristics of the training data set
train_features = torch.tensor(all_features[:train_count].values, dtype=torch.float32)
# Get the characteristics of the test data set
test_features = torch.tensor(all_features[train_count:].values, dtype=torch.float32)
# Get the label of the training data set
train_labels = torch.tensor(train_data.SalePrice.values.reshape(-1, 1), dtype=torch.float32)
# The loss function is defined as the mean square loss function
loss_function = nn.MSELoss()
# Multilayer perceptron model
class Net(nn.Module):
def __init__(self, in_features):
super(Net, self).__init__()
self.linear_relu1 = nn.Linear(in_features, 128) # Input layer
self.linear_relu2 = nn.Linear(128, 256) # Hidden layer
self.linear_relu3 = nn.Linear(256, 256) # Hidden layer
self.linear_relu4 = nn.Linear(256, 256) # Hidden layer
self.linear5 = nn.Linear(256, 1) # Output layer
def forward(self, x):
y_pred = self.linear_relu1(x)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear_relu2(y_pred)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear_relu3(y_pred)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear_relu4(y_pred)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear5(y_pred)
return y_pred
multilayer_perceptrons_model = Net(train_features.shape[1])
# Loss function : Logarithmic mean square function
def log_rmse(net, features, labels):
# The output result of the model is less than 1 Is set to 1. The purpose is to further stabilize the result when taking logarithm
clipped_preds = torch.clamp(net(features), 1, float('inf'))
rmse = torch.sqrt(loss_function(torch.log(clipped_preds), torch.log(labels)))
return rmse.item()
# model training
def train(net, train_features, train_labels, test_features, test_labels,
num_epochs, learning_rate, weight_decay, batch_size):
train_ls, test_ls = [], []
train_iter = d2l.load_array((train_features, train_labels), batch_size)
# Optimizer usage Adam optimization algorithm
optimizer = torch.optim.Adam(net.parameters(), lr = learning_rate, weight_decay = weight_decay)
# Model iteration training
for epoch in range(num_epochs):
# Train a set of parameters ?
for X, y in train_iter:
optimizer.zero_grad()
loss = loss_function(net(X), y)
loss.backward()
optimizer.step()
train_ls.append(log_rmse(net, train_features, train_labels))
if test_labels is not None:
test_ls.append(log_rmse(net, test_features, test_labels))
return train_ls, test_ls
# obtain K Fold cross validation dataset . Put the training set data into the atmosphere k Share , Among them the first i Copies are used as validation data , The rest are used as training data
def get_k_fold_data(k, i, X, y):
assert k > 1
fold_size = X.shape[0] // k # to be divisible by k
X_train, y_train = None, None
for j in range(k):
idx = slice(j * fold_size, (j + 1) * fold_size)
X_part, y_part = X[idx, :], y[idx]
if j == i:
X_valid, y_valid = X_part, y_part
elif X_train is None:
X_train, y_train = X_part, y_part
else:
X_train = torch.cat([X_train, X_part], 0)
y_train = torch.cat([y_train, y_part], 0)
return X_train, y_train, X_valid, y_valid
# be based on K Fold cross validation dataset execution K Time training
def k_fold_train(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size):
train_l_sum, valid_l_sum = 0, 0
for i in range(k):
# data = get_k_fold_data(k, i, X_train, y_train)
train_features, train_labels, valid_features, valid_labels = get_k_fold_data(k, i, X_train, y_train)
# This *data What do you mean ???
# train_ls, valid_ls = train(net, *data, num_epochs, learning_rate, weight_decay, batch_size)
train_ls, valid_ls = train(multilayer_perceptrons_model, train_features, train_labels, valid_features, valid_labels, num_epochs, learning_rate, weight_decay, batch_size)
train_l_sum += train_ls[-1]
valid_l_sum += valid_ls[-1]
# Draw a subgraph
plt.subplot(2, 3, i + 1)
train_line, = plt.plot(list(range(1, num_epochs + 1)), train_ls)
valid_line, = plt.plot(valid_ls)
plt.xlabel("epoch")
plt.ylabel("rmse")
# plt.yscale('log')
plt.title('#{} fold result'.format(i + 1))
plt.legend([train_line, valid_line], ['train', 'valid'], loc='best')
plt.grid(False)
print(f' fold {i + 1}, Training log rmse={float(train_ls[-1]):f}, '
f' verification log rmse={float(valid_ls[-1]):f}')
return train_l_sum / k, valid_l_sum / k
k = 5
num_epochs = 100
lr = 1e-4
weight_decay = 0
batch_size = 64
# Training
train_l, valid_l = k_fold_train(k, train_features, train_labels, num_epochs, lr, weight_decay, batch_size)
print(f'{k}- Fold validation : Average training log rmse:{float(train_l):f}, '
f' Average validation log rmse:{float(valid_l):f}')
# Save model parameters
torch.save(multilayer_perceptrons_model.state_dict(), 'model_param')
# Show drawing
plt.tight_layout()
plt.show()
Environmental Science
OS: macOS 12.1
pytorch: 1.10.1
C++ call pytorch Model
Model implementation
import torch
# Multilayer perceptron model
# class Net(torch.jit.ScriptModule):
class Net(torch.nn.Module):
def __init__(self, in_features):
super(Net, self).__init__()
self.linear_relu1 = torch.nn.Linear(in_features, 128) # Input layer
self.linear_relu2 = torch.nn.Linear(128, 256) # Hidden layer
self.linear_relu3 = torch.nn.Linear(256, 256) # Hidden layer
self.linear_relu4 = torch.nn.Linear(256, 256) # Hidden layer
self.linear5 = torch.nn.Linear(256, 1) # Output layer
def forward(self, input):
y_pred = self.linear_relu1(input)
y_pred = torch.nn.functional.relu(y_pred)
y_pred = self.linear_relu2(y_pred)
y_pred = torch.nn.functional.relu(y_pred)
y_pred = self.linear_relu3(y_pred)
y_pred = torch.nn.functional.relu(y_pred)
y_pred = self.linear_relu4(y_pred)
y_pred = torch.nn.functional.relu(y_pred)
y_pred = self.linear5(y_pred)
return y_pred
# Model examples . Characteristic number :331
my_model = Net(331)
scripted_model = torch.jit.script(my_model)
print(scripted_model)
print(scripted_model.code)
# Load the parameters of pre training
scripted_model.load_state_dict(torch.load('model_param'))
# Enter prediction mode
scripted_model.eval()
# Model serialization
scripted_model.save("model.pt")
C++ Call model
#include <torch/script.h>
#include <iostream>
using namespace std;
int main(int argc, const char* argv[])
{
if (argc != 2) {
cerr << "usage: example_app <path-to-exported-script-module>" << endl;
return -1;
}
auto model = torch::jit::load(argv[1]); // torch::jit::Module
cout << "load OK!" << endl;
return 0;
}
CMake
Create the following CMakeLists.txt file
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(example)
find_package(Torch REQUIRED)
message(STATUS "TORCH_LIBRARIES = ${TORCH_LIBRARIES}")
set(mkl_include /opt/intel/oneapi/mkl/2022.0.0/include)
set(mkl_lib /opt/intel/oneapi/mkl/2022.0.0/lib)
include_directories(${mkl_include})
link_directories(${mkl_lib})
add_executable(example_app example_app.cpp)
target_link_libraries(example_app "${TORCH_LIBRARIES}" libmkl_intel_ilp64.dylib)
set_property(TARGET example_app PROPERTY CXX_STANDARD 14)
mkl There are still problems with the use of , To be amended ...
reference :
边栏推荐
- 为什么许多shopify独立站卖家都在用聊天机器人?一分钟读懂行业秘密!
- Créateur de génie: cavalier solitaire, magnat de la technologie et ai | dix ans d'apprentissage profond
- Multipass Chinese document - remove instance
- ssh连win10报错:Permission denied (publickey,keyboard-interactive).
- Beidou navigation technology and industrial application of "chasing dreams in space and feeling for Beidou"
- Statsmodels Library -- linear regression model
- The beautiful scenery is natural, and the wonderful pen is obtained by chance -- how is the "wonderful pen" refined?
- Using requests library and re library to crawl web pages
- AD教程系列 | 4 - 创建集成库文件
- [quartz] read configuration from database to realize dynamic timing task
猜你喜欢
Codeforces Round #800 (Div. 2)
Classic theory: detailed explanation of three handshakes and four waves of TCP protocol
2.< tag-动态规划和常规问题>lt.343. 整数拆分
Wechat applet exits the applet (navigator and api--wx.exitminiprogram)
localStorage浏览器本地储存,解决游客不登录的情况下限制提交表单次数。
Zhongshanshan: engineers after being blasted will take off | ONEFLOW u
A beginner's entry is enough: develop mobile IM from zero
Yolov5 super parameter setting and data enhancement analysis
【Unity3D】刚体组件Rigidbody
torchvision_ Transform (image enhancement)
随机推荐
Keras actual combat cifar10 in tensorflow
PHP one sentence Trojan horse
Classic theory: detailed explanation of three handshakes and four waves of TCP protocol
Tensorflow and deep learning day 3
图像翻译/GAN:Unsupervised Image-to-Image Translation with Self-Attention Networks基于自我注意网络的无监督图像到图像的翻译
ModuleNotFoundError: No module named ‘numpy‘
5. <tag-栈和常规问题>补充: lt.946. 验证栈序列(同剑指 Offer 31. 栈的压入、弹出序列)
Technical past: tcp/ip protocol that has changed the world (precious pictures, caution for mobile phones)
Create a binary response variable using the cut sub box operation
The beautiful scenery is natural, and the wonderful pen is obtained by chance -- how is the "wonderful pen" refined?
【Latex】错误类型总结(持更)
Use to_ Numeric to numeric type
Why does the mobile IM based on TCP still need to keep the heartbeat alive?
天才制造者:独行侠、科技巨头和AI|深度学习崛起十年
PHP之一句话木马
Datetime data type ---now() gets the current time, datetime() creation date, performs mathematical operations, and to_ Datetime() converts to date type and extracts various parts of date
localStorage浏览器本地储存,解决游客不登录的情况下限制提交表单次数。
5. < tag stack and general problems > supplement: lt.946 Verify the stack sequence (the same as the push in and pop-up sequence of offer 31. stack)
ModuleNotFoundError: No module named ‘numpy‘
Solution to back-off restarting failed container