当前位置:网站首页>Model selection and optimization

Model selection and optimization

2022-06-23 20:39:00 Mr. Dongye

 Cross validation ( All data sharing n Equal division  )
 The most commonly used is 10 Crossover verification 

 give an example :
4 Crossover verification ( Divide into 4 Equal time division ):

 Finally, it is found that 4 Mean of accuracy 


 The grid search : Adjustable parameters 
 Preset several super parameter combinations for the model , Each group of super parameters was evaluated by cross validation , Select the optimal parameter combination to establish the model 

API
from sklearn.model_selection import GridSearchCV

# coding=utf8
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 500)
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)

df = pd.read_csv(
    r'E:\Python  machine learning \csv\datingTestSet.txt',
    sep='\t',
    header=None,
    names=['flight', 'icecream', 'game', 'type']
)

df_value = df[['flight', 'icecream', 'game']].values
df_value = np.array(df_value)

#  test_size=0.25  Means to choose 25% To verify the data 
x_train, x_test, y_train, y_test = train_test_split(df_value, df['type'], test_size=0.25)  #  The cutting data 

#  Preprocessing : Data standardization ( The normal distribution is satisfied, i.e. the standard deviation is 1, The average value is 0 Array of )
#  The processing formula is  X=(x-x̅)/α
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
# coding=utf8
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 500)
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)

df = pd.read_csv(
    r'E:\Python  machine learning \csv\datingTestSet.txt',
    sep='\t',
    header=None,
    names=['flight', 'icecream', 'game', 'type']
)

df_value = df[['flight', 'icecream', 'game']].values
df_value = np.array(df_value)

#  test_size=0.25  Means to choose 25% To verify the data 
x_train, x_test, y_train, y_test = train_test_split(df_value, df['type'], test_size=0.25)  #  The cutting data 

#  Preprocessing : Data standardization ( The normal distribution is satisfied, i.e. the standard deviation is 1, The average value is 0 Array of )
#  The processing formula is  X=(x-x̅)/α
scaler = StandardScaler()
x_train

 example 
# coding=utf8
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 500)
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)

df = pd.read_csv(
    r'E:\Python  machine learning \csv\datingTestSet.txt',
    sep='\t',
    header=None,
    names=['flight', 'icecream', 'game', 'type']
)

df_value = df[['flight', 'icecream', 'game']].values
df_value = np.array(df_value)

#  test_size=0.25  Means to choose 25% To verify the data 
x_train, x_test, y_train, y_test = train_test_split(df_value, df['type'], test_size=0.25)  #  The cutting data 

#  Preprocessing : Data standardization ( The normal distribution is satisfied, i.e. the standard deviation is 1, The average value is 0 Array of )
#  The processing formula is  X=(x-x̅)/α
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
# coding=utf8
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 500)
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)

df = pd.read_csv(
    r'E:\Python  machine learning \csv\datingTestSet.txt',
    sep='\t',
    header=None,
    names=['flight', 'icecream', 'game', 'type']
)

df_value = df[['flight', 'icecream', 'game']].values
df_value = np.array(df_value)

#  test_size=0.25  Means to choose 25% To verify the data 
x_train, x_test, y_train, y_test = train_test_split(df_value, df['type'], test_size=0.25)  #  The cutting data 

#  Preprocessing : Data standardization ( The normal distribution is satisfied, i.e. the standard deviation is 1, The average value is 0 Array of )
#  The processing formula is  X=(x-x̅)/α
scaler = StandardScaler()
x_train
 The grid search 
#  Use K Nearest neighbor algorithm 
knn = KNeighborsClassifier()

#  Construct the values of some parameters to search 
param = {'n_neighbors':[3,5,10]}

#  choose 2 Crossover verification 
cv = 2

#  Do a grid search 
gc = GridSearchCV(knn, param_grid=param,cv=cv)
gc.fit(x_train,y_train)
gc_s = gc.score(x_test,y_test)
print(gc.best_score_)  #  Show the best results in cross validation 
print(gc.best_estimator_)  #  Show the best model parameters 
print(gc.cv_results_)  #  Display the results of each cross validation for each super parameter 
原网站

版权声明
本文为[Mr. Dongye]所创,转载请带上原文链接,感谢
https://yzsam.com/2021/12/202112291954434531.html