import collections
import itertools
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn import svm
from sklearn.model_selection import GridSearchCV, train_test_split
def read_libsvm_format(url):
df = pd.read_csv(url, sep=' ', header=None)
df = df.dropna(axis=1, how='all') # drop empty columns
df = df.dropna() #drop all rows that have any NaN values
target = df.iloc[:,0].values
df = df.iloc[:,1:].applymap(lambda x: x.split(':')[1])
df['target'] = target
df = df.astype(float)
return df
def report(reg, model):
best_result = reg.best_score_
print('Best R^2 score of {} models: {}'.format(model, best_result))
best_parameters = reg.best_params_
print('Parameters of the best {} model: {}'.format(model, best_parameters))
def regression(X, y, regressor, grid_params, name):
reg = GridSearchCV(regressor, grid_params, cv=5, verbose=0, n_jobs=-1)
reg.fit(X, y)
report(reg, name)
#breast = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/breast-cancer_scale' # class labels: 2, 4
#df = read_libsvm_format(diabetes, 2, 4)
#leukemia = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/leu.bz2' # class labels: -1, 1
#df = read_libsvm_format(diabetes, -1, 1)
body = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/bodyfat_scale'
df = read_libsvm_format(body)
X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values
svr_param_grid = [
{
'C': [1, 10, 100],
'coef0': np.linspace(0, 1, 2), 'epsilon': np.linspace(0, 1, 3),
'kernel': ['poly', 'sigmoid']
},
{
'C': [1, 10, 100],
'epsilon': np.linspace(0, 1, 3),
'kernel': ['linear', 'rbf']
},
]
svr = svm.SVR(gamma='scale')
regression(X, y, svr, svr_param_grid, 'SVR')
lr_param_grid = [
{'fit_intercept': [True, False], 'normalize': [True, False], 'copy_X':[True, False]}
]
lr = LinearRegression()
regression(X, y, lr, lr_param_grid, 'Linear Regression')