SVM Model

2023. 5. 9. 17:57

Support Vector Machine

Overview

from sklearn.datasets import make_classification, make_regression
from sklearn.svm import LinearSVC, LinearSVR, NuSVC, NuSVR, SVC, SVR

X, y = make_classification(n_samples=3000, n_features=10, n_classes=3, n_clusters_per_class=1, weights=[.6, .3, .1], flip_y=0)
classifier = LinearSVC() # kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}
classifier = NuSVC(nu=.1, probability=True)
classifier = SVC(probability=True)
classifier.fit(X, y)
classifier.predict(X)
#classifier.predict_proba(X)

X, y = make_regression(n_samples=3000, n_features=10, n_informative=5, n_targets=1, bias=0.0, effective_rank=None, tail_strength=0.5, noise=0.0, shuffle=True, coef=False, random_state=None)
regressor = LinearSVR()
regressor = NuSVR(nu=.1)
regressor = SVR()
regressor.fit(X, y)
regressor.predict(X)

Linear SVM

C-SVM

Task: Classification

Validation: SVC: binary classification

import joblib
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.preprocessing import PowerTransformer, QuantileTransformer, StandardScaler, Normalizer, RobustScaler, MinMaxScaler, MaxAbsScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.model_selection import cross_val_score, cross_validate, GridSearchCV, StratifiedKFold, RepeatedStratifiedKFold
from sklearn.metrics import roc_curve, precision_recall_curve, auc
from sklearn.svm import SVC


X, y = make_classification(n_samples=3000, n_features=10, n_classes=2, n_clusters_per_class=1, weights=[0.6, 0.4], flip_y=0)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=None) # cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=None) # cross validation & randomness control

#binary_class_scoring = ['accuracy', 'balanced_accuracy', 'recall', 'average_precision', 'precision', 'f1', 'jaccard', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo']
#multi_class_scoring = ['accuracy', 'balanced_accuracy', 'recall_micro', 'recall_macro', 'recall_weighted', 'precision_micro', 'precision_macro', 'precision_weighted', 'f1_micro', 'f1_macro', 'f1_weighted', 'jaccard_micro', 'jaccard_macro', 'jaccard_weighted', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted']
classifier = make_pipeline(PowerTransformer(method='yeo-johnson', standardize=True), Normalizer(), SVC(probability=True, gamma='scale', random_state=None))
classifier = GridSearchCV(
    estimator=classifier, cv=cv,
    scoring=['accuracy', 'recall', 'precision', 'f1'][0], 
    param_grid={
        'svc__C' : [.1, .05, .01],
        'svc__kernel' : ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'][0:4],        
    },
    return_train_score=True
)
classifier.fit(X, y); joblib.dump(classifier, 'classifier.joblib')
classifier = joblib.load('classifier.joblib')
classifier.cv_results_


# Evaluation
train_scores = pd.DataFrame(list(filter(lambda score: score[0].endswith('train_score') , classifier.cv_results_.items())))
train_scores = train_scores[1].apply(lambda x: pd.Series(x)).T.rename(columns=train_scores[0].to_dict())
train_scores.columns = pd.MultiIndex.from_tuples(map(lambda column: ('train_score', column.replace('_train_score', '')), train_scores.columns))

test_scores = pd.DataFrame(list(filter(lambda score: score[0].endswith('test_score') , classifier.cv_results_.items())))
test_scores = test_scores[1].apply(lambda x: pd.Series(x)).T.rename(columns=test_scores[0].to_dict())
test_scores.columns = pd.MultiIndex.from_tuples(map(lambda column: ('test_score', column.replace('_test_score', '')), test_scores.columns))

time_scores = pd.DataFrame(list(filter(lambda score: score[0].endswith('time') , classifier.cv_results_.items())))
time_scores = time_scores[1].apply(lambda x: pd.Series(x)).T.rename(columns=time_scores[0].to_dict())
time_scores.columns = pd.MultiIndex.from_tuples(map(lambda column: ('time', column.replace('_time', '')), time_scores.columns))

scores = pd.concat([train_scores, test_scores, time_scores], axis=1)
scores.index = pd.MultiIndex.from_frame(pd.DataFrame(classifier.cv_results_['params']))
scores.sort_values(('test_score', 'rank'))

Preprocessing effect:SVC :binary classification

import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.decomposition import PCA, FactorAnalysis
from sklearn.preprocessing import PowerTransformer, QuantileTransformer, StandardScaler, Normalizer, RobustScaler, MinMaxScaler, MaxAbsScaler
from sklearn.preprocessing import OneHotEncoder, Binarizer, KBinsDiscretizer, PolynomialFeatures, SplineTransformer

def scoring(classifier, X, y, preprocessor_name, task_type, random_state=None):
    from sklearn.model_selection import cross_validate, RepeatedStratifiedKFold, RepeatedKFold
    
    if task_type == 'binary':
        scoring = ['accuracy', 'balanced_accuracy', 'recall', 'average_precision', 'precision', 'f1', 'jaccard', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo']
        cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=random_state) # StratifiedKFold(n_splits=5, shuffle=False, random_state=None)  # cross validation & randomness control
    elif task_type == 'multi':
        scoring = ['accuracy', 'balanced_accuracy', 'recall_micro', 'recall_macro', 'recall_weighted', 'precision_micro', 'precision_macro', 'precision_weighted', 'f1_micro', 'f1_macro', 'f1_weighted', 'jaccard_micro', 'jaccard_macro', 'jaccard_weighted', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted']
        cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=random_state) # StratifiedKFold(n_splits=5, shuffle=False, random_state=None)  # cross validation & randomness control
    elif task_type == 'reg':
        scoring = ['r2', 'explained_variance', 'max_error', 'neg_mean_squared_error', 'neg_mean_absolute_error', 'neg_root_mean_squared_error', 'neg_median_absolute_error', 'neg_mean_absolute_percentage_error']
        cv = RepeatedKFold(n_splits=5, n_repeats=3, random_state=random_state) # KFold(n_splits=5, shuffle=False, random_state=None)
    
    scores = pd.DataFrame(cross_validate(classifier, X, y, cv=cv, scoring=scoring, return_train_score=True)).mean()
    scores.name = preprocessor_name
    return scores

def scoring_summary(scores):
    # summary
    train_scores = scores[list(filter(lambda column: column.startswith('train'), scores.columns))]
    test_scores = scores[list(filter(lambda column: column.startswith('test'), scores.columns))]
    time_scores = scores[list(filter(lambda column: column.endswith('time'), scores.columns))]
    train_scores.columns = pd.MultiIndex.from_tuples(map(lambda column: ('train', '_'.join(column.split('_')[1:])), train_scores.columns))
    test_scores.columns = pd.MultiIndex.from_tuples(map(lambda column: ('test', '_'.join(column.split('_')[1:])), test_scores.columns))
    time_scores.columns = pd.MultiIndex.from_tuples(map(lambda column: ('time', '_'.join(column.split('_')[:-1])), time_scores.columns))
    scores = pd.concat([train_scores, test_scores, time_scores], axis=1).swaplevel(0,1,axis=1)
    return scores 

random_state = None; task_type = 'binary'
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, weights=[0.9, 0.1], flip_y=0, random_state=random_state)


# baseline
scores = list()
params = dict(probability=True, kernel='rbf')
scores.append(scoring(SVC(**params), X, y, preprocessor_name='baseline', task_type=task_type, random_state=random_state))

# transform of measure
scores.append(scoring(SVC(**params), PCA(n_components=None).fit_transform(X), y, preprocessor_name='PCA', task_type=task_type, random_state=random_state))
scores.append(scoring(SVC(**params), FactorAnalysis(n_components=None, rotation='varimax').fit_transform(X), y, preprocessor_name='FactorAnalysis', task_type=task_type, random_state=random_state))
scores.append(scoring(SVC(**params), QuantileTransformer(output_distribution='normal').fit_transform(X), y, preprocessor_name='QuantileTransformer', task_type=task_type, random_state=random_state))
scores.append(scoring(SVC(**params), PowerTransformer(method='yeo-johnson', standardize=True).fit_transform(X), y, preprocessor_name='PowerTransform', task_type=task_type, random_state=random_state))
scores.append(scoring(SVC(**params), Normalizer().fit_transform(X), y, preprocessor_name='Normalizer', task_type=task_type, random_state=random_state))
scores.append(scoring(SVC(**params), StandardScaler().fit_transform(X), y, preprocessor_name='StandardScaler', task_type=task_type, random_state=random_state))
scores.append(scoring(SVC(**params), MinMaxScaler().fit_transform(X), y, preprocessor_name='MinMaxScaler', task_type=task_type, random_state=random_state))
scores.append(scoring(SVC(**params), MaxAbsScaler().fit_transform(X), y, preprocessor_name='MaxAbsScaler', task_type=task_type, random_state=random_state))
scores.append(scoring(SVC(**params), RobustScaler().fit_transform(X), y, preprocessor_name='RobustScaler', task_type=task_type, random_state=random_state))
scores.append(scoring(SVC(**params), KBinsDiscretizer(n_bins=[3]*10, encode='ordinal').fit_transform(X), y, preprocessor_name='KBinsDiscretizer', task_type=task_type, random_state=random_state))

# transform of sigma-algebra
scores.append(scoring(SVC(**params), Binarizer(threshold=0).fit_transform(X), y, preprocessor_name='Binarizer', task_type=task_type, random_state=random_state))
scores.append(scoring(SVC(**params), PolynomialFeatures(degree=2, interaction_only=False, include_bias=True).fit_transform(X), y, preprocessor_name='PolynomialFeatures', task_type=task_type, random_state=random_state))
scores.append(scoring(SVC(**params), SplineTransformer(degree=2, n_knots=3).fit_transform(X), y, preprocessor_name='SplineTransformer', task_type=task_type, random_state=random_state))
scores = pd.concat(scores, axis=1).T

# summary
scores = scoring_summary(scores)
scores[['accuracy', 'recall', 'precision', 'f1']]

One-class SVM

Nu-SVM

Reference

'artificial intelligence > machine learning' 카테고리의 다른 글

K-mean Clustering Model (0)	2023.05.09
Perceptron and Neural Network Model (0)	2023.05.09
K-Nearest and Radius Neighbor Model (0)	2023.05.09
Ensemble Model (0)	2023.05.09
Decision Tree, Random Forest and Extra Tree (0)	2023.05.09

All-Together

SVM Model

Support Vector Machine

Overview

Linear SVM

C-SVM

Task: Classification

One-class SVM

Nu-SVM

Reference

'artificial intelligence > machine learning' 카테고리의 다른 글

+ Recent posts

티스토리툴바