%pylab inline
import pandas as pd
from ipypublish import nb_setup

Populating the interactive namespace from numpy and matplotlib


nb_setup.images_hconcat(["DSTMAA_images/svm.png"], width=600)


#Example of hyperplane geometry
w1 = 1; w2 = 2
b1 = 10
#Plot hyperplane in x1, x2 space
x1 = linspace(-3,3,100)
x2 = (b1-w1*x1)/w2
plot(x1,x2)
#Create hyperplane 2
b2 = 8
x2 = (b2-w1*x1)/w2
plot(x1,x2)
grid()
#Compute distance to hyperplane 2
print('Distance between two hyperplanes =',abs(b1-b2)/sqrt(w1**2+w2**2))

Distance between two hyperplanes = 0.8944271909999159


#PREDICTION ON TEST DATA
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve,auc
from sklearn.metrics import confusion_matrix


ncaa = pd.read_csv("DSTMAA_data/ncaa.txt",sep='\t')
yy = append(list(ones(32)), list(zeros(32)))
ncaa["y"] = yy
ncaa.head()


#CREATE FEATURES
y = ncaa['y']
X = ncaa.iloc[:,2:13]
X.head()


#FIT MODEL
from sklearn import svm
model = svm.SVC()
model.fit(X,y)
ypred = model.predict(X)

/Users/srdas/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)


#CONFUSION MATRIX
cm = confusion_matrix(y, ypred)
cm

array([[32,  0],
       [ 0, 32]])


#ACCURACY
accuracy_score(y,ypred)

1.0


#CLASSIFICATION REPORT
print(classification_report(y, ypred))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        32
         1.0       1.00      1.00      1.00        32

   micro avg       1.00      1.00      1.00        64
   macro avg       1.00      1.00      1.00        64
weighted avg       1.00      1.00      1.00        64

	No NAME	GMS	PTS	REB	AST	TO	A/T	STL	BLK	PF	FG	FT	3P	y
0	1. NorthCarolina	6	84.2	41.5	17.8	12.8	1.39	6.7	3.8	16.7	0.514	0.664	0.417	1.0
1	2. Illinois	6	74.5	34.0	19.0	10.2	1.87	8.0	1.7	16.5	0.457	0.753	0.361	1.0
2	3. Louisville	5	77.4	35.4	13.6	11.0	1.24	5.4	4.2	16.6	0.479	0.702	0.376	1.0
3	4. MichiganState	5	80.8	37.8	13.0	12.6	1.03	8.4	2.4	19.8	0.445	0.783	0.329	1.0
4	5. Arizona	4	79.8	35.0	15.8	14.5	1.09	6.0	6.5	13.3	0.542	0.759	0.397	1.0

	PTS	REB	AST	TO	A/T	STL	BLK	PF	FG	FT	3P
0	84.2	41.5	17.8	12.8	1.39	6.7	3.8	16.7	0.514	0.664	0.417
1	74.5	34.0	19.0	10.2	1.87	8.0	1.7	16.5	0.457	0.753	0.361
2	77.4	35.4	13.6	11.0	1.24	5.4	4.2	16.6	0.479	0.702	0.376
3	80.8	37.8	13.0	12.6	1.03	8.4	2.4	19.8	0.445	0.783	0.329
4	79.8	35.0	15.8	14.5	1.09	6.0	6.5	13.3	0.542	0.759	0.397

Support Vector Machines¶

What is a SVM?¶

Hyperplane Geometry¶

Regularization¶

NCAA Dataset¶