In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
In [2]:
#K nearest neighbor (KNN)
In [3]:
from sklearn import datasets
In [4]:
iris=datasets.load_iris()
In [5]:
print(iris.DESCR)
.. _iris_dataset:
Iris plants dataset
--------------------
**Data Set Characteristics:**
:Number of Instances: 150 (50 in each of
three classes)
:Number of Attributes: 4 numeric,
predictive attributes and the class
:Attribute Information:
-
sepal length in cm
- sepal width in cm
- petal length in cm
- petal width in cm
- class:
- Iris-Setosa
- Iris-Versicolour
- Iris-Virginica
:Summary Statistics:
============== ==== ==== ======= =====
====================
Min Max
Mean SD Class Correlation
============== ==== ==== ======= =====
====================
sepal length: 4.3
7.9 5.84 0.83 0.7826
sepal width: 2.0
4.4 3.05 0.43
-0.4194
petal length: 1.0
6.9 3.76 1.76
0.9490 (high!)
petal width: 0.1
2.5 1.20 0.76
0.9565 (high!)
============== ==== ==== ======= =====
====================
:Missing Attribute Values: None
:Class Distribution: 33.3% for each of 3
classes.
:Creator: R.A. Fisher
:Donor: Michael Marshall
(MARSHALL%PLU@io.arc.nasa.gov)
:Date: July, 1988
The famous Iris database,
first used by Sir R.A. Fisher. The dataset is taken
from Fisher's paper. Note that
it's the same as in R, but not as in the UCI
Machine Learning Repository,
which has two wrong data points.
This is perhaps the best known
database to be found in the
pattern recognition
literature. Fisher's paper is a classic
in the field and
is referenced frequently to
this day. (See Duda & Hart, for
example.) The
data set contains 3 classes of
50 instances each, where each class refers to a
type of iris plant. One class is linearly separable from the
other 2; the
latter are NOT linearly
separable from each other.
.. topic:: References
- Fisher, R.A. "The use of multiple
measurements in taxonomic problems"
Annual Eugenics, 7, Part II, 179-188
(1936); also in "Contributions to
Mathematical Statistics" (John Wiley,
NY, 1950).
- Duda, R.O., & Hart, P.E. (1973)
Pattern Classification and Scene Analysis.
(Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
- Dasarathy, B.V. (1980) "Nosing Around
the Neighborhood: A New System
Structure and Classification Rule for
Recognition in Partially Exposed
Environments". IEEE Transactions on Pattern Analysis and
Machine
Intelligence, Vol. PAMI-2, No. 1, 67-71.
- Gates, G.W. (1972) "The Reduced
Nearest Neighbor Rule". IEEE
Transactions
on Information Theory, May 1972, 431-433.
- See also: 1988 MLC Proceedings,
54-64. Cheeseman et al"s AUTOCLASS
II
conceptual clustering system finds 3
classes in the data.
- Many, many more ...
In [6]:
x=iris.data
In [8]:
y=iris.target
In [9]:
from sklearn.neighbors import KNeighborsClassifier
In [13]:
clf=KNeighborsClassifier()
In [14]:
clf.fit(x,y)
Out[14]:
KNeighborsClassifier(algorithm='auto',
leaf_size=30, metric='minkowski',
metric_params=None,
n_jobs=None, n_neighbors=5, p=2,
weights='uniform')
In [15]:
predic=clf.predict([[1,4,6,2]])
print(predic)
[1]
In [ ]:
In [1]:
import numpy as np
import pandas as
pd
from matplotlib import
pyplot as plt
import seaborn as
sns
In [2]:
from sklearn import
datasets
In [3]:
iris=datasets.load_iris()
In [4]:
print(iris.DESCR)
.. _iris_dataset:
Iris plants dataset
--------------------
**Data Set Characteristics:**
:Number of Instances: 150 (50 in each of
three classes)
:Number of Attributes: 4 numeric,
predictive attributes and the class
:Attribute Information:
- sepal length in cm
- sepal width in cm
- petal length in cm
- petal width in cm
- class:
- Iris-Setosa
- Iris-Versicolour
- Iris-Virginica
:Summary Statistics:
============== ==== ==== ======= =====
====================
Min Max
Mean SD Class Correlation
============== ==== ==== ======= =====
====================
sepal length: 4.3
7.9 5.84 0.83
0.7826
sepal width: 2.0
4.4 3.05 0.43
-0.4194
petal length: 1.0
6.9 3.76 1.76
0.9490 (high!)
petal width: 0.1
2.5 1.20 0.76
0.9565 (high!)
============== ==== ==== ======= =====
====================
:Missing Attribute Values: None
:Class Distribution: 33.3% for each of 3
classes.
:Creator: R.A. Fisher
:Donor: Michael Marshall
(MARSHALL%PLU@io.arc.nasa.gov)
:Date: July, 1988
The famous Iris database,
first used by Sir R.A. Fisher. The dataset is taken
from Fisher's paper. Note that
it's the same as in R, but not as in the UCI
Machine Learning Repository,
which has two wrong data points.
This is perhaps the best known
database to be found in the
pattern recognition
literature. Fisher's paper is a classic
in the field and
is referenced frequently to
this day. (See Duda & Hart, for
example.) The
data set contains 3 classes of
50 instances each, where each class refers to a
type of iris plant. One class is linearly separable from the
other 2; the
latter are NOT linearly
separable from each other.
.. topic:: References
- Fisher, R.A. "The use of multiple
measurements in taxonomic problems"
Annual Eugenics, 7, Part II, 179-188
(1936); also in "Contributions to
Mathematical Statistics" (John Wiley,
NY, 1950).
- Duda, R.O., & Hart, P.E. (1973)
Pattern Classification and Scene Analysis.
(Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
- Dasarathy, B.V. (1980) "Nosing Around
the Neighborhood: A New System
Structure and Classification Rule for
Recognition in Partially Exposed
Environments". IEEE Transactions on Pattern Analysis and
Machine
Intelligence, Vol. PAMI-2, No. 1, 67-71.
- Gates, G.W. (1972) "The Reduced
Nearest Neighbor Rule". IEEE
Transactions
on Information Theory, May 1972, 431-433.
- See also: 1988 MLC Proceedings,
54-64. Cheeseman et al"s AUTOCLASS
II
conceptual clustering system finds 3
classes in the data.
- Many, many more ...
In [5]:
x=iris.data
y=iris.target
In [6]:
from sklearn.model_selection
import train_test_split
In [12]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.30,random_state=42)
In [13]:
from sklearn.neighbors
import KNeighborsClassifier
In [14]:
clf=KNeighborsClassifier()
In [15]:
clf.fit(x_train,y_train)
Out[15]:
KNeighborsClassifier(algorithm='auto',
leaf_size=30, metric='minkowski',
metric_params=None,
n_jobs=None, n_neighbors=5, p=2,
weights='uniform')
In [16]:
y_preds=clf.predict(x_test)
In [17]:
print(y_preds)
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0
1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
0 0 0 2 1 1 0 0]
In [18]:
from sklearn import
metrics
In [19]:
metrics.accuracy_score(y_test,y_preds)
Out[19]:
1.0
In [20]:
from sklearn.metrics
import confusion_matrix
In [21]:
confusion_matrix(y_test,y_preds)
Out[21]:
array([[19, 0, 0],
[ 0, 13,
0],
[ 0,
0, 13]], dtype=int64)
In [22]:
#now in new
example we well also define how many neighbors we need
In [ ]:
In [1]:
import numpy as np
import pandas as
pd
from matplotlib import
pyplot as plt
import seaborn as
sns
In [2]:
from sklearn import
datasets
In [3]:
wine=datasets.load_wine()
In [4]:
print(wine.DESCR)
.. _wine_dataset:
Wine recognition dataset
------------------------
**Data Set Characteristics:**
:Number of Instances: 178 (50 in each of
three classes)
:Number of Attributes: 13 numeric,
predictive attributes and the class
:Attribute Information:
-
Alcohol
-
Malic acid
-
Ash
- Alcalinity of ash
-
Magnesium
- Total phenols
-
Flavanoids
-
Nonflavanoid phenols
-
Proanthocyanins
- Color intensity
-
Hue
-
OD280/OD315 of diluted wines
-
Proline
- class:
- class_0
- class_1
- class_2
:Summary Statistics:
============================= ==== =====
======= =====
Min Max
Mean SD
============================= ==== =====
======= =====
Alcohol: 11.0 14.8
13.0 0.8
Malic Acid: 0.74 5.80
2.34 1.12
Ash: 1.36 3.23
2.36 0.27
Alcalinity of Ash: 10.6 30.0
19.5 3.3
Magnesium: 70.0 162.0 99.7
14.3
Total Phenols: 0.98 3.88
2.29 0.63
Flavanoids: 0.34 5.08
2.03 1.00
Nonflavanoid Phenols: 0.13
0.66 0.36 0.12
Proanthocyanins: 0.41 3.58
1.59 0.57
Colour Intensity: 1.3
13.0 5.1 2.3
Hue: 0.48 1.71
0.96 0.23
OD280/OD315 of diluted wines: 1.27 4.00
2.61 0.71
Proline: 278 1680
746 315
============================= ==== =====
======= =====
:Missing Attribute Values: None
:Class Distribution: class_0 (59), class_1
(71), class_2 (48)
:Creator: R.A. Fisher
:Donor: Michael Marshall
(MARSHALL%PLU@io.arc.nasa.gov)
:Date: July, 1988
This is a copy of UCI ML Wine
recognition datasets.
https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data
The data is the results of a
chemical analysis of wines grown in the same
region in Italy by three
different cultivators. There are thirteen different
measurements taken for
different constituents found in the three types of
wine.
Original Owners:
Forina, M. et al, PARVUS -
An Extendible Package for Data
Exploration, Classification and Correlation.
Institute of Pharmaceutical and
Food Analysis and Technologies,
Via Brigata Salerno, 16147
Genoa, Italy.
Citation:
Lichman, M. (2013). UCI
Machine Learning Repository
[https://archive.ics.uci.edu/ml].
Irvine, CA: University of California,
School of Information and
Computer Science.
.. topic:: References
(1) S. Aeberhard, D. Coomans and O. de Vel,
Comparison of Classifiers in High Dimensional
Settings,
Tech. Rep. no. 92-02, (1992), Dept. of
Computer Science and Dept. of
Mathematics and Statistics, James Cook
University of North Queensland.
(Also submitted to Technometrics).
The data was used with many others for
comparing various
classifiers. The classes are separable,
though only RDA
has achieved 100% correct classification.
(RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1%
(z-transformed data))
(All results using the leave-one-out
technique)
(2) S. Aeberhard, D. Coomans and O. de Vel,
"THE CLASSIFICATION PERFORMANCE OF
RDA"
Tech. Rep. no. 92-01, (1992), Dept. of
Computer Science and Dept. of
Mathematics and Statistics, James Cook
University of North Queensland.
(Also submitted to Journal of Chemometrics).
In [5]:
x=wine.data
y=wine.target
In [6]:
from sklearn.model_selection
import train_test_split
In [7]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.30,random_state=42,stratify=y)
In [8]:
from sklearn.neighbors
import KNeighborsClassifier
In [9]:
clf=KNeighborsClassifier(n_neighbors=3)
clf.fit(x_train,y_train)
Out[9]:
KNeighborsClassifier(algorithm='auto',
leaf_size=30, metric='minkowski',
metric_params=None,
n_jobs=None, n_neighbors=3, p=2,
weights='uniform')
In [10]:
y_predict=clf.predict(x_test)
In [11]:
from sklearn import
metrics
print("Accuracy=",metrics.accuracy_score(y_test,y_predict))
Accuracy= 0.6851851851851852
In [12]:
clf=KNeighborsClassifier(n_neighbors=5)
clf.fit(x_train,y_train)
y_predict=clf.predict(x_test)
print("Accuracy=",metrics.accuracy_score(y_test,y_predict))
Accuracy= 0.7222222222222222
In [13]:
clf=KNeighborsClassifier(n_neighbors=7)
clf.fit(x_train,y_train)
y_predict=clf.predict(x_test)
print("Accuracy=",metrics.accuracy_score(y_test,y_predict))
Accuracy= 0.7407407407407407
In [14]:
clf=KNeighborsClassifier(n_neighbors=9)
clf.fit(x_train,y_train)
y_predict=clf.predict(x_test)
print("Accuracy=",metrics.accuracy_score(y_test,y_predict))
Accuracy= 0.7222222222222222
In [15]:
x
Out[15]:
array([[1.423e+01, 1.710e+00,
2.430e+00, ..., 1.040e+00, 3.920e+00,
1.065e+03],
[1.320e+01, 1.780e+00, 2.140e+00, ...,
1.050e+00, 3.400e+00,
1.050e+03],
[1.316e+01, 2.360e+00, 2.670e+00, ...,
1.030e+00, 3.170e+00,
1.185e+03],
...,
[1.327e+01, 4.280e+00, 2.260e+00, ...,
5.900e-01, 1.560e+00,
8.350e+02],
[1.317e+01, 2.590e+00, 2.370e+00, ...,
6.000e-01, 1.620e+00,
8.400e+02],
[1.413e+01, 4.100e+00, 2.740e+00, ...,
6.100e-01, 1.600e+00,
5.600e+02]])
In [16]:
#if we see the
above output, their is lot of distanc ebetween the values and
#when you plot a
graph it will be very difficult so
#we will scale
using formulaa (Xi-Xmean)/stanard deviation of that feature.
#Xi is the first
element of the column/feature. Xmean is the maean of all the values of the
column/feature
#and standard
deviation is he standard devaitoon of that feature/column
In [17]:
from sklearn.preprocessing
import StandardScaler
In [18]:
scaler=StandardScaler()
x_scaled=scaler.fit_transform(x)
x_scaled
Out[18]:
array([[ 1.51861254,
-0.5622498 , 0.23205254, ..., 0.36217728,
1.84791957, 1.01300893],
[ 0.24628963, -0.49941338, -0.82799632,
..., 0.40605066,
1.1134493 , 0.96524152],
[ 0.19687903, 0.02123125,
1.10933436, ..., 0.31830389,
0.78858745, 1.39514818],
...,
[ 0.33275817, 1.74474449, -0.38935541, ..., -1.61212515,
-1.48544548, 0.28057537],
[ 0.20923168, 0.22769377,
0.01273209, ..., -1.56825176,
-1.40069891, 0.29649784],
[ 1.39508604, 1.58316512,
1.36520822, ..., -1.52437837,
-1.42894777, -0.59516041]])
In [19]:
x_train,x_test,y_train,y_test=train_test_split(x_scaled,y,test_size=0.30,random_state=42,stratify=y)
In [20]:
clf=KNeighborsClassifier(n_neighbors=7)
clf.fit(x_train,y_train)
y_predict=clf.predict(x_test)
print("Accuracy=",metrics.accuracy_score(y_test,y_predict))
Accuracy= 0.9444444444444444
In [21]:
clf=KNeighborsClassifier(n_neighbors=9)
clf.fit(x_train,y_train)
y_predict=clf.predict(x_test)
print("Accuracy=",metrics.accuracy_score(y_test,y_predict))
Accuracy= 0.9629629629629629
In [22]:
clf=KNeighborsClassifier(n_neighbors=11)
clf.fit(x_train,y_train)
y_predict=clf.predict(x_test)
print("Accuracy=",metrics.accuracy_score(y_test,y_predict))
Accuracy= 0.9629629629629629
In [24]:
clf=KNeighborsClassifier(n_neighbors=23)
clf.fit(x_train,y_train)
y_predict=clf.predict(x_test)
print("Accuracy=",metrics.accuracy_score(y_test,y_predict))
Accuracy= 0.9814814814814815
In [25]:
from sklearn.model_selection
import cross_val_score
In [26]:
neighbors=list(range(1,50,2))
cv_scores=[]
for k in neighbors:
knn=KNeighborsClassifier(n_neighbors=k)
scores=cross_val_score(knn,x_scaled,y,scoring='accuracy')
cv_scores.append(scores.mean())
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978:
FutureWarning: The default value of cv will change from 3 to 5 in version 0.22.
Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
In [27]:
MSE
Out[27]:
[0.9329501915708813,
0.9329501915708813,
0.9438697318007664,
0.9327586206896553,
0.9273946360153257,
0.9440613026819924,
0.9496168582375479,
0.9553639846743295,
0.9551724137931035,
0.9551724137931035,
0.9551724137931035,
0.9551724137931035,
0.9662835249042145,
0.960727969348659,
0.960727969348659,
0.9551724137931035,
0.9440613026819924,
0.9496168582375479,
0.9496168582375479,
0.9496168582375479,
0.9551724137931035,
0.949808429118774,
0.949808429118774,
0.9331417624521072,
0.9331417624521072]
In [29]:
MSE=[1-x for x in
cv_scores]
MSE
Out[29]:
[0.06704980842911867,
0.06704980842911867,
0.05613026819923361,
0.06724137931034468,
0.07260536398467432,
0.0559386973180076,
0.050383141762452066,
0.04463601532567052,
0.04482758620689653,
0.04482758620689653,
0.04482758620689653,
0.04482758620689653,
0.03371647509578546,
0.039272030651340994,
0.039272030651340994,
0.04482758620689653,
0.0559386973180076,
0.050383141762452066,
0.050383141762452066,
0.050383141762452066,
0.04482758620689653,
0.050191570881226055,
0.050191570881226055,
0.06685823754789277,
0.06685823754789277]
In [34]:
optimal_k=neighbors[MSE.index(min(MSE))]
print(optimal_k)
25
In [35]:
x_train,x_test,y_train,y_test=train_test_split(x_scaled,y,test_size=0.30,random_state=42,stratify=y)
clf=KNeighborsClassifier(n_neighbors=25)
clf.fit(x_train,y_train)
y_predict=clf.predict(x_test)
print("Accuracy=",metrics.accuracy_score(y_test,y_predict))
Accuracy= 0.9814814814814815
In [37]:
plt.plot(neighbors,MSE)
plt.xlabel('Number
of K')
plt.ylabel('Error')
plt.show()
In [ ]:
No comments:
Post a Comment