import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#regressor

from sklearn.ensemble import RandomForestRegressor
from sklearn import datasets,metrics
from sklearn.model_selection import train_test_split

diabetes=datasets.load_diabetes()

print(diabetes.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - Age
      - Sex
      - Body mass index
      - Average blood pressure
      - S1
      - S2
      - S3
      - S4
      - S5
      - S6

Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).

Source URL:
https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html

For more information see:
Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) "Least Angle Regression," Annals of Statistics (with discussion), 407-499.
(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)

x=diabetes.data
y=diabetes.target

df=pd.DataFrame(x,columns=diabetes.feature_names)
df['target']=y
df.head()

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20,random_state=101)

regressor=RandomForestRegressor(random_state=101)

regressor.fit(x_train,y_train)

C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                      max_features='auto', max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=10,
                      n_jobs=None, oob_score=False, random_state=101, verbose=0,
                      warm_start=False)

y_prediction=regressor.predict(x_test)

y_prediction

array([ 74.8,  86.9, 224.5, 117.4, 201.2, 172.7, 245.7, 160.6, 174.6,
       123.6,  73.9, 138.8,  71.7, 222.9, 122.8, 244.7,  98.8, 164.6,
       290.3,  85.1, 182.7, 102.1, 248.1,  82.4, 119.1, 142.3, 180.7,
       150.9, 178.9, 117. , 131.1,  70.8, 164.7, 129. ,  88. , 251.5,
        84.7, 184.8,  74.4, 161.7, 104.5,  88.1, 119.6, 290.6, 267.4,
        75.7, 103.1, 176.6, 150.5, 196.9, 117.9, 139.4, 127.3, 122.9,
        95.9, 169.1,  89.7, 179.3, 126.1, 159.6,  94.6,  87.6,  70.6,
       293.2, 150.3, 115.7, 105.8, 118.2,  69.3, 220.7, 169.3, 224.5,
       300.8, 207.3, 182.6, 251.5, 204.9, 204.3, 264.4,  94.7, 102.5,
       132.2, 127.4,  79.3,  94.2,  89.5, 190.6, 109.2, 163.3])

new_prediction=regressor.predict([[-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068330,-0.092204]])

new_prediction

array([63.4])

metrics.mean_squared_error(y_test,y_prediction)

3523.512359550562

np.sqrt(metrics.mean_squared_error(y_test,y_prediction))

59.3591809204824

#now we will use decision tree as classifier

from sklearn.ensemble import RandomForestClassifier

iris=datasets.load_iris()

x1=iris.data
y1=iris.target

df2=pd.DataFrame(x1,columns=iris.feature_names)
df2['target']=y1
df2.head()

x1_train,x1_test,y1_train,y1_test=train_test_split(x1,y1,test_size=0.20,random_state=101)

clf=RandomForestClassifier()

clf.fit(x1_train,y1_train)

C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

y_pred=clf.predict(x1_test)

metrics.accuracy_score(y1_test,y_pred)

0.9333333333333333

metrics.confusion_matrix(y1_test,y_pred)

array([[10,  0,  0],
       [ 0, 12,  0],
       [ 0,  2,  6]], dtype=int64)

y_pred

array([0, 0, 0, 1, 1, 2, 1, 1, 2, 0, 2, 0, 0, 2, 2, 1, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 1, 1, 2, 0, 0])

	age	sex	bmi	bp	s1	s2	s3	s4	s5	s6	target
0	0.038076	0.050680	0.061696	0.021872	-0.044223	-0.034821	-0.043401	-0.002592	0.019908	-0.017646	151.0
1	-0.001882	-0.044642	-0.051474	-0.026328	-0.008449	-0.019163	0.074412	-0.039493	-0.068330	-0.092204	75.0
2	0.085299	0.050680	0.044451	-0.005671	-0.045599	-0.034194	-0.032356	-0.002592	0.002864	-0.025930	141.0
3	-0.089063	-0.044642	-0.011595	-0.036656	0.012191	0.024991	-0.036038	0.034309	0.022692	-0.009362	206.0
4	0.005383	-0.044642	-0.036385	0.021872	0.003935	0.015596	0.008142	-0.002592	-0.031991	-0.046641	135.0

Knowledge Hub

Saturday, December 28, 2019

Machine Learning day 7

No comments:

Post a Comment

Featured Post

Ichimoku cloud

Search This Blog

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2