Saturday, December 28, 2019

Machine Learning day 7

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
#regressor
In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn import datasets,metrics
from sklearn.model_selection import train_test_split
In [4]:
diabetes=datasets.load_diabetes()
In [5]:
print(diabetes.DESCR)
.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - Age
      - Sex
      - Body mass index
      - Average blood pressure
      - S1
      - S2
      - S3
      - S4
      - S5
      - S6

Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).

Source URL:
https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html

For more information see:
Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) "Least Angle Regression," Annals of Statistics (with discussion), 407-499.
(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)
In [6]:
x=diabetes.data
y=diabetes.target
In [7]:
df=pd.DataFrame(x,columns=diabetes.feature_names)
df['target']=y
df.head()
Out[7]:
agesexbmibps1s2s3s4s5s6target
00.0380760.0506800.0616960.021872-0.044223-0.034821-0.043401-0.0025920.019908-0.017646151.0
1-0.001882-0.044642-0.051474-0.026328-0.008449-0.0191630.074412-0.039493-0.068330-0.09220475.0
20.0852990.0506800.044451-0.005671-0.045599-0.034194-0.032356-0.0025920.002864-0.025930141.0
3-0.089063-0.044642-0.011595-0.0366560.0121910.024991-0.0360380.0343090.022692-0.009362206.0
40.005383-0.044642-0.0363850.0218720.0039350.0155960.008142-0.002592-0.031991-0.046641135.0
In [8]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20,random_state=101)
In [9]:
regressor=RandomForestRegressor(random_state=101)
In [10]:
regressor.fit(x_train,y_train)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)
Out[10]:
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                      max_features='auto', max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=10,
                      n_jobs=None, oob_score=False, random_state=101, verbose=0,
                      warm_start=False)
In [11]:
y_prediction=regressor.predict(x_test)
In [12]:
y_prediction
Out[12]:
array([ 74.8,  86.9, 224.5, 117.4, 201.2, 172.7, 245.7, 160.6, 174.6,
       123.6,  73.9, 138.8,  71.7, 222.9, 122.8, 244.7,  98.8, 164.6,
       290.3,  85.1, 182.7, 102.1, 248.1,  82.4, 119.1, 142.3, 180.7,
       150.9, 178.9, 117. , 131.1,  70.8, 164.7, 129. ,  88. , 251.5,
        84.7, 184.8,  74.4, 161.7, 104.5,  88.1, 119.6, 290.6, 267.4,
        75.7, 103.1, 176.6, 150.5, 196.9, 117.9, 139.4, 127.3, 122.9,
        95.9, 169.1,  89.7, 179.3, 126.1, 159.6,  94.6,  87.6,  70.6,
       293.2, 150.3, 115.7, 105.8, 118.2,  69.3, 220.7, 169.3, 224.5,
       300.8, 207.3, 182.6, 251.5, 204.9, 204.3, 264.4,  94.7, 102.5,
       132.2, 127.4,  79.3,  94.2,  89.5, 190.6, 109.2, 163.3])
In [13]:
new_prediction=regressor.predict([[-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068330,-0.092204]])
In [14]:
new_prediction
Out[14]:
array([63.4])
In [15]:
metrics.mean_squared_error(y_test,y_prediction)
Out[15]:
3523.512359550562
In [16]:
np.sqrt(metrics.mean_squared_error(y_test,y_prediction))
Out[16]:
59.3591809204824
In [17]:
#now we will use decision tree as classifier
In [18]:
from sklearn.ensemble import RandomForestClassifier
In [19]:
iris=datasets.load_iris()
In [20]:
x1=iris.data
y1=iris.target
In [23]:
df2=pd.DataFrame(x1,columns=iris.feature_names)
df2['target']=y1
df2.head()
Out[23]:
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
In [24]:
x1_train,x1_test,y1_train,y1_test=train_test_split(x1,y1,test_size=0.20,random_state=101)
In [25]:
clf=RandomForestClassifier()
In [26]:
clf.fit(x1_train,y1_train)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)
Out[26]:
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)
In [27]:
y_pred=clf.predict(x1_test)
In [28]:
metrics.accuracy_score(y1_test,y_pred)
Out[28]:
0.9333333333333333
In [29]:
metrics.confusion_matrix(y1_test,y_pred)
Out[29]:
array([[10,  0,  0],
       [ 0, 12,  0],
       [ 0,  2,  6]], dtype=int64)
In [30]:
y_pred
Out[30]:
array([0, 0, 0, 1, 1, 2, 1, 1, 2, 0, 2, 0, 0, 2, 2, 1, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 1, 1, 2, 0, 0])
In [ ]:
 

No comments:

Post a Comment

Featured Post

Ichimoku cloud

Here how you read a ichimoku cloud 1) Blue Converse line: It measures short term trend. it also shows minor support or resistance. Its ve...