In [3]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
In [2]:
df=pd.read_csv('USA_Housing.csv')
df
Out[2]:
In [4]:
df.info()
In [5]:
df.drop("Address",axis=1,inplace=True)
In [7]:
df.head()
Out[7]:
In [8]:
#now we will find correlation between different columns
df.corr()
Out[8]:
In [9]:
sns.heatmap(df.corr())
Out[9]:
In [11]:
df.drop('Avg. Area Number of Bedrooms',axis=1,inplace=True)
In [12]:
df.head()
Out[12]:
In [13]:
df.columns
Out[13]:
In [16]:
x=df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
'Area Population']]
y=df[['Price']]
In [17]:
x.head()
y.head()
Out[17]:
In [19]:
from sklearn.model_selection import train_test_split
In [20]:
#train_test_split is a model for splitting
In [22]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.30,random_state=42)
In [23]:
#we need to divide the data in training and test.test_size=0.30 means how much data you want to keep in test environment
#. here it is 30%
#random_state=42 means it will pick same data again and again
In [24]:
x_train.info()
In [25]:
y_train.info()
In [26]:
y_train
Out[26]:
In [27]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.30,random_state=42)
In [28]:
y_train
Out[28]:
In [29]:
from sklearn.linear_model import LinearRegression
In [30]:
model=LinearRegression()
In [31]:
model.fit(x_train,y_train)
Out[31]:
In [32]:
y_predict=model.predict(x_test)
In [33]:
y_predict
Out[33]:
In [34]:
model.coef_
Out[34]:
In [35]:
model.intercept_
Out[35]:
In [47]:
#Now we will Do Lgistic regression. It calculates Probability
In [ ]:
In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
In [2]:
df=pd.read_csv('titanic_train.csv')
In [3]:
df
Out[3]:
In [4]:
df.info()
In [5]:
#now we have to convert object into integer and fill non null values
In [6]:
df['Survived'].value_counts()
Out[6]:
In [7]:
df.drop(['PassengerId','Name','Ticket'],axis=1,inplace=True)
In [8]:
df.head()
Out[8]:
In [9]:
df.isnull()
Out[9]:
In [10]:
sns.heatmap(df.isnull())
Out[10]:
In [11]:
df['Cabin'].isnull().value_counts()
Out[11]:
In [12]:
#in the above command we find 687 values with no data
In [13]:
df.drop('Cabin',axis=1,inplace=True)
In [14]:
df.info()
In [15]:
# we will make boxplot so that we can determine ages
In [16]:
sns.boxplot(x='Sex',y='Age',data=df)
Out[16]:
In [17]:
sns.boxplot(x='Embarked',y='Age',data=df)
Out[17]:
In [18]:
sns.boxplot(x='Pclass',y='Age',data=df)
Out[18]:
In [19]:
#now we will get the nmean of ages
In [20]:
df['Age'].mean()
Out[20]:
In [21]:
df[df['Pclass']==1]['Age'].mean()
Out[21]:
In [22]:
df[df['Pclass']==2]['Age'].mean()
Out[22]:
In [23]:
df[df['Pclass']==3]['Age'].mean()
Out[23]:
In [24]:
#now we will nthe null vales in Ages
In [25]:
def imput_age(cols):
age=cols[0]
pclass=cols[1]
if(pd.isnull(age)):
if(pclass==1):
return 38
elif(pclass==2):
return 29
else:
return 25
else:
return age
In [26]:
df['Age']=df[['Age','Pclass']].apply(imput_age,axis=1)
In [27]:
sns.heatmap(df.isnull())
Out[27]:
In [28]:
df.head()
Out[28]:
In [29]:
#any data need to be in numbers so we convert embarked and sex into numbers
In [30]:
df['Sex'].value_counts()
Out[30]:
In [31]:
#gender={'male':0,'female':1}
#df['Sex']=df['Sex'].map(gender)
gen={'male':0,'female':1}
df['Sex']=df['Sex'].map(gen)
In [32]:
df.head()
Out[32]:
In [33]:
port={'S':0,'C':1,'Q':2}
In [34]:
df['Embarked']=df['Embarked'].map(port)
In [35]:
df.head()
Out[35]:
In [36]:
df.columns
Out[36]:
In [37]:
x=df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare',
]]
In [38]:
x.head()
Out[38]:
In [39]:
y=df['Survived']
In [40]:
y.head()
Out[40]:
In [41]:
from sklearn.model_selection import train_test_split
In [42]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.30,random_state=101)
In [43]:
from sklearn.linear_model import LogisticRegression
In [44]:
model=LogisticRegression()
In [45]:
model.fit(x_train,y_train)
Out[45]:
In [46]:
y_predict=model.predict(x_test)
In [47]:
print(y_predict)
In [48]:
from sklearn import metrics
In [49]:
metrics.accuracy_score(y_test,y_predict)
Out[49]:
In [50]:
from sklearn.metrics import confusion_matrix
In [51]:
confusion_matrix(y_test,y_predict)
Out[51]:
In [ ]:
No comments:
Post a Comment