Wednesday, November 20, 2019

Learning Datascience- Day4

In [1]:
import numpy as np
import pandas as pd
In [3]:
# this is day 4 of data science
In [5]:
df=pd.DataFrame(np.random.rand(5,4),index=["a",'b','c','d','e'],columns=["si.no","Name","Marks","Grade"])
df
Out[5]:
si.noNameMarksGrade
a0.6036900.3930970.9855300.758059
b0.3759730.5922220.6697190.440668
c0.5142340.3922540.3783210.494321
d0.9952590.3122830.9114440.867670
e0.0694780.4105210.4770980.601133
In [6]:
# If you want to print only one column
In [8]:
df['Name']
Out[8]:
a    0.393097
b    0.592222
c    0.392254
d    0.312283
e    0.410521
Name: Name, dtype: float64
In [9]:
#If you want to print 2 columns
In [10]:
df[["si.no","Name"]]
Out[10]:
si.noName
a0.6036900.393097
b0.3759730.592222
c0.5142340.392254
d0.9952590.312283
e0.0694780.410521
In [11]:
#in the above you have 2 brackets, if you print more than one column then you will use 2 brackets
In [12]:
#If you want to print a row
In [13]:
df.loc['b']
Out[13]:
si.no    0.375973
Name     0.592222
Marks    0.669719
Grade    0.440668
Name: b, dtype: float64
In [14]:
#LOC commnds help in printing rows
In [15]:
#in the below example you have 2 brackets, if you print more than one row then you will use 2 brackets
In [16]:
df.loc[['a','c']]
Out[16]:
si.noNameMarksGrade
a0.6036900.3930970.9855300.758059
c0.5142340.3922540.3783210.494321
In [17]:
#if you want to print onl one column for multiple rows
In [18]:
df.loc[['a','c']]["Name"]
Out[18]:
a    0.393097
c    0.392254
Name: Name, dtype: float64
In [19]:
#in the below example you have 2 brackets, if you print more than one column for multiple rows then you will use 2 brackets
In [20]:
df.loc[['a','c']][["Name","Grade"]]
Out[20]:
NameGrade
a0.3930970.758059
c0.3922540.494321
In [21]:
df.loc[['a']]["Name"]
Out[21]:
a    0.393097
Name: Name, dtype: float64
In [22]:
#to print with index
In [24]:
df.iloc[1:3,2:4]
Out[24]:
MarksGrade
b0.6697190.440668
c0.3783210.494321
In [25]:
#in the above example, 1:3 is for rows, so it will take b and c as 3-1 is 2
In [27]:
df.iloc[:,0:1]
Out[27]:
si.no
a0.603690
b0.375973
c0.514234
d0.995259
e0.069478
In [29]:
df.drop("Marks",axis=1)
Out[29]:
si.noNameGrade
a0.6036900.3930970.758059
b0.3759730.5922220.440668
c0.5142340.3922540.494321
d0.9952590.3122830.867670
e0.0694780.4105210.601133
In [30]:
#in the above example we are deleting Column Marks, so we are using axis=1
In [31]:
df
Out[31]:
si.noNameMarksGrade
a0.6036900.3930970.9855300.758059
b0.3759730.5922220.6697190.440668
c0.5142340.3922540.3783210.494321
d0.9952590.3122830.9114440.867670
e0.0694780.4105210.4770980.601133
In [32]:
df.drop("Marks",axis=1,inplace=True)
In [33]:
df
Out[33]:
si.noNameGrade
a0.6036900.3930970.758059
b0.3759730.5922220.440668
c0.5142340.3922540.494321
d0.9952590.3122830.867670
e0.0694780.4105210.601133
In [34]:
#if you need permanently drop Marks, then use the command inplace=True
In [35]:
df.drop('d',axis=0,inplace=True)
In [36]:
df
Out[36]:
si.noNameGrade
a0.6036900.3930970.758059
b0.3759730.5922220.440668
c0.5142340.3922540.494321
e0.0694780.4105210.601133
In [37]:
#if we need to add new column to the table above
In [38]:
df["City"]=[11,22,33,44]
df
Out[38]:
si.noNameGradeCity
a0.6036900.3930970.75805911
b0.3759730.5922220.44066822
c0.5142340.3922540.49432133
e0.0694780.4105210.60113344
In [39]:
df["id"]=df["Grade"]+df["City"]
In [40]:
df
Out[40]:
si.noNameGradeCityid
a0.6036900.3930970.7580591111.758059
b0.3759730.5922220.4406682222.440668
c0.5142340.3922540.4943213333.494321
e0.0694780.4105210.6011334444.601133
In [41]:
# to add a row
In [42]:
df.loc["f"]=[10,20,30,40,50]
df
Out[42]:
si.noNameGradeCityid
a0.6036900.3930970.7580591111.758059
b0.3759730.5922220.4406682222.440668
c0.5142340.3922540.4943213333.494321
e0.0694780.4105210.6011334444.601133
f10.00000020.00000030.0000004050.000000
In [43]:
df[df>.5]
Out[43]:
si.noNameGradeCityid
a0.603690NaN0.7580591111.758059
bNaN0.592222NaN2222.440668
c0.514234NaNNaN3333.494321
eNaNNaN0.6011334444.601133
f10.00000020.00000030.0000004050.000000
In [47]:
df1=pd.DataFrame({'A':[1,2,np.nan],'B':[5,np.nan,np.nan],'C':[1,2,3]})
df1
Out[47]:
ABC
01.05.01
12.0NaN2
2NaNNaN3
In [48]:
df1
Out[48]:
ABC
01.05.01
12.0NaN2
2NaNNaN3
In [49]:
#how to fill this Not a Number values
In [50]:
df1.fillna("Nitin")
Out[50]:
ABC
0151
12Nitin2
2NitinNitin3
In [51]:
#to fill Nan Values in a single column
In [52]:
df1["A"].fillna("ABC")
Out[52]:
0      1
1      2
2    ABC
Name: A, dtype: object
In [53]:
#to fill nan Values by the mean of the remaining numbers
In [54]:
df1["A"].fillna(df1["A"].mean())
Out[54]:
0    1.0
1    2.0
2    1.5
Name: A, dtype: float64
In [55]:
#to convert into CSV
In [56]:
df.to_csv("Testcsv1.csv")
In [57]:
#To read a csv file
In [58]:
df2=pd.read_csv('student_grades.csv')
In [59]:
df2
Out[59]:
StudentGrade
0John Smith80
1Jane Smith75
2John Doe65
3Jane Doe90
In [60]:
df2.head()
Out[60]:
StudentGrade
0John Smith80
1Jane Smith75
2John Doe65
3Jane Doe90
In [61]:
df2.head(8)
Out[61]:
StudentGrade
0John Smith80
1Jane Smith75
2John Doe65
3Jane Doe90
In [62]:
#in the above 2 example, if you need specific no. of roads to be printed, you use head command
In [63]:
df2.tail()
Out[63]:
StudentGrade
0John Smith80
1Jane Smith75
2John Doe65
3Jane Doe90
In [64]:
#above example will give you last 5 entries of your sheet
In [65]:
df2.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
Student    4 non-null object
Grade      4 non-null int64
dtypes: int64(1), object(1)
memory usage: 144.0+ bytes
In [ ]:
 

No comments:

Post a Comment

Featured Post

Ichimoku cloud

Here how you read a ichimoku cloud 1) Blue Converse line: It measures short term trend. it also shows minor support or resistance. Its ve...