import numpy as np
import pandas as pd

# this is day 4 of data science

df=pd.DataFrame(np.random.rand(5,4),index=["a",'b','c','d','e'],columns=["si.no","Name","Marks","Grade"])
df

# If you want to print only one column

df['Name']

a    0.393097
b    0.592222
c    0.392254
d    0.312283
e    0.410521
Name: Name, dtype: float64

#If you want to print 2 columns

df[["si.no","Name"]]

#in the above you have 2 brackets, if you print more than one column then you will use 2 brackets

#If you want to print a row

df.loc['b']

si.no    0.375973
Name     0.592222
Marks    0.669719
Grade    0.440668
Name: b, dtype: float64

#LOC commnds help in printing rows

#in the below example you have 2 brackets, if you print more than one row then you will use 2 brackets

df.loc[['a','c']]

#if you want to print onl one column for multiple rows

df.loc[['a','c']]["Name"]

a    0.393097
c    0.392254
Name: Name, dtype: float64

#in the below example you have 2 brackets, if you print more than one column for multiple rows then you will use 2 brackets

df.loc[['a','c']][["Name","Grade"]]

df.loc[['a']]["Name"]

a    0.393097
Name: Name, dtype: float64

#to print with index

df.iloc[1:3,2:4]

#in the above example, 1:3 is for rows, so it will take b and c as 3-1 is 2

df.iloc[:,0:1]

df.drop("Marks",axis=1)

#in the above example we are deleting Column Marks, so we are using axis=1

df

df.drop("Marks",axis=1,inplace=True)

df

#if you need permanently drop Marks, then use the command inplace=True

df.drop('d',axis=0,inplace=True)

df

#if we need to add new column to the table above

df["City"]=[11,22,33,44]
df

df["id"]=df["Grade"]+df["City"]

df

# to add a row

df.loc["f"]=[10,20,30,40,50]
df

df[df>.5]

df1=pd.DataFrame({'A':[1,2,np.nan],'B':[5,np.nan,np.nan],'C':[1,2,3]})
df1

df1

#how to fill this Not a Number values

df1.fillna("Nitin")

#to fill Nan Values in a single column

df1["A"].fillna("ABC")

0      1
1      2
2    ABC
Name: A, dtype: object

#to fill nan Values by the mean of the remaining numbers

df1["A"].fillna(df1["A"].mean())

0    1.0
1    2.0
2    1.5
Name: A, dtype: float64

#to convert into CSV

df.to_csv("Testcsv1.csv")

#To read a csv file

df2=pd.read_csv('student_grades.csv')

df2

df2.head()

df2.head(8)

#in the above 2 example, if you need specific no. of roads to be printed, you use head command

df2.tail()

#above example will give you last 5 entries of your sheet

df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
Student    4 non-null object
Grade      4 non-null int64
dtypes: int64(1), object(1)
memory usage: 144.0+ bytes

	si.no
a	0.603690
b	0.375973
c	0.514234
d	0.995259
e	0.069478

	si.no	Name	Grade	City	id
a	0.603690	NaN	0.758059	11	11.758059
b	NaN	0.592222	NaN	22	22.440668
c	0.514234	NaN	NaN	33	33.494321
e	NaN	NaN	0.601133	44	44.601133
f	10.000000	20.000000	30.000000	40	50.000000

	A	B	C
0	1.0	5.0	1
1	2.0	NaN	2
2	NaN	NaN	3

	A	B	C
0	1.0	5.0	1
1	2.0	NaN	2
2	NaN	NaN	3

	Student	Grade
0	John Smith	80
1	Jane Smith	75
2	John Doe	65
3	Jane Doe	90

Knowledge Hub

Wednesday, November 20, 2019

Learning Datascience- Day4

No comments:

Post a Comment

Featured Post

Ichimoku cloud

Search This Blog

	si.no	Name	Marks	Grade
a	0.603690	0.393097	0.985530	0.758059
b	0.375973	0.592222	0.669719	0.440668
c	0.514234	0.392254	0.378321	0.494321
d	0.995259	0.312283	0.911444	0.867670
e	0.069478	0.410521	0.477098	0.601133

	A	B	C
0	1	5	1
1	2	Nitin	2
2	Nitin	Nitin	3