Friday, December 13, 2019

Machine learning- day3



In [3]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
In [2]:
df=pd.read_csv('USA_Housing.csv')
df
Out[2]:
Avg. Area IncomeAvg. Area House AgeAvg. Area Number of RoomsAvg. Area Number of BedroomsArea PopulationPriceAddress
079545.458575.6828617.0091884.0923086.800501.059034e+06208 Michael Ferry Apt. 674\nLaurabury, NE 3701...
179248.642456.0029006.7308213.0940173.072171.505891e+06188 Johnson Views Suite 079\nLake Kathleen, CA...
261287.067185.8658908.5127275.1336882.159401.058988e+069127 Elizabeth Stravenue\nDanieltown, WI 06482...
363345.240057.1882365.5867293.2634310.242831.260617e+06USS Barnett\nFPO AP 44820
459982.197235.0405557.8393884.2326354.109476.309435e+05USNS Raymond\nFPO AE 09386
580175.754164.9884086.1045124.0426748.428421.068138e+0606039 Jennifer Islands Apt. 443\nTracyport, KS...
664698.463436.0253368.1477603.4160828.249091.502056e+064759 Daniel Shoals Suite 442\nNguyenburgh, CO ...
778394.339286.9897806.6204782.4236516.358971.573937e+06972 Joyce Viaduct\nLake William, TN 17778-6483
859927.660815.3621266.3931212.3029387.396007.988695e+05USS Gilbert\nFPO AA 20957
981885.927184.4236728.1676886.1040149.965751.545155e+06Unit 9446 Box 0958\nDPO AE 97025
1080527.472088.0935135.0427474.1047224.359841.707046e+066368 John Motorway Suite 700\nJanetbury, NM 26854
1150593.695504.4965137.4676274.4934343.991896.637324e+05911 Castillo Park Apt. 717\nDavisborough, PW 7...
1239033.809247.6717557.2500293.1039220.361471.042814e+06209 Natasha Stream Suite 961\nHuffmanland, NE ...
1373163.663446.9195355.9931882.2732326.123141.291332e+06829 Welch Track Apt. 992\nNorth John, AR 26532...
1469391.380185.3447768.4064184.3735521.294031.402818e+06PSC 5330, Box 4420\nAPO AP 08302
1573091.866755.4431568.5175134.0123929.524051.306675e+062278 Shannon View\nNorth Carriemouth, NM 84617
1679706.963065.0678908.2197713.1239717.813581.556787e+06064 Hayley Unions\nNicholsborough, HI 44161-1887
1761929.077024.7885505.0970104.3024595.901505.284852e+055498 Rachel Locks\nNew Gregoryshire, PW 54755
1863508.194305.9471657.1877745.1235719.653051.019426e+06Unit 7424 Box 2786\nDPO AE 71255
1962085.276405.7394117.0918085.4944922.106701.030591e+0619696 Benjamin Cape\nStephentown, ME 36952-4733
2086294.999096.6274578.0118984.0747560.775342.146925e+06030 Larry Park Suite 665\nThomashaven, HI 8794...
2160835.089985.5512226.5171752.1045574.741669.292476e+05USNS Brown\nFPO AP 85833
2264490.650274.2103235.4780884.3140358.960117.188872e+0595198 Ortiz Key\nPort Sara, TN 24541-2855
2360697.351546.1704847.1505376.3428140.967097.439998e+059003 Jay Plains Suite 838\nLake Elizabeth, IN ...
2459748.855495.3393407.7486824.2327809.986548.957371e+0524282 Paul Valley\nWest Perry, MI 03169-5806
2556974.476548.2875627.3128804.3340694.869511.453975e+0661938 Brady Falls\nLewisfort, DE 61227
2682173.626084.0185256.9926992.0338853.918071.125693e+063599 Ramirez Springs\nJacksonhaven, AZ 72798
2764626.880985.4433606.9887544.0027784.742289.754295e+05073 Christopher Falls Suite 882\nWest Cynthia,...
2890499.057456.3843594.2421913.0433970.164991.240764e+066531 Chase Prairie Apt. 245\nSusanshire, MN 22365
2959323.792106.9778288.2736974.0737520.657731.577018e+0617124 Johnson Squares\nLake Robertfurt, AL 618...
........................
497055980.204817.0145105.4587892.1143968.687051.120943e+062558 King Trail\nEast Catherinebury, MP 23625-...
497173491.134435.7844304.4259593.3730800.541061.111307e+066043 Stevens Stream\nWest Kimberlymouth, ME 49723
497283695.272387.6435077.1272195.0533113.759061.736402e+0633465 Hernandez Forest Apt. 692\nPort Ashleyfo...
497378743.759276.5836856.5956834.0724381.144541.340770e+06805 David Knoll Apt. 216\nMccarthyview, GU 74316
497470720.296466.4118015.0481283.0119114.019258.013486e+0514742 Lopez Ridge Apt. 889\nJessicatown, CA 28254
497554037.580888.4717656.9660723.2728696.170861.324382e+066278 Jenkins Harbors Apt. 807\nNew Yvettehaven...
497675046.313795.3511697.7978255.2334107.888621.340344e+0655823 Stuart Fields\nNunezstad, NM 03601
497775980.438846.5831055.9148923.2340394.593491.518478e+061831 Escobar Plain Suite 171\nMartinezberg, OH...
497880393.339508.8997135.6529744.0439547.932491.910585e+0602084 Rivera Lock\nHallville, NJ 32367-9579
497982224.695015.4340878.3757083.1257166.867511.823498e+064679 Turner Tunnel\nRosariobury, CT 68552-4766
498075664.024485.7892036.4153122.0254724.251271.406865e+060476 Jessica Shoals\nMelissamouth, DE 39609-2777
498171663.871296.1507457.3119076.3324109.778061.203850e+061316 Tony Inlet Suite 235\nWest Jimmy, SC 72946
498258800.908775.9765077.3040516.4337426.709751.020096e+06109 Lee Wall Apt. 315\nLunamouth, AZ 05121-3634
498369655.183957.7211006.0777954.2932902.355581.194357e+0639174 Jessica Mission Apt. 539\nWest Cindyboro...
498462623.359835.0716246.7710153.3350985.971201.211900e+069894 Greg Ridge\nNorth Tiffanyhaven, ID 66602-...
498575117.042956.0362756.5381112.2243976.031061.378938e+06PSC 7442, Box 6234\nAPO AP 13017
498671060.406015.7188397.2227304.3434814.585591.260241e+065611 Matthew Avenue\nLake Kevin, FM 72963-8891
498765729.222336.2377876.8604753.1225573.854291.197073e+06641 Lisa Parkways Suite 552\nWest Amandaside, ...
498867637.840677.0566735.7744093.0543846.531341.275143e+066066 Sanders Court Apt. 914\nSouth Alexis, FM ...
498947965.406905.6946387.3633275.4046071.947348.852050e+0519960 Scott Street\nPort Brenda, MO 02292-8651
499052723.876565.4522378.1245716.3914802.088444.795006e+0586727 Kelly Plaza\nLake Veronica, IL 04474
499174102.191895.6578417.6839933.1324041.270591.263721e+062871 John Lodge\nAmychester, GU 61734-5597
499287499.125746.4034734.8360914.0240815.199681.568701e+06Unit 2096 Box 9559\nDPO AE 80983-8797
499369639.140905.0075107.7783756.0554056.128431.381831e+065259 David Causeway Apt. 975\nSouth Alexstad, ...
499473060.846235.2936826.3122534.1622695.695489.053549e+055224 Lamb Passage\nNancystad, GA 16579
499560567.944147.8303626.1373563.4622837.361031.060194e+06USNS Williams\nFPO AP 30153-7653
499678491.275436.9991356.5767634.0225616.115491.482618e+06PSC 9258, Box 8489\nAPO AA 42991-3352
499763390.686897.2505914.8050812.1333266.145491.030730e+064215 Tracy Garden Suite 076\nJoshualand, VA 01...
499868001.331245.5343887.1301445.4442625.620161.198657e+06USS Wallace\nFPO AE 73316
499965510.581805.9923056.7923364.0746501.283801.298950e+0637778 George Ridges Apt. 509\nEast Holly, NV 2...
5000 rows × 7 columns
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 7 columns):
Avg. Area Income                5000 non-null float64
Avg. Area House Age             5000 non-null float64
Avg. Area Number of Rooms       5000 non-null float64
Avg. Area Number of Bedrooms    5000 non-null float64
Area Population                 5000 non-null float64
Price                           5000 non-null float64
Address                         5000 non-null object
dtypes: float64(6), object(1)
memory usage: 273.5+ KB
In [5]:
df.drop("Address",axis=1,inplace=True)
In [7]:
df.head()
Out[7]:
Avg. Area IncomeAvg. Area House AgeAvg. Area Number of RoomsAvg. Area Number of BedroomsArea PopulationPrice
079545.458575.6828617.0091884.0923086.800501.059034e+06
179248.642456.0029006.7308213.0940173.072171.505891e+06
261287.067185.8658908.5127275.1336882.159401.058988e+06
363345.240057.1882365.5867293.2634310.242831.260617e+06
459982.197235.0405557.8393884.2326354.109476.309435e+05
In [8]:
#now we will find correlation between different columns
df.corr()
Out[8]:
Avg. Area IncomeAvg. Area House AgeAvg. Area Number of RoomsAvg. Area Number of BedroomsArea PopulationPrice
Avg. Area Income1.000000-0.002007-0.0110320.019788-0.0162340.639734
Avg. Area House Age-0.0020071.000000-0.0094280.006149-0.0187430.452543
Avg. Area Number of Rooms-0.011032-0.0094281.0000000.4626950.0020400.335664
Avg. Area Number of Bedrooms0.0197880.0061490.4626951.000000-0.0221680.171071
Area Population-0.016234-0.0187430.002040-0.0221681.0000000.408556
Price0.6397340.4525430.3356640.1710710.4085561.000000
In [9]:
sns.heatmap(df.corr())
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x2976190c898>
In [11]:
df.drop('Avg. Area Number of Bedrooms',axis=1,inplace=True)
In [12]:
df.head()
Out[12]:
Avg. Area IncomeAvg. Area House AgeAvg. Area Number of RoomsArea PopulationPrice
079545.458575.6828617.00918823086.800501.059034e+06
179248.642456.0029006.73082140173.072171.505891e+06
261287.067185.8658908.51272736882.159401.058988e+06
363345.240057.1882365.58672934310.242831.260617e+06
459982.197235.0405557.83938826354.109476.309435e+05
In [13]:
df.columns
Out[13]:
Index(['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
       'Area Population', 'Price'],
      dtype='object')
In [16]:
x=df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
       'Area Population']]
y=df[['Price']]
In [17]:
x.head()
y.head()
Out[17]:
Price
01.059034e+06
11.505891e+06
21.058988e+06
31.260617e+06
46.309435e+05
In [19]:
from sklearn.model_selection import train_test_split
In [20]:
#train_test_split is a model for splitting
In [22]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.30,random_state=42)
In [23]:
#we need to divide the data in training and test.test_size=0.30 means how much data you want to keep in test environment
#. here it is 30%
#random_state=42 means it will pick same data again and again
In [24]:
x_train.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 3500 entries, 1840 to 860
Data columns (total 4 columns):
Avg. Area Income             3500 non-null float64
Avg. Area House Age          3500 non-null float64
Avg. Area Number of Rooms    3500 non-null float64
Area Population              3500 non-null float64
dtypes: float64(4)
memory usage: 136.7 KB
In [25]:
y_train.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 3500 entries, 1840 to 860
Data columns (total 1 columns):
Price    3500 non-null float64
dtypes: float64(1)
memory usage: 54.7 KB
In [26]:
y_train
Out[26]:
Price
18408.814461e+05
21151.375771e+06
44371.137069e+06
11469.289500e+05
24861.392084e+06
441.153871e+06
21541.409039e+06
15269.554459e+05
48552.998630e+05
13781.172133e+06
17651.069851e+06
30121.594748e+06
4131.008713e+06
42841.196254e+06
14071.292287e+06
16587.230900e+05
8011.281741e+06
12881.526915e+06
21821.429383e+06
25881.986811e+06
26151.746087e+06
23181.155753e+06
2562.298379e+06
45417.427053e+05
17299.456146e+05
11515.307647e+05
41441.126085e+06
6471.919693e+06
29081.240504e+06
17021.273631e+06
......
46581.227660e+06
30056.509392e+05
27341.822988e+06
1891.400105e+06
18061.168445e+06
9751.433221e+06
27476.951522e+05
20471.587585e+06
25586.068631e+05
10821.237361e+06
4741.137059e+06
29041.596343e+06
48431.167627e+06
41177.672148e+05
33851.880179e+06
45551.157759e+06
11841.591188e+06
24336.316565e+05
23911.226067e+06
7698.812731e+05
16851.090805e+06
1301.064686e+06
29191.860649e+06
31711.480675e+06
34447.980739e+05
44261.023944e+06
4661.223101e+06
30921.318598e+06
37721.708631e+06
8601.060898e+06
3500 rows × 1 columns
In [27]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.30,random_state=42)
In [28]:
y_train
Out[28]:
Price
18408.814461e+05
21151.375771e+06
44371.137069e+06
11469.289500e+05
24861.392084e+06
441.153871e+06
21541.409039e+06
15269.554459e+05
48552.998630e+05
13781.172133e+06
17651.069851e+06
30121.594748e+06
4131.008713e+06
42841.196254e+06
14071.292287e+06
16587.230900e+05
8011.281741e+06
12881.526915e+06
21821.429383e+06
25881.986811e+06
26151.746087e+06
23181.155753e+06
2562.298379e+06
45417.427053e+05
17299.456146e+05
11515.307647e+05
41441.126085e+06
6471.919693e+06
29081.240504e+06
17021.273631e+06
......
46581.227660e+06
30056.509392e+05
27341.822988e+06
1891.400105e+06
18061.168445e+06
9751.433221e+06
27476.951522e+05
20471.587585e+06
25586.068631e+05
10821.237361e+06
4741.137059e+06
29041.596343e+06
48431.167627e+06
41177.672148e+05
33851.880179e+06
45551.157759e+06
11841.591188e+06
24336.316565e+05
23911.226067e+06
7698.812731e+05
16851.090805e+06
1301.064686e+06
29191.860649e+06
31711.480675e+06
34447.980739e+05
44261.023944e+06
4661.223101e+06
30921.318598e+06
37721.708631e+06
8601.060898e+06
3500 rows × 1 columns
In [29]:
from sklearn.linear_model import LinearRegression
In [30]:
model=LinearRegression()
In [31]:
model.fit(x_train,y_train)
Out[31]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
In [32]:
y_predict=model.predict(x_test)
In [33]:
y_predict
Out[33]:
array([[1309493.18923727],
       [1238962.66596298],
       [1248121.55591489],
       ...,
       [1459381.97707876],
       [1482240.08195154],
       [1050989.28897342]])
In [34]:
model.coef_
Out[34]:
array([[2.16341933e+01, 1.65621287e+05, 1.21162110e+05, 1.52136132e+01]])
In [35]:
model.intercept_
Out[35]:
array([-2639242.60887411])
In [47]:
#Now we will Do Lgistic regression. It calculates Probability
In [ ]:
 In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
In [2]:
df=pd.read_csv('titanic_train.csv')
In [3]:
df
Out[3]:
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
5603Moran, Mr. JamesmaleNaN003308778.4583NaNQ
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S
7803Palsson, Master. Gosta Leonardmale2.03134990921.0750NaNS
8913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.00234774211.1333NaNS
91012Nasser, Mrs. Nicholas (Adele Achem)female14.01023773630.0708NaNC
101113Sandstrom, Miss. Marguerite Rutfemale4.011PP 954916.7000G6S
111211Bonnell, Miss. Elizabethfemale58.00011378326.5500C103S
121303Saundercock, Mr. William Henrymale20.000A/5. 21518.0500NaNS
131403Andersson, Mr. Anders Johanmale39.01534708231.2750NaNS
141503Vestrom, Miss. Hulda Amanda Adolfinafemale14.0003504067.8542NaNS
151612Hewlett, Mrs. (Mary D Kingcome)female55.00024870616.0000NaNS
161703Rice, Master. Eugenemale2.04138265229.1250NaNQ
171812Williams, Mr. Charles EugenemaleNaN0024437313.0000NaNS
181903Vander Planke, Mrs. Julius (Emelia Maria Vande...female31.01034576318.0000NaNS
192013Masselmani, Mrs. FatimafemaleNaN0026497.2250NaNC
202102Fynney, Mr. Joseph Jmale35.00023986526.0000NaNS
212212Beesley, Mr. Lawrencemale34.00024869813.0000D56S
222313McGowan, Miss. Anna "Annie"female15.0003309238.0292NaNQ
232411Sloper, Mr. William Thompsonmale28.00011378835.5000A6S
242503Palsson, Miss. Torborg Danirafemale8.03134990921.0750NaNS
252613Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...female38.01534707731.3875NaNS
262703Emir, Mr. Farred ChehabmaleNaN0026317.2250NaNC
272801Fortune, Mr. Charles Alexandermale19.03219950263.0000C23 C25 C27S
282913O'Dwyer, Miss. Ellen "Nellie"femaleNaN003309597.8792NaNQ
293003Todoroff, Mr. LaliomaleNaN003492167.8958NaNS
.......................................
86186202Giles, Mr. Frederick Edwardmale21.0102813411.5000NaNS
86286311Swift, Mrs. Frederick Joel (Margaret Welles Ba...female48.0001746625.9292D17S
86386403Sage, Miss. Dorothy Edith "Dolly"femaleNaN82CA. 234369.5500NaNS
86486502Gill, Mr. John Williammale24.00023386613.0000NaNS
86586612Bystrom, Mrs. (Karolina)female42.00023685213.0000NaNS
86686712Duran y More, Miss. Asuncionfemale27.010SC/PARIS 214913.8583NaNC
86786801Roebling, Mr. Washington Augustus IImale31.000PC 1759050.4958A24S
86886903van Melkebeke, Mr. PhilemonmaleNaN003457779.5000NaNS
86987013Johnson, Master. Harold Theodormale4.01134774211.1333NaNS
87087103Balkic, Mr. Cerinmale26.0003492487.8958NaNS
87187211Beckwith, Mrs. Richard Leonard (Sallie Monypeny)female47.0111175152.5542D35S
87287301Carlsson, Mr. Frans Olofmale33.0006955.0000B51 B53 B55S
87387403Vander Cruyssen, Mr. Victormale47.0003457659.0000NaNS
87487512Abelson, Mrs. Samuel (Hannah Wizosky)female28.010P/PP 338124.0000NaNC
87587613Najib, Miss. Adele Kiamie "Jane"female15.00026677.2250NaNC
87687703Gustafsson, Mr. Alfred Ossianmale20.00075349.8458NaNS
87787803Petroff, Mr. Nedeliomale19.0003492127.8958NaNS
87887903Laleff, Mr. KristomaleNaN003492177.8958NaNS
87988011Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)female56.0011176783.1583C50C
88088112Shelley, Mrs. William (Imanita Parrish Hall)female25.00123043326.0000NaNS
88188203Markun, Mr. Johannmale33.0003492577.8958NaNS
88288303Dahlberg, Miss. Gerda Ulrikafemale22.000755210.5167NaNS
88388402Banfield, Mr. Frederick Jamesmale28.000C.A./SOTON 3406810.5000NaNS
88488503Sutehall, Mr. Henry Jrmale25.000SOTON/OQ 3920767.0500NaNS
88588603Rice, Mrs. William (Margaret Norton)female39.00538265229.1250NaNQ
88688702Montvila, Rev. Juozasmale27.00021153613.0000NaNS
88788811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42S
88888903Johnston, Miss. Catherine Helen "Carrie"femaleNaN12W./C. 660723.4500NaNS
88989011Behr, Mr. Karl Howellmale26.00011136930.0000C148C
89089103Dooley, Mr. Patrickmale32.0003703767.7500NaNQ
891 rows × 12 columns
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Name           891 non-null object
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Ticket         891 non-null object
Fare           891 non-null float64
Cabin          204 non-null object
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB
In [5]:
#now we have to convert object into integer and fill non null values
In [6]:
df['Survived'].value_counts()
Out[6]:
0    549
1    342
Name: Survived, dtype: int64
In [7]:
df.drop(['PassengerId','Name','Ticket'],axis=1,inplace=True)
In [8]:
df.head()
Out[8]:
SurvivedPclassSexAgeSibSpParchFareCabinEmbarked
003male22.0107.2500NaNS
111female38.01071.2833C85C
213female26.0007.9250NaNS
311female35.01053.1000C123S
403male35.0008.0500NaNS
In [9]:
df.isnull()
Out[9]:
SurvivedPclassSexAgeSibSpParchFareCabinEmbarked
0FalseFalseFalseFalseFalseFalseFalseTrueFalse
1FalseFalseFalseFalseFalseFalseFalseFalseFalse
2FalseFalseFalseFalseFalseFalseFalseTrueFalse
3FalseFalseFalseFalseFalseFalseFalseFalseFalse
4FalseFalseFalseFalseFalseFalseFalseTrueFalse
5FalseFalseFalseTrueFalseFalseFalseTrueFalse
6FalseFalseFalseFalseFalseFalseFalseFalseFalse
7FalseFalseFalseFalseFalseFalseFalseTrueFalse
8FalseFalseFalseFalseFalseFalseFalseTrueFalse
9FalseFalseFalseFalseFalseFalseFalseTrueFalse
10FalseFalseFalseFalseFalseFalseFalseFalseFalse
11FalseFalseFalseFalseFalseFalseFalseFalseFalse
12FalseFalseFalseFalseFalseFalseFalseTrueFalse
13FalseFalseFalseFalseFalseFalseFalseTrueFalse
14FalseFalseFalseFalseFalseFalseFalseTrueFalse
15FalseFalseFalseFalseFalseFalseFalseTrueFalse
16FalseFalseFalseFalseFalseFalseFalseTrueFalse
17FalseFalseFalseTrueFalseFalseFalseTrueFalse
18FalseFalseFalseFalseFalseFalseFalseTrueFalse
19FalseFalseFalseTrueFalseFalseFalseTrueFalse
20FalseFalseFalseFalseFalseFalseFalseTrueFalse
21FalseFalseFalseFalseFalseFalseFalseFalseFalse
22FalseFalseFalseFalseFalseFalseFalseTrueFalse
23FalseFalseFalseFalseFalseFalseFalseFalseFalse
24FalseFalseFalseFalseFalseFalseFalseTrueFalse
25FalseFalseFalseFalseFalseFalseFalseTrueFalse
26FalseFalseFalseTrueFalseFalseFalseTrueFalse
27FalseFalseFalseFalseFalseFalseFalseFalseFalse
28FalseFalseFalseTrueFalseFalseFalseTrueFalse
29FalseFalseFalseTrueFalseFalseFalseTrueFalse
..............................
861FalseFalseFalseFalseFalseFalseFalseTrueFalse
862FalseFalseFalseFalseFalseFalseFalseFalseFalse
863FalseFalseFalseTrueFalseFalseFalseTrueFalse
864FalseFalseFalseFalseFalseFalseFalseTrueFalse
865FalseFalseFalseFalseFalseFalseFalseTrueFalse
866FalseFalseFalseFalseFalseFalseFalseTrueFalse
867FalseFalseFalseFalseFalseFalseFalseFalseFalse
868FalseFalseFalseTrueFalseFalseFalseTrueFalse
869FalseFalseFalseFalseFalseFalseFalseTrueFalse
870FalseFalseFalseFalseFalseFalseFalseTrueFalse
871FalseFalseFalseFalseFalseFalseFalseFalseFalse
872FalseFalseFalseFalseFalseFalseFalseFalseFalse
873FalseFalseFalseFalseFalseFalseFalseTrueFalse
874FalseFalseFalseFalseFalseFalseFalseTrueFalse
875FalseFalseFalseFalseFalseFalseFalseTrueFalse
876FalseFalseFalseFalseFalseFalseFalseTrueFalse
877FalseFalseFalseFalseFalseFalseFalseTrueFalse
878FalseFalseFalseTrueFalseFalseFalseTrueFalse
879FalseFalseFalseFalseFalseFalseFalseFalseFalse
880FalseFalseFalseFalseFalseFalseFalseTrueFalse
881FalseFalseFalseFalseFalseFalseFalseTrueFalse
882FalseFalseFalseFalseFalseFalseFalseTrueFalse
883FalseFalseFalseFalseFalseFalseFalseTrueFalse
884FalseFalseFalseFalseFalseFalseFalseTrueFalse
885FalseFalseFalseFalseFalseFalseFalseTrueFalse
886FalseFalseFalseFalseFalseFalseFalseTrueFalse
887FalseFalseFalseFalseFalseFalseFalseFalseFalse
888FalseFalseFalseTrueFalseFalseFalseTrueFalse
889FalseFalseFalseFalseFalseFalseFalseFalseFalse
890FalseFalseFalseFalseFalseFalseFalseTrueFalse
891 rows × 9 columns
In [10]:
sns.heatmap(df.isnull())
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x19b36985630>
In [11]:
df['Cabin'].isnull().value_counts()
Out[11]:
True     687
False    204
Name: Cabin, dtype: int64
In [12]:
#in the above command we find 687 values with no data
In [13]:
df.drop('Cabin',axis=1,inplace=True)
In [14]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 8 columns):
Survived    891 non-null int64
Pclass      891 non-null int64
Sex         891 non-null object
Age         714 non-null float64
SibSp       891 non-null int64
Parch       891 non-null int64
Fare        891 non-null float64
Embarked    889 non-null object
dtypes: float64(2), int64(4), object(2)
memory usage: 55.8+ KB
In [15]:
# we will make boxplot so that we can determine ages
In [16]:
sns.boxplot(x='Sex',y='Age',data=df)
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x19b36d37828>
In [17]:
sns.boxplot(x='Embarked',y='Age',data=df)
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x19b36de9128>
In [18]:
sns.boxplot(x='Pclass',y='Age',data=df)
Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x19b36e7b208>
In [19]:
#now we will get the nmean of ages
In [20]:
df['Age'].mean()
Out[20]:
29.69911764705882
In [21]:
df[df['Pclass']==1]['Age'].mean()
Out[21]:
38.233440860215055
In [22]:
df[df['Pclass']==2]['Age'].mean()
Out[22]:
29.87763005780347
In [23]:
df[df['Pclass']==3]['Age'].mean()
Out[23]:
25.14061971830986
In [24]:
#now we will nthe null vales in Ages
In [25]:
def imput_age(cols):
    age=cols[0]
    pclass=cols[1]
    if(pd.isnull(age)):
        if(pclass==1):
            return 38
        elif(pclass==2):
            return 29
        else:
            return 25
    else:
        return age
In [26]:
df['Age']=df[['Age','Pclass']].apply(imput_age,axis=1)
In [27]:
sns.heatmap(df.isnull())
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x19b36f331d0>
In [28]:
df.head()
Out[28]:
SurvivedPclassSexAgeSibSpParchFareEmbarked
003male22.0107.2500S
111female38.01071.2833C
213female26.0007.9250S
311female35.01053.1000S
403male35.0008.0500S
In [29]:
#any data need to be in numbers so we convert embarked and sex into numbers
In [30]:
df['Sex'].value_counts()
Out[30]:
male      577
female    314
Name: Sex, dtype: int64
In [31]:
#gender={'male':0,'female':1}
#df['Sex']=df['Sex'].map(gender)
gen={'male':0,'female':1}
df['Sex']=df['Sex'].map(gen)
In [32]:
df.head()
Out[32]:
SurvivedPclassSexAgeSibSpParchFareEmbarked
003022.0107.2500S
111138.01071.2833C
213126.0007.9250S
311135.01053.1000S
403035.0008.0500S
In [33]:
port={'S':0,'C':1,'Q':2}
In [34]:
df['Embarked']=df['Embarked'].map(port)
In [35]:
df.head()
Out[35]:
SurvivedPclassSexAgeSibSpParchFareEmbarked
003022.0107.25000.0
111138.01071.28331.0
213126.0007.92500.0
311135.01053.10000.0
403035.0008.05000.0
In [36]:
df.columns
Out[36]:
Index(['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare',
       'Embarked'],
      dtype='object')
In [37]:
x=df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare',
       ]]
In [38]:
x.head()
Out[38]:
PclassSexAgeSibSpParchFare
03022.0107.2500
11138.01071.2833
23126.0007.9250
31135.01053.1000
43035.0008.0500
In [39]:
y=df['Survived']
In [40]:
y.head()
Out[40]:
0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64
In [41]:
from sklearn.model_selection import train_test_split
In [42]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.30,random_state=101)
In [43]:
from sklearn.linear_model import LogisticRegression
In [44]:
model=LogisticRegression()
In [45]:
model.fit(x_train,y_train)
C:\Users\AbhishekSingh\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
Out[45]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)
In [46]:
y_predict=model.predict(x_test)
In [47]:
print(y_predict)
[0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 1 1 1 0 0 1 0
 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 1 1 1 0 0 0 0 1 1 0 0 1 0 0 1 1 1 0 1 0 0 0
 1 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 1 1 1 1 1 0 0 0 0 1 0 0 1 1 1 1 1 0 0
 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 1 0 1 0 0 1 0 1 0
 0 0 1 1 0 0 0 0 1 0 1 1 0 0 1 0 1 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 0 1 1 1
 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 1 1 0
 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 1 0 1 1 0 0 1 1 0 0 1 1 0 1 0 0 0 0 0 0 1
 0 0 1 0 0 1 0 0 1]
In [48]:
from sklearn import metrics
In [49]:
metrics.accuracy_score(y_test,y_predict)
Out[49]:
0.7723880597014925
In [50]:
from sklearn.metrics import confusion_matrix
In [51]:
confusion_matrix(y_test,y_predict)
Out[51]:
array([[133,  21],
       [ 40,  74]], dtype=int64)
In [ ]:
 

No comments:

Post a Comment

Featured Post

Ichimoku cloud

Here how you read a ichimoku cloud 1) Blue Converse line: It measures short term trend. it also shows minor support or resistance. Its ve...