Practice 1.2 – Python Pandas Cookbook by Alfred Essa

import pandas as pd
import datetime as dt
#creating list containing dates from 9-01 to 9-10
start  = dt.datetime(2013,9,1)
end = dt.datetime(2013,9,11)
step = dt.timedelta(days = 1)
dates = []
#populate the list
while start < end:
    dates.append(start.strftime('%m-%d'))
    start += step
dates
[’09-01′,
’09-02′,
’09-03′,
’09-04′,
’09-05′,
’09-06′,
’09-07′,
’09-08′,
’09-09′,
’09-10′]
d = {'Date' : dates, 'Tokyo':[3,4,5,4,6,3,32,2,3,13], 'Paris':[45,2,4,5,46,4,7,85,12,9], 'Mumbai':[23,32,12,45,3,6,7,8,1,9]} 
d
{‘Date’: [’09-01′,
’09-02′,
’09-03′,
’09-04′,
’09-05′,
’09-06′,
’09-07′,
’09-08′,
’09-09′,
’09-10′],
‘Mumbai’: [23, 32, 12, 45, 3, 6, 7, 8, 1, 9],
‘Paris’: [45, 2, 4, 5, 46, 4, 7, 85, 12, 9],
‘Tokyo’: [3, 4, 5, 4, 6, 3, 32, 2, 3, 13]}
Creating dataframe using dictionary with equal length of lists
temp = pd.DataFrame(d)
temp
Date Mumbai Paris Tokyo
0 09-01 23 45 3
1 09-02 32 2 4
2 09-03 12 4 5
3 09-04 45 5 4
4 09-05 3 46 6
5 09-06 6 4 3
6 09-07 7 7 32
7 09-08 8 85 2
8 09-09 1 12 3
9 09-10 9 9 13
temp['Tokyo']
0     3
1     4
2     5
3     4
4     6
5     3
6    32
7     2
8     3
9    13
Name: Tokyo, dtype: int64
temp = temp.set_index('Date')
temp
Mumbai Paris Tokyo
Date
09-01 23 45 3
09-02 32 2 4
09-03 12 4 5
09-04 45 5 4
09-05 3 46 6
09-06 6 4 3
09-07 7 7 32
09-08 8 85 2
09-09 1 12 3
09-10 9 9 13
import os as os
os.getcwd()
'C:\\Anaconda'
tb = pd.read_csv('C:/Anaconda/TB_outcomes.csv')
tb.head()
country iso2 iso3 iso_numeric g_whoregion year rep_meth new_sp_coh new_sp_cur new_sp_cmplt mdr_coh mdr_succ mdr_fail mdr_died mdr_lost xdr_coh xdr_succ xdr_fail xdr_died xdr_lost
0 Afghanistan AF AFG 4 EMR 1994 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 Afghanistan AF AFG 4 EMR 1995 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 Afghanistan AF AFG 4 EMR 1996 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 Afghanistan AF AFG 4 EMR 1997 100 2001 786 108 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 Afghanistan AF AFG 4 EMR 1998 100 2913 772 199 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 72 columns

tb.tail()
country iso2 iso3 iso_numeric g_whoregion year rep_meth new_sp_coh new_sp_cur new_sp_cmplt mdr_coh mdr_succ mdr_fail mdr_died mdr_lost xdr_coh xdr_succ xdr_fail xdr_died xdr_lost
4052 Zimbabwe ZW ZWE 716 AFR 2008 100 10370 6973 734 0 NaN NaN NaN NaN 0 NaN NaN NaN NaN
4053 Zimbabwe ZW ZWE 716 AFR 2009 100 10195 7131 868 1 1 0 0 0 0 0 0 0 0
4054 Zimbabwe ZW ZWE 716 AFR 2010 100 11654 8377 1116 6 4 0 2 0 0 0 0 0 0
4055 Zimbabwe ZW ZWE 716 AFR 2011 NaN 12596 9208 995 70 57 0 9 2 0 0 0 0 0
4056 Zimbabwe ZW ZWE 716 AFR 2012 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 72 columns

To get unique values

tb['country'].unique()
array(['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra',
       'Angola', 'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia',
       'Aruba', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bermuda', 'Bhutan', 'Bolivia (Plurinational State of)',
       'Bonaire, Saint Eustatius and Saba', 'Bosnia and Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Brunei Darussalam',
       'Bulgaria', 'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia',
       'Cameroon', 'Canada', 'Cayman Islands', 'Central African Republic',
       'Chad', 'Chile', 'China', 'China, Hong Kong SAR',
       'China, Macao SAR', 'Colombia', 'Comoros', 'Congo', 'Cook Islands',
       'Costa Rica', "C\xc3\xb4te d'Ivoire", 'Croatia', 'Cuba',
       'Cura\xc3\xa7ao', 'Cyprus', 'Czech Republic',
       "Democratic People's Republic of Korea",
       'Democratic Republic of the Congo', 'Denmark', 'Djibouti',
       'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador',
       'Equatorial Guinea', 'Eritrea', 'Estonia', 'Ethiopia', 'Fiji',
       'Finland', 'France', 'French Polynesia', 'Gabon', 'Gambia',
       'Georgia', 'Germany', 'Ghana', 'Greece', 'Greenland', 'Grenada',
       'Guam', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti',
       'Honduras', 'Hungary', 'Iceland', 'India', 'Indonesia',
       'Iran (Islamic Republic of)', 'Iraq', 'Ireland', 'Israel', 'Italy',
       'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati',
       'Kuwait', 'Kyrgyzstan', "Lao People's Democratic Republic",
       'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Lithuania',
       'Luxembourg', 'Madagascar', 'Malawi', 'Malaysia', 'Maldives',
       'Mali', 'Malta', 'Marshall Islands', 'Mauritania', 'Mauritius',
       'Mexico', 'Micronesia (Federated States of)', 'Monaco', 'Mongolia',
       'Montenegro', 'Montserrat', 'Morocco', 'Mozambique', 'Myanmar',
       'Namibia', 'Nauru', 'Nepal', 'Netherlands Antilles', 'Netherlands',
       'New Caledonia', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria',
       'Niue', 'Northern Mariana Islands', 'Norway', 'Oman', 'Pakistan',
       'Palau', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru',
       'Philippines', 'Poland', 'Portugal', 'Puerto Rico', 'Qatar',
       'Republic of Korea', 'Republic of Moldova', 'Romania',
       'Russian Federation', 'Rwanda', 'Saint Kitts and Nevis',
       'Saint Lucia', 'Saint Vincent and the Grenadines', 'Samoa',
       'San Marino', 'Sao Tome and Principe', 'Saudi Arabia', 'Senegal',
       'Serbia & Montenegro', 'Serbia', 'Seychelles', 'Sierra Leone',
       'Singapore', 'Sint Maarten (Dutch part)', 'Slovakia', 'Slovenia',
       'Solomon Islands', 'Somalia', 'South Africa', 'South Sudan',
       'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Swaziland', 'Sweden',
       'Switzerland', 'Syrian Arab Republic', 'Tajikistan', 'Thailand',
       'The Former Yugoslav Republic of Macedonia', 'Timor-Leste', 'Togo',
       'Tokelau', 'Tonga', 'Trinidad and Tobago', 'Tunisia', 'Turkey',
       'Turkmenistan', 'Turks and Caicos Islands', 'Tuvalu', 'Uganda',
       'Ukraine', 'United Arab Emirates',
       'United Kingdom of Great Britain and Northern Ireland',
       'United Republic of Tanzania', 'United States of America',
       'Uruguay', 'US Virgin Islands', 'Uzbekistan', 'Vanuatu',
       'Venezuela (Bolivarian Republic of)', 'Viet Nam',
       'Wallis and Futuna Islands', 'West Bank and Gaza Strip', 'Yemen',
       'Zambia', 'Zimbabwe'], dtype=object)

Counting number of Unique values

tb.country.value_counts() 
Botswana                            19
Bolivia (Plurinational State of)    19
Greenland                           19
Armenia                             19
China                               19
Togo                                19
Mongolia                            19
Saint Kitts and Nevis               19
Cuba                                19
Benin                               19
Cook Islands                        19
Malawi                              19
Norway                              19
Nauru                               19
Solomon Islands                     19
...
US Virgin Islands                    19
China, Hong Kong SAR                 19
Denmark                              19
Philippines                          19
Canada                               19
China, Macao SAR                     19
Netherlands Antilles                 15
Timor-Leste                          11
Serbia & Montenegro                  10
Montenegro                            8
Serbia                                8
Bonaire, Saint Eustatius and Saba     4
Sint Maarten (Dutch part)             4
Curaçao                               4
South Sudan                           3
Length: 219, dtype: int64
tb.describe()
iso_numeric year rep_meth new_sp_coh new_sp_cur new_sp_cmplt new_sp_died new_sp_fail new_sp_def c_new_sp_tsr mdr_coh mdr_succ mdr_fail mdr_died mdr_lost xdr_coh xdr_succ xdr_fail xdr_died xdr_lost
count 4057.000000 4057.000000 3037.000000 3053.000000 2944.000000 2943.00000 2993.000000 2876.000000 2955.000000 3004.000000 1050.000000 1017.000000 959.000000 1000.000000 987.000000 562.000000 525.000000 524.000000 525.000000 524.000000
mean 433.592310 2003.042149 100.271320 10867.512611 7897.903533 963.62827 430.973939 184.123088 613.043655 75.767643 139.985714 71.208456 14.385819 22.544000 22.217832 6.181495 1.390476 0.837786 2.230476 0.776718
std 254.908076 5.485677 0.647391 45621.976594 37520.862855 3325.39556 1615.996031 812.662201 2386.874910 16.305073 726.653931 342.387797 106.821966 138.383012 113.607426 48.815990 9.570645 5.019886 20.085652 5.790293
min 4.000000 1994.000000 100.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 212.000000 1998.000000 100.000000 124.000000 66.750000 13.00000 7.000000 0.000000 4.000000 69.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
50% 430.000000 2003.000000 100.000000 1229.000000 721.500000 124.00000 60.000000 15.000000 90.000000 79.000000 6.000000 3.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
75% 646.000000 2008.000000 100.000000 5366.000000 3401.500000 580.50000 257.000000 99.000000 393.000000 87.000000 43.000000 24.000000 1.000000 6.000000 4.000000 0.000000 0.000000 0.000000 0.000000 0.000000
max 894.000000 2012.000000 102.000000 642321.000000 544731.000000 64938.00000 27005.000000 12505.000000 35469.000000 100.000000 15896.000000 5895.000000 2916.000000 3037.000000 2344.000000 751.000000 116.000000 64.000000 305.000000 94.000000

8 rows × 68 columns

 

Advertisements