Preliminary data analysis and refinement of research question to focus on Sub-Sahara Africa
After I carefully reviewed the GapMinder dataset, I have decided to focus my research on countries in Sub-Saharan Africa instead. To accomplished this, I added a new column - 'continent' to gapminder.csv and created a new dataframe - 'sub-sahara-africa' by sub-setting the data to only include countries that are in aforementioned region.Python Code
import pandas
import os
print(os.getcwd())
DATA_PATH = os.path.join(os.getcwd(), "data")
DATA_FILE = os.path.join(DATA_PATH, "gapminder.csv")
econ_data = pandas.read_csv(DATA_FILE, low_memory=False)
pandas.set_option('display.float_format', lambda x: '%f' % x)
# setting variables of interest to numeric
econ_data['incomeperperson'] = \
pandas.to_numeric(econ_data['incomeperperson'], errors='coerce')
econ_data['relectricperperson'] = \
pandas.to_numeric(econ_data['relectricperperson'], errors='coerce')
econ_data['urbanrate'] = \
pandas.to_numeric(econ_data['urbanrate'], errors='coerce')
print('***** Extrating Gapminder Data for variables of interest*****')
relevant_data = \
econ_data[['country', 'continent', 'incomeperperson', 'relectricperperson',
'urbanrate']]
# making a copy of original dataframe
econ2_data = econ_data.copy()
# Extract only countries in Sub-Sahara Africa.
# For this I added a new column - continent to gapminder dataframe
sub_sahara_africa = relevant_data[relevant_data['continent'] == 'Africa']
print(sub_sahara_africa)
print('***** Per Capita Electricity Consumption and relative frequencies*****')
urban_rate = \
sub_sahara_africa['urbanrate'].value_counts(sort=False,
dropna=False,
normalize=False)
print(urban_rate)
rfreq_urban_rate = \
sub_sahara_africa['urbanrate'].value_counts(sort=False, dropna=False,
normalize=True)
print(rfreq_urban_rate)
per_capita_electric = \
sub_sahara_africa['relectricperperson'].value_counts(sort=False,
dropna=False,
normalize=False)
print(per_capita_electric)
rfreq_per_capita_electric = \
sub_sahara_africa['relectricperperson'].value_counts(sort=False,
dropna=False,
normalize=True)
print(rfreq_per_capita_electric)
per_capita_gdp = \
sub_sahara_africa['incomeperperson'].value_counts(sort=False,
dropna=False,
normalize=False)
print(per_capita_gdp)
rfreq_per_capita_gdp = \
sub_sahara_africa['incomeperperson'].value_counts(sort=False,
dropna=False,
normalize=True)
print(rfreq_per_capita_gdp)
# Create a second copy of sub_sahara_africa for further data analysis
sub2_sahara_africa = sub_sahara_africa.copy()
# Create a second copy of sub_sahara_africa for further data analysis
sub2_sahara_africa = sub_sahara_africa.copy()
print('Split income data into 3 strata - LowGDP, MediumGDP, HighGDP')
sub2_sahara_africa['percapitagdp'] =\
pandas.qcut(sub2_sahara_africa.incomeperperson, 3,
labels=["LowGDP", "MediumGDP", "HighGDP"])
print(sub2_sahara_africa)
# Frequencey description of income strata
per_capita_gdp_bin = \
sub2_sahara_africa['percapitagdp'].value_counts(sort=False,
dropna=True)
print(per_capita_gdp_bin)
rfreq_per_capita_gdp_bin = \
sub2_sahara_africa['percapitagdp'].value_counts(sort=False,
dropna=True,
normalize=True)
print(rfreq_per_capita_gdp_bin)
print('Split relectricperperson data into 3 categories \
- LowkWh, MediumkWh, HighkWh')
sub2_sahara_africa['percapitakWh'] =\
pandas.qcut(sub2_sahara_africa.relectricperperson, 3,
labels=["LowkWh", "MediumkWh", "HighkWh"])
print(sub2_sahara_africa)
per_kWh_use = \
sub2_sahara_africa['percapitakWh'].value_counts(sort=False,
dropna=True)
print(per_kWh_use)
rfreq_per_kWh_use = \
sub2_sahara_africa['percapitakWh'].value_counts(sort=False,
dropna=True,
normalize=True)
print(rfreq_per_kWh_use)
print('Split urbanrate data into 3 categories - rural, town, urban')
sub2_sahara_africa['areatype'] =\
pandas.qcut(sub2_sahara_africa.urbanrate, 3,
labels=["rural", "town", "urban"])
print(sub2_sahara_africa)
areatype_count = \
sub2_sahara_africa['areatype'].value_counts(sort=False,
dropna=True)
print(areatype_count)
rfreq_areatype_count = \
sub2_sahara_africa['areatype'].value_counts(sort=False,
dropna=True,
normalize=True)
print(rfreq_areatype_count)
#Aggregating data across groups
sub2_sahara_africa.groupby('percapitagdp').agg({'incomeperperson': [numpy.size, numpy.mean]})
sub2_sahara_africa.groupby('percapitakWh').agg({'relectricperperson': [numpy.size, numpy.mean]})
sub2_sahara_africa.groupby('areatype').agg({'urbanrate': [numpy.size, numpy.mean]})
# Aggregating per capita income groups across country
by_gdp = sub2_sahara_africa.groupby(['percapitagdp', 'country'])
by_gdp.incomeperperson.mean()
# Aggregating per capita electric consumption groups across country
# Generate Summary statistics for Sub Sahara Africa
sub_sahara_africa.describe()
import os
print(os.getcwd())
DATA_PATH = os.path.join(os.getcwd(), "data")
DATA_FILE = os.path.join(DATA_PATH, "gapminder.csv")
econ_data = pandas.read_csv(DATA_FILE, low_memory=False)
pandas.set_option('display.float_format', lambda x: '%f' % x)
# setting variables of interest to numeric
econ_data['incomeperperson'] = \
pandas.to_numeric(econ_data['incomeperperson'], errors='coerce')
econ_data['relectricperperson'] = \
pandas.to_numeric(econ_data['relectricperperson'], errors='coerce')
econ_data['urbanrate'] = \
pandas.to_numeric(econ_data['urbanrate'], errors='coerce')
print('***** Extrating Gapminder Data for variables of interest*****')
relevant_data = \
econ_data[['country', 'continent', 'incomeperperson', 'relectricperperson',
'urbanrate']]
# making a copy of original dataframe
econ2_data = econ_data.copy()
# Extract only countries in Sub-Sahara Africa.
# For this I added a new column - continent to gapminder dataframe
sub_sahara_africa = relevant_data[relevant_data['continent'] == 'Africa']
print(sub_sahara_africa)
print('***** Per Capita Electricity Consumption and relative frequencies*****')
urban_rate = \
sub_sahara_africa['urbanrate'].value_counts(sort=False,
dropna=False,
normalize=False)
print(urban_rate)
rfreq_urban_rate = \
sub_sahara_africa['urbanrate'].value_counts(sort=False, dropna=False,
normalize=True)
print(rfreq_urban_rate)
per_capita_electric = \
sub_sahara_africa['relectricperperson'].value_counts(sort=False,
dropna=False,
normalize=False)
print(per_capita_electric)
rfreq_per_capita_electric = \
sub_sahara_africa['relectricperperson'].value_counts(sort=False,
dropna=False,
normalize=True)
print(rfreq_per_capita_electric)
per_capita_gdp = \
sub_sahara_africa['incomeperperson'].value_counts(sort=False,
dropna=False,
normalize=False)
print(per_capita_gdp)
rfreq_per_capita_gdp = \
sub_sahara_africa['incomeperperson'].value_counts(sort=False,
dropna=False,
normalize=True)
print(rfreq_per_capita_gdp)
# Create a second copy of sub_sahara_africa for further data analysis
sub2_sahara_africa = sub_sahara_africa.copy()
# Create a second copy of sub_sahara_africa for further data analysis
sub2_sahara_africa = sub_sahara_africa.copy()
print('Split income data into 3 strata - LowGDP, MediumGDP, HighGDP')
sub2_sahara_africa['percapitagdp'] =\
pandas.qcut(sub2_sahara_africa.incomeperperson, 3,
labels=["LowGDP", "MediumGDP", "HighGDP"])
print(sub2_sahara_africa)
# Frequencey description of income strata
per_capita_gdp_bin = \
sub2_sahara_africa['percapitagdp'].value_counts(sort=False,
dropna=True)
print(per_capita_gdp_bin)
rfreq_per_capita_gdp_bin = \
sub2_sahara_africa['percapitagdp'].value_counts(sort=False,
dropna=True,
normalize=True)
print(rfreq_per_capita_gdp_bin)
print('Split relectricperperson data into 3 categories \
- LowkWh, MediumkWh, HighkWh')
sub2_sahara_africa['percapitakWh'] =\
pandas.qcut(sub2_sahara_africa.relectricperperson, 3,
labels=["LowkWh", "MediumkWh", "HighkWh"])
print(sub2_sahara_africa)
per_kWh_use = \
sub2_sahara_africa['percapitakWh'].value_counts(sort=False,
dropna=True)
print(per_kWh_use)
rfreq_per_kWh_use = \
sub2_sahara_africa['percapitakWh'].value_counts(sort=False,
dropna=True,
normalize=True)
print(rfreq_per_kWh_use)
print('Split urbanrate data into 3 categories - rural, town, urban')
sub2_sahara_africa['areatype'] =\
pandas.qcut(sub2_sahara_africa.urbanrate, 3,
labels=["rural", "town", "urban"])
print(sub2_sahara_africa)
areatype_count = \
sub2_sahara_africa['areatype'].value_counts(sort=False,
dropna=True)
print(areatype_count)
rfreq_areatype_count = \
sub2_sahara_africa['areatype'].value_counts(sort=False,
dropna=True,
normalize=True)
print(rfreq_areatype_count)
#Aggregating data across groups
sub2_sahara_africa.groupby('percapitagdp').agg({'incomeperperson': [numpy.size, numpy.mean]})
sub2_sahara_africa.groupby('percapitakWh').agg({'relectricperperson': [numpy.size, numpy.mean]})
sub2_sahara_africa.groupby('areatype').agg({'urbanrate': [numpy.size, numpy.mean]})
# Aggregating per capita income groups across country
by_gdp = sub2_sahara_africa.groupby(['percapitagdp', 'country'])
by_gdp.incomeperperson.mean()
# Aggregating per capita electric consumption groups across country
by_kWh = sub2_sahara_africa.groupby(['percapitakWh', 'country'])
by_kWh.relectricperperson.mean()
by_areatype.urbanrate.mean()
by_kWh.relectricperperson.mean()
# Aggregating urbanrate groups across country
by_areatype = sub2_sahara_africa.groupby(['areatype', 'country'])by_areatype.urbanrate.mean()
Dataframe Output
***** Extrating Gapminder Data for variables of interest*****
country incomeperperson relectricperperson urbanrate
4 Angola 1381.004268 172.999227 56.70
19 Benin 377.039699 38.222943 41.20
24 Botswana 4189.436587 454.795705 59.58
28 Burkina Faso 276.200413 NaN 19.56
31 Cameroon 713.639303 59.551245 56.76
33 Cape Verde 1959.844472 NaN 59.62
35 Central African Rep. 239.518749 NaN 38.58
36 Chad 275.884287 NaN 26.68
42 Congo, Rep. 1253.292015 56.372450 61.34
45 Cote d'Ivoire 591.067944 70.387444 48.78
51 Djibouti 895.318340 NaN 87.30
57 Equatorial Guinea 8654.536845 NaN 39.38
58 Eritrea 131.796207 20.288131 20.72
60 Ethiopia 220.891248 15.056236 17.00
66 Gabon 4180.765821 537.104738 85.04
67 Gambia 354.599726 NaN 56.42
70 Ghana 358.979540 97.246492 50.02
78 Guinea 411.501447 NaN 34.44
79 Guinea-Bissau 161.317137 NaN 29.84
97 Kenya 468.696044 41.180003 21.60
106 Lesotho 495.734247 NaN 25.46
107 Liberia 155.033231 NaN 60.14
114 Madagascar 242.677534 NaN 29.52
115 Malawi 184.141797 NaN 18.80
118 Mali 269.892881 NaN 32.18
122 Mauritania 609.131206 NaN 41.00
123 Mauritius 5182.143721 NaN 42.48
131 Mozambique 389.763634 31.386838 36.84
133 Namibia 2667.246710 0.000000 36.84
141 Niger 180.083376 NaN 16.54
142 Nigeria 544.599477 74.064241 48.36
160 Rwanda 338.266391 NaN 18.34
166 Sao Tome and Principe NaN NaN 60.56
168 Senegal 561.708585 55.794744 42.38
171 Seychelles 8614.120219 NaN 54.34
172 Sierra Leone 268.331790 NaN 37.76
177 Somalia NaN NaN 36.52
178 South Africa 3745.649852 920.137600 60.74
181 Sudan 523.950151 50.892101 43.44
183 Swaziland 1810.230533 NaN 24.94
189 Tanzania 456.385712 38.634503 25.52
192 Togo 285.224449 66.238522 42.00
199 Uganda 377.421113 NaN 12.98
211 Zambia 432.226337 168.623031 35.42
212 Zimbabwe 320.771890 297.883200 37.34
# Generate Summary statistics for Sub Sahara Africa
sub_sahara_africa.describe()
Summary Statistics
incomeperperson relectricperperson urbanrate
count 43.000000 21.000000 45.000000
mean 1296.513138 155.564733 40.688889
std 2046.959164 226.257267 17.250389
min 131.796207 0.000000 12.980000
25% 276.042350 38.634503 26.680000
50% 432.226337 59.551245 38.580000
75% 1074.305177 168.623031 54.340000
max 8654.536845 920.137600 87.300000
The hypothesis stipulated earlier was to test if per capita electric consumption(relectricperperson) can be used as a proxy to economic growth(incomeperperson)in Sub-Sahara Africa.
A review of the summary statistics reveals that, countries with higher per capita electric consumption also tend to have higher per capita income and hence better economic outcome.
On average, 4 in 10 of the population in Sub Sahara Africa live in urban centers.
Per Capita Electric consumption across Sub Sahara Africa averages 155.56 kWh in 2008 and income per capita average of $1296.51 in constant 2000 USD.
Per Capita Electric consumption across Sub Sahara Africa averages 155.56 kWh in 2008 and income per capita average of $1296.51 in constant 2000 USD.
Frequency and Relative Frequency Distributions
Income Classification Frequency
LowGDP 15
MediumGDP 14
HighGDP 14
Income Classification Relative Frequency
LowGDP 0.340909
MediumGDP 0.318182
HighGDP 0.318182
Per Capita Electricity Consumption Frequency
LowkWh 8
MediumkWh 7
HighkWh 7
Per Capita Electricity Consumption Relative Frequency
LowkWh 0.181818
MediumkWh 0.159091
HighkWh 0.159091
Locality Classification Frequency
rural 15
town 14
urban 15
Per Capita Electricity Consumption Relative Frequency
rural 0.340909
town 0.318182
urban 0.340909
Aggregating Per Capita Income over Income groups
incomeperperson
size mean
percapitagdp
LowGDP 15.000000 221.036056
MediumGDP 14.000000 435.062293
HighGDP 14.000000 2702.379115
Aggregating Per Capita Electric Consumption over Electric Consumption bands
relectricperperson
size mean
percapitakWh
LowkWh 8.000000 26.934737
MediumkWh 7.000000 61.900107
HighkWh 7.000000 378.398571
Aggregating Urbanization rate over urban type
urbanrate
size mean
areatype
rural 15.000000 22.645333
town 14.000000 38.118571
urban 15.000000 58.448000
Aggregating per capita income groups across country
percapitagdp country mean
LowGDP Burkina Faso 276.200413
Central African Rep. 239.518749
Chad 275.884287
Congo, Dem. Rep. 103.775857
Eritrea 131.796207
Ethiopia 220.891248
Guinea-Bissau 161.317137
Liberia 155.033231
Madagascar 242.677534
Malawi 184.141797
Mali 269.892881
Niger 180.083376
Sierra Leone 268.331790
Togo 285.224449
Zimbabwe 320.771890
MediumGDP Benin 377.039699
Gambia 354.599726
Ghana 358.979540
Guinea 411.501447
Kenya 468.696044
Lesotho 495.734247
Mozambique 389.763634
Nigeria 544.599477
Rwanda 338.266391
Senegal 561.708585
Sudan 523.950151
Tanzania 456.385712
Uganda 377.421113
Zambia 432.226337
HighGDP Angola 1381.004268
Botswana 4189.436587
Cameroon 713.639303
Cape Verde 1959.844472
Congo, Rep. 1253.292015
Cote d'Ivoire 591.067944
Djibouti 895.318340
Equatorial Guinea 8654.536845
Gabon 4180.765821
Mauritania 609.131206
Mauritius 5182.143721
Namibia 2667.246710
South Africa 3745.649852
Swaziland 1810.230533
Aggregating per capita electric consumption groups across country
Out[172]:
percapitakWh country mean
LowkWh Benin 38.222943
Congo, Dem. Rep. 30.709244
Eritrea 20.288131
Ethiopia 15.056236
Kenya 41.180003
Mozambique 31.386838
Namibia 0.000000
Tanzania 38.634503
MediumkWh
Cameroon 59.551245
Cameroon 59.551245
Congo, Rep. 56.372450
Cote d'Ivoire 70.387444
Nigeria 74.064241
Senegal 55.794744
Sudan 50.892101
Togo 66.238522
HighkWh
Angola 172.999227
Angola 172.999227
Botswana 454.795705
Gabon 537.104738
Ghana 97.246492
South Africa 920.137600
Zambia 168.623030
Zimbabwe 297.883200
Aggregating urbanrate groups across country
areatype country mean
rural Burkina Faso 19.560000
Chad 26.680000
Eritrea 20.720000
Ethiopia 17.000000
Guinea-Bissau 29.840000
Kenya 21.600000
Lesotho 25.460000
Madagascar 29.520000
Malawi 18.800000
Mali 32.180000
Niger 16.540000
Rwanda 18.340000
Swaziland 24.940000
Tanzania 25.520000
Uganda 12.980000
town
Benin 41.200000
Benin 41.200000
Central African Rep. 38.580000
Congo, Dem. Rep. 33.960000
Equatorial Guinea 39.380000
Guinea 34.440000
Mauritania 41.000000
Mozambique 36.840000
Namibia 36.840000
Senegal 42.380000
Sierra Leone 37.760000
Somalia 36.520000
Togo 42.000000
Zambia 35.420000
Zimbabwe 37.340000
urban
Angola 56.700000
Angola 56.700000
Botswana 59.580000
Cameroon 56.760000
Cape Verde 59.620000
Congo, Rep. 61.340000
Cote d'Ivoire 48.780000
Djibouti 87.300000
Gabon 85.040000
Gambia 56.420000
Ghana 50.020000
Liberia 60.140000
Mauritius 42.480000
Nigeria 48.360000
South Africa 60.740000
Sudan 43.440000
Summary of Analysis
Upon running and generating myriad of summary statistics, one can deduce the following
- Countries in Sub Sahara Africa with large per capita electricity consumption tend to have higher per capita GDP and are more urbanized.
No comments:
Post a Comment