From 5ba46f32e8cb8ffe056a91f7597cde84ce3f91a0 Mon Sep 17 00:00:00 2001 From: mmcky Date: Tue, 12 Mar 2024 12:34:40 +1100 Subject: [PATCH] review of executable version in jupyter lab --- .../usa-gini-nwealth-tincome-lincome.csv | 40 ++++++------- lectures/inequality.md | 58 ++++++++++++++----- 2 files changed, 64 insertions(+), 34 deletions(-) diff --git a/lectures/_static/lecture_specific/inequality/usa-gini-nwealth-tincome-lincome.csv b/lectures/_static/lecture_specific/inequality/usa-gini-nwealth-tincome-lincome.csv index 85f233ab..4bf8d779 100644 --- a/lectures/_static/lecture_specific/inequality/usa-gini-nwealth-tincome-lincome.csv +++ b/lectures/_static/lecture_specific/inequality/usa-gini-nwealth-tincome-lincome.csv @@ -1,21 +1,21 @@ year,n_wealth,t_income,l_income -1950,0.8257332034366359,0.44248654139458704,0.5342948198773421 -1953,0.8059487586599332,0.42645440609359414,0.5158978980963693 -1956,0.8121790488050623,0.4442694287339929,0.5349293526208143 -1959,0.7952068741637921,0.4374934807706162,0.5213985948309414 -1962,0.8086945076579385,0.4435843103853643,0.5345127915054336 -1965,0.7904149225687938,0.4376371546666339,0.748786002088776 -1968,0.7982885066993525,0.4208620794438893,0.5242396427381537 -1971,0.7911574835420264,0.4233344246090261,0.5576454812313487 -1977,0.7571418922185211,0.46187678800902404,0.5704448110072055 -1983,0.7494335400643021,0.43934561846446935,0.5662220844385908 -1989,0.7715705301674326,0.5115249581654199,0.6013995687471441 -1992,0.75081266140553,0.47406506720767994,0.5983592657979562 -1995,0.7569492388110272,0.48965523558400526,0.596977951671689 -1998,0.7603291991801175,0.49117441585168564,0.5774462841723361 -2001,0.7816118750507013,0.5239092994681116,0.6042739644967291 -2004,0.7700355469522365,0.4884350383903243,0.5981432201792726 -2007,0.7821413776486991,0.5197156312086196,0.6263452195753233 -2010,0.8250825295193426,0.5195972120145639,0.6453653328291923 -2013,0.8227698931835287,0.5314001749843371,0.6498682917772659 -2016,0.8342975903562232,0.5541400068900836,0.6706846793375284 +1950,0.825733203436636,0.44248654139458754,0.5342948198773422 +1953,0.8059487586599333,0.42645440609359464,0.5158978980963698 +1956,0.8121790488050616,0.4442694287339925,0.5349293526208134 +1959,0.7952068741637915,0.43749348077061606,0.5213985948309418 +1962,0.8086945076579374,0.4435843103853642,0.5345127915054336 +1965,0.7904149225687952,0.43763715466663355,0.7487860020887757 +1968,0.7982885066993517,0.42086207944388965,0.5242396427381534 +1971,0.7911574835420259,0.42333442460902565,0.5576454812313468 +1977,0.7571418922185198,0.46187678800902515,0.5704448110072063 +1983,0.7494335400643009,0.43934561846446973,0.5662220844385935 +1989,0.7715705301674317,0.5115249581654214,0.6013995687471423 +1992,0.7508126614055307,0.47406506720767516,0.5983592657979556 +1995,0.7569492388110264,0.48965523558400864,0.5969779516716902 +1998,0.7603291991801189,0.49117441585169025,0.5774462841723348 +2001,0.7816118750507017,0.5239092994681133,0.604273964496734 +2004,0.7700355469522374,0.48843503839032487,0.5981432201792718 +2007,0.7821413776486984,0.5197156312086194,0.6263452195753234 +2010,0.8250825295193427,0.5195972120145639,0.6453653328291896 +2013,0.8227698931835266,0.5314001749843371,0.6498682917772642 +2016,0.8342975903562223,0.5541400068900839,0.6706846793375303 diff --git a/lectures/inequality.md b/lectures/inequality.md index 1e66f39a..af15ced3 100644 --- a/lectures/inequality.md +++ b/lectures/inequality.md @@ -77,6 +77,7 @@ import numpy as np import matplotlib.pyplot as plt import random as rd import wbgapi as wb +import plotly.express as px ``` ## The Lorenz curve @@ -596,7 +597,6 @@ mystnb: caption: Gini coefficients (USA) with trend name: gini_usa_trend --- - x = data_usa.dropna().index.values y = data_usa.dropna().values plt.scatter(x,y) @@ -619,7 +619,7 @@ As we have discussed the Gini coefficient can also be computed over different di We can use the data collected above {ref}`survey of consumer finances ` to look at the gini coefficient when using income when compared to wealth data. -We can compute the Gini coefficient for net wealth, total income, and labour income over many years. +We can compute the Gini coefficient for net wealth, total income, and labour income over many years. ```{code-cell} ipython3 df_income_wealth.year.describe() @@ -677,7 +677,7 @@ Let's plot the Gini coefficients for net wealth, labor income and total income. Looking at each data series we see an outlier in gini coefficient computed for 1965 for `labour income`. -We will smooth our data and take an average of the data either side of it for the time being. +We will smooth our data and take an average of the data either side of it for the time being. ```{code-cell} ipython3 ginis["l_income"][1965] = (ginis["l_income"][1962] + ginis["l_income"][1968]) / 2 @@ -722,7 +722,7 @@ ax.legend() plt.show() ``` -Now we can compare net wealth and labour income. +Now we can compare net wealth and labour income. ```{code-cell} ipython3 fig, ax = plt.subplots() @@ -758,6 +758,21 @@ Let us compare three western economies: USA, United Kingdom, and Norway data[['USA','GBR', 'NOR']].plot(ylabel='gini coefficient') ``` +We see that Norway has a shorter time series so let us take a closer look at the underlying data + +```{code-cell} ipython3 +data[['NOR']].dropna().head(n=5) +``` + +The data for Norway in this dataset goes back to 1979 but there are gaps in the time series and matplotlib is not showing those data points. + +We can use `dataframe.ffill()` to copy and bring forward the last known value in a series to fill in these gaps + +```{code-cell} ipython3 +data['NOR'] = data['NOR'].ffill() +data[['USA','GBR', 'NOR']].plot(ylabel='gini coefficient') +``` + From this plot we can observe that the USA has a higher gini coefficient (i.e. higher income inequality) when compared to the UK and Norway. Norway has the lowest gini coefficient over the three economies from the year 2003, and it is consistently substantially lower than the USA. @@ -781,19 +796,13 @@ plot_data.index.names = ['country', 'year'] plot_data.columns = ['gini'] ``` -Looking at the first 5 rows of data - -```{code-cell} ipython3 -plot_data.head(n=5) -``` - Now we can get the gdp per capita data into a shape that can be merged with `plot_data` ```{code-cell} ipython3 pgdppc = pd.DataFrame(gdppc.unstack()) pgdppc.index.names = ['country', 'year'] pgdppc.columns = ['gdppc'] -plot_data = pdata.merge(pgdppc, left_index=True, right_index=True) +plot_data = plot_data.merge(pgdppc, left_index=True, right_index=True) plot_data.reset_index(inplace=True) ``` @@ -803,11 +812,32 @@ We will transform the year column to remove the 'YR' text and return an integer. plot_data.year = plot_data.year.map(lambda x: int(x.replace('YR',''))) ``` -Now using plotly to build a plot with gdp per capita on the y-axis and the gini coefficient on the x-axis. +Now using plotly to build a plot with gdp per capita on the y-axis and the gini coefficient on the x-axis. ```{code-cell} ipython3 -import plotly.express as px -fig = px.line(plot_data, x="gini", y="gdppc", color="country", text="year", height=800) +min_year = plot_data.year.min() +max_year = plot_data.year.max() +``` + +```{note} +The time series for all three countries start and stop in different years. We will add a year mask to the data to +improve clarity in the chart including the different end years associated with each countries time series. +``` + +```{code-cell} ipython3 +labels = [1979, 1986, 1991, 1995, 2000, 2020, 2021, 2022] + list(range(min_year,max_year,5)) +plot_data.year = plot_data.year.map(lambda x: x if x in labels else None) +``` + +```{code-cell} ipython3 +fig = px.line(plot_data, + x = "gini", + y = "gdppc", + color = "country", + text = "year", + height = 800, + labels = {"gini" : "Gini coefficient", "gdppc" : "GDP per capita"} + ) fig.update_traces(textposition="bottom right") fig.show() ```