Sustainable retirement age¶

AIM: assuming that the current situation of working / retired people is sustainable, and keeping that as reference, what is the retirement age for the whole century, considering the forecast model of the population I have built in my previous Notebook?

import io
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio


pio.renderers.default = 'vscode+notebook'
pd.options.plotting.backend = "plotly"

dfp=pd.read_csv("../data/pop_by_age_year.csv", index_col=0).rename(columns=int) # From Notebook#14

retirement_age = pd.read_csv( # Digitized from source: https://www.linkedin.com/posts/federica-nerini-71222a2b0_a-che-et%C3%A0-si-va-in-pensione-in-italia-activity-7459644370825228288-lWXf
    io.StringIO("""
year,male,female
1974,62.1,59.7
1975,62.0,60.0
1976,62.0,60.4
1977,61.2,60.8
1978,60.9,60.7
1979,60.7,60.5
1980,60.8,60.4
1981,60.7,60.2
1982,60.9,61.0
1983,61.2,60.5
1984,61.1,60.2
1985,61.2,60.3
1986,60.9,60.0
1987,61.0,60.3
1988,60.5,60.1
1989,60.6,58.7
1990,60.7,58.2
1991,60.5,58.0
1992,59.5,57.5
1993,58.8,59.0
1994,58.5,58.8
1995,58.2,58.7
1996,58.2,58.5
1997,58.4,58.1
1998,58.3,57.9
1999,58.8,57.3
2000,59.2,57.2
2001,59.0,58.2
2002,59.5,58.9
2003,59.9,58.0
2004,60.0,58.1
2005,59.9,57.9
2006,60.2,57.8
2007,60.4,57.8
2008,60.5,57.8
2009,60.0,57.9
2010,59.7,58.6
2011,60.1,58.9
2012,60.5,60.1
2013,60.5,60.2
2014,60.8,59.8
2015,60.9,60.2
2016,61.2,60.7
2017,61.5,60.7
2018,62.3,61.0
2019,62.8,61.9
2020,62.6,61.5
2021,62.3,61.5
2022,62.7,61.8
2023,63.1,62.2
""".strip()))

retirement_age["average"] = retirement_age[["male", "female"]].mean(axis=1)
(
    retirement_age
    .plot(x="year", y=["male", "female", "average"])
    .update_layout(
        title="Effective retirement age in Italy (source: OCSE)",
        width=800
        )
    .show()
)

Visualize the past distribution of working / retired people, to spot some ratio to keep as reference for the future.

# I'm making some assumptions on the average age of retirement and start working
first_year = dfp.columns[0]
last_year = dfp.columns[-1]
print("First and last year in the dataset:", first_year, last_year)

past_retirement_lookup = retirement_age.set_index("year")["average"].to_dict()

working_age = { y: 18 + 4*(y-first_year)/(last_year-first_year) for y in dfp.columns } # Assumption: it was 18 in first year and 22 in last
retirem_age = { y: past_retirement_lookup.get(y, 61 if y < 1980 else 64) for y in dfp.columns } # Assumption: following the retirement_age data

# ratio of people in working age over retired age
wr_df = pd.DataFrame(index=dfp.columns)
wr_df["working-age / retirement-age ratio"] = np.nan
for y in wr_df.index:
    wa = working_age[y]
    ra = retirem_age[y]
    wr_df.loc[y, "working-age / retirement-age ratio"] = dfp.loc[wa:ra,y].sum() / dfp.loc[ra:,y].sum()
    
wr_df.plot().update_layout(
    xaxis_title="Year",
    yaxis_title="Working-age / Retirement-age ratio",
    showlegend=False,
    height=500,
    width=800,
).show()

First and last year in the dataset: 1952 2026

# Compute the mean wr ratio, but only the recent one, which seems stabilized
starting_year_mean = 2000
wr_ratio = wr_df.loc[wr_df.index >= starting_year_mean, "working-age / retirement-age ratio"].mean().round(2)
print(f"Mean working-age/retirement ratio since {starting_year_mean}:", wr_ratio)

Mean working-age/retirement ratio since 2000: 2.06

dfs = pd.read_csv("../data/pop_by_age_year_proj.csv", index_col=0)
dfp_projs = {
    scenario: ( # convert to the usual dataframe age-x-year format
        dfs
        .reset_index()
        [["year", "AGE", scenario]]
        .pivot(index="year", columns="AGE", values=scenario)
        .transpose()
    )
    for scenario in dfs.columns[1:]
}

Note that I'm not consideting unemployment, assuming it is constant, and what matters is the ratio between working and retired people not if working people are actually employed or not.

I will now compute the projected retirement age that it is necessary to maintain an average working/retirement ratio of 2.2

past_retirement_cohort = (
    retirement_age
    .assign(Born_year_retiring=lambda df: (df.year - df.average).astype(int))
    .sort_values("Born_year_retiring")
)
past_retirement_cohort

# Same data plotted in two variants

fig = go.Figure()
for scenario_label, dfpproj in dfp_projs.items():
    raproj_df = pd.DataFrame(index=range(2002,2100+1))
    raproj_df["Sustainable retirement age"] = np.nan
    for y in raproj_df.index:
        wa = 22 # Assumption: people will start working at 22 on average
        for ra in range(50, 100): # I'm testing different retirement ages (ra) untill the wr_ratio is reached
            if dfpproj.loc[wa:ra,y].sum() / dfpproj.loc[ra:,y].sum() > wr_ratio:
                raproj_df.loc[y, "Sustainable retirement age"] = ra - 1 
                break
        
    raproj_df["Born year retiring"] = raproj_df.index - raproj_df["Sustainable retirement age"]
    raproj_df = (
        raproj_df
        .reset_index()
        .rename(columns={"index": "Year of Retirement"})
        .astype(int)
    )
    fig.add_trace(
        go.Scatter(
        x=raproj_df["Born year retiring"],
        y=raproj_df["Sustainable retirement age"],
        mode='lines',
        text=[f'With scenario `{scenario_label}` <br>someone born in {raproj_df.at[i, "Born year retiring"]} will retire in {raproj_df.at[i, "Year of Retirement"]} at {raproj_df.at[i, "Sustainable retirement age"]}yo'
              for i in raproj_df.index],
        hoverinfo='text',
        name=scenario_label
    ))   
fig.add_trace(
    go.Scatter(
    x=past_retirement_cohort["Born_year_retiring"],
    y=past_retirement_cohort["average"],
    mode='lines',
    text=[f'Past data <br>someone born in {past_retirement_cohort.at[i, "Born_year_retiring"]} retired on average in {past_retirement_cohort.at[i, "year"]} at {past_retirement_cohort.at[i, "average"]:.0f}yo'
          for i in past_retirement_cohort.index],
    hoverinfo='text',
    name="Past effective retirement age",
    line=dict(color='black', width=2)
))  
fig.update_layout(
    xaxis_title="Year of Birth",
    yaxis_title="Retirement Age",
    legend_title="Scenarios",
    showlegend=True,
    margin=dict(l=0, r=15, t=20, b=0),
    width=780,
    height=320,
)
print(f"Sustainable retirement age to maintain the same ratio of working/retired people of {wr_ratio:.2f}:")
fig.write_html("../images_output/sust_retirement_age.html")
fig.show()

Sustainable retirement age to maintain the same ratio of working/retired people of 2.06:

Conclusions¶

I made some assumptions on the age at which people starts to work and retired in the past
I took the average wr_ratio of working-age/retirement-age people since 2000, which seems stabilized, as reference for the future
Based on my projection of Italian future population, being born in 1990, I expect to retire at 68-73 year old (2058-2064) depending on the scenario
Whoever born after 1980 can not expect to retire before 68 years old, with a range of 5 year more depending on the scenario

NOTE: all previous assumptions are based solely on the demographic, not considering wage evolution. A severe blindspot of this model is that the lower salaries of the newer generations may need a hither wr_ratio to be sustainable, which would lead to an even higher retirement age.

Follow-up¶

Consider unemployment: this is a self-balancing secondary effect as less woking people might compensate lower unemployment too
Run a sensitivity test on the "reference" ratio of working-age/retirement-age people as it is a very handwavy estimate
Get more data on effective historical start age (historical retirement age added on 2026-06-01)
Run a sensitivity test on the age at which people will start to work
Compute the life expectancy after retirement: is it fair w.r.t. those who retired in the past years?

	year	male	female	average	Born_year_retiring
0	1974	62.1	59.7	60.90	1913
1	1975	62.0	60.0	61.00	1914
2	1976	62.0	60.4	61.20	1914
3	1977	61.2	60.8	61.00	1916
4	1978	60.9	60.7	60.80	1917
5	1979	60.7	60.5	60.60	1918
6	1980	60.8	60.4	60.60	1919
7	1981	60.7	60.2	60.45	1920
8	1982	60.9	61.0	60.95	1921
9	1983	61.2	60.5	60.85	1922
10	1984	61.1	60.2	60.65	1923
11	1985	61.2	60.3	60.75	1924
12	1986	60.9	60.0	60.45	1925
13	1987	61.0	60.3	60.65	1926
14	1988	60.5	60.1	60.30	1927
15	1989	60.6	58.7	59.65	1929
16	1990	60.7	58.2	59.45	1930
17	1991	60.5	58.0	59.25	1931
18	1992	59.5	57.5	58.50	1933
19	1993	58.8	59.0	58.90	1934
20	1994	58.5	58.8	58.65	1935
21	1995	58.2	58.7	58.45	1936
22	1996	58.2	58.5	58.35	1937
23	1997	58.4	58.1	58.25	1938
24	1998	58.3	57.9	58.10	1939
25	1999	58.8	57.3	58.05	1940
26	2000	59.2	57.2	58.20	1941
27	2001	59.0	58.2	58.60	1942
28	2002	59.5	58.9	59.20	1942
29	2003	59.9	58.0	58.95	1944
30	2004	60.0	58.1	59.05	1944
31	2005	59.9	57.9	58.90	1946
32	2006	60.2	57.8	59.00	1947
33	2007	60.4	57.8	59.10	1947
34	2008	60.5	57.8	59.15	1948
35	2009	60.0	57.9	58.95	1950
36	2010	59.7	58.6	59.15	1950
37	2011	60.1	58.9	59.50	1951
38	2012	60.5	60.1	60.30	1951
39	2013	60.5	60.2	60.35	1952
40	2014	60.8	59.8	60.30	1953
41	2015	60.9	60.2	60.55	1954
42	2016	61.2	60.7	60.95	1955
43	2017	61.5	60.7	61.10	1955
44	2018	62.3	61.0	61.65	1956
45	2019	62.8	61.9	62.35	1956
46	2020	62.6	61.5	62.05	1957
47	2021	62.3	61.5	61.90	1959
48	2022	62.7	61.8	62.25	1959
49	2023	63.1	62.2	62.65	1960