Modelling the future resident population¶

AIM: extrapolate from the past data, the future resident population in Italy by age distribution, till year 2100.

I'm assuming a model like:

$ pop(a=0,y) = birthprop( pop(a=18:50,y-1), fr_{avg}(y-1) ) $

$ pop(a,y) = p(a-1,y-1) \cdot [1 - dp(a, y-1)] + mb(a, y-1) $

$ pop(a=100^{+}, y) = p(a=99,y-1) \cdot [1 - dp(99, y-1)] + p(a=100,y-1) \cdot dp_{100^+}(a, y-1) $

Where:

$pop(a, y)$ is the resident population of age $a$ in year $y$
$birthprop$ is a function returning the number of births given a population in the fertile age range (18-50 yo) and an average fertility rate $fr_{avg}$
$dp(a, y)$ is the death probability for age $a$ in year $y$
$mb(a, y)$ is the Net Migration Balance, positive for more people immigrating than emigrating
$dp_{100^+}(a, y)$ is an effective death probability for people older than 100 years old combined

Moreover:

I will make 3 scenarios for the fertility rate (growing, constant, decreasing)
I will make 3 scenarios for the Net Migration Balance: no more migration from next year, constant migration, 2x migration from next year. The reference migration balance is the average of the last 20 years.
I will combine these 3x3=9 scenarios and see the results

import numpy as np
import pandas as pd
import json
import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.colors as pc
import plotly.io as pio
from copy import deepcopy
import sdmx
import warnings 

client = sdmx.Client("ISTAT")

pio.renderers.default = 'vscode+notebook'
pd.options.plotting.backend = "plotly"
warnings.filterwarnings("ignore", category=FutureWarning)

def get_colors(n, cmap_name="rainbow"):
    """Get colors for px colors_discrete argument, given the number of colors needed, n."""
    cmap = matplotlib.colormaps[cmap_name]
    colors = [cmap(i) for i in np.linspace(0, 1, n)]  # Generate colors
    colors_str = [f"rgba({int(color[0]*250)}, {int(color[1]*250)}, {int(color[2]*250)}, 1.0)" for color in colors]
    return colors_str

# convert default plotly colors to matplotlib
plotly_colors = pc.qualitative.Plotly
matplotlib_colors = []
for color in plotly_colors:
    r, g, b = pc.hex_to_rgb(color)
    matplotlib_colors.append((r/255, g/255, b/255))

# Recover data from previous notebooks
dfp=pd.read_csv("../data/pop_by_age_year.csv", index_col=0).rename(columns=int)
dfdpj=pd.read_csv("../data/deathprob_by_age_year_proj.csv", index_col=0).rename(columns=int)
dfmb = pd.read_csv("../data/average_migration_balance.csv", index_col=0).squeeze()

# Reference years I will be using for the analysis
start_year = dfp.columns[0] # 2002
last_year = dfp.columns[-1] # 2023 (or later if updated)
end_year = 2100

print(f"First-last observation: Jan/{start_year} - Jan/{last_year}")
print(f"First-last prediction:  Jan/{last_year+1} - Jan/{end_year}")

First-last observation: Jan/1952 - Jan/2026
First-last prediction:  Jan/2027 - Jan/2100

# Create coefficients for the migration balance
migration_coeff = {}
for year in range(last_year+1, end_year+1):
    migration_coeff[year] = {
        "double ": 2,
        "average": 1,
        "zero   ": 0,
        #f"↑ 5x in {end_year}": 1 + (5-1) / (end_year - last_year) * (year - last_year), # old scenario
    }

# Values from previous analysis on fertility (see Notebook #40)
fdata = json.load(open("../data/fertility_fitted.json"))
print("Fertility coefficients:")
display(fdata)

def get_newborns_next_year(dfp, year, fertility_rate, fdata):
    """Fom population and fertility rate for a given year,
    get the number of newborns for the NEXT year (fist January of year+1).
    """
    def constrained_sigmoid(x0, k):
        """Get sigmoid function, which is a cumsum of a distribution."""
        x = np.arange(17, 50+1)
        y = 1 / (1 + np.exp(-k * (x - x0)))
        y = y - y[0]
        y = y / y[-1]
        return y
    pop = dfp[year]
    mothers_potential = pop.loc[18:50].to_numpy() * fdata["assumed_women_ratio"]
    x0 = np.polyval(fdata["sigmoid_x0"], year)
    distrib = np.diff(constrained_sigmoid(x0, fdata["sigmoid_k"]))
    newborns = sum(mothers_potential * distrib) * fertility_rate
    return int(newborns)

fertility_rate = {}
m_coeff_scenario_1 = (2 - fdata["last_observed_value"]) / (end_year - fdata["last_observed_year"])
m_coeff_scenario_3 = (0 - fdata["last_observed_value"]) / (end_year - fdata["last_observed_year"])
for year in range(fdata["last_observed_year"], end_year+1):
    fertility_rate[year] = {
        f"↑ 2 in {end_year}": fdata["last_observed_value"] + m_coeff_scenario_1 * (year - fdata["last_observed_year"]), # going to 2
        "eq. to last": fdata["last_observed_value"], # constant
        f"↓ 0 in {end_year}": fdata["last_observed_value"] + m_coeff_scenario_3 * (year - fdata["last_observed_year"]), # going to 0
    }

Fertility coefficients:

{'last_observed_year': 2024,
 'last_observed_value': 1.1526289356194974,
 'sigmoid_x0': [0.09490182479041313, -160.47494571102223],
 'sigmoid_k': -0.296,
 'assumed_women_ratio': 0.5}

Projection of the resident population till 2100¶

dfpj_long_all = None
scenarios = [] # trying to sort them by descending end_year population
for migration_scenario in migration_coeff[end_year]:
    for fertility_scenario in fertility_rate[end_year]:
        scenario = f"migr {migration_scenario} & fert {fertility_scenario}"
        scenarios.append(scenario)
        print("Simulating:", scenario)
        dfpj = dfp.copy()
        for year in range(last_year+1, end_year+1):
            dfpj.loc[0, year] = get_newborns_next_year(dfpj, year-1, fertility_rate[year-1][fertility_scenario], fdata)
            dfpj.loc[1:, year] = dfpj.loc[0:99, year-1].values * ( 1 - dfdpj.loc[0:99, year-1]).values
            dfpj.loc[100, year] += dfpj.loc[100, year-1] * (1-0.4)
            
            # add migration balance
            dfpj.loc[1:80, year] += dfmb.values * migration_coeff[year][migration_scenario]

        dfpj_long = dfpj.reset_index().melt(id_vars="AGE").rename(columns={"variable": "year", "value": "population"})
        if dfpj_long_all is None:
            dfpj_long_all = deepcopy(dfpj_long).rename(columns={"population": scenario})
        else:
            dfpj_long_all[scenario] = dfpj_long["population"]
            
dfpj_long_all = dfpj_long_all.astype(int) # Integers are ok for years, ages, and population (no need for decimal precision!)
dfpj_long_all.to_csv("../data/pop_by_age_year_proj.csv", index=False)
display(dfpj_long_all)

Simulating: migr double  & fert ↑ 2 in 2100
Simulating: migr double  & fert eq. to last
Simulating: migr double  & fert ↓ 0 in 2100
Simulating: migr average & fert ↑ 2 in 2100
Simulating: migr average & fert eq. to last
Simulating: migr average & fert ↓ 0 in 2100
Simulating: migr zero    & fert ↑ 2 in 2100
Simulating: migr zero    & fert eq. to last
Simulating: migr zero    & fert ↓ 0 in 2100

fig = px.line(
    dfpj_long_all, 
    x="AGE", 
    y=dfpj_long_all.columns, 
    animation_frame="year",
    markers=False,
)
fig.update_layout(
    xaxis_title="Age",
    yaxis_title="Population by age group",
    yaxis_range=[0, 1e6],
    legend_title="Scenarios",
    margin=dict(l=10, r=10, t=10, b=10),
    width=780,
    height=420,
)
fig.add_vrect(x0=17, x1=50, fillcolor="LightSalmon", opacity=0.5, layer="below", line_width=0)

# Initialize the animation at last_year
nframe_last_year = last_year - start_year
frame_last_year = fig.frames[nframe_last_year]
for extra in frame_last_year.data[len(fig.data):]:
    fig.add_trace(extra)
for i, tr in enumerate(frame_last_year.data):
    payload = tr.to_plotly_json() if hasattr(tr, "to_plotly_json") else dict(tr)
    fig.data[i].update(**payload)
if "sliders" in fig.layout and fig.layout.sliders:
    fig.layout.sliders[0].active = nframe_last_year

fig["layout"].pop("updatemenus") # optional, drop animation buttons
fig.write_html("../images_output/pop_by_age_proj.html", auto_play=False)
fig.show()

# Get official projections from ISTAT, to compare with our model
df3 = sdmx.to_pandas(
    client.data(
        resource_id="165_889_DF_DCIS_PREVDEM1_1", 
        key={
            "FREQ": "A",
            "REF_AREA": "IT",    
            "AGE": [],
            "DATA_TYPE": "JAN",
            "AGE": "TOTAL",
            "SEX": "9",
        })
    ).reset_index()
df3p = df3.pivot(index="TIME_PERIOD", columns="FORECAST_INTERVAL", values="value").astype(int)
display(df3p)

fig = dfpj_long_all.groupby("year").sum().drop(columns="AGE").plot()
#fig.add_vrect(x0=start_year, x1=last_year, fillcolor="Green", opacity=0.1, layer="below", line_width=0)
fig.update_layout(
    legend_title="Scenarios",
    margin=dict(l=10, r=10, t=20, b=10),
    xaxis_title=None,
    yaxis_title="Total Population",
    width=780,
    height=320,
)
fig.add_trace(go.Scatter(
    x=df3p.index, 
    y=df3p["PROJMED"], 
    mode="lines", 
    name="(ISTAT Projection ±80 percentile)",
    line=dict(color="rgba(0,0,0,0.7)")
)).add_trace(go.Scatter(
    x=df3p.index, 
    y=df3p["PROJLOW80"], 
    mode="lines",
    showlegend=False,
    fill=None,
    line=dict(color="rgba(128,128,128,0.3)")
)).add_trace(go.Scatter(
    x=df3p.index, 
    y=df3p["PROJUPP80"], 
    mode="lines",
    showlegend=False,
    fill="tonexty",
    line=dict(color="rgba(128,128,128,0.3)"),
    fillcolor="rgba(128,128,128,0.3)"
))

fig.write_html("../images_output/pop_tot_proj.html")
fig

Conclusions¶

My model total population prediction is in line with the official ISTAT projectons
Depending on the scenario, the 2100 Italian population can go from 17M to 80M

Follow-up¶

Analyse better the composition of the projected population

	AGE	year	migr double & fert ↑ 2 in 2100	migr double & fert eq. to last	migr double & fert ↓ 0 in 2100	migr average & fert ↑ 2 in 2100	migr average & fert eq. to last	migr average & fert ↓ 0 in 2100	migr zero & fert ↑ 2 in 2100	migr zero & fert eq. to last	migr zero & fert ↓ 0 in 2100
0	0	1952	817477	817477	817477	817477	817477	817477	817477	817477	817477
1	1	1952	838469	838469	838469	838469	838469	838469	838469	838469	838469
2	2	1952	851858	851858	851858	851858	851858	851858	851858	851858	851858
3	3	1952	907767	907767	907767	907767	907767	907767	907767	907767	907767
4	4	1952	901277	901277	901277	901277	901277	901277	901277	901277	901277
...	...	...	...	...	...	...	...	...	...	...	...
15044	96	2100	174107	174107	174107	141485	141485	141485	108864	108864	108864
15045	97	2100	136070	136070	136070	111111	111111	111111	86151	86151	86151
15046	98	2100	105341	105341	105341	86703	86703	86703	68065	68065	68065
15047	99	2100	80566	80566	80566	66999	66999	66999	53432	53432	53432
15048	100	2100	141258	141258	141258	119439	119439	119439	97619	97619	97619

FORECAST_INTERVAL	PROJLOW50	PROJLOW80	PROJLOW90	PROJMED	PROJUPP50	PROJUPP80	PROJUPP90
TIME_PERIOD
2024	58971230	58971230	58971230	58971230	58971230	58971230	58971230
2025	58923943	58914768	58909429	58934357	58944652	58953629	58960077
2026	58854860	58833930	58821919	58878791	58902152	58922558	58936786
2027	58765562	58730546	58710458	58805555	58844547	58878580	58902664
2028	58657522	58605357	58576941	58715886	58772892	58822879	58858714
2029	58531618	58461826	58423265	58611066	58688715	58756265	58804177
2030	58389475	58300483	58250587	58492891	58592493	58681329	58740407
2031	58235971	58125234	58062959	58364091	58487641	58599089	58673137
2032	58070047	57936609	57861505	58225538	58374231	58509889	58598970
2033	57895201	57737372	57649206	58079819	58256322	58416402	58523284
2034	57711446	57528258	57424946	57927615	58133651	58317693	58445185
2035	57518708	57309121	57188955	57768649	58006357	58218662	58361792
2036	57319032	57079512	56942533	57602112	57872422	58117941	58274767
2037	57113561	56844469	56689018	57430814	57735868	58011449	58189779
2038	56903024	56595404	56422500	57255668	57596836	57905486	58108169
2039	56685746	56347018	56151673	57076174	57453312	57799140	58018052
2040	56462446	56092608	55871877	56891831	57304174	57688553	57924494
2041	56234503	55826061	55586102	56702463	57151160	57573882	57830366
2042	55998773	55557389	55289635	56508176	56996715	57461529	57741962
2043	55755909	55278994	54984273	56307911	56840217	57342740	57651532
2044	55504835	54989606	54665292	56100711	56678406	57221254	57558691
2045	55239734	54687958	54333959	55885863	56508884	57092852	57458853
2046	54964191	54377495	53991969	55661710	56333518	56958891	57338212
2047	54680093	54059743	53644805	55427009	56147484	56811452	57221533
2048	54396478	53723043	53276010	55180907	55949210	56654241	57091737
2049	54094200	53380022	52900105	54922959	55737310	56492214	56952179
2050	53767151	53016989	52502347	54652334	55514069	56321481	56811063
2051	53439054	52628238	52089212	54369402	55280678	56132228	56651854
2052	53093257	52228295	51659720	54072977	55033114	55934354	56499514
2053	52739298	51812334	51224879	53764559	54777712	55721568	56315772
2054	52362038	51389236	50776400	53444984	54500962	55497615	56138371
2055	51971955	50961045	50288538	53114482	54217486	55286450	55942624
2056	51589002	50516350	49820433	52775392	53930306	55049000	55746468
2057	51187202	50066752	49327479	52428222	53632843	54815527	55562126
2058	50779205	49603619	48851294	52076010	53344951	54577434	55343299
2059	50363329	49149357	48362820	51720190	53034640	54311861	55123964
2060	49945966	48687462	47866607	51362773	52740311	54066999	54896747
2061	49524184	48220113	47368590	51005747	52441564	53817669	54730447
2062	49123838	47757096	46862060	50651078	52151002	53588410	54540109
2063	48712356	47291594	46389879	50301094	51861156	53346830	54300371
2064	48309489	46834289	45891209	49957914	51569154	53131089	54108042
2065	47915256	46387565	45394934	49623325	51292696	52928974	53934816
2066	47535249	45937256	44934512	49298413	51031589	52737701	53803945
2067	47148256	45510195	44460621	48983716	50800367	52569529	53643147
2068	46766731	45065603	43982005	48679753	50585458	52398356	53527996
2069	46406696	44633906	43554234	48386864	50372365	52250315	53408746
2070	46066729	44215674	43081281	48106343	50163868	52098240	53314196
2071	45724650	43833379	42642146	47838096	49966563	51977335	53227873
2072	45394497	43432097	42201225	47579866	49803410	51874865	53139453
2073	45064424	43060638	41807565	47333408	49617715	51756471	53089167
2074	44754783	42693503	41409851	47097166	49452449	51668822	53020899
2075	44454833	42369061	40967996	46869406	49313521	51622385	52983523
2076	44131565	42030478	40582987	46649822	49161739	51556891	52914775
2077	43826365	41668037	40181602	46436446	49016289	51512882	52914773
2078	43539170	41307618	39828913	46227080	48889316	51448857	52875681
2079	43244031	40917708	39426741	46022894	48776044	51401464	52897130
2080	42983784	40535400	39019247	45820186	48654446	51355491	52847412