← Home

Modelling the future resident population

AIM: extrapolate from the past data, the future resident population in Italy by age distribution, till year 2100.

I'm assuming a model like:

$ pop(a=0,y) = birthprop( pop(a=18:50,y-1), fr_{avg}(y-1) ) $

$ pop(a,y) = p(a-1,y-1) \cdot [1 - dp(a, y-1)] + mb(a, y-1) $

$ pop(a=100^{+}, y) = p(a=99,y-1) \cdot [1 - dp(99, y-1)] + p(a=100,y-1) \cdot dp_{100^+}(a, y-1) $

Where:

  • $pop(a, y)$ is the resident population of age $a$ in year $y$
  • $birthprop$ is a function returning the number of births given a population in the fertile age range (18-50 yo) and an average fertility rate $fr_{avg}$
  • $dp(a, y)$ is the death probability for age $a$ in year $y$
  • $mb(a, y)$ is the Net Migration Balance, positive for more people immigrating than emigrating
  • $dp_{100^+}(a, y)$ is an effective death probability for people older than 100 years old combined

Moreover:

  • I will make 3 scenarios for the fertility rate (growing, constant, decreasing)
  • I will make 3 scenarios for the Net Migration Balance: no more migration from next year, constant migration, 2x migration from next year. The reference migration balance is the average of the last 20 years.
  • I will combine these 3x3=9 scenarios and see the results
In [1]:
import numpy as np
import pandas as pd
import json
import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.colors as pc
import plotly.io as pio
from pandas_datareader import wb # https://pandas-datareader.readthedocs.io/en/latest/remote_data.html#world-bank Italian data are probably copied from ISTAT anyway
from copy import deepcopy
import warnings

pio.renderers.default = 'vscode+notebook'
pd.options.plotting.backend = "plotly"
warnings.filterwarnings("ignore", category=FutureWarning)

def get_colors(n, cmap_name="rainbow"):
    """Get colors for px colors_discrete argument, given the number of colors needed, n."""
    cmap = matplotlib.colormaps[cmap_name]
    colors = [cmap(i) for i in np.linspace(0, 1, n)]  # Generate colors
    colors_str = [f"rgba({int(color[0]*250)}, {int(color[1]*250)}, {int(color[2]*250)}, 1.0)" for color in colors]
    return colors_str

# convert default plotly colors to matplotlib
plotly_colors = pc.qualitative.Plotly
matplotlib_colors = []
for color in plotly_colors:
    r, g, b = pc.hex_to_rgb(color)
    matplotlib_colors.append((r/255, g/255, b/255))
In [2]:
# Recover data from previous notebooks
dfp=pd.read_csv("../data/pop_by_age_year.csv", index_col=0).rename(columns=int)
dfdpj=pd.read_csv("../data/deathprob_by_age_year_proj.csv", index_col=0).rename(columns=int)
dfmb = pd.read_csv("../data/average_migration_balance.csv", index_col=0).squeeze()
In [3]:
# Reference years I will be using for the analysis
start_year = dfp.columns[0] # 2002
last_year = dfp.columns[-1] # 2023 (or later if updated)
end_year = 2100

print(f"First-last observation: Jan/{start_year} - Jan/{last_year}")
print(f"First-last prediction:  Jan/{last_year+1} - Jan/{end_year}")
First-last observation: Jan/2002 - Jan/2025
First-last prediction:  Jan/2026 - Jan/2100
In [4]:
# Create coefficients for the migration balance
migration_coeff = {}
for year in range(last_year+1, end_year+1):
    migration_coeff[year] = {
        "double ": 2,
        "average": 1,
        "zero   ": 0,
        #f"↑ 5x in {end_year}": 1 + (5-1) / (end_year - last_year) * (year - last_year), # old scenario
    }
In [5]:
# Values from previous analysis on fertility (see Notebook #40)
fdata = json.load(open("../data/fertility_fitted.json"))
print("Fertility coefficients:")
display(fdata)

def get_newborns_next_year(dfp, year, fertility_rate, fdata):
    """Fom population and fertility rate for a given year,
    get the number of newborns for the NEXT year (fist January of year+1).
    """
    def constrained_sigmoid(x0, k):
        """Get sigmoid function, which is a cumsum of a distribution."""
        x = np.arange(17, 50+1)
        y = 1 / (1 + np.exp(-k * (x - x0)))
        y = y - y[0]
        y = y / y[-1]
        return y
    pop = dfp[year]
    mothers_potential = pop.loc[18:50].to_numpy() * fdata["assumed_women_ratio"]
    x0 = np.polyval(fdata["sigmoid_x0"], year)
    distrib = np.diff(constrained_sigmoid(x0, fdata["sigmoid_k"]))
    newborns = sum(mothers_potential * distrib) * fertility_rate
    return int(newborns)

fertility_rate = {}
m_coeff_scenario_1 = (2 - fdata["last_observed_value"]) / (end_year - fdata["last_observed_year"])
m_coeff_scenario_3 = (0 - fdata["last_observed_value"]) / (end_year - fdata["last_observed_year"])
for year in range(fdata["last_observed_year"], end_year+1):
    fertility_rate[year] = {
        f"↑ 2 in {end_year}": fdata["last_observed_value"] + m_coeff_scenario_1 * (year - fdata["last_observed_year"]), # going to 2
        "eq. to last": fdata["last_observed_value"], # constant
        f"↓ 0 in {end_year}": fdata["last_observed_value"] + m_coeff_scenario_3 * (year - fdata["last_observed_year"]), # going to 0
    }
Fertility coefficients:
{'last_observed_year': 2024,
 'last_observed_value': 1.1526289356194974,
 'sigmoid_x0': [0.09458307745702403, -159.90026614075148],
 'sigmoid_k': -0.296,
 'assumed_women_ratio': 0.5}

Projection of the resident population till 2100

In [6]:
dfpj_long_all = None
scenarios = [] # trying to sort them by descending end_year population
for migration_scenario in migration_coeff[end_year]:
    for fertility_scenario in fertility_rate[end_year]:
        scenario = f"migr {migration_scenario} & fert {fertility_scenario}"
        scenarios.append(scenario)
        print("Simulating:", scenario)
        dfpj = dfp.copy()
        for year in range(last_year+1, end_year+1):
            dfpj.loc[0, year] = get_newborns_next_year(dfpj, year-1, fertility_rate[year-1][fertility_scenario], fdata)
            dfpj.loc[1:, year] = dfpj.loc[0:99, year-1].values * ( 1 - dfdpj.loc[0:99, year-1]).values
            dfpj.loc[100, year] += dfpj.loc[100, year-1] * (1-0.4)
            
            # add migration balance
            dfpj.loc[1:80, year] += dfmb.values * migration_coeff[year][migration_scenario]

        dfpj_long = dfpj.reset_index().melt(id_vars="age").rename(columns={"variable": "year", "value": "population"})
        if dfpj_long_all is None:
            dfpj_long_all = deepcopy(dfpj_long).rename(columns={"population": scenario})
        else:
            dfpj_long_all[scenario] = dfpj_long["population"]
            
dfpj_long_all = dfpj_long_all.astype(int) # Integers are ok for years, ages, and population (no need for decimal precision!)
dfpj_long_all.to_csv("../data/pop_by_age_year_proj.csv", index=False)
display(dfpj_long_all)
Simulating: migr double  & fert ↑ 2 in 2100
Simulating: migr double  & fert eq. to last
Simulating: migr double  & fert ↓ 0 in 2100
Simulating: migr average & fert ↑ 2 in 2100
Simulating: migr average & fert eq. to last
Simulating: migr average & fert ↓ 0 in 2100
Simulating: migr zero    & fert ↑ 2 in 2100
Simulating: migr zero    & fert eq. to last
Simulating: migr zero    & fert ↓ 0 in 2100
age year migr double & fert ↑ 2 in 2100 migr double & fert eq. to last migr double & fert ↓ 0 in 2100 migr average & fert ↑ 2 in 2100 migr average & fert eq. to last migr average & fert ↓ 0 in 2100 migr zero & fert ↑ 2 in 2100 migr zero & fert eq. to last migr zero & fert ↓ 0 in 2100
0 0 2002 523007 523007 523007 523007 523007 523007 523007 523007 523007
1 1 2002 529233 529233 529233 529233 529233 529233 529233 529233 529233
2 2 2002 528131 528131 528131 528131 528131 528131 528131 528131 528131
3 3 2002 518790 518790 518790 518790 518790 518790 518790 518790 518790
4 4 2002 515957 515957 515957 515957 515957 515957 515957 515957 515957
... ... ... ... ... ... ... ... ... ... ... ...
9994 96 2100 173908 173908 173908 140465 140465 140465 107022 107022 107022
9995 97 2100 136345 136345 136345 110614 110614 110614 84884 84884 84884
9996 98 2100 105724 105724 105724 86434 86434 86434 67145 67145 67145
9997 99 2100 80799 80799 80799 66714 66714 66714 52629 52629 52629
9998 100 2100 141896 141896 141896 119120 119120 119120 96344 96344 96344

9999 rows × 11 columns

In [7]:
fig = px.line(
    dfpj_long_all, 
    x="age", 
    y=dfpj_long_all.columns, 
    animation_frame="year",
    markers=False,
)
fig.update_layout(
    xaxis_title="Age",
    yaxis_title="Population by age group",
    yaxis_range=[0, 1e6],
    legend_title="Scenarios",
    margin=dict(l=10, r=10, t=10, b=10),
    width=780,
    height=420,
)
fig.add_vrect(x0=17, x1=50, fillcolor="LightSalmon", opacity=0.5, layer="below", line_width=0)
fig["layout"].pop("updatemenus") # optional, drop animation buttons
fig.write_html("../images_output/pop_by_age_proj.html", auto_play=False)
fig.show()
In [8]:
fig = dfpj_long_all.groupby("year").sum().drop(columns="age").plot()
fig.add_vrect(x0=start_year, x1=last_year, fillcolor="Green", opacity=0.1, layer="below", line_width=0)
fig.update_layout(
    legend_title="Scenarios",
    margin=dict(l=10, r=10, t=20, b=10),
    xaxis_title=None,
    yaxis_title="Total Population",
    width=780,
    height=320,
)
fig
In [9]:
# Get historical data back to 1960 from the World Bank
df_wb = wb.download(indicator='SP.POP.TOTL', country=['IT'], start=1900, end=2100) # https://data.worldbank.org/indicator/SP.POP.TOTL
ss_wb = (
    df_wb
    .reset_index()
    .astype({"year": int})
    .set_index("year")
    .sort_index()
    .rename(columns={"SP.POP.TOTL": "population"})
    ["population"]
)
ss_wb
Out[9]:
year
1960    50199700
1961    50536350
1962    50879450
1963    51252000
1964    51675350
          ...   
2019    59729081
2020    59438851
2021    59133173
2022    59013667
2023    58993475
Name: population, Length: 64, dtype: int64
In [10]:
# Get forecast from ISTAT
from istatapi import discovery, retrieval
ds = discovery.DataSet(dataflow_identifier="DCIS_PREVDEM1") 
ds.set_filters(
    freq="A",
    itter107="IT", 
    eta="TOTAL",
    sesso="9",
    tipo_inddem="JAN"
)
df3 = retrieval.get_data(ds)
#df3.loc[:, lambda dfx: (~dfx.isna()).any(axis=0)]
df3p = df3.pivot(index="TIME_PERIOD", columns="INT_PREV", values="OBS_VALUE")
display(df3p)
INT_PREV PROJLOW50 PROJLOW80 PROJLOW90 PROJMED PROJUPP50 PROJUPP80 PROJUPP90
TIME_PERIOD
2023-01-01 58997201 58997201 58997201 58997201 58997201 58997201 58997201
2024-01-01 58983512 58977497 58973977 58990232 58996772 59002850 59007149
2025-01-01 58946379 58931724 58923319 58962775 58978871 58993691 59004207
2026-01-01 58886837 58860903 58845703 58915808 58944062 58969761 58988129
2027-01-01 58806069 58766768 58743339 58850343 58893107 58932357 58959933
2028-01-01 58705684 58651125 58618817 58768307 58828187 58882531 58921003
2029-01-01 58588417 58515600 58474010 58670689 58749507 58821938 58871983
2030-01-01 58454553 58362601 58308529 58558680 58658369 58750726 58813585
2031-01-01 58307452 58193981 58129821 58435207 58558469 58672430 58748292
2032-01-01 58148921 58010693 57936744 58302404 58450225 58586687 58679008
2033-01-01 57978880 57817162 57727406 58160911 58335791 58495555 58604716
2034-01-01 57800818 57613738 57509426 58013112 58215358 58400612 58530321
2035-01-01 57614925 57400841 57284621 57860265 58093051 58304906 58451217
2036-01-01 57425078 57178872 57049375 57701835 57967796 58207842 58374609
2037-01-01 57225569 56952292 56800989 57538066 57840400 58112534 58296746
2038-01-01 57017662 56714143 56542854 57368952 57705044 58011123 58214694
2039-01-01 56807821 56471694 56277305 57195738 57568269 57912897 58135966
2040-01-01 56593591 56224248 56004931 57018231 57429808 57809023 58050754
2041-01-01 56369748 55966454 55723247 56835500 57282161 57702400 57960566
2042-01-01 56141627 55698618 55435786 56646675 57134471 57593672 57868921
2043-01-01 55900750 55423307 55129465 56450970 56981366 57484456 57782402
2044-01-01 55651378 55135709 54817797 56247828 56827709 57357410 57694835
2045-01-01 55395387 54846049 54493249 56036639 56661780 57237638 57592548
2046-01-01 55124382 54540201 54150347 55815826 56486037 57101229 57486198
2047-01-01 54839845 54223245 53798311 55584227 56300907 56961581 57373009
2048-01-01 54553563 53893379 53446312 55341064 56106957 56814411 57247261
2049-01-01 54253228 53544578 53062606 55085954 55899593 56644658 57125818
2050-01-01 53939818 53186053 52673183 54818130 55674853 56469679 56969733
2051-01-01 53608464 52809028 52255320 54538014 55444045 56283060 56813929
2052-01-01 53268506 52403183 51829989 54244466 55197882 56079912 56661896
2053-01-01 52912109 51998553 51394912 53939027 54941519 55878583 56493070
2054-01-01 52542625 51564758 50954288 53622569 54673983 55656952 56311536
2055-01-01 52158619 51137552 50487392 53295352 54389123 55446822 56129862
2056-01-01 51777871 50694207 50009034 52959738 54109566 55216056 55936691
2057-01-01 51375686 50258853 49519000 52616244 53818887 55004243 55734809
2058-01-01 50972069 49801096 49053716 52267909 53527940 54763623 55523656
2059-01-01 50563740 49349177 48583148 51916153 53227695 54511058 55328265
2060-01-01 50142724 48889167 48098201 51562962 52938634 54267442 55107283
2061-01-01 49733708 48433726 47585149 51210282 52646427 54026041 54929071
2062-01-01 49330186 47973761 47084137 50860009 52359158 53807776 54758964
2063-01-01 48930377 47511510 46603445 50514374 52057776 53572019 54529384
2064-01-01 48530710 47049523 46105949 50175406 51791621 53347827 54328390
2065-01-01 48152662 46613075 45632931 49844820 51517093 53151040 54177310
2066-01-01 47760617 46155824 45147336 49523642 51263086 52968706 54041784
2067-01-01 47379910 45711508 44686104 49212372 51041340 52803681 53902920
2068-01-01 47003326 45293774 44208657 48911498 50821986 52629723 53776421
2069-01-01 46640700 44872119 43776714 48621354 50614043 52483857 53653866
2070-01-01 46317302 44460075 43304177 48343247 50412553 52335210 53562022
2071-01-01 45958344 44060323 42884404 48077095 50208873 52218903 53492079
2072-01-01 45641398 43677497 42452000 47820661 50043549 52101152 53405677
2073-01-01 45311008 43296127 42055237 47575732 49868254 52009894 53354956
2074-01-01 44997516 42938735 41663721 47340770 49724037 51912953 53290909
2075-01-01 44712139 42605647 41232325 47114061 49560437 51868282 53258122
2076-01-01 44388022 42245801 40829117 46895327 49418448 51814235 53180786
2077-01-01 44086743 41902266 40436190 46682619 49295219 51770327 53166018
2078-01-01 43806333 41542835 40071210 46473761 49156320 51716143 53132165
2079-01-01 43499333 41155842 39648012 46269940 49017866 51653312 53151332
2080-01-01 43218585 40782984 39276886 46067470 48905418 51607440 53082687
In [11]:
fig1 = deepcopy(fig)
fig1.add_trace(
    go.Scatter(
        x=ss_wb.index,
        y=ss_wb.values,
        mode="lines",
        name="(historical data from World Bank)",
        line=dict(color="gray", width=2, dash="dot")
    )
)
fig1.add_trace(go.Scatter(
    x=df3p.index.year, 
    y=df3p["PROJMED"], 
    mode="lines", 
    name="(ISTAT Projection ±80 percentile)",
    line=dict(color="rgba(0,0,0,0.5)")
)).add_trace(go.Scatter(
    x=df3p.index.year, 
    y=df3p["PROJLOW80"], 
    mode="lines",
    showlegend=False,
    fill=None,
    line=dict(color="rgba(128,128,128,0.2)")
)).add_trace(go.Scatter(
    x=df3p.index.year, 
    y=df3p["PROJUPP80"], 
    mode="lines",
    showlegend=False,
    fill="tonexty",
    line=dict(color="rgba(128,128,128,0.2)"),
    fillcolor="rgba(128,128,128,0.2)"
))

fig1.write_html("../images_output/pop_tot_proj.html")
fig1

Conclusions

  • TODO

Follow-up

  • TODO
← Home