← Home

Modelling the future resident population

AIM: extrapolate from the past data, the future resident population in Italy by age distribution, till year 2100.

I'm assuming a model like:

$ pop(a=0,y) = birthprop( pop(a=18:50,y-1), fr_{avg}(y-1) ) $

$ pop(a,y) = p(a-1,y-1) \cdot [1 - dp(a, y-1)] + mb(a, y-1) $

$ pop(a=100^{+}, y) = p(a=99,y-1) \cdot [1 - dp(99, y-1)] + p(a=100,y-1) \cdot dp_{100^+}(a, y-1) $

Where:

  • $pop(a, y)$ is the resident population of age $a$ in year $y$
  • $birthprop$ is a function returning the number of births given a population in the fertile age range (18-50 yo) and an average fertility rate $fr_{avg}$
  • $dp(a, y)$ is the death probability for age $a$ in year $y$
  • $mb(a, y)$ is the Net Migration Balance, positive for more people immigrating than emigrating
  • $dp_{100^+}(a, y)$ is an effective death probability for people older than 100 years old combined

Moreover:

  • I will make 3 scenarios for the fertility rate (growing, constant, decreasing)
  • I will make 3 scenarios for the Net Migration Balance: no more migration from next year, constant migration, 2x migration from next year. The reference migration balance is the average of the last 20 years.
  • I will combine these 3x3=9 scenarios and see the results
In [1]:
import numpy as np
import pandas as pd
import json
import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.colors as pc
import plotly.io as pio
from copy import deepcopy
import sdmx
import warnings 

client = sdmx.Client("ISTAT")

pio.renderers.default = 'vscode+notebook'
pd.options.plotting.backend = "plotly"
warnings.filterwarnings("ignore", category=FutureWarning)

def get_colors(n, cmap_name="rainbow"):
    """Get colors for px colors_discrete argument, given the number of colors needed, n."""
    cmap = matplotlib.colormaps[cmap_name]
    colors = [cmap(i) for i in np.linspace(0, 1, n)]  # Generate colors
    colors_str = [f"rgba({int(color[0]*250)}, {int(color[1]*250)}, {int(color[2]*250)}, 1.0)" for color in colors]
    return colors_str

# convert default plotly colors to matplotlib
plotly_colors = pc.qualitative.Plotly
matplotlib_colors = []
for color in plotly_colors:
    r, g, b = pc.hex_to_rgb(color)
    matplotlib_colors.append((r/255, g/255, b/255))
In [2]:
# Recover data from previous notebooks
dfp=pd.read_csv("../data/pop_by_age_year.csv", index_col=0).rename(columns=int)
dfdpj=pd.read_csv("../data/deathprob_by_age_year_proj.csv", index_col=0).rename(columns=int)
dfmb = pd.read_csv("../data/average_migration_balance.csv", index_col=0).squeeze()
In [3]:
# Reference years I will be using for the analysis
start_year = dfp.columns[0] # 2002
last_year = dfp.columns[-1] # 2023 (or later if updated)
end_year = 2100

print(f"First-last observation: Jan/{start_year} - Jan/{last_year}")
print(f"First-last prediction:  Jan/{last_year+1} - Jan/{end_year}")
First-last observation: Jan/1952 - Jan/2026
First-last prediction:  Jan/2027 - Jan/2100
In [4]:
# Create coefficients for the migration balance
migration_coeff = {}
for year in range(last_year+1, end_year+1):
    migration_coeff[year] = {
        "double ": 2,
        "average": 1,
        "zero   ": 0,
        #f"↑ 5x in {end_year}": 1 + (5-1) / (end_year - last_year) * (year - last_year), # old scenario
    }
In [5]:
# Values from previous analysis on fertility (see Notebook #40)
fdata = json.load(open("../data/fertility_fitted.json"))
print("Fertility coefficients:")
display(fdata)

def get_newborns_next_year(dfp, year, fertility_rate, fdata):
    """Fom population and fertility rate for a given year,
    get the number of newborns for the NEXT year (fist January of year+1).
    """
    def constrained_sigmoid(x0, k):
        """Get sigmoid function, which is a cumsum of a distribution."""
        x = np.arange(17, 50+1)
        y = 1 / (1 + np.exp(-k * (x - x0)))
        y = y - y[0]
        y = y / y[-1]
        return y
    pop = dfp[year]
    mothers_potential = pop.loc[18:50].to_numpy() * fdata["assumed_women_ratio"]
    x0 = np.polyval(fdata["sigmoid_x0"], year)
    distrib = np.diff(constrained_sigmoid(x0, fdata["sigmoid_k"]))
    newborns = sum(mothers_potential * distrib) * fertility_rate
    return int(newborns)

fertility_rate = {}
m_coeff_scenario_1 = (2 - fdata["last_observed_value"]) / (end_year - fdata["last_observed_year"])
m_coeff_scenario_3 = (0 - fdata["last_observed_value"]) / (end_year - fdata["last_observed_year"])
for year in range(fdata["last_observed_year"], end_year+1):
    fertility_rate[year] = {
        f"↑ 2 in {end_year}": fdata["last_observed_value"] + m_coeff_scenario_1 * (year - fdata["last_observed_year"]), # going to 2
        "eq. to last": fdata["last_observed_value"], # constant
        f"↓ 0 in {end_year}": fdata["last_observed_value"] + m_coeff_scenario_3 * (year - fdata["last_observed_year"]), # going to 0
    }
Fertility coefficients:
{'last_observed_year': 2024,
 'last_observed_value': 1.1526289356194974,
 'sigmoid_x0': [0.09490182479041313, -160.47494571102223],
 'sigmoid_k': -0.296,
 'assumed_women_ratio': 0.5}

Projection of the resident population till 2100

In [6]:
dfpj_long_all = None
scenarios = [] # trying to sort them by descending end_year population
for migration_scenario in migration_coeff[end_year]:
    for fertility_scenario in fertility_rate[end_year]:
        scenario = f"migr {migration_scenario} & fert {fertility_scenario}"
        scenarios.append(scenario)
        print("Simulating:", scenario)
        dfpj = dfp.copy()
        for year in range(last_year+1, end_year+1):
            dfpj.loc[0, year] = get_newborns_next_year(dfpj, year-1, fertility_rate[year-1][fertility_scenario], fdata)
            dfpj.loc[1:, year] = dfpj.loc[0:99, year-1].values * ( 1 - dfdpj.loc[0:99, year-1]).values
            dfpj.loc[100, year] += dfpj.loc[100, year-1] * (1-0.4)
            
            # add migration balance
            dfpj.loc[1:80, year] += dfmb.values * migration_coeff[year][migration_scenario]

        dfpj_long = dfpj.reset_index().melt(id_vars="AGE").rename(columns={"variable": "year", "value": "population"})
        if dfpj_long_all is None:
            dfpj_long_all = deepcopy(dfpj_long).rename(columns={"population": scenario})
        else:
            dfpj_long_all[scenario] = dfpj_long["population"]
            
dfpj_long_all = dfpj_long_all.astype(int) # Integers are ok for years, ages, and population (no need for decimal precision!)
dfpj_long_all.to_csv("../data/pop_by_age_year_proj.csv", index=False)
display(dfpj_long_all)
Simulating: migr double  & fert ↑ 2 in 2100
Simulating: migr double  & fert eq. to last
Simulating: migr double  & fert ↓ 0 in 2100
Simulating: migr average & fert ↑ 2 in 2100
Simulating: migr average & fert eq. to last
Simulating: migr average & fert ↓ 0 in 2100
Simulating: migr zero    & fert ↑ 2 in 2100
Simulating: migr zero    & fert eq. to last
Simulating: migr zero    & fert ↓ 0 in 2100
AGE year migr double  & fert ↑ 2 in 2100 migr double  & fert eq. to last migr double  & fert ↓ 0 in 2100 migr average & fert ↑ 2 in 2100 migr average & fert eq. to last migr average & fert ↓ 0 in 2100 migr zero    & fert ↑ 2 in 2100 migr zero    & fert eq. to last migr zero    & fert ↓ 0 in 2100
0 0 1952 817477 817477 817477 817477 817477 817477 817477 817477 817477
1 1 1952 838469 838469 838469 838469 838469 838469 838469 838469 838469
2 2 1952 851858 851858 851858 851858 851858 851858 851858 851858 851858
3 3 1952 907767 907767 907767 907767 907767 907767 907767 907767 907767
4 4 1952 901277 901277 901277 901277 901277 901277 901277 901277 901277
... ... ... ... ... ... ... ... ... ... ... ...
15044 96 2100 174107 174107 174107 141485 141485 141485 108864 108864 108864
15045 97 2100 136070 136070 136070 111111 111111 111111 86151 86151 86151
15046 98 2100 105341 105341 105341 86703 86703 86703 68065 68065 68065
15047 99 2100 80566 80566 80566 66999 66999 66999 53432 53432 53432
15048 100 2100 141258 141258 141258 119439 119439 119439 97619 97619 97619

15049 rows × 11 columns

In [ ]:
 
In [7]:
fig = px.line(
    dfpj_long_all, 
    x="AGE", 
    y=dfpj_long_all.columns, 
    animation_frame="year",
    markers=False,
)
fig.update_layout(
    xaxis_title="Age",
    yaxis_title="Population by age group",
    yaxis_range=[0, 1e6],
    legend_title="Scenarios",
    margin=dict(l=10, r=10, t=10, b=10),
    width=780,
    height=420,
)
fig.add_vrect(x0=17, x1=50, fillcolor="LightSalmon", opacity=0.5, layer="below", line_width=0)

# Initialize the animation at last_year
nframe_last_year = last_year - start_year
frame_last_year = fig.frames[nframe_last_year]
for extra in frame_last_year.data[len(fig.data):]:
    fig.add_trace(extra)
for i, tr in enumerate(frame_last_year.data):
    payload = tr.to_plotly_json() if hasattr(tr, "to_plotly_json") else dict(tr)
    fig.data[i].update(**payload)
if "sliders" in fig.layout and fig.layout.sliders:
    fig.layout.sliders[0].active = nframe_last_year

fig["layout"].pop("updatemenus") # optional, drop animation buttons
fig.write_html("../images_output/pop_by_age_proj.html", auto_play=False)
fig.show()
In [8]:
# Get official projections from ISTAT, to compare with our model
df3 = sdmx.to_pandas(
    client.data(
        resource_id="165_889_DF_DCIS_PREVDEM1_1", 
        key={
            "FREQ": "A",
            "REF_AREA": "IT",    
            "AGE": [],
            "DATA_TYPE": "JAN",
            "AGE": "TOTAL",
            "SEX": "9",
        })
    ).reset_index()
df3p = df3.pivot(index="TIME_PERIOD", columns="FORECAST_INTERVAL", values="value").astype(int)
display(df3p)
FORECAST_INTERVAL PROJLOW50 PROJLOW80 PROJLOW90 PROJMED PROJUPP50 PROJUPP80 PROJUPP90
TIME_PERIOD
2024 58971230 58971230 58971230 58971230 58971230 58971230 58971230
2025 58923943 58914768 58909429 58934357 58944652 58953629 58960077
2026 58854860 58833930 58821919 58878791 58902152 58922558 58936786
2027 58765562 58730546 58710458 58805555 58844547 58878580 58902664
2028 58657522 58605357 58576941 58715886 58772892 58822879 58858714
2029 58531618 58461826 58423265 58611066 58688715 58756265 58804177
2030 58389475 58300483 58250587 58492891 58592493 58681329 58740407
2031 58235971 58125234 58062959 58364091 58487641 58599089 58673137
2032 58070047 57936609 57861505 58225538 58374231 58509889 58598970
2033 57895201 57737372 57649206 58079819 58256322 58416402 58523284
2034 57711446 57528258 57424946 57927615 58133651 58317693 58445185
2035 57518708 57309121 57188955 57768649 58006357 58218662 58361792
2036 57319032 57079512 56942533 57602112 57872422 58117941 58274767
2037 57113561 56844469 56689018 57430814 57735868 58011449 58189779
2038 56903024 56595404 56422500 57255668 57596836 57905486 58108169
2039 56685746 56347018 56151673 57076174 57453312 57799140 58018052
2040 56462446 56092608 55871877 56891831 57304174 57688553 57924494
2041 56234503 55826061 55586102 56702463 57151160 57573882 57830366
2042 55998773 55557389 55289635 56508176 56996715 57461529 57741962
2043 55755909 55278994 54984273 56307911 56840217 57342740 57651532
2044 55504835 54989606 54665292 56100711 56678406 57221254 57558691
2045 55239734 54687958 54333959 55885863 56508884 57092852 57458853
2046 54964191 54377495 53991969 55661710 56333518 56958891 57338212
2047 54680093 54059743 53644805 55427009 56147484 56811452 57221533
2048 54396478 53723043 53276010 55180907 55949210 56654241 57091737
2049 54094200 53380022 52900105 54922959 55737310 56492214 56952179
2050 53767151 53016989 52502347 54652334 55514069 56321481 56811063
2051 53439054 52628238 52089212 54369402 55280678 56132228 56651854
2052 53093257 52228295 51659720 54072977 55033114 55934354 56499514
2053 52739298 51812334 51224879 53764559 54777712 55721568 56315772
2054 52362038 51389236 50776400 53444984 54500962 55497615 56138371
2055 51971955 50961045 50288538 53114482 54217486 55286450 55942624
2056 51589002 50516350 49820433 52775392 53930306 55049000 55746468
2057 51187202 50066752 49327479 52428222 53632843 54815527 55562126
2058 50779205 49603619 48851294 52076010 53344951 54577434 55343299
2059 50363329 49149357 48362820 51720190 53034640 54311861 55123964
2060 49945966 48687462 47866607 51362773 52740311 54066999 54896747
2061 49524184 48220113 47368590 51005747 52441564 53817669 54730447
2062 49123838 47757096 46862060 50651078 52151002 53588410 54540109
2063 48712356 47291594 46389879 50301094 51861156 53346830 54300371
2064 48309489 46834289 45891209 49957914 51569154 53131089 54108042
2065 47915256 46387565 45394934 49623325 51292696 52928974 53934816
2066 47535249 45937256 44934512 49298413 51031589 52737701 53803945
2067 47148256 45510195 44460621 48983716 50800367 52569529 53643147
2068 46766731 45065603 43982005 48679753 50585458 52398356 53527996
2069 46406696 44633906 43554234 48386864 50372365 52250315 53408746
2070 46066729 44215674 43081281 48106343 50163868 52098240 53314196
2071 45724650 43833379 42642146 47838096 49966563 51977335 53227873
2072 45394497 43432097 42201225 47579866 49803410 51874865 53139453
2073 45064424 43060638 41807565 47333408 49617715 51756471 53089167
2074 44754783 42693503 41409851 47097166 49452449 51668822 53020899
2075 44454833 42369061 40967996 46869406 49313521 51622385 52983523
2076 44131565 42030478 40582987 46649822 49161739 51556891 52914775
2077 43826365 41668037 40181602 46436446 49016289 51512882 52914773
2078 43539170 41307618 39828913 46227080 48889316 51448857 52875681
2079 43244031 40917708 39426741 46022894 48776044 51401464 52897130
2080 42983784 40535400 39019247 45820186 48654446 51355491 52847412
In [9]:
fig = dfpj_long_all.groupby("year").sum().drop(columns="AGE").plot()
#fig.add_vrect(x0=start_year, x1=last_year, fillcolor="Green", opacity=0.1, layer="below", line_width=0)
fig.update_layout(
    legend_title="Scenarios",
    margin=dict(l=10, r=10, t=20, b=10),
    xaxis_title=None,
    yaxis_title="Total Population",
    width=780,
    height=320,
)
fig.add_trace(go.Scatter(
    x=df3p.index, 
    y=df3p["PROJMED"], 
    mode="lines", 
    name="(ISTAT Projection ±80 percentile)",
    line=dict(color="rgba(0,0,0,0.7)")
)).add_trace(go.Scatter(
    x=df3p.index, 
    y=df3p["PROJLOW80"], 
    mode="lines",
    showlegend=False,
    fill=None,
    line=dict(color="rgba(128,128,128,0.3)")
)).add_trace(go.Scatter(
    x=df3p.index, 
    y=df3p["PROJUPP80"], 
    mode="lines",
    showlegend=False,
    fill="tonexty",
    line=dict(color="rgba(128,128,128,0.3)"),
    fillcolor="rgba(128,128,128,0.3)"
))

fig.write_html("../images_output/pop_tot_proj.html")
fig

Conclusions

  • My model total population prediction is in line with the official ISTAT projectons
  • Depending on the scenario, the 2100 Italian population can go from 17M to 80M

Follow-up

  • Analyse better the composition of the projected population
← Home