← Home

Analyse the future resident populationΒΆ

AIM: now that I have the projected population by age group till 2100, make some plots to understand how the future population will look like

InΒ [1]:
import numpy as np
import pandas as pd
import json
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import plotly.colors as pc
import plotly.io as pio

import warnings

pio.renderers.default = 'vscode+notebook'
pd.options.plotting.backend = "plotly"
warnings.filterwarnings("ignore", category=FutureWarning)

def get_colors(n, cmap_name="rainbow"):
    """Get colors for px colors_discrete argument, given the number of colors needed, n."""
    cmap = matplotlib.colormaps[cmap_name]
    colors = [cmap(i) for i in np.linspace(0, 1, n)]  # Generate colors
    colors_str = [f"rgba({int(color[0]*250)}, {int(color[1]*250)}, {int(color[2]*250)}, 1.0)" for color in colors]
    return colors_str

# convert default plotly colors to matplotlib
plotly_colors = pc.qualitative.Plotly
matplotlib_colors = []
for color in plotly_colors:
    r, g, b = pc.hex_to_rgb(color)
    matplotlib_colors.append((r/255, g/255, b/255))
InΒ [2]:
# Recover data from previous notebooks
dfp=pd.read_csv("../data/pop_by_age_year.csv", index_col=0).rename(columns=int) # From Notebook#40
dfpj_long_all = pd.read_csv("../data/pop_by_age_year_proj.csv") # From Notebook#70
InΒ [3]:
start_year = dfp.columns[0] # 2002
last_year = dfp.columns[-1] # 2023 (or later if updated)
end_year = 2100

print(f"First-last observation: Jan/{start_year} - Jan/{last_year}")
print(f"First-last prediction:  Jan/{last_year+1} - Jan/{end_year}")

scenarios = [ col for col in dfpj_long_all.columns if col.startswith("mig") ]
print("\nScenarios:")
display(scenarios)
First-last observation: Jan/1952 - Jan/2026
First-last prediction:  Jan/2027 - Jan/2100

Scenarios:
['migr double  & fert ↑ 2 in 2100',
 'migr double  & fert eq. to last',
 'migr double  & fert ↓ 0 in 2100',
 'migr average & fert ↑ 2 in 2100',
 'migr average & fert eq. to last',
 'migr average & fert ↓ 0 in 2100',
 'migr zero    & fert ↑ 2 in 2100',
 'migr zero    & fert eq. to last',
 'migr zero    & fert ↓ 0 in 2100']
InΒ [4]:
# melt the dataframe to long format, which is easier to generate plots
df_plot = dfpj_long_all.melt(
    id_vars=["year", "AGE"], 
    value_vars=dfpj_long_all.columns[2:],
    var_name="scenario",
    value_name="population"
)
df_plot
Out[4]:
year AGE scenario population
0 1952 0 migr doubleΒ Β & fert ↑ 2 in 2100 817477
1 1952 1 migr doubleΒ Β & fert ↑ 2 in 2100 838469
2 1952 2 migr doubleΒ Β & fert ↑ 2 in 2100 851858
3 1952 3 migr doubleΒ Β & fert ↑ 2 in 2100 907767
4 1952 4 migr doubleΒ Β & fert ↑ 2 in 2100 901277
... ... ... ... ...
135436 2100 96 migr zeroΒ Β Β Β & fert ↓ 0 in 2100 108864
135437 2100 97 migr zeroΒ Β Β Β & fert ↓ 0 in 2100 86151
135438 2100 98 migr zeroΒ Β Β Β & fert ↓ 0 in 2100 68065
135439 2100 99 migr zeroΒ Β Β Β & fert ↓ 0 in 2100 53432
135440 2100 100 migr zeroΒ Β Β Β & fert ↓ 0 in 2100 97619

135441 rows Γ— 4 columns

InΒ [5]:
# Create 3x3 subplots to show the population by age group (young, adult, senior) in each scenario
AGE1, AGE2 = 20, 70

fig, axes = plt.subplots(3, 3, figsize=(9.5, 5.5), sharex=False, sharey=False)
for idx, scenario in enumerate(scenarios):
    
    ax = axes[idx // 3, idx % 3]
    df_scenario = df_plot[df_plot["scenario"] == scenario].copy()
    
    sum10 = df_scenario[df_scenario["AGE"].between(0, AGE1, inclusive='both')].groupby("year")["population"].sum()
    sum12 = df_scenario[df_scenario["AGE"].between(AGE1+1, AGE2-1, inclusive='both')].groupby("year")["population"].sum()
    sum23 =df_scenario[df_scenario["AGE"].between(AGE2, 100, inclusive='both')].groupby("year")["population"].sum()
    
    ax.plot(sum10.index, sum10.values/1e6, label=f"0-{AGE1}", color=matplotlib_colors[0])
    ax.plot(sum12.index, sum12.values/1e6, label=f"{AGE1+1}-{AGE2-1}", color=matplotlib_colors[1])
    ax.plot(sum23.index, sum23.values/1e6, label=f"{AGE2}+", color=matplotlib_colors[2])

    ax.set_title(scenario, color=matplotlib_colors[idx])
    ax.set_xlabel(None)
    ax.set_ylabel(None)
    ax.set_xlim(start_year, end_year)
    ax.set_ylim(0, 50)
    if idx == 8:
        ax.legend()
    ax.axvspan(start_year, last_year, color="green", alpha=0.1)
    ax.grid(axis='y')

fig.text(0.0, 0.5, 'Population by age group (millions)', va='center', rotation='vertical')
plt.tight_layout()
plt.savefig("../images_output/analysis_proj_groups.png", dpi=200)
plt.show()
No description has been provided for this image
InΒ [6]:
# Here I want to focus on the amount of teenagers in high-school age, for each of the 3x3 scenarios
AGE1, AGE2 = 14, 18
AVG_START_YEAR = 2000

fig, axes = plt.subplots(3, 3, figsize=(9.5, 5.5), sharex=False, sharey=False)
for idx, scenario in enumerate(scenarios):
    
    ax = axes[idx // 3, idx % 3]
    df_scenario = df_plot[df_plot["scenario"] == scenario].copy()
    
    sum12 = df_scenario[df_scenario["AGE"].between(AGE1, AGE2, inclusive='both')].groupby("year")["population"].sum()
    avg_past = sum12[(sum12.index >= AVG_START_YEAR) & (sum12.index <= last_year)].mean()
    
    ax.plot(sum12.index, sum12.values/avg_past - 1, label=f"{AGE1}-{AGE2}", color="red")

    ax.set_title(scenario, color=matplotlib_colors[idx])
    ax.set_xlabel(None)
    ax.set_ylabel(None)
    ax.set_xlim(start_year, end_year)
    ax.set_ylim(-1, +1)
    ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: f"{x:.0%}"))
    ax.axvspan(start_year, last_year, color="green", alpha=0.1)
    ax.grid(axis='y')

fig.text(0, 0.5, f"%Change of {AGE1}-{AGE2} years old w.r.t. {AVG_START_YEAR}-{last_year} average", va='center', rotation='vertical')
plt.tight_layout()
plt.savefig("../images_output/analysis_proj_teens.png", dpi=200)
plt.show()
No description has been provided for this image
InΒ [7]:
# Values from previous analysis on fertility (see Notebook #40)
fdata = json.load(open("../data/fertility_fitted.json"))
print("Fertility coefficients:")
display(fdata)

def get_newborns_next_year(dfp, year, fertility_rate, fdata):
    """Fom population and fertility rate for a given year,
    get the number of newborns for the NEXT year (fist January of year+1).
    """
    def constrained_sigmoid(x0, k):
        """Get sigmoid function, which is a cumsum of a distribution."""
        x = np.arange(17, 50+1)
        y = 1 / (1 + np.exp(-k * (x - x0)))
        y = y - y[0]
        y = y / y[-1]
        return y
    pop = dfp[year]
    mothers_potential = pop.loc[18:50].to_numpy() * fdata["assumed_women_ratio"]
    x0 = np.polyval(fdata["sigmoid_x0"], year)
    distrib = np.diff(constrained_sigmoid(x0, fdata["sigmoid_k"]))
    newborns = sum(mothers_potential * distrib) * fertility_rate
    return int(newborns)
Fertility coefficients:
{'last_observed_year': 2024,
 'last_observed_value': 1.1526289356194974,
 'sigmoid_x0': [0.09490182479041313, -160.47494571102223],
 'sigmoid_k': -0.296,
 'assumed_women_ratio': 0.5}
InΒ [8]:
# Here I want to focus on the amount potential mothers, for each of the 3x3 scenarios
# This is obtained by using fertility=1 in the get_newborns_next_year function


fig, axes = plt.subplots(3, 3, figsize=(9.5, 5.5), sharex=False, sharey=False)
for idx, scenario in enumerate(scenarios):
    
    ax = axes[idx // 3, idx % 3]
    dfp_scenario = dfpj_long_all[["AGE", "year", scenario]].pivot(index="AGE", columns="year", values=scenario)
    
    value = [ get_newborns_next_year(dfp_scenario, year, 1, fdata)/1e3 for year in dfp_scenario.columns ]
    ax.plot(dfp_scenario.columns, value, color="red")

    ax.set_title(scenario, color=matplotlib_colors[idx])
    ax.set_xlabel(None)
    ax.set_ylabel(None)
    ax.set_xlim(start_year, end_year)
    ax.set_ylim(0, 500)
    ax.axvspan(start_year, last_year, color="green", alpha=0.1)
    ax.grid(axis='y')

fig.text(0.0, 0.5, 'Potential mothers (thousands)', va='center', rotation='vertical')
plt.tight_layout()
plt.savefig("../images_output/analysis_proj_mothers.png", dpi=200)
plt.show()
No description has been provided for this image

ConclusionsΒΆ

  • Baseline and grim scenarios show a significant shift from historical trends.

Follow-upΒΆ

  • Compare with ISTAT projections, since 165_889_DF_DCIS_PREVDEM1_1 includes the age split of the population
← Home