← Home

Fertility

AIM: match the population data with the fertility rate, to be able to extrapolate for next years

The fertility rate ($fr$) is the average number of children per women, and has to be above 2 to keep the population stable.

ISTAT computes the fertility rate in DCIS_FECONDITA1, but I have to do some data reconcilation to make sure that I have a function I can use to compute newborns from the population data.

In [1]:
import numpy as np
import pandas as pd
import json
import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
import wbgapi as wb # Italian data are probably copied from ISTAT anyway
from copy import deepcopy
import sdmx
import warnings

pio.renderers.default = 'vscode+notebook'
pd.options.plotting.backend = "plotly"
warnings.filterwarnings("ignore", category=FutureWarning)

def get_colors(n, cmap_name="rainbow"):
    """Get colors for px colors_discrete argument, given the number of colors needed, n."""
    cmap = matplotlib.colormaps[cmap_name]
    colors = [cmap(i) for i in np.linspace(0, 1, n)]  # Generate colors
    colors_str = [f"rgba({int(color[0]*250)}, {int(color[1]*250)}, {int(color[2]*250)}, 1.0)" for color in colors]
    return colors_str
In [2]:
dfp=pd.read_csv("../data/pop_by_age_year.csv", index_col=0).rename(columns=int)  # From Notebook#14
In [ ]:
df6 = sdmx.to_pandas(sdmx.Client("ISTAT").data(
    resource_id="25_326_DF_DCIS_FECONDITA1_5", 
    key={
        "REF_AREA": "IT" # entire Italy
        } 
)).reset_index().astype({"TIME_PERIOD": int})
df6
Out[ ]:
FREQ REF_AREA DATA_TYPE CITIZENSHIP MOTHER_AGE TIME_PERIOD value
0 A IT TOTFERR FRG TOTAL 2002 2.82
1 A IT TOTFERR FRG TOTAL 2003 2.47
2 A IT TOTFERR FRG TOTAL 2004 2.84
3 A IT TOTFERR FRG TOTAL 2005 2.69
4 A IT TOTFERR FRG TOTAL 2006 2.79
... ... ... ... ... ... ... ...
67 A IT TOTFERR TOTAL TOTAL 2020 1.24
68 A IT TOTFERR TOTAL TOTAL 2021 1.25
69 A IT TOTFERR TOTAL TOTAL 2022 1.24
70 A IT TOTFERR TOTAL TOTAL 2023 1.20
71 A IT TOTFERR TOTAL TOTAL 2024 1.18

72 rows × 7 columns

In [4]:
px.line(
    df6,
    x="TIME_PERIOD", 
    y="value", 
    color="CITIZENSHIP", 
).update_layout(
    title="Italian fertility rate by citizenship",
    width=1000,
).show()
In [ ]:
df7 = sdmx.to_pandas(sdmx.Client("ISTAT").data(
    resource_id="25_326_DF_DCIS_FECONDITA1_7", 
    key={
        "REF_AREA": "IT" # entire Italy
        } 
)).reset_index()
df7
Out[ ]:
FREQ REF_AREA DATA_TYPE CITIZENSHIP MOTHER_AGE TIME_PERIOD value
0 A IT ASFR FRG Y_GE50 2002 0.90
1 A IT ASFR FRG Y_GE50 2003 0.89
2 A IT ASFR FRG Y_GE50 2004 0.84
3 A IT ASFR FRG Y_GE50 2005 1.22
4 A IT ASFR FRG Y_GE50 2006 1.42
... ... ... ... ... ... ... ...
2659 A IT ASFR TOTAL Y49 2020 0.43
2660 A IT ASFR TOTAL Y49 2021 0.40
2661 A IT ASFR TOTAL Y49 2022 0.63
2662 A IT ASFR TOTAL Y49 2023 0.59
2663 A IT ASFR TOTAL Y49 2024 0.68

2664 rows × 7 columns

In [6]:
# clean the dataset
fert_by_age = (
    df7
    .query("CITIZENSHIP == 'TOTAL'")
    .assign(year = pd.to_datetime(df7["TIME_PERIOD"]).dt.year)
    .assign(age = df7["MOTHER_AGE"].str.extract(r"(\d+)").astype(int))
    .pivot(index="age", columns="year", values="value")
)
fert_by_age
Out[6]:
year 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 ... 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024
age
14 0.17 0.14 0.01 0.00 0.03 0.04 0.01 0.02 0.01 0.03 ... 0.04 0.04 0.03 0.03 0.02 0.01 0.03 0.03 0.02 0.01
15 0.20 0.16 0.12 0.14 0.18 0.17 0.15 0.13 0.14 0.20 ... 0.19 0.17 0.17 0.14 0.11 0.11 0.08 0.08 0.08 0.07
16 3.01 3.24 3.27 3.08 3.16 3.26 3.04 2.86 2.80 3.13 ... 2.14 1.86 1.59 1.37 1.36 1.04 1.00 0.95 0.82 0.85
17 6.10 6.24 5.81 6.21 5.95 6.42 6.33 5.68 5.56 5.96 ... 4.09 3.60 3.40 3.00 2.56 2.19 2.11 1.97 1.83 1.69
18 9.26 9.61 9.37 10.45 10.00 10.44 10.35 10.07 9.95 10.49 ... 7.31 6.90 6.05 5.56 5.20 4.36 3.87 3.73 3.77 3.41
19 15.22 15.39 15.02 15.27 15.16 16.42 16.03 16.60 16.46 16.98 ... 12.42 12.14 10.95 10.91 9.34 8.59 7.40 7.28 7.10 6.58
20 20.21 20.57 20.63 20.17 19.86 22.07 21.96 22.35 23.62 23.99 ... 18.01 16.78 15.75 15.13 13.96 12.73 11.55 11.07 10.33 9.92
21 25.13 25.97 26.20 25.84 25.24 27.76 27.59 27.66 28.91 30.55 ... 22.18 21.82 20.70 19.22 18.22 16.99 15.72 15.09 13.83 13.88
22 31.94 32.36 32.45 32.26 31.37 34.05 33.41 34.01 35.05 35.76 ... 28.04 26.99 26.09 25.12 23.43 22.08 20.33 19.67 18.47 17.23
23 39.77 40.06 39.60 40.01 38.48 40.67 39.98 40.97 40.55 43.02 ... 34.53 33.37 31.63 29.76 28.44 26.73 25.49 25.36 23.09 22.37
24 48.49 48.03 47.01 47.26 46.82 48.41 47.69 48.57 48.53 50.11 ... 41.24 39.84 38.44 36.88 34.44 33.39 31.28 30.96 28.30 27.51
25 58.05 58.27 56.51 56.32 54.69 57.70 56.63 57.03 57.18 57.78 ... 49.97 47.87 46.93 44.35 42.46 41.03 39.07 38.47 35.75 34.45
26 69.07 67.14 66.14 66.04 64.84 66.13 66.64 65.84 66.29 67.07 ... 58.83 57.71 55.55 53.79 50.99 49.27 47.23 46.18 44.96 42.24
27 78.49 77.42 74.83 75.05 74.13 75.50 73.62 76.05 75.30 76.80 ... 68.86 66.02 65.59 62.56 60.46 58.35 57.24 56.02 54.16 50.34
28 86.68 86.95 82.68 82.33 81.85 84.33 82.52 83.58 83.29 85.73 ... 77.72 76.71 74.06 72.17 68.87 67.41 65.46 65.89 62.32 60.23
29 91.04 91.20 89.90 89.84 88.10 90.15 88.71 89.17 90.42 92.64 ... 84.32 85.14 83.55 79.01 78.27 75.76 74.76 75.06 73.31 69.53
30 93.57 94.59 93.03 93.55 94.41 94.63 93.85 94.16 95.80 96.36 ... 91.89 90.54 90.01 88.49 83.95 83.89 84.29 83.90 79.71 77.88
31 91.20 92.11 90.94 93.27 94.55 97.15 93.87 96.77 97.33 99.21 ... 96.07 95.92 93.38 91.69 90.78 89.64 89.38 89.19 86.76 85.08
32 85.33 89.64 87.80 89.61 92.60 94.80 93.69 94.82 96.87 99.74 ... 95.44 95.73 94.52 92.29 89.83 91.09 90.77 92.08 88.87 86.89
33 76.98 80.11 80.73 82.47 86.01 89.26 88.51 91.82 92.69 95.23 ... 93.21 93.75 91.64 90.73 88.33 87.50 91.40 89.66 88.24 86.98
34 69.04 71.86 72.38 75.31 77.26 81.18 81.17 85.14 86.83 88.29 ... 88.39 89.23 87.73 88.30 85.47 84.56 87.36 87.78 85.07 84.28
35 59.33 61.57 64.00 65.79 69.18 71.14 73.44 76.05 78.07 81.24 ... 81.13 82.28 82.28 80.51 79.85 79.92 83.10 80.38 80.47 79.55
36 47.99 51.10 53.21 54.69 57.93 62.01 61.25 65.24 67.90 71.36 ... 72.53 72.78 73.14 72.44 71.36 70.63 75.15 72.20 71.47 72.43
37 37.79 39.94 42.08 43.74 46.95 49.14 50.60 52.59 54.67 58.54 ... 60.97 61.85 62.37 61.33 61.24 59.82 63.34 62.57 61.68 61.47
38 28.44 29.97 31.52 33.27 35.77 38.13 39.08 41.47 43.82 46.80 ... 50.87 50.59 51.22 50.26 49.54 49.69 52.78 52.45 51.43 50.46
39 21.73 22.44 23.91 24.82 26.62 29.13 30.07 31.75 33.39 35.29 ... 39.89 40.23 41.14 40.60 40.01 39.82 42.23 41.75 41.68 41.44
40 14.93 16.27 16.57 17.82 18.96 20.41 21.44 22.71 24.21 25.52 ... 30.43 30.91 31.57 31.01 30.49 30.57 31.89 31.41 30.62 31.71
41 10.19 10.27 11.27 11.32 12.35 13.11 14.08 14.91 15.99 17.27 ... 21.06 21.28 21.65 22.26 21.63 21.22 22.21 22.44 22.90 22.56
42 6.08 6.27 6.65 6.82 7.40 8.03 8.40 9.03 9.76 10.57 ... 13.79 14.27 14.27 14.43 14.32 14.20 14.27 15.05 14.40 14.88
43 3.55 3.60 3.74 3.84 4.18 4.36 4.81 5.22 5.56 5.72 ... 8.22 8.82 8.94 8.89 8.59 8.76 8.64 8.88 9.09 9.15
44 1.72 1.93 1.94 1.94 2.15 2.20 2.40 2.65 2.86 3.15 ... 4.68 4.77 5.17 5.46 5.36 5.10 5.20 5.56 5.53 5.66
45 0.98 0.83 1.05 1.01 1.02 1.06 1.11 1.26 1.39 1.63 ... 2.66 2.94 3.26 3.09 3.22 3.02 2.94 3.52 3.41 3.44
46 0.42 0.46 0.46 0.34 0.53 0.51 0.53 0.63 0.73 0.79 ... 1.53 1.74 1.73 2.04 1.92 1.92 1.77 2.16 2.28 2.32
47 0.23 0.21 0.26 0.15 0.22 0.24 0.25 0.26 0.37 0.38 ... 0.95 0.93 1.09 1.17 1.11 1.10 1.06 1.41 1.39 1.49
48 0.18 0.15 0.12 0.12 0.15 0.15 0.12 0.16 0.18 0.23 ... 0.41 0.59 0.66 0.62 0.75 0.76 0.61 0.88 0.98 0.85
49 0.10 0.09 0.11 0.07 0.12 0.08 0.11 0.11 0.18 0.15 ... 0.34 0.36 0.40 0.43 0.47 0.43 0.40 0.63 0.59 0.68
50 0.34 0.33 0.24 0.16 0.21 0.25 0.30 0.25 0.35 0.28 ... 0.64 0.71 0.81 0.83 0.93 0.89 0.84 1.01 1.06 1.14

37 rows × 26 columns

In [7]:
fig = px.line(
    data_frame=fert_by_age,
    x=fert_by_age.index,
    y=fert_by_age.columns,
    color_discrete_sequence=get_colors(n=fert_by_age.columns.size),
    labels={"x": "Year", "y": "Fertility rate"},
).update_layout(
    xaxis_title="Age",
    yaxis_title="Rel. fertility by age",
    yaxis_range=[0, 100],
    legend_title="Scenarios",
    margin=dict(l=10, r=10, t=20, b=10),
    width=780,
    height=320,
)
fig.write_html("../images_output/fertility_by_age.html", auto_play=False)
fig.show()
In [8]:
# Get a cumulative fertility rate by age
fert_by_age_cum = fert_by_age.div(fert_by_age.sum(axis=0), axis=1) # first normalize the fertility rate by age
fert_by_age_cum = fert_by_age_cum.cumsum() # then get the cumulative fertility rate by age
fert_by_age_cum.loc[17] = 0 # enforcing 0 at age 17 to set constraints on sigmoid
fert_by_age_cum.sort_index(inplace=True)
px.line(
    data_frame=fert_by_age_cum,
    x=fert_by_age_cum.index,
    y=fert_by_age_cum.columns,
    color_discrete_sequence=get_colors(n=fert_by_age_cum.columns.size),
    labels={"x": "Year", "y": "Fertility rate"},
).update_layout(
    width=1000
).show()
In [9]:
# fit the sigmoids and extrapolate for next years
from scipy.optimize import curve_fit

def constrained_sigmoid(x: float, x0: float, k: float) -> float:
    """
    Compute a constrained sigmoid function that is 0 at the start and 1 at the end.

    Parameters:
    x (float): The input value.
    x0 (float): The x-value of the sigmoid's midpoint.
    k (float): The steepness of the curve.

    Returns:
    float: The output of the constrained sigmoid function.
    """
    y = 1 / (1 + np.exp(-k * (x - x0)))
    y = y - y[0]
    y = y / y[-1]
    return y

def fit_sigmoid(x, y):
    p0 = [np.mean(x), 1]
    popt, pcov = curve_fit(constrained_sigmoid, x, y, p0, method='dogbox')
    return popt

fig, axs = plt.subplots(5,6, figsize=[15, 10])
coeff_by_year = {} # {year: [x0, k]}
for iplot, year in enumerate(fert_by_age_cum.columns):
    x = fert_by_age_cum.index
    y = fert_by_age_cum.loc[:, year]
    coeff_by_year[year] = fit_sigmoid(x, y)
    y_fit = constrained_sigmoid(x, *coeff_by_year[year])
    ax = axs.flatten()[iplot]
    ax.scatter(x, y, color="black", marker="o", s=2)
    ax.set_title(f"Year: {year}")
    ax.plot(x,y_fit, color="red", lw=0.5)
    ax.grid()
plt.tight_layout()
plt.show()
No description has been provided for this image
In [10]:
df_coeff = pd.DataFrame(coeff_by_year).T
df_coeff.columns = ["x0", "k"]
fig = make_subplots(rows=1, cols=2).update_layout(
    width=1000,
    title="Sigmoid parameters over time: x0 (center) an k (steepness)"
)
fig.add_trace(go.Scatter(x=df_coeff.index, y=df_coeff["x0"], mode="markers", name="x0"),row=1, col=1)
fig.add_trace(go.Scatter(x=df_coeff.index, y=df_coeff["k"], mode="markers", name="k"), row=1, col=2)

# I will keep x0 linear and k constant to have just one parameter to control (x0, which is average age of fertility)
x0_coeff = np.polyfit(df_coeff.index, df_coeff["x0"], 1)
k = df_coeff["k"].mean()
fig.add_trace(go.Scatter(x=df_coeff.index, y=np.polyval(x0_coeff, df_coeff.index), mode="lines", name="x0 linear fit"), row=1, col=1)
fig.add_trace(go.Scatter(x=df_coeff.index, y=[k]*df_coeff.index.size, mode="lines", name="k mean"), row=1, col=2)
fig.show()
print("x0_coeff", x0_coeff)
print("k", k)
x0_coeff [ 9.49018248e-02 -1.60474946e+02]
k -0.2992077906780687

Visually, it looks ok to model the cumulative sum of fertility over mothers' age like this: a sigmoid, function of x0 and k, where:

  • x0 is a linear function of the year of observation (i.e., motherhood age is shifing ahead with years)
  • k is the steepness of the sigmoid, it is going up and down, but there is not a significant error if I keep it constant (otherwise I should guess whether to fit it with a parabola or a sinusoid!)

Now that I know the disribution of the fertility rate, I can distribute the number of new borns accordingly.

In [11]:
total_newborns_by_year = dfp.iloc[0,:].shift(-1).dropna().to_dict() # newborns @ year of observation (shifting by -1 year beacause if there is a <1 year old on January 1st, he was born in the previous year)

fertiliy_by_year = {}
for year, tot_nb in total_newborns_by_year.items():
    women = dfp.loc[17:50, year]/2 # assuming half of the population are women, which is reasonable globally but may not be the case for each age group 18-50
    x0 = np.polyval([ 0.09458, -159.9], year)
    k = -0.296
    sigmoid = constrained_sigmoid(women.index, x0, k)
    distrib = np.diff(sigmoid) # turning the sigmoid to a normaized distribution from 18 to 50
    mothers = sum(distrib * women.values[1:])
    fertiliy_by_year[year] = tot_nb / mothers

# REMEMBER: the fertility_by_year I computed here will be very similar to the official one by ISTAT, 
# but slightly different because there may be diffences in the calculations (e.g., exact age of the mothers, infancy mortality, etc.).
# However, it is consistent to generate the correct number of newborns.

Store and recover data

In [12]:
# Store the coefficients
fertility_json = {
    "last_observed_year": max(fertiliy_by_year.keys()),
    "last_observed_value": fertiliy_by_year[max(fertiliy_by_year.keys())],
    "sigmoid_x0": list(x0_coeff),
    "sigmoid_k": k,
    "assumed_women_ratio": 0.5
}
with open("../data/fertility_fitted.json", "w") as f:
    json.dump(fertility_json, f, indent=4)
In [13]:
# make a function that recovers the data and gives the nexborns from the population
fdata = json.load(open("../data/fertility_fitted.json"))
print("Fertility coefficients:")
display(fdata)

def get_newborns_next_year(dfp, year, fertility_rate, fdata):
    """Fom population and fertility rate for a given year,
    get the number of newborns for the NEXT year (fist January of year+1).
    """
    def constrained_sigmoid(x0, k):
        """Get sigmoid function, which is a cumsum of a distribution."""
        x = np.arange(17, 50+1)
        y = 1 / (1 + np.exp(-k * (x - x0)))
        y = y - y[0]
        y = y / y[-1]
        return y
    pop = dfp[year]
    mothers_potential = pop.loc[18:50].to_numpy() * fdata["assumed_women_ratio"]
    x0 = np.polyval(fdata["sigmoid_x0"], year)
    distrib = np.diff(constrained_sigmoid(x0, fdata["sigmoid_k"]))
    newborns = sum(mothers_potential * distrib) * fertility_rate
    return int(newborns)

get_newborns_next_year(dfp=dfp, year=2013, fertility_rate=1, fdata=fdata)
Fertility coefficients:
{'last_observed_year': 2024,
 'last_observed_value': 1.1526289356194974,
 'sigmoid_x0': [0.09490182479041313, -160.47494571102223],
 'sigmoid_k': -0.296,
 'assumed_women_ratio': 0.5}
Out[13]:
375995

Make plots compared to official data

In [14]:
# Get also the values from the World Bank that date back to 1960 (but should be the same as ISTAT)
ss_wb_fr = wb.data.DataFrame("SP.DYN.TFRT.IN", "ITA").T.dropna()["ITA"]
ss_wb_fr.index = [ int(year[-4:]) for year in ss_wb_fr.index ]
ss_wb_fr
Out[14]:
1960    2.40
1961    2.44
1962    2.46
1963    2.51
1964    2.66
        ... 
2019    1.27
2020    1.24
2021    1.25
2022    1.24
2023    1.20
Name: ITA, Length: 64, dtype: float64
In [15]:
# compare all the fertility rates

fig = go.Figure().add_trace(go.Scatter(
        x=df6.query("CITIZENSHIP == 'TOTAL'").TIME_PERIOD, 
        y=df6.query("CITIZENSHIP == 'TOTAL'").value, 
        mode="lines", 
        name="From ISTAT 'TOTAL'",
)).add_trace(go.Scatter(
        x=fert_by_age.columns.astype(int), 
        y=fert_by_age.sum(axis=0)/1000, 
        mode="lines", 
        name="From ISTAT age-by-age summed",
)).add_trace(go.Scatter(
        x=ss_wb_fr.index, 
        y=ss_wb_fr.values, 
        mode="lines", 
        name="From World Bank",
)).add_trace(go.Scatter(
        x=list(fertiliy_by_year.keys()), 
        y=list(fertiliy_by_year.values()), 
        mode="lines", 
        name="From calculations",
)).update_layout(
    width=1000,
    title="Comparison of Fertility Rates from different sources and calculations",
    xaxis_title="Year",
    yaxis_title="Fertility Rate"
)

fig.show()

Note that there is still some discrepancy between the fertility rate I compute and the ISTAT one, that may be due to:

  • approximating exactly 50% of the population as women
  • approximation in the sigmoid model shape with no mothers below 18 and above 50 years old
  • the indirect way I'm computing the mothers from a distribution fitted on fertility rate
In [16]:
# https://esploradati.istat.it/databrowser/#/it/dw/categories/IT1,POP,1.0/POP_DEMOPROJ/DCIS_PREVDEM1/IT1,165_889_DF_DCIS_PREVDEM1_3,1.0
df3 = sdmx.to_pandas(sdmx.Client("ISTAT").data(
    resource_id="165_889_DF_DCIS_PREVDEM1_3", 
    key={
        "REF_AREA": "IT", # entire Italy
        "DATA_TYPE": "TFR" # Total Fertility Rate
        }
)).reset_index().astype({"TIME_PERIOD": int})
df3
Out[16]:
FREQ REF_AREA DATA_TYPE FORECAST_INTERVAL AGE SEX TIME_PERIOD value
0 A IT TFR PROJLOW50 TOTAL 9 2024 1.17
1 A IT TFR PROJLOW50 TOTAL 9 2025 1.18
2 A IT TFR PROJLOW50 TOTAL 9 2026 1.19
3 A IT TFR PROJLOW50 TOTAL 9 2027 1.20
4 A IT TFR PROJLOW50 TOTAL 9 2028 1.21
... ... ... ... ... ... ... ... ...
394 A IT TFR PROJUPP90 TOTAL 9 2076 1.81
395 A IT TFR PROJUPP90 TOTAL 9 2077 1.82
396 A IT TFR PROJUPP90 TOTAL 9 2078 1.83
397 A IT TFR PROJUPP90 TOTAL 9 2079 1.84
398 A IT TFR PROJUPP90 TOTAL 9 2080 1.85

399 rows × 8 columns

In [17]:
# pivot data to have year, and OBS_VALUE of  INT_PREV="PROJMED" PROJLOW80 and PROJUPP80
df3p = df3.pivot(index="TIME_PERIOD", columns="FORECAST_INTERVAL", values="value")
df3p.head()
Out[17]:
FORECAST_INTERVAL PROJLOW50 PROJLOW80 PROJLOW90 PROJMED PROJUPP50 PROJUPP80 PROJUPP90
TIME_PERIOD
2024 1.17 1.16 1.15 1.18 1.19 1.20 1.21
2025 1.18 1.17 1.16 1.20 1.21 1.23 1.23
2026 1.19 1.17 1.16 1.21 1.23 1.25 1.26
2027 1.20 1.18 1.17 1.22 1.25 1.27 1.28
2028 1.21 1.19 1.17 1.24 1.26 1.29 1.30
In [18]:
# plot the data with INT_PREV="PROJMED" as line and INT_PREV="PROJLOW80" and "PROJUP80" as area of the confidence interval
go.Figure(
    ).add_trace(go.Scatter(
        x=ss_wb_fr.index, 
        y=ss_wb_fr.values, 
        mode="lines", 
        name="From World Bank",
    )
    ).add_trace(go.Scatter(
        x=df3p.index, 
        y=df3p["PROJMED"], 
        mode="lines", 
        name="Projection",
        line=dict(color="black")
    )
    ).add_trace(go.Scatter(
        x=df3p.index, 
        y=df3p["PROJLOW80"], 
        mode="lines", 
        name="Projection 80% confidence (lower bound)",
        fill=None,
        line=dict(color="gray"),  # Change color and make it more transparent
    )
    ).add_trace(go.Scatter(
        x=df3p.index, 
        y=df3p["PROJUPP80"], 
        mode="lines", 
        name="Projection 80% confidence (upper bound)",
        fill="tonexty",
        line=dict(color="gray"),  # Change color and make it more transparent
    )
    ).update_layout(
        width=1000,
        title="Projection of Fertility Rate from ISTAT",
        xaxis_title="Year",
        yaxis_title="Fertility Rate",
    ).show()

Assuming 3 scenarios for future fertility rate

In [19]:
last_year = max(fertiliy_by_year.keys())
end_year = 2100

fr_projections = {}
fr_projections['fertility: ↓ 0 in 2100'] = pd.Series(np.linspace(fertiliy_by_year[last_year], 0, end_year-last_year+1), index=range(last_year, end_year+1))
fr_projections['fertility: eq. to last'] = pd.Series([fertiliy_by_year[last_year]] * (end_year-last_year+1), index=range(last_year, end_year+1))
fr_projections['fertility: ↑ 2 in 2100'] = pd.Series(np.linspace(fertiliy_by_year[last_year], 2, end_year-last_year+1), index=range(last_year, end_year+1))

fig1 = deepcopy(fig)
for key, fr in fr_projections.items():
    fig1.add_trace(
        go.Scatter(
            x=fr.index, # Need to shift by one because they refer to "end of y" while I refer to "start of y+1" for the same quantity
            y=fr,
            name=f"SCENARIO - {key}",
            line=dict(dash="dot")
        )
    )
    
# add projection from ISTAT
fig1.add_trace(go.Scatter(
    x=df3p.index, 
    y=df3p["PROJMED"], 
    mode="lines", name="ISTAT Projection ±80 percentile", line=dict(color="black")))
fig1.add_trace(go.Scatter(
        x=df3p.index, 
        y=df3p["PROJLOW80"], 
        mode="lines", showlegend=False, fill=None,line=dict(color="gray")))
fig1.add_trace(go.Scatter(
        x=df3p.index, 
        y=df3p["PROJUPP80"], 
        mode="lines", showlegend=False, fill="tonexty",line=dict(color="gray")))

fig1.update_layout(
    title=None,
    xaxis_title="Year",
    legend_title="Data Source", 
    margin=dict(l=0, r=0, t=20, b=0),
    width=780,
    height=280,
)
fig1.write_html("../images_output/fertility_scenarios.html")
last_istat_value = df6.query("CITIZENSHIP == 'TOTAL'").iloc[-1]['value']
print(f"Fertilyty from last year ({last_year}) - my estimate: {fertiliy_by_year[last_year]:.2f}, ISTAT: {last_istat_value:.2f}")
print("Fertility rate future projection: 3 scenarios")
fig1.show()
Fertilyty from last year (2024) - my estimate: 1.15, ISTAT: 1.18
Fertility rate future projection: 3 scenarios

I can not compute the future newborn because I need to know the exact population in the 18-50 range to infer the number of mothers.

Conclusions

  • the fertility rate is decreasing
  • the age of motherhood is shifting ahead and it is approximable to a normal distribution over the 18-50 years range
  • considering past data and future scenario the discrepancy I get between my estimate of fertility rate and the ISTAT one is negligible: much of the gap I had in provious calculation was due to neglecting the distribution of mothers' age
  • I can not undertand why ISTAT projection are applying some sort of "mean reversion" instead of "trend following": is that an opinionated choice?

Follow-up

  • Use the sigmoid model of fertility to estimate the number of new borns in the future from the number of 18-50y women
  • Investigate the motivations behind ISTAT projection, and how they propagate to "too optimistic" model of the future population that make the INPS projection looking better!
← Home