Investigating death probability as computed from ISTAT¶

AIM: what are the mortality trends in Italy?

Remember:

I already explored the data in a previous notebook
Mortality is computed in DCIS_MORTALITA1 as FUNZ_BIO=PROBDEATH
Data is available for each age (up to 119 years old which is extrapolated, beacuse there are no people who lived that long)
Data is available for each year of observation since 1974
The methodology for the calculation is presented in https://www.istat.it/it/files/2018/08/volume-tavole-mortalita-1998.pdf

import numpy as np
import pandas as pd
import requests
import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import sdmx
import warnings 

client = sdmx.Client("ISTAT")
pio.renderers.default = 'vscode+notebook'
warnings.filterwarnings('ignore')
requests.urllib3.disable_warnings() # avoid "InsecureRequestWarning: Unverified HTTPS request is being made to host 'sdmx.istat.it'. Adding certificate verification is strongly advised"

def get_colors(n, cmap_name="rainbow"):
    """Get colors for px colors_discrete argument, given the number of colors needed, n."""
    cmap = matplotlib.colormaps[cmap_name]
    colors = [cmap(i) for i in np.linspace(0, 1, n)]  # Generate colors
    colors_str = [f"rgba({int(color[0]*250)}, {int(color[1]*250)}, {int(color[2]*250)}, 1.0)" for color in colors]
    return colors_str

df5 = sdmx.to_pandas(
    client.data(
        resource_id="26_295_DF_DCIS_MORTALITA1_2", 
        key={
            "FREQ": "A",
            "REF_AREA": "IT",
            "DATA_TYPE": "PROBDEATH",     
            "AGE": [], # I want them all
            "SEX": "9",
        })
    ).reset_index()
df5

print(sorted(list(df5.AGE.unique())))

['Y0', 'Y1', 'Y10', 'Y100', 'Y101', 'Y102', 'Y103', 'Y104', 'Y105', 'Y106', 'Y107', 'Y108', 'Y109', 'Y11', 'Y110', 'Y111', 'Y112', 'Y113', 'Y114', 'Y115', 'Y116', 'Y117', 'Y118', 'Y119', 'Y12', 'Y13', 'Y14', 'Y15', 'Y16', 'Y17', 'Y18', 'Y19', 'Y2', 'Y20', 'Y21', 'Y22', 'Y23', 'Y24', 'Y25', 'Y26', 'Y27', 'Y28', 'Y29', 'Y3', 'Y30', 'Y31', 'Y32', 'Y33', 'Y34', 'Y35', 'Y36', 'Y37', 'Y38', 'Y39', 'Y4', 'Y40', 'Y41', 'Y42', 'Y43', 'Y44', 'Y45', 'Y46', 'Y47', 'Y48', 'Y49', 'Y5', 'Y50', 'Y51', 'Y52', 'Y53', 'Y54', 'Y55', 'Y56', 'Y57', 'Y58', 'Y59', 'Y6', 'Y60', 'Y61', 'Y62', 'Y63', 'Y64', 'Y65', 'Y66', 'Y67', 'Y68', 'Y69', 'Y7', 'Y70', 'Y71', 'Y72', 'Y73', 'Y74', 'Y75', 'Y76', 'Y77', 'Y78', 'Y79', 'Y8', 'Y80', 'Y81', 'Y82', 'Y83', 'Y84', 'Y85', 'Y86', 'Y87', 'Y88', 'Y89', 'Y9', 'Y90', 'Y91', 'Y92', 'Y93', 'Y94', 'Y95', 'Y96', 'Y97', 'Y98', 'Y99']

dfdp = (
    df5
    .query("AGE!='TOTAL'")
    .assign(AGE= lambda x: [ int(a.split("Y")[-1]) for a in x["AGE"] ])
    .groupby(["AGE", "TIME_PERIOD"], as_index=False)["value"].sum()
    .assign(YEAR= lambda x: pd.to_datetime(x["TIME_PERIOD"]).dt.year)
    .pivot(index="AGE", columns="YEAR", values="value")
    .sort_index()
    .div(1000) # PROBDEATH is per 1000 people
)
dfdp.to_csv("../data/deathprob_by_age_year.csv")
dfdp

fig, axs = plt.subplots(12, 10, figsize=[24, 18], sharex=True)
for iplot, age in enumerate(dfdp.index):
    ax = axs.flatten()[iplot]
    ax.plot(dfdp.columns, dfdp.loc[age] * 100, color="red")
    ax.set_title(f"Age: {age}") 
    ax.axvline(2003, color="black", linestyle="-", lw=0.5, alpha=0.5) # heatwave - investigate in Notebook 53
    ax.axvline(2020, color="black", linestyle="-", lw=0.5, alpha=0.5) # COVID-19, also change in methodology (see Notebook 25)
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{x:.2f}%'))
plt.tight_layout()
plt.show()

# use plotly to plot all ages in a single plot
fig = go.Figure()
first_year = dfdp.columns.min()
colors = get_colors(len(dfdp.index))
for i, age in enumerate(dfdp.index):
    fig.add_trace(go.Scatter(
        x=dfdp.columns, y=dfdp.loc[age]/dfdp.loc[age, first_year], mode='lines', name=age, line_color=colors[i]))
fig.update_layout(title="Mortality rate by age group", xaxis_title="Year", yaxis_title="Mortality rate (relative to 2003)", legend_title="Age group", width=1000)
fig.show()

Conclusions¶

There was a sharp decline in mortaility from 1974 since 2000, except for the hill in 1-40 years old, expecially evident in people around 30 years old, with its peak around 1990-1995: it should be due to drug abuse, but why does it affect people so young? Unhealthy parents?
From 2000 to now, results are mixed: for some ages is still going down like pre-2000 (e.g., 78-85 years old), for some it plateaud with noise (90+ years old), and for many it is reaching a plateau just in the recent few years (45-77).
Covid-19 increased the mortality, which is not yet down to follow pre-Covid trends for many ages: is this due to the italian health care worsening?
In relative terms (current mortality over mortality in 1974), younger people had the best improvement: while 50-80 years old had a 60% decrease in mortality (relative to 1974), this value goes up to 80% in under 18.

Follow-up¶

Reproduce the PROBDEATH values from raw data of deaths. Only data grouped by 5-years-age-groups are available, can I find single-age raw data? Or iis it possible to do it from 5-years-age-group data?

YEAR	1974	1975	1976	1977	1978	1979	1980	1981	1982	1983	...	2016	2017	2018	2019	2020	2021	2022	2023	2024	2025
AGE
0	0.024431	0.023538	0.021786	0.020453	0.018872	0.017614	0.016245	0.015062	0.013304	0.012488	...	0.002962	0.002915	0.002918	0.002784	0.002632	0.002407	0.002346	0.002460	0.002523	0.002565
1	0.001161	0.001034	0.000992	0.000951	0.000931	0.000891	0.000880	0.000842	0.000818	0.000769	...	0.000190	0.000203	0.000204	0.000208	0.000191	0.000182	0.000175	0.000199	0.000203	0.000195
2	0.000803	0.000729	0.000687	0.000662	0.000643	0.000619	0.000617	0.000583	0.000570	0.000541	...	0.000148	0.000145	0.000145	0.000152	0.000143	0.000135	0.000125	0.000138	0.000141	0.000141
3	0.000580	0.000540	0.000499	0.000480	0.000460	0.000443	0.000448	0.000422	0.000415	0.000394	...	0.000115	0.000107	0.000109	0.000114	0.000110	0.000101	0.000094	0.000102	0.000106	0.000109
4	0.000450	0.000430	0.000392	0.000377	0.000353	0.000338	0.000347	0.000329	0.000326	0.000305	...	0.000092	0.000084	0.000089	0.000089	0.000088	0.000079	0.000076	0.000082	0.000087	0.000091
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
115	0.830542	0.842068	0.840793	0.832899	0.820353	0.821158	0.830536	0.812013	0.827645	0.844510	...	0.783121	0.790757	0.785699	0.789470	0.825086	0.801922	0.827020	0.812427	0.794315	0.786954
116	0.840824	0.851811	0.851071	0.844183	0.832275	0.833640	0.842781	0.824728	0.840211	0.855815	...	0.805091	0.811518	0.807117	0.810286	0.843005	0.821794	0.844665	0.832323	0.815078	0.808432
117	0.849532	0.860006	0.859822	0.854056	0.842874	0.844825	0.853700	0.836179	0.851242	0.865447	...	0.825683	0.830815	0.825405	0.829654	0.859216	0.840126	0.860587	0.850653	0.834376	0.828475
118	0.856800	0.866782	0.867174	0.862612	0.852221	0.854774	0.863353	0.846417	0.860838	0.873540	...	0.844921	0.848679	0.843982	0.847608	0.873775	0.856952	0.874836	0.867463	0.851072	0.846985
119	0.862763	0.872263	0.873250	0.869934	0.860379	0.863539	0.871795	0.855488	0.869114	0.880248	...	0.862830	0.865155	0.861231	0.864192	0.886750	0.872318	0.887476	0.882806	0.867411	0.864399

	FREQ	REF_AREA	DATA_TYPE	SEX	AGE	TIME_PERIOD	value
0	A	IT	PROBDEATH	9	Y0	1974	24.43089
1	A	IT	PROBDEATH	9	Y0	1975	23.53843
2	A	IT	PROBDEATH	9	Y0	1976	21.78605
3	A	IT	PROBDEATH	9	Y0	1977	20.45305
4	A	IT	PROBDEATH	9	Y0	1978	18.87216
...	...	...	...	...	...	...	...
6235	A	IT	PROBDEATH	9	Y99	2021	337.84437
6236	A	IT	PROBDEATH	9	Y99	2022	366.28239
6237	A	IT	PROBDEATH	9	Y99	2023	336.68549
6238	A	IT	PROBDEATH	9	Y99	2024	326.34894
6239	A	IT	PROBDEATH	9	Y99	2025	309.65789