← Home

Investigating death probability as computed from ISTATΒΆ

AIM: what are the mortality trends in Italy?

Remember:

  • I already explored the data in a previous notebook
  • Mortality is computed in DCIS_MORTALITA1 as FUNZ_BIO=PROBDEATH
  • Data is available for each age (up to 119 years old which is extrapolated, beacuse there are no people who lived that long)
  • Data is available for each year of observation since 1974
  • The methodology for the calculation is presented in https://www.istat.it/it/files/2018/08/volume-tavole-mortalita-1998.pdf
InΒ [1]:
import numpy as np
import pandas as pd
import requests
import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import sdmx
import warnings 

client = sdmx.Client("ISTAT")
pio.renderers.default = 'vscode+notebook'
warnings.filterwarnings('ignore')
requests.urllib3.disable_warnings() # avoid "InsecureRequestWarning: Unverified HTTPS request is being made to host 'sdmx.istat.it'. Adding certificate verification is strongly advised"

def get_colors(n, cmap_name="rainbow"):
    """Get colors for px colors_discrete argument, given the number of colors needed, n."""
    cmap = matplotlib.colormaps[cmap_name]
    colors = [cmap(i) for i in np.linspace(0, 1, n)]  # Generate colors
    colors_str = [f"rgba({int(color[0]*250)}, {int(color[1]*250)}, {int(color[2]*250)}, 1.0)" for color in colors]
    return colors_str
InΒ [2]:
df5 = sdmx.to_pandas(
    client.data(
        resource_id="26_295_DF_DCIS_MORTALITA1_2", 
        key={
            "FREQ": "A",
            "REF_AREA": "IT",
            "DATA_TYPE": "PROBDEATH",     
            "AGE": [], # I want them all
            "SEX": "9",
        })
    ).reset_index()
df5
Out[2]:
FREQ REF_AREA DATA_TYPE SEX AGE TIME_PERIOD value
0 A IT PROBDEATH 9 Y0 1974 24.43089
1 A IT PROBDEATH 9 Y0 1975 23.53843
2 A IT PROBDEATH 9 Y0 1976 21.78605
3 A IT PROBDEATH 9 Y0 1977 20.45305
4 A IT PROBDEATH 9 Y0 1978 18.87216
... ... ... ... ... ... ... ...
6235 A IT PROBDEATH 9 Y99 2021 337.84437
6236 A IT PROBDEATH 9 Y99 2022 366.28239
6237 A IT PROBDEATH 9 Y99 2023 336.68549
6238 A IT PROBDEATH 9 Y99 2024 326.34894
6239 A IT PROBDEATH 9 Y99 2025 309.65789

6240 rows Γ— 7 columns

InΒ [5]:
print(sorted(list(df5.AGE.unique())))
['Y0', 'Y1', 'Y10', 'Y100', 'Y101', 'Y102', 'Y103', 'Y104', 'Y105', 'Y106', 'Y107', 'Y108', 'Y109', 'Y11', 'Y110', 'Y111', 'Y112', 'Y113', 'Y114', 'Y115', 'Y116', 'Y117', 'Y118', 'Y119', 'Y12', 'Y13', 'Y14', 'Y15', 'Y16', 'Y17', 'Y18', 'Y19', 'Y2', 'Y20', 'Y21', 'Y22', 'Y23', 'Y24', 'Y25', 'Y26', 'Y27', 'Y28', 'Y29', 'Y3', 'Y30', 'Y31', 'Y32', 'Y33', 'Y34', 'Y35', 'Y36', 'Y37', 'Y38', 'Y39', 'Y4', 'Y40', 'Y41', 'Y42', 'Y43', 'Y44', 'Y45', 'Y46', 'Y47', 'Y48', 'Y49', 'Y5', 'Y50', 'Y51', 'Y52', 'Y53', 'Y54', 'Y55', 'Y56', 'Y57', 'Y58', 'Y59', 'Y6', 'Y60', 'Y61', 'Y62', 'Y63', 'Y64', 'Y65', 'Y66', 'Y67', 'Y68', 'Y69', 'Y7', 'Y70', 'Y71', 'Y72', 'Y73', 'Y74', 'Y75', 'Y76', 'Y77', 'Y78', 'Y79', 'Y8', 'Y80', 'Y81', 'Y82', 'Y83', 'Y84', 'Y85', 'Y86', 'Y87', 'Y88', 'Y89', 'Y9', 'Y90', 'Y91', 'Y92', 'Y93', 'Y94', 'Y95', 'Y96', 'Y97', 'Y98', 'Y99']
InΒ [13]:
dfdp = (
    df5
    .query("AGE!='TOTAL'")
    .assign(AGE= lambda x: [ int(a.split("Y")[-1]) for a in x["AGE"] ])
    .groupby(["AGE", "TIME_PERIOD"], as_index=False)["value"].sum()
    .assign(YEAR= lambda x: pd.to_datetime(x["TIME_PERIOD"]).dt.year)
    .pivot(index="AGE", columns="YEAR", values="value")
    .sort_index()
    .div(1000) # PROBDEATH is per 1000 people
)
dfdp.to_csv("../data/deathprob_by_age_year.csv")
dfdp
Out[13]:
YEAR 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 ... 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
AGE
0 0.024431 0.023538 0.021786 0.020453 0.018872 0.017614 0.016245 0.015062 0.013304 0.012488 ... 0.002962 0.002915 0.002918 0.002784 0.002632 0.002407 0.002346 0.002460 0.002523 0.002565
1 0.001161 0.001034 0.000992 0.000951 0.000931 0.000891 0.000880 0.000842 0.000818 0.000769 ... 0.000190 0.000203 0.000204 0.000208 0.000191 0.000182 0.000175 0.000199 0.000203 0.000195
2 0.000803 0.000729 0.000687 0.000662 0.000643 0.000619 0.000617 0.000583 0.000570 0.000541 ... 0.000148 0.000145 0.000145 0.000152 0.000143 0.000135 0.000125 0.000138 0.000141 0.000141
3 0.000580 0.000540 0.000499 0.000480 0.000460 0.000443 0.000448 0.000422 0.000415 0.000394 ... 0.000115 0.000107 0.000109 0.000114 0.000110 0.000101 0.000094 0.000102 0.000106 0.000109
4 0.000450 0.000430 0.000392 0.000377 0.000353 0.000338 0.000347 0.000329 0.000326 0.000305 ... 0.000092 0.000084 0.000089 0.000089 0.000088 0.000079 0.000076 0.000082 0.000087 0.000091
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
115 0.830542 0.842068 0.840793 0.832899 0.820353 0.821158 0.830536 0.812013 0.827645 0.844510 ... 0.783121 0.790757 0.785699 0.789470 0.825086 0.801922 0.827020 0.812427 0.794315 0.786954
116 0.840824 0.851811 0.851071 0.844183 0.832275 0.833640 0.842781 0.824728 0.840211 0.855815 ... 0.805091 0.811518 0.807117 0.810286 0.843005 0.821794 0.844665 0.832323 0.815078 0.808432
117 0.849532 0.860006 0.859822 0.854056 0.842874 0.844825 0.853700 0.836179 0.851242 0.865447 ... 0.825683 0.830815 0.825405 0.829654 0.859216 0.840126 0.860587 0.850653 0.834376 0.828475
118 0.856800 0.866782 0.867174 0.862612 0.852221 0.854774 0.863353 0.846417 0.860838 0.873540 ... 0.844921 0.848679 0.843982 0.847608 0.873775 0.856952 0.874836 0.867463 0.851072 0.846985
119 0.862763 0.872263 0.873250 0.869934 0.860379 0.863539 0.871795 0.855488 0.869114 0.880248 ... 0.862830 0.865155 0.861231 0.864192 0.886750 0.872318 0.887476 0.882806 0.867411 0.864399

120 rows Γ— 52 columns

InΒ [14]:
fig, axs = plt.subplots(12, 10, figsize=[24, 18], sharex=True)
for iplot, age in enumerate(dfdp.index):
    ax = axs.flatten()[iplot]
    ax.plot(dfdp.columns, dfdp.loc[age] * 100, color="red")
    ax.set_title(f"Age: {age}") 
    ax.axvline(2003, color="black", linestyle="-", lw=0.5, alpha=0.5) # heatwave - investigate in Notebook 53
    ax.axvline(2020, color="black", linestyle="-", lw=0.5, alpha=0.5) # COVID-19, also change in methodology (see Notebook 25)
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{x:.2f}%'))
plt.tight_layout()
plt.show()
No description has been provided for this image
InΒ [15]:
# use plotly to plot all ages in a single plot
fig = go.Figure()
first_year = dfdp.columns.min()
colors = get_colors(len(dfdp.index))
for i, age in enumerate(dfdp.index):
    fig.add_trace(go.Scatter(
        x=dfdp.columns, y=dfdp.loc[age]/dfdp.loc[age, first_year], mode='lines', name=age, line_color=colors[i]))
fig.update_layout(title="Mortality rate by age group", xaxis_title="Year", yaxis_title="Mortality rate (relative to 2003)", legend_title="Age group", width=1000)
fig.show()

ConclusionsΒΆ

  • There was a sharp decline in mortaility from 1974 since 2000, except for the hill in 1-40 years old, expecially evident in people around 30 years old, with its peak around 1990-1995: it should be due to drug abuse, but why does it affect people so young? Unhealthy parents?
  • From 2000 to now, results are mixed: for some ages is still going down like pre-2000 (e.g., 78-85 years old), for some it plateaud with noise (90+ years old), and for many it is reaching a plateau just in the recent few years (45-77).
  • Covid-19 increased the mortality, which is not yet down to follow pre-Covid trends for many ages: is this due to the italian health care worsening?
  • In relative terms (current mortality over mortality in 1974), younger people had the best improvement: while 50-80 years old had a 60% decrease in mortality (relative to 1974), this value goes up to 80% in under 18.

Follow-upΒΆ

  • Reproduce the PROBDEATH values from raw data of deaths. Only data grouped by 5-years-age-groups are available, can I find single-age raw data? Or iis it possible to do it from 5-years-age-group data?
← Home