chore(growth): fetch extended data for indicators
All checks were successful
continuous-integration/drone/pr Build is passing

Also, improve module doc.
This commit is contained in:
João Paulo Dubas 2024-06-06 22:49:52 +00:00
parent a8466f0c1d
commit 902a22c947
Signed by: joao.dubas
SSH Key Fingerprint: SHA256:V1mixgOGRc/YMhGx/DNkOSmJxgA2vHNrDZEk3wt/kOA
9 changed files with 30908 additions and 2248 deletions

View File

@ -1,6 +1,13 @@
defmodule Growth.Indicators.Download do
@moduledoc """
Download, extract, and load indicators to construct z-score for:
To calculate z-scores for the different growth measurements, the system must:
1. Fetch indicators from World Health Organization
2. Extract data from excel sheets
3. Convert the data into proper format, specially, handle with decimal values
3. Add metadata to make search for the parameters possible
The following indicators to construct z-scores are fetched:
* [height for age 0 to 5 years](https://www.who.int/tools/child-growth-standards/standards/length-height-for-age)
* [height for age 5 to 19 years](https://www.who.int/tools/growth-reference-data-for-5to19-years/indicators/height-for-age)
@ -201,11 +208,16 @@ defmodule Growth.Indicators.Download do
def process_measure(
{measure,
%{
female: %{age_tables: female_urls, expanded_tables: _},
male: %{age_tables: male_urls, expanded_tables: _}
female: %{age_tables: female_urls, expanded_tables: e_female_urls},
male: %{age_tables: male_urls, expanded_tables: e_male_urls}
}}
) do
[{:female, female_urls}, {:male, male_urls}]
[
{:female, :age, female_urls},
{:male, :age, male_urls},
{:female, :expanded, e_female_urls},
{:male, :expanded, e_male_urls}
]
|> Enum.map(&Task.async(__MODULE__, :process_gender, [&1]))
|> Task.await_many()
|> merge()
@ -213,18 +225,18 @@ defmodule Growth.Indicators.Download do
|> save(measure)
end
def process_gender({gender, urls}) do
def process_gender({gender, category, urls}) do
urls
|> Enum.map(&Task.async(__MODULE__, :process, [gender, &1]))
|> Enum.map(&Task.async(__MODULE__, :process, [gender, category, &1]))
|> Task.await_many()
|> merge()
end
def process(gender, url) do
def process(gender, category, url) do
url
|> fetch!()
|> extract!(url)
|> convert(gender, url)
|> convert(gender, category, url)
end
def fetch!(url) do
@ -247,15 +259,15 @@ defmodule Growth.Indicators.Download do
end
end
@common_header ~w(source gender age_unit age l m s sd3neg sd2neg sd1neg sd0 sd1 sd2 sd3)
@common_header ~w(source category gender age_unit age l m s sd3neg sd2neg sd1neg sd0 sd1 sd2 sd3)
# FIX: (jpd) weight for lenght/height does not have an age in the header row
def convert([header | rows], gender, url) do
def convert([header | rows], gender, category, url) do
age_unit = header |> hd() |> String.downcase()
fixed_header = header |> tl() |> Enum.map(&String.downcase/1) |> Enum.map(&String.trim/1)
parsed_header = ["source" | ["gender" | ["age_unit" | ["age" | fixed_header]]]]
parsed_header = ["source" | ["category" | ["gender" | ["age_unit" | ["age" | fixed_header]]]]]
# NOTE: (jpd): parsing the rows consist in:
# 1. convert row values to decimal
@ -267,7 +279,7 @@ defmodule Growth.Indicators.Download do
parsed_rows =
rows
|> Stream.map(fn row -> Enum.map(row, &Decimal.new/1) end)
|> Stream.map(&[url | [gender | [age_unit | &1]]])
|> Stream.map(&[url | [category | [gender | [age_unit | &1]]]])
|> Stream.map(&Enum.zip(parsed_header, &1))
|> Stream.map(&Map.new/1)
|> Stream.map(&Map.take(&1, @common_header))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff