wip(growth): improve data parsing

1. Fetch common data for all indicators
2. Remove headers during merge

Also, re-generate files.
This commit is contained in:
2024-06-04 23:55:03 +00:00
parent 2fedb4ecf0
commit 9c8e1d20ee
9 changed files with 1125 additions and 1131 deletions

View File

@@ -247,25 +247,47 @@ defmodule Growth.Indicators.Download do
end
end
@common_header ~w(source gender age_unit age l m s sd3neg sd2neg sd1neg sd0 sd1 sd2 sd3)
def convert([header | rows], gender, url) do
age = header |> hd() |> String.downcase()
age_unit = header |> hd() |> String.downcase()
fixed_header = header |> tl() |> Enum.map(&String.downcase/1) |> Enum.map(&String.trim/1)
parsed_header = ["source" | ["gender" | ["age_unit" | ["age" | fixed_header]]]]
# NOTE: (jpd): parsing the rows consist in:
# 1. convert row values to decimal
# 2. prepend the values url source, gender, and age unit
# 3. convert row to keyword list using the parsed header
# 4. convert from keyword list to map
# 5. fetch common values based on common headers
# 6. sort row values based on common headers
parsed_rows =
Enum.map(rows, fn row ->
parsed_row = Enum.map(row, &Decimal.new/1)
[url | [gender | [age | parsed_row]]]
rows
|> Stream.map(fn row -> Enum.map(row, &Decimal.new/1) end)
|> Stream.map(&[url | [gender | [age_unit | &1]]])
|> Stream.map(&Enum.zip(parsed_header, &1))
|> Stream.map(&Map.new/1)
|> Stream.map(&Map.take(&1, @common_header))
|> Enum.map(fn row ->
Enum.map(@common_header, fn key -> Map.get(row, key) end)
end)
[parsed_header | parsed_rows]
[@common_header | parsed_rows]
end
def merge(datum) do
Enum.reduce(datum, [], fn data, accum ->
datum
|> Stream.with_index()
|> Stream.map(fn
{data, 0} ->
data
{[_ | data], _} ->
data
end)
|> Enum.reduce([], fn data, accum ->
Enum.concat(accum, data)
end)
end