wip(growth): improve data parsing
1. Fetch common data for all indicators 2. Remove headers during merge Also, re-generate files.
This commit is contained in:
@@ -247,25 +247,47 @@ defmodule Growth.Indicators.Download do
|
||||
end
|
||||
end
|
||||
|
||||
@common_header ~w(source gender age_unit age l m s sd3neg sd2neg sd1neg sd0 sd1 sd2 sd3)
|
||||
|
||||
def convert([header | rows], gender, url) do
|
||||
age = header |> hd() |> String.downcase()
|
||||
age_unit = header |> hd() |> String.downcase()
|
||||
|
||||
fixed_header = header |> tl() |> Enum.map(&String.downcase/1) |> Enum.map(&String.trim/1)
|
||||
|
||||
parsed_header = ["source" | ["gender" | ["age_unit" | ["age" | fixed_header]]]]
|
||||
|
||||
# NOTE: (jpd): parsing the rows consist in:
|
||||
# 1. convert row values to decimal
|
||||
# 2. prepend the values url source, gender, and age unit
|
||||
# 3. convert row to keyword list using the parsed header
|
||||
# 4. convert from keyword list to map
|
||||
# 5. fetch common values based on common headers
|
||||
# 6. sort row values based on common headers
|
||||
parsed_rows =
|
||||
Enum.map(rows, fn row ->
|
||||
parsed_row = Enum.map(row, &Decimal.new/1)
|
||||
|
||||
[url | [gender | [age | parsed_row]]]
|
||||
rows
|
||||
|> Stream.map(fn row -> Enum.map(row, &Decimal.new/1) end)
|
||||
|> Stream.map(&[url | [gender | [age_unit | &1]]])
|
||||
|> Stream.map(&Enum.zip(parsed_header, &1))
|
||||
|> Stream.map(&Map.new/1)
|
||||
|> Stream.map(&Map.take(&1, @common_header))
|
||||
|> Enum.map(fn row ->
|
||||
Enum.map(@common_header, fn key -> Map.get(row, key) end)
|
||||
end)
|
||||
|
||||
[parsed_header | parsed_rows]
|
||||
[@common_header | parsed_rows]
|
||||
end
|
||||
|
||||
def merge(datum) do
|
||||
Enum.reduce(datum, [], fn data, accum ->
|
||||
datum
|
||||
|> Stream.with_index()
|
||||
|> Stream.map(fn
|
||||
{data, 0} ->
|
||||
data
|
||||
|
||||
{[_ | data], _} ->
|
||||
data
|
||||
end)
|
||||
|> Enum.reduce([], fn data, accum ->
|
||||
Enum.concat(accum, data)
|
||||
end)
|
||||
end
|
||||
|
Reference in New Issue
Block a user