[WIP] Implement server to parse address #1

Draft
joao.dubas wants to merge 24 commits from jpd-add-parser-server into main
18 changed files with 1099 additions and 368 deletions

22
.editorconfig Normal file
View File

@ -0,0 +1,22 @@
root = true
[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
[*.{css,html,js,json}]
indent_style = space
indent_size = 2
[*.{ex,exs}]
indent_style = space
indent_size = 2
[*.go]
indent_style = tab
indent_size = 4
[*.py]
indent_style = space
indent_size = 4

3
.gitignore vendored
View File

@ -96,3 +96,6 @@ erl_crash.dump
*.beam
config/*.secret.exs
elixir_ls/
# ---> Project
config/data/addresses.csv

View File

@ -1,5 +1,5 @@
erlang 26.2.1
elixir 1.16.0-otp-26
poetry 1.7.1
python 3.12.1
go 1.21.6
erlang 27.3.4
elixir 1.18.4
poetry 2.1.3
python 3.13.3
golang 1.24.3

View File

@ -1,4 +1,4 @@
FROM debian:bookworm-20240110-slim AS builder
FROM debian:bookworm-20250428-slim AS builder
RUN apt-get update \
&& apt-get install -y \
autoconf \
@ -10,14 +10,18 @@ RUN apt-get update \
pkg-config \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /opt/src
RUN git clone https://github.com/openvenues/libpostal.git \
RUN git config --global http.version HTTP/1.1 \
&& git config --global http.postBuffer 524288000 \
&& git config --global http.lowSpeedLimit 0 \
&& git config --global http.lowSpeedTime 999999 \
&& git clone https://github.com/openvenues/libpostal.git \
&& cd libpostal \
&& ./bootstrap.sh \
&& ./configure --datadir=/usr/local/share \
&& make -j4 \
&& make install
FROM hexpm/elixir:1.16.0-erlang-26.2.1-debian-bookworm-20231009-slim AS elixir
FROM hexpm/elixir:1.18.4-erlang-27.3.4-debian-bookworm-20250520-slim AS elixir
RUN apt-get update \
&& apt-get install -y \
build-essential \
@ -31,13 +35,14 @@ COPY --from=builder /usr/local/lib/pkgconfig/libpostal.pc /usr/local/lib/pkgconf
COPY --from=builder /usr/local/share/libpostal /usr/local/share/libpostal
COPY --from=builder /usr/local/bin/libpostal_data /usr/local/bin/libpostal_data
RUN ln -s /usr/local/lib/libpostal.so.1.0.1 /usr/local/lib/libpostal.so.1 \
&& ln -s /usr/local/lib/libpostal.so.1.0.1 /usr/local/lib/libpostal.so
&& ln -s /usr/local/lib/libpostal.so.1.0.1 /usr/local/lib/libpostal.so \
&& ldconfig
WORKDIR /opt/src/app
VOLUME ["/opt/src/app/_build", "/opt/src/app/deps"]
COPY ./ex .
RUN mix do deps.get, deps.compile
FROM python:3.12.1-slim-bookworm AS python
FROM python:3.13.3-slim-bookworm AS python
ENV PATH /root/.local/bin:${PATH}
RUN apt-get update \
&& apt-get install -y \
@ -53,14 +58,16 @@ COPY --from=builder /usr/local/lib/pkgconfig/libpostal.pc /usr/local/lib/pkgconf
COPY --from=builder /usr/local/share/libpostal /usr/local/share/libpostal
COPY --from=builder /usr/local/bin/libpostal_data /usr/local/bin/libpostal_data
RUN ln -s /usr/local/lib/libpostal.so.1.0.1 /usr/local/lib/libpostal.so.1 \
&& ln -s /usr/local/lib/libpostal.so.1.0.1 /usr/local/lib/libpostal.so
&& ln -s /usr/local/lib/libpostal.so.1.0.1 /usr/local/lib/libpostal.so \
&& ldconfig
WORKDIR /opt/src/app
COPY ./py/pyproject.toml .
COPY ./py/poetry.lock .
COPY ./py/README.md .
RUN poetry install
COPY ./py .
FROM golang:1.21.6-bookworm AS go
FROM golang:1.24.3-bookworm AS go
COPY --from=builder /usr/local/include/libpostal /usr/local/include/libpostal
COPY --from=builder /usr/local/lib/libpostal.a /usr/local/lib/
COPY --from=builder /usr/local/lib/libpostal.la /usr/local/lib/
@ -69,7 +76,11 @@ COPY --from=builder /usr/local/lib/pkgconfig/libpostal.pc /usr/local/lib/pkgconf
COPY --from=builder /usr/local/share/libpostal /usr/local/share/libpostal
COPY --from=builder /usr/local/bin/libpostal_data /usr/local/bin/libpostal_data
RUN ln -s /usr/local/lib/libpostal.so.1.0.1 /usr/local/lib/libpostal.so.1 \
&& ln -s /usr/local/lib/libpostal.so.1.0.1 /usr/local/lib/libpostal.so
&& ln -s /usr/local/lib/libpostal.so.1.0.1 /usr/local/lib/libpostal.so \
&& ldconfig
WORKDIR /opt/src/config/data
COPY ./config/data/addresses.csv ./
WORKDIR /opt/src/app
COPY ./go .
RUN go get ...
RUN go mod download \
&& go build

Binary file not shown.

View File

@ -0,0 +1,30 @@
{
"displayedAttributes": [
"country",
"state",
"city",
"neighborhood",
"road",
"house_number",
"house",
"unit",
"postal_code"
],
"filterableAttributes": [
"country",
"state",
"city",
"neighborhood",
"road",
"house_number",
"house",
"unit",
"postal_code"
],
"searchableAttributes": [
"house_number",
"house",
"unit",
"postal_code"
],
}

View File

@ -4,25 +4,76 @@ services:
build:
target: builder
image: 'joaodubas/addressex:builder'
pull_policy: never
profiles:
- build
hostname: libpostal
init: true
restart: unless-stopped
entrypoint: sleep
command: infinity
search:
image: 'getmeili/meilisearch:v1.14.0'
hostname: meili
restart: unless-stopped
environment:
MEILI_ENV: development
MEILI_MASTER_KEY: &meili_master_key ${ADDRESSEX_MEILI_KEY:-59EDmQofBp8vGT8kMvJJADPHRWHEAsWzZjCCqBFpVeuBmC2kWgCiBEgG7vfZ3ArY}
volumes:
- 'meili_data:/meili_data'
ports:
- '${ADDRESSEX_MEILI_PORT:-7700:7700}'
search_ui:
image: 'riccoxie/meilisearch-ui:v0.12.1'
hostname: search-ui
init: true
restart: unless-stopped
ports:
- '${ADDRESSEX_MEILI_UI_PORT:-24173:4173}'
typesense:
image: 'typesense/typesense:28.0'
hostname: typesense
restart: unless-stopped
ex:
build:
target: elixir
image: 'joaodubas/addressex:elixir'
pull_policy: never
hostname: ex
init: true
restart: unless-stopped
entrypoint: sleep
command: infinity
py:
build:
target: python
image: 'joaodubas/addressex:python'
pull_policy: never
hostname: py
init: true
restart: unless-stopped
entrypoint: sleep
command: infinity
go:
build:
target: go
image: 'joaodubas/addressex:go'
pull_policy: never
hostname: go
init: true
restart: unless-stopped
environment:
MEILI_MASTER_KEY: *meili_master_key
ports:
- '${ADDRESSEX_GO_PORT:-9000:9000}'
entrypoint: sleep
command: infinity
volumes:
meili_data: {}

83
go/addresses.go Normal file
View File

@ -0,0 +1,83 @@
package main
import (
"crypto/sha1"
"encoding/hex"
"fmt"
parser "github.com/openvenues/gopostal/parser"
)
type Address struct {
ID string `json:"id"`
Country string `json:"country"`
State string `json:"state"`
City string `json:"city"`
Neighborhood string `json:"neighborhood"`
Road string `json:"road"`
HouseNumber string `json:"house_number"`
House string `json:"house"`
Unit string `json:"unit"`
PostalCode string `json:"postal_code"`
Raw string `json:"raw_address"`
}
func (a Address) String() string {
return fmt.Sprintf(
"%s, %s, %s %s. %s, %s, %s, %s.",
a.Road,
a.HouseNumber,
a.Unit,
a.House,
a.Neighborhood,
a.City,
a.State,
a.PostalCode,
)
}
func (a Address) id() string {
hasher := sha1.New()
hasher.Write([]byte(a.String()))
return hex.EncodeToString(hasher.Sum(nil))
}
func parseAddress(address string) Address {
// log.Printf("Address to parse: %s", address)
components := parser.ParseAddress(address)
// log.Printf("Address components: %s", components)
return newFromComponents(address, components)
}
func newFromComponents(raw string, components []parser.ParsedComponent) Address {
address := Address{Raw: raw}
for _, component := range components {
switch component.Label {
case "house":
address.House = component.Value
case "house_number":
address.HouseNumber = component.Value
case "road":
address.Road = component.Value
case "unit":
address.Unit = component.Value
case "postcode":
address.PostalCode = component.Value
case "suburb":
address.Neighborhood = component.Value
case "city_district":
address.Neighborhood = component.Value
case "city":
address.City = component.Value
case "state_district":
address.Neighborhood = component.Value
case "state":
address.State = component.Value
case "country":
address.Country = component.Value
}
}
address.ID = address.id()
// log.Printf("Address parsed: %s", address)
return address
}

View File

@ -1,5 +1,18 @@
module joaodubas.dev/addressex
go 1.21.6
go 1.23.0
require github.com/openvenues/gopostal v0.0.0-20171226154602-e0184512a45d // indirect
toolchain go1.24.3
require (
github.com/meilisearch/meilisearch-go v0.32.0
github.com/openvenues/gopostal v0.0.0-20240426055609-4fe3a773f519
)
require (
github.com/andybalholm/brotli v1.1.1 // indirect
github.com/golang-jwt/jwt/v4 v4.5.2 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/mailru/easyjson v0.9.0 // indirect
github.com/stretchr/testify v1.8.4 // indirect
)

View File

@ -1,2 +1,32 @@
github.com/openvenues/gopostal v0.0.0-20171226154602-e0184512a45d h1:KJ+N55d9zLN8fTg3NchLdmmAmPieXC5E6UNJ8zFFttU=
github.com/openvenues/gopostal v0.0.0-20171226154602-e0184512a45d/go.mod h1:Ycrd7XnwQdumHzpB/6WEa85B4WNdbLC6Wz4FAQNkaV0=
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/golang-jwt/jwt/v4 v4.5.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
github.com/golang-jwt/jwt/v4 v4.5.2 h1:YtQM7lnr8iZ+j5q71MGKkNw9Mn7AjHM68uc9g5fXeUI=
github.com/golang-jwt/jwt/v4 v4.5.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
github.com/meilisearch/meilisearch-go v0.32.0 h1:cWcycpONSH3VLTZ5npUl1O5aXPkNM0vUx6bywnYqGbE=
github.com/meilisearch/meilisearch-go v0.32.0/go.mod h1:aNtyuwurDg/ggxQIcKqWH6G9g2ptc8GyY7PLY4zMn/g=
github.com/openvenues/gopostal v0.0.0-20240426055609-4fe3a773f519 h1:xZ0ZhxCnrs2zaBBvGIHQqzoeXjzctJP61r+aX3QjXhQ=
github.com/openvenues/gopostal v0.0.0-20240426055609-4fe3a773f519/go.mod h1:Ycrd7XnwQdumHzpB/6WEa85B4WNdbLC6Wz4FAQNkaV0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -1,12 +1,25 @@
package main
import (
"fmt"
parser "github.com/openvenues/gopostal/parser"
"html/template"
"log"
"net/http"
)
func main() {
addressComponents := parser.ParseAddress("rua pedroso xavier 277, apto 51 torre 2. vila albertina, 02732-020, são paulo, sp, brasil")
fmt.Println(addressComponents)
http.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.Dir("static"))))
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
tmpl := template.Must(template.ParseFiles("./templates/index.html"))
tmpl.Execute(w, nil)
})
http.HandleFunc("/parse", func(w http.ResponseWriter, r *http.Request) {
tmpl := template.Must(template.ParseFiles("./templates/fragments/results.html"))
data := map[string]Address{"Result": parseAddress(r.URL.Query().Get("address"))}
tmpl.Execute(w, data)
})
log.Println("App running on 9000...")
log.Fatal(http.ListenAndServe(":9000", nil))
}

125
go/meilisearch.go Normal file
View File

@ -0,0 +1,125 @@
package main
import (
"encoding/csv"
"io"
"log"
"os"
"github.com/meilisearch/meilisearch-go"
)
func search() {
// client := newClient()
// _, err := client.Index("addressex").UpdateIndex()
// if err != nil {
// panic(err)
// }
}
func init() {
// TODO: (jpd) move this logic to their own endopints
createIndex()
updateIndex()
loadAddresses()
}
func createIndex() {
indexConfig := &meilisearch.IndexConfig{Uid: "addressex", PrimaryKey: "id"}
client := newClient()
_, err := client.CreateIndex(indexConfig)
if err != nil {
panic(err)
}
}
func updateIndex() {
index := newClientIndex()
_, err := index.UpdateSettings(&meilisearch.Settings{
DisplayedAttributes: []string{
"id",
"country",
"state",
"city",
"neighborhood",
"road",
"house_number",
"house",
"unit",
"postal_code",
"raw_address",
},
FilterableAttributes: []string{
"country",
"state",
"city",
"neighborhood",
"road",
"house_number",
"house",
"unit",
"postal_code",
},
SearchableAttributes: []string{
"house_number",
"house",
"unit",
"postal_code",
},
Synonyms: map[string][]string{
"apartamento": []string{"apto", "apt", "ap"},
"bloco": []string{"bl", "b"},
"torre": []string{"tr", "t"},
"conjunto": []string{"conj", "cj"},
},
})
if err != nil {
panic(err)
}
}
func loadAddresses() {
// TODO: (jpd) use env var to set address file
f, err := os.Open("/opt/src/config/data/addresses.csv")
if err != nil {
panic(err)
}
defer f.Close()
index := newClientIndex()
rows := csv.NewReader(f)
data := []Address{}
for {
row, err := rows.Read()
if err == io.EOF {
break
} else if err != nil {
panic(err)
}
rawAddress := row[0]
data = append(data, parseAddress(rawAddress))
if len(data) == 1000 {
log.Printf("Recorded %d addresses", len(data))
_, err := index.AddDocuments(data, "id")
if err != nil {
panic(err)
}
data = []Address{}
}
}
log.Printf("Recorded %d addresses", len(data))
_, err = index.AddDocuments(data, "id")
if err != nil {
panic(err)
}
}
func newClientIndex() meilisearch.IndexManager {
client := newClient()
return client.Index("addressex")
}
func newClient() meilisearch.ServiceManager {
return meilisearch.New("http://meili:7700", meilisearch.WithAPIKey(os.Getenv("MEILI_MASTER_KEY")))
}

100
go/static/css/index.css Normal file
View File

@ -0,0 +1,100 @@
:root {
--bg-color: #131415;
--primary-color: #9575CD;
--margin: 40px;
--margin-sm: 20px;
--radius: 6px;
--text-color: #FFF;
--header-height: 100px;
}
body {
margin: 0;
background: var(--bg-color);
color: var(--text-color);
font-family: Arial, sans-serif;
}
ul {
margin: 0;
padding: 0;
list-style-type: none;
}
header {
background: #24292d;
height: var(--header-height);
}
header ul {
display: flex;
}
header ul li {
height: var(--header-height);
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
padding: 0 var(--margin-sm);
}
header ul li h5 {
font-size: 24px;
font-weight: 400;
margin: 0;
}
header ul li h6 {
font-size: 14px;
font-weight: 100;
margin: 5px 0 0;
}
header ul li h5 strong {
color: #81c784;
}
section {
height: calc(100vh - var(--header-height));
display: flex;
flex-direction: column;
align-items: center;
}
section input {
height: 60px;
line-height: 60px;
background: transparent;
border: 2px solid var(--primary-color);
border-radius: var(--radius);
color: var(--text-color);
font-size: 20px;
outline: none !important;
padding: 0 20px;
width: 300px;
margin-top: var(--margin);
}
#search-results ul li {
color: #FFF;
display: flex;
}
#search-results ul li button {
align-self: center;
height: 34px;
padding: 0 15px;
border: 0;
border-radius: var( --radius);
margin-left: var(--margin-sm);
background: var(--primary-color);
color: #FFF;
font-weight: bold;
text-transform: uppercase;
cursor: pointer;
}
#spinner {
display: none;
}

View File

@ -0,0 +1,18 @@
<dl>
<dt>Logradouro</dt>
<dd>{{ .Result.Road }}</dd>
<dt>N&uacute;mero</dt>
<dd>{{ .Result.HouseNumber }}</dd>
<dt>Unidade</dt>
<dd>{{ .Result.Unit }}</dd>
<dt>Torre</dt>
<dd>{{ .Result.House }}</dd>
<dt>Bairro</dt>
<dd>{{ .Result.Neighborhood }}</dd>
<dt>Cidade</dt>
<dd>{{ .Result.City }}</dd>
<dt>Estado</dt>
<dd>{{ .Result.State }}</dd>
<dt><abbr title="C&oacute;digo de Endere&ccedil;amento Postal">CEP</abbr></dt>
<dd>{{ .Result.PostalCode }}</dd>
</dl>

27
go/templates/index.html Normal file
View File

@ -0,0 +1,27 @@
<!DOCTYPE html>
<html lang="pt-BR">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Parser de Endere&ccedil;o</title>
<script src="https://unpkg.com/htmx.org@1.9.10"></script>
<link rel="stylesheet" href="/static/css/index.css"/>
</head>
<body>
<main>
<section>
<div>
<input
type="search"
name="address"
placeholder="Endereço: Rua, Número, Complement. Bairro - Cidade/Estado. CEP"
hx-get="/parse"
hx-trigger="keyup changed delay:500ms"
hx-target="#address" />
</div>
<div id="address"></div>
</section>
</main>
</body>
</html>

0
py/README.md Normal file
View File

874
py/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,19 +1,22 @@
[tool.poetry]
[project]
name = "addressex"
version = "0.1.0"
description = "Address parser API"
authors = ["EmCasa Developer <dev@emcasa.com>"]
authors = [{ name = "EmCasa Developer", email = "dev@emcasa.com>" }]
readme = "README.md"
requires-python = ">=3.13,<4.0"
dependencies = [
"postal>=1.1.10",
"pynest-api>=0.3.0,<1.0.0"
]
[tool.poetry.dependencies]
python = "^3.12"
postal = "^1.1.10"
pynest-api = "^0.1.2"
[tool.poetry]
package-mode = false
[tool.poetry.group.dev.dependencies]
ipython = "^8.20.0"
pytest = "^7.4.4"
black = "^23.11.0"
ipython = "^9.2.0"
pytest = "^8.3.5"
black = "^25.1.0"
[build-system]
requires = ["poetry-core"]