Skip to content

Commit

Permalink
Libpostal rest server implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
gosom committed Mar 9, 2023
1 parent 72530c3 commit 4ae1f9b
Show file tree
Hide file tree
Showing 12 changed files with 1,156 additions and 1 deletion.
22 changes: 22 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# If you prefer the allow list template instead of the deny list, see community template:
# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
#
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# Dependency directories (remove the comment below to include it)
# vendor/

# Go workspace file
go.work
.env
41 changes: 41 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
FROM golang:alpine3.17

RUN set -ex \
&& apk add --no-cache --virtual .build-deps \
curl \
gcc \
g++ \
make \
libtool \
autoconf \
automake \
git \
&& mkdir -p /src \
&& mkdir -p /data \
&& cd /src \
&& git clone https://github.com/openvenues/libpostal.git \
&& cd libpostal \
&& ./bootstrap.sh \
&& ./configure --datadir=/data MODEL=senzing \
&& make -j "$(nproc)" \
&& make install \
&& apk del .build-deps \
&& rm -rf /src

RUN apk add --no-cache gcc musl-dev pkgconfig

WORKDIR /app

ENV GO111MODULE=on
ENV CGO_ENABLED=1
ENV GOOS=linux
ENV GOARCH=amd64

COPY go.mod go.sum ./
RUN go mod download

COPY . .

RUN go build -o /usr/bin/address-parser main.go

ENTRYPOINT ["/usr/bin/address-parser"]
78 changes: 77 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,77 @@
# address-parser-go-rest
# Address Parser Go REST

Address Parser Go REST is a REST API that provides address parsing functionality using the libpostal library.
The purpose of this API is to allow users to easily parse addresses into their individual components
without the need for the libpostal library to be included as a dependency in their projects.

## Quickstart

```
docker run
curl -X 'POST' \
'http://localhost:8080/parse' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"address": "48 Leicester Square, London WC2H 7LU, United Kingdom",
"title_case": true
}'
```

Response:

```
curl -X 'POST' \
'http://localhost:8080/parse' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"address": "48 Leicester Square, London WC2H 7LU, United Kingdom",
"title_case": true
}'
```

[swagger documentation](http://localhost:8080/docs/)


## Run without docker

To install and run Address Parser Go REST, you can use the following steps:

1. Make sure you have a recent version of Golang
2. [Install](https://github.com/openvenues/libpostal/issues#installation-maclinux) libpostal on your machine.
3. `go mod tidy`
4. `go run main.go`


Notes:
you can change the port the service or the path for swagger is listening to by setting the following environment variables:
```
PARSER_HTTP_ADDR=:8080
DOCS_PATH=/docs
```
you can also put these in `.env` file in the root of the project.

If you want to rebuild the swagger documentation make sure that you have
installed [swag](https://github.com/swaggo/swag)

to regenerate:
```
go generate
```

## Contributing

If you would like to contribute to Address Parser Go REST, please create a pull request with your changes.
You can also report any issues or bugs you encounter by creating a new issue on the GitHub repository.

## License

Address Parser Go REST is licensed under the MIT License. See `LICENSE` for more information.

## Acknowledgments

We would like to acknowledge the contributors of the libpostal library and the Go bindings used in this project.


66 changes: 66 additions & 0 deletions addressparser/addressparser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package addressparser

import "errors"

var ErrAddressUnparsable = errors.New("address is unparsable")

// Address is a struct for an address
type Address struct {
// venue name e.g. "Brooklyn Academy of Music", and building names e.g. "Empire State Building"
House string `json:"house,omitempty"`
// for category queries like "restaurants", etc.
Category string `json:"category,omitempty"`
// phrases like "in", "near", etc. used after a category phrase to help with parsing queries like "restaurants in Brooklyn"
Near string `json:"near,omitempty"`
// usually refers to the external (street-facing) building number. In some countries this may be a compount, hyphenated number which also includes an apartment number, or a block number (a la Japan), but libpostal will just call it the house_number for simplicity.
HouseNumber string `json:"house_number,omitempty"`
// street name(s)
Road string `json:"road,omitempty"`
// an apartment, unit, office, lot, or other secondary unit designator
Unit string `json:"unit,omitempty"`
// expressions indicating a floor number e.g. "3rd Floor", "Ground Floor", etc.
Level string `json:"level,omitempty"`
// numbered/lettered staircase
Staircase string `json:"staircase,omitempty"`
// numbered/lettered entrance
Entrance string `json:"entrance,omitempty"`
// post office box: typically found in non-physical (mail-only) addresses
PoBox string `json:"po_box,omitempty"`
// postal codes used for mail sorting
Postcode string `json:"postcode,omitempty"`
// usually an unofficial neighborhood name like "Harlem", "South Bronx", or "Crown Heights"
Suburb string `json:"suburb,omitempty"`
// these are usually boroughs or districts within a city that serve some official purpose e.g. "Brooklyn" or "Hackney" or "Bratislava IV"
CityDistrict string `json:"city_district,omitempty"`
// any human settlement including cities, towns, villages, hamlets, localities, etc.
City string `json:"city,omitempty"`
// named islands e.g. "Maui"
Island string `json:"island,omitempty"`
// usually a second-level administrative division or county.
StateDistrict string `json:"state_district,omitempty"`
// a first-level administrative division. Scotland, Northern Ireland, Wales, and England in the UK are mapped to "state" as well (convention used in OSM, GeoPlanet, etc.)
State string `json:"state,omitempty"`
// informal subdivision of a country without any political status
CountryRegion string `json:"country_region,omitempty"`
// sovereign nations and their dependent territories, anything with an ISO-3166 code.
Country string `json:"country,omitempty"`
// currently only used for appending “West Indies” after the country name, a pattern frequently used in the English-speaking Caribbean e.g. “Jamaica, West Indies”
WorldRegion string `json:"world_region,omitempty"`
}

// AddressParserInput is a struct for the input to the address parser
type AddressParserInput struct {
// the address to parse
Address string `json:"address" validate:"required"`
// the language of the address. Leave empty if you don't know
Language string `json:"language,omitempty"`
// the country of the address. Leave empty if you don't know
Country string `json:"country,omitempty"`
// if true then the responses will be title Cased. Default behavior of libpostal is not to do that.
TitleCase bool `json:"title_case,omitempty"`
}

// AddressParser is an interface for the address parser
type AddressParser interface {
Parse(input AddressParserInput) (Address, error)
}
87 changes: 87 additions & 0 deletions addressparser/libpostal/libpostal.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package libpostal

import (
"github.com/gosom/kit/logging"
postal "github.com/openvenues/gopostal/parser"
"golang.org/x/text/cases"
"golang.org/x/text/language"

"github.com/gosom/address-parser-go-rest/addressparser"
)

var _ addressparser.AddressParser = (*libPostalParser)(nil)

type libPostalParser struct {
log logging.Logger
}

func (o *libPostalParser) Parse(input addressparser.AddressParserInput) (addressparser.Address, error) {
components := postal.ParseAddressOptions(input.Address, postal.ParserOptions{
Language: input.Language,
Country: input.Country,
})
if len(components) == 0 {
return addressparser.Address{}, addressparser.ErrAddressUnparsable
}
address := addressparser.Address{}
tag := language.Und
if input.Language != "" {
if r, err := language.Parse("de"); err == nil {
tag = r
}
}
for i := range components {
if input.TitleCase {
components[i].Value = cases.Title(tag, cases.NoLower).String(components[i].Value)
}
switch components[i].Label {
case "house":
address.House = components[i].Value
case "category":
address.Category = components[i].Value
case "near":
address.Near = components[i].Value
case "house_number":
address.HouseNumber = components[i].Value
case "road":
address.Road = components[i].Value
case "unit":
address.Unit = components[i].Value
case "level":
address.Level = components[i].Value
case "staircase":
address.Staircase = components[i].Value
case "entrance":
address.Entrance = components[i].Value
case "po_box":
address.PoBox = components[i].Value
case "postcode":
address.Postcode = components[i].Value
case "suburb":
address.Suburb = components[i].Value
case "city_district":
address.CityDistrict = components[i].Value
case "city":
address.City = components[i].Value
case "island":
address.Island = components[i].Value
case "state_district":
address.StateDistrict = components[i].Value
case "state":
address.State = components[i].Value
case "country_region":
address.CountryRegion = components[i].Value
case "country":
address.Country = components[i].Value
case "world_region":
address.WorldRegion = components[i].Value
default:
o.log.Warn("Unknown component", "component", components[i].Label)
}
}
return address, nil
}

func NewLibPostalParser(log logging.Logger) *libPostalParser {
return &libPostalParser{log: log}
}
59 changes: 59 additions & 0 deletions addressparser/ports/http.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package ports

import (
"fmt"
"net/http"

"github.com/gosom/kit/lib"
"github.com/gosom/kit/logging"
"github.com/gosom/kit/web"

"github.com/gosom/address-parser-go-rest/addressparser"
)

// AddressParserHandler is a handler for parsing addresses
type AddressParserHandler struct {
log logging.Logger
parser addressparser.AddressParser
}

// NewAddressParserHandler creates a new AddressParserHandler
func NewAddressParserHandler(log logging.Logger, parser addressparser.AddressParser) AddressParserHandler {
return AddressParserHandler{
log: log,
parser: parser,
}
}

// RegisterRoutes registers the routes for the AddressParserHandler
func (o *AddressParserHandler) RegisterRouters(r web.Router) {
r.Post("/parse", o.Parse)
}

// Parse is a handler for parsing addresses
//
// @Summary Parse an address into its components
// @Description Parses an address into its components
// @Tags AddressParser
// @Accept json
// @Produce json
// @Param input body addressparser.AddressParserInput true "AddressParserInput"
// @Success 200 {object} addressparser.Address
// @Failure 400 {object} web.ErrResponse
// @Failure 422 {object} web.ErrResponse
// @Failure 500 {object} web.ErrResponse
// @Router /parse [post]
func (o *AddressParserHandler) Parse(w http.ResponseWriter, r *http.Request) {
var payload addressparser.AddressParserInput
if err := web.DecodeBody(r, &payload, true); err != nil {
web.JSONError(w, r, fmt.Errorf("%w %s", lib.ErrBadRequest))
return
}
result, err := o.parser.Parse(payload)
if err != nil {
ae := fmt.Errorf("%w %s", lib.ErrUnprocessable, err.Error())
web.JSONError(w, r, ae)
return
}
web.JSON(w, r, http.StatusOK, result)
}
Loading

0 comments on commit 4ae1f9b

Please sign in to comment.