-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Libpostal rest server implementation
- Loading branch information
Showing
12 changed files
with
1,156 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# If you prefer the allow list template instead of the deny list, see community template: | ||
# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore | ||
# | ||
# Binaries for programs and plugins | ||
*.exe | ||
*.exe~ | ||
*.dll | ||
*.so | ||
*.dylib | ||
|
||
# Test binary, built with `go test -c` | ||
*.test | ||
|
||
# Output of the go coverage tool, specifically when used with LiteIDE | ||
*.out | ||
|
||
# Dependency directories (remove the comment below to include it) | ||
# vendor/ | ||
|
||
# Go workspace file | ||
go.work | ||
.env |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
FROM golang:alpine3.17 | ||
|
||
RUN set -ex \ | ||
&& apk add --no-cache --virtual .build-deps \ | ||
curl \ | ||
gcc \ | ||
g++ \ | ||
make \ | ||
libtool \ | ||
autoconf \ | ||
automake \ | ||
git \ | ||
&& mkdir -p /src \ | ||
&& mkdir -p /data \ | ||
&& cd /src \ | ||
&& git clone https://github.com/openvenues/libpostal.git \ | ||
&& cd libpostal \ | ||
&& ./bootstrap.sh \ | ||
&& ./configure --datadir=/data MODEL=senzing \ | ||
&& make -j "$(nproc)" \ | ||
&& make install \ | ||
&& apk del .build-deps \ | ||
&& rm -rf /src | ||
|
||
RUN apk add --no-cache gcc musl-dev pkgconfig | ||
|
||
WORKDIR /app | ||
|
||
ENV GO111MODULE=on | ||
ENV CGO_ENABLED=1 | ||
ENV GOOS=linux | ||
ENV GOARCH=amd64 | ||
|
||
COPY go.mod go.sum ./ | ||
RUN go mod download | ||
|
||
COPY . . | ||
|
||
RUN go build -o /usr/bin/address-parser main.go | ||
|
||
ENTRYPOINT ["/usr/bin/address-parser"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,77 @@ | ||
# address-parser-go-rest | ||
# Address Parser Go REST | ||
|
||
Address Parser Go REST is a REST API that provides address parsing functionality using the libpostal library. | ||
The purpose of this API is to allow users to easily parse addresses into their individual components | ||
without the need for the libpostal library to be included as a dependency in their projects. | ||
|
||
## Quickstart | ||
|
||
``` | ||
docker run | ||
curl -X 'POST' \ | ||
'http://localhost:8080/parse' \ | ||
-H 'accept: application/json' \ | ||
-H 'Content-Type: application/json' \ | ||
-d '{ | ||
"address": "48 Leicester Square, London WC2H 7LU, United Kingdom", | ||
"title_case": true | ||
}' | ||
``` | ||
|
||
Response: | ||
|
||
``` | ||
curl -X 'POST' \ | ||
'http://localhost:8080/parse' \ | ||
-H 'accept: application/json' \ | ||
-H 'Content-Type: application/json' \ | ||
-d '{ | ||
"address": "48 Leicester Square, London WC2H 7LU, United Kingdom", | ||
"title_case": true | ||
}' | ||
``` | ||
|
||
[swagger documentation](http://localhost:8080/docs/) | ||
|
||
|
||
## Run without docker | ||
|
||
To install and run Address Parser Go REST, you can use the following steps: | ||
|
||
1. Make sure you have a recent version of Golang | ||
2. [Install](https://github.com/openvenues/libpostal/issues#installation-maclinux) libpostal on your machine. | ||
3. `go mod tidy` | ||
4. `go run main.go` | ||
|
||
|
||
Notes: | ||
you can change the port the service or the path for swagger is listening to by setting the following environment variables: | ||
``` | ||
PARSER_HTTP_ADDR=:8080 | ||
DOCS_PATH=/docs | ||
``` | ||
you can also put these in `.env` file in the root of the project. | ||
|
||
If you want to rebuild the swagger documentation make sure that you have | ||
installed [swag](https://github.com/swaggo/swag) | ||
|
||
to regenerate: | ||
``` | ||
go generate | ||
``` | ||
|
||
## Contributing | ||
|
||
If you would like to contribute to Address Parser Go REST, please create a pull request with your changes. | ||
You can also report any issues or bugs you encounter by creating a new issue on the GitHub repository. | ||
|
||
## License | ||
|
||
Address Parser Go REST is licensed under the MIT License. See `LICENSE` for more information. | ||
|
||
## Acknowledgments | ||
|
||
We would like to acknowledge the contributors of the libpostal library and the Go bindings used in this project. | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package addressparser | ||
|
||
import "errors" | ||
|
||
var ErrAddressUnparsable = errors.New("address is unparsable") | ||
|
||
// Address is a struct for an address | ||
type Address struct { | ||
// venue name e.g. "Brooklyn Academy of Music", and building names e.g. "Empire State Building" | ||
House string `json:"house,omitempty"` | ||
// for category queries like "restaurants", etc. | ||
Category string `json:"category,omitempty"` | ||
// phrases like "in", "near", etc. used after a category phrase to help with parsing queries like "restaurants in Brooklyn" | ||
Near string `json:"near,omitempty"` | ||
// usually refers to the external (street-facing) building number. In some countries this may be a compount, hyphenated number which also includes an apartment number, or a block number (a la Japan), but libpostal will just call it the house_number for simplicity. | ||
HouseNumber string `json:"house_number,omitempty"` | ||
// street name(s) | ||
Road string `json:"road,omitempty"` | ||
// an apartment, unit, office, lot, or other secondary unit designator | ||
Unit string `json:"unit,omitempty"` | ||
// expressions indicating a floor number e.g. "3rd Floor", "Ground Floor", etc. | ||
Level string `json:"level,omitempty"` | ||
// numbered/lettered staircase | ||
Staircase string `json:"staircase,omitempty"` | ||
// numbered/lettered entrance | ||
Entrance string `json:"entrance,omitempty"` | ||
// post office box: typically found in non-physical (mail-only) addresses | ||
PoBox string `json:"po_box,omitempty"` | ||
// postal codes used for mail sorting | ||
Postcode string `json:"postcode,omitempty"` | ||
// usually an unofficial neighborhood name like "Harlem", "South Bronx", or "Crown Heights" | ||
Suburb string `json:"suburb,omitempty"` | ||
// these are usually boroughs or districts within a city that serve some official purpose e.g. "Brooklyn" or "Hackney" or "Bratislava IV" | ||
CityDistrict string `json:"city_district,omitempty"` | ||
// any human settlement including cities, towns, villages, hamlets, localities, etc. | ||
City string `json:"city,omitempty"` | ||
// named islands e.g. "Maui" | ||
Island string `json:"island,omitempty"` | ||
// usually a second-level administrative division or county. | ||
StateDistrict string `json:"state_district,omitempty"` | ||
// a first-level administrative division. Scotland, Northern Ireland, Wales, and England in the UK are mapped to "state" as well (convention used in OSM, GeoPlanet, etc.) | ||
State string `json:"state,omitempty"` | ||
// informal subdivision of a country without any political status | ||
CountryRegion string `json:"country_region,omitempty"` | ||
// sovereign nations and their dependent territories, anything with an ISO-3166 code. | ||
Country string `json:"country,omitempty"` | ||
// currently only used for appending “West Indies” after the country name, a pattern frequently used in the English-speaking Caribbean e.g. “Jamaica, West Indies” | ||
WorldRegion string `json:"world_region,omitempty"` | ||
} | ||
|
||
// AddressParserInput is a struct for the input to the address parser | ||
type AddressParserInput struct { | ||
// the address to parse | ||
Address string `json:"address" validate:"required"` | ||
// the language of the address. Leave empty if you don't know | ||
Language string `json:"language,omitempty"` | ||
// the country of the address. Leave empty if you don't know | ||
Country string `json:"country,omitempty"` | ||
// if true then the responses will be title Cased. Default behavior of libpostal is not to do that. | ||
TitleCase bool `json:"title_case,omitempty"` | ||
} | ||
|
||
// AddressParser is an interface for the address parser | ||
type AddressParser interface { | ||
Parse(input AddressParserInput) (Address, error) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
package libpostal | ||
|
||
import ( | ||
"github.com/gosom/kit/logging" | ||
postal "github.com/openvenues/gopostal/parser" | ||
"golang.org/x/text/cases" | ||
"golang.org/x/text/language" | ||
|
||
"github.com/gosom/address-parser-go-rest/addressparser" | ||
) | ||
|
||
var _ addressparser.AddressParser = (*libPostalParser)(nil) | ||
|
||
type libPostalParser struct { | ||
log logging.Logger | ||
} | ||
|
||
func (o *libPostalParser) Parse(input addressparser.AddressParserInput) (addressparser.Address, error) { | ||
components := postal.ParseAddressOptions(input.Address, postal.ParserOptions{ | ||
Language: input.Language, | ||
Country: input.Country, | ||
}) | ||
if len(components) == 0 { | ||
return addressparser.Address{}, addressparser.ErrAddressUnparsable | ||
} | ||
address := addressparser.Address{} | ||
tag := language.Und | ||
if input.Language != "" { | ||
if r, err := language.Parse("de"); err == nil { | ||
tag = r | ||
} | ||
} | ||
for i := range components { | ||
if input.TitleCase { | ||
components[i].Value = cases.Title(tag, cases.NoLower).String(components[i].Value) | ||
} | ||
switch components[i].Label { | ||
case "house": | ||
address.House = components[i].Value | ||
case "category": | ||
address.Category = components[i].Value | ||
case "near": | ||
address.Near = components[i].Value | ||
case "house_number": | ||
address.HouseNumber = components[i].Value | ||
case "road": | ||
address.Road = components[i].Value | ||
case "unit": | ||
address.Unit = components[i].Value | ||
case "level": | ||
address.Level = components[i].Value | ||
case "staircase": | ||
address.Staircase = components[i].Value | ||
case "entrance": | ||
address.Entrance = components[i].Value | ||
case "po_box": | ||
address.PoBox = components[i].Value | ||
case "postcode": | ||
address.Postcode = components[i].Value | ||
case "suburb": | ||
address.Suburb = components[i].Value | ||
case "city_district": | ||
address.CityDistrict = components[i].Value | ||
case "city": | ||
address.City = components[i].Value | ||
case "island": | ||
address.Island = components[i].Value | ||
case "state_district": | ||
address.StateDistrict = components[i].Value | ||
case "state": | ||
address.State = components[i].Value | ||
case "country_region": | ||
address.CountryRegion = components[i].Value | ||
case "country": | ||
address.Country = components[i].Value | ||
case "world_region": | ||
address.WorldRegion = components[i].Value | ||
default: | ||
o.log.Warn("Unknown component", "component", components[i].Label) | ||
} | ||
} | ||
return address, nil | ||
} | ||
|
||
func NewLibPostalParser(log logging.Logger) *libPostalParser { | ||
return &libPostalParser{log: log} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package ports | ||
|
||
import ( | ||
"fmt" | ||
"net/http" | ||
|
||
"github.com/gosom/kit/lib" | ||
"github.com/gosom/kit/logging" | ||
"github.com/gosom/kit/web" | ||
|
||
"github.com/gosom/address-parser-go-rest/addressparser" | ||
) | ||
|
||
// AddressParserHandler is a handler for parsing addresses | ||
type AddressParserHandler struct { | ||
log logging.Logger | ||
parser addressparser.AddressParser | ||
} | ||
|
||
// NewAddressParserHandler creates a new AddressParserHandler | ||
func NewAddressParserHandler(log logging.Logger, parser addressparser.AddressParser) AddressParserHandler { | ||
return AddressParserHandler{ | ||
log: log, | ||
parser: parser, | ||
} | ||
} | ||
|
||
// RegisterRoutes registers the routes for the AddressParserHandler | ||
func (o *AddressParserHandler) RegisterRouters(r web.Router) { | ||
r.Post("/parse", o.Parse) | ||
} | ||
|
||
// Parse is a handler for parsing addresses | ||
// | ||
// @Summary Parse an address into its components | ||
// @Description Parses an address into its components | ||
// @Tags AddressParser | ||
// @Accept json | ||
// @Produce json | ||
// @Param input body addressparser.AddressParserInput true "AddressParserInput" | ||
// @Success 200 {object} addressparser.Address | ||
// @Failure 400 {object} web.ErrResponse | ||
// @Failure 422 {object} web.ErrResponse | ||
// @Failure 500 {object} web.ErrResponse | ||
// @Router /parse [post] | ||
func (o *AddressParserHandler) Parse(w http.ResponseWriter, r *http.Request) { | ||
var payload addressparser.AddressParserInput | ||
if err := web.DecodeBody(r, &payload, true); err != nil { | ||
web.JSONError(w, r, fmt.Errorf("%w %s", lib.ErrBadRequest)) | ||
return | ||
} | ||
result, err := o.parser.Parse(payload) | ||
if err != nil { | ||
ae := fmt.Errorf("%w %s", lib.ErrUnprocessable, err.Error()) | ||
web.JSONError(w, r, ae) | ||
return | ||
} | ||
web.JSON(w, r, http.StatusOK, result) | ||
} |
Oops, something went wrong.