Skip to content

Commit a9ca1e2

Browse files
author
Hämäläinen, Mika K
committed
first commit
0 parents  commit a9ca1e2

File tree

6 files changed

+160
-0
lines changed

6 files changed

+160
-0
lines changed

DESCRIPTION.rst

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
=====================
2+
Korp API for Python
3+
=====================
4+
5+
This library provides an easy way to query `Korp <https://spraakbanken.gu.se/swe/forskning/infrastruktur/korp/>`_ systems for language corpora. This library is brought to you by `Mika Hämäläinen <https://mikakalevi.com>`_.
6+
7+
*****
8+
Usage
9+
*****
10+
You can initialise Korp with either service_name (`språkbanken <https://spraakbanken.gu.se/korp/#?lang=sv>`_, `kielipankki <https://korp.csc.fi/>`_ or `GT <http://gtweb.uit.no/korp/>`_) or url to your Korp's API interface such as https://korp.csc.fi/cgi-bin/korp.cgi .
11+
12+
An example for getting all concordances for North Sami corpora in Giellatekno Korp for query *[pos="A"] "go" [pos="N"]*.
13+
14+
``from korp import Korp``
15+
16+
``korppi = Korp(service_name="GT") #uses Giellatekno``
17+
18+
``corpora = korppi.list_corpora("SME") #lists corpora returns the ones starting with the North Sami language code``
19+
20+
``number_of_results, concordances = korppi.all_concordances('[pos="A"] "go" [pos="N"]', corpora)``
21+
22+
****************
23+
More information
24+
****************
25+
26+
For more information, see `the GitHub page <https://github.com/mikahama/python-korp>`_ and `Wiki for tutorials <https://github.com/mikahama/python-korp/wiki>`_.

setup.cfg

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[bdist_wheel]
2+
universal=1

setup.py

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# -*- coding: utf-8 -*-
2+
"""A setuptools based setup module.
3+
See:
4+
https://packaging.python.org/en/latest/distributing.html
5+
https://github.com/pypa/sampleproject
6+
"""
7+
8+
# Always prefer setuptools over distutils
9+
from setuptools import setup, find_packages
10+
# To use a consistent encoding
11+
from codecs import open
12+
from os import path
13+
14+
here = path.abspath(path.dirname(__file__))
15+
16+
# Get the long description from the relevant file
17+
with open(path.join(here, 'DESCRIPTION.rst'), encoding='utf-8') as f:
18+
long_description = f.read()
19+
20+
setup(
21+
name='korp',
22+
23+
# Versions should comply with PEP440. For a discussion on single-sourcing
24+
# the version across setup.py and the project code, see
25+
# https://packaging.python.org/en/latest/single_source_version.html
26+
version='1.0.2',
27+
28+
description='Korp API library for Python',
29+
long_description=long_description,
30+
31+
# The project's main homepage.
32+
url='https://mikakalevi.com/nlp/python-korp/',
33+
34+
# Author details
35+
author='Mika Hämäläinen, Dept. of Modern Languages, University of Helsinki',
36+
author_email='[email protected]',
37+
38+
# Choose your license
39+
license='Apache License, Version 2.0',
40+
41+
# See https://pypi.python.org/pypi?%3Aaction=list_classifiers
42+
classifiers=[
43+
# How mature is this project? Common values are
44+
# 3 - Alpha
45+
# 4 - Beta
46+
# 5 - Production/Stable
47+
'Development Status :: 5 - Production/Stable',
48+
49+
# Indicate who your project is intended for
50+
'Intended Audience :: Developers',
51+
"Topic :: Text Processing :: Linguistic",
52+
53+
# Specify the Python versions you support here. In particular, ensure
54+
# that you indicate whether you support Python 2, Python 3 or both.
55+
'Programming Language :: Python :: 2',
56+
'Programming Language :: Python :: 3',
57+
'Programming Language :: Python :: 2.6',
58+
'Programming Language :: Python :: 2.7',
59+
60+
],
61+
62+
# What does your project relate to?
63+
keywords='Korp API',
64+
65+
# You can just specify the packages manually here if your project is
66+
# simple. Or you can use find_packages().
67+
packages=["korp"],
68+
package_dir={'korp': 'korp'},
69+
70+
# List run-time dependencies here. These will be installed by pip when
71+
# your project is installed. For an analysis of "install_requires" vs pip's
72+
# requirements files see:
73+
# https://packaging.python.org/en/latest/requirements.html
74+
install_requires=["requests"],
75+
76+
# List additional groups of dependencies here (e.g. development
77+
# dependencies). You can install these using the following syntax,
78+
# for example:
79+
# $ pip install -e .[dev,test]
80+
extras_require={},
81+
82+
# If there are data files included in your packages that need to be
83+
# installed, specify them here. If using Python 2.6 or less, then these
84+
# have to be included in MANIFEST.in as well.
85+
package_data={
86+
'korp': ['*.json'],
87+
},
88+
89+
# Although 'package_data' is the preferred approach, in some case you may
90+
# need to place data files outside of your packages. See:
91+
# http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa
92+
# In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
93+
data_files=[],
94+
95+
# To provide executable scripts, use entry points in preference to the
96+
# "scripts" keyword. Entry points provide cross-platform support and allow
97+
# pip to create the appropriate form of executable for the target platform.
98+
entry_points={},
99+
)

test_uralicnlp.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#encoding: utf-8
2+
from uralicNLP import uralicApi
3+
print uralicApi.supported_languages()
4+
5+
print uralicApi.analyze("voita", "fin")
6+
7+
print uralicApi.generate("käsi+N+Sg+Par", "fin")
8+
9+
print uralicApi.dictionary_search("car", "sms")
10+
11+
print uralicApi.lemmatize("voita", "fin")

uralicNLP/__init__.py

Whitespace-only changes.

uralicNLP/uralicApi.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import requests
2+
3+
api_url = "http://sanat.csc.fi:8000/smsxml/"
4+
5+
def supported_languages():
6+
return __send_request("listLanguages/", {"user": "uralicApi"})
7+
8+
def analyze(word, language):
9+
return __send_request("analyze/", {"word": word, "language": language})
10+
11+
def generate(query, language):
12+
return __send_request("generate/", {"query": query, "language": language})
13+
14+
def dictionary_search(word, language):
15+
return __send_request("search/", {"word": word, "language": language})
16+
17+
def lemmatize(word, language):
18+
return __send_request("lemmatize/", {"word": word, "language": language})
19+
20+
def __send_request(url, data):
21+
r = requests.get(api_url + url, params=data)
22+
return r.json()

0 commit comments

Comments
 (0)