Skip to content

Commit

Permalink
Merge pull request #25 from fau-klue/ms-measure
Browse files Browse the repository at this point in the history
v0.2.1
  • Loading branch information
ausgerechnet authored Jun 3, 2022
2 parents 8062593 + d20485a commit e512589
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 15 deletions.
17 changes: 9 additions & 8 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ url = "https://pypi.org/simple"
verify_ssl = true

[dev-packages]
coveralls = "==1.7.0"
cython = "==0.29.24"
pylint = "==2.3.1"
pytest = "==4.4.0"
pytest-cov = "==2.6.1"
twine = "==3.4.2"
pytest = "==7.0.1"
pylint = "==2.13.9"
pytest-cov = "==3.0.0"
cython = "==0.29.30"
twine = "==3.7.1"
setuptools = "==59.6.0"

[packages]
pandas = "*"
scipy = "*"
wheel = ">=0.37.1"
pandas = ">=1.1.5"
scipy = ">=1.5.4"
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,24 +126,26 @@ You can thus `join` the results directly to the input.

## Association Measures

The following association measures are currently implemented (v0.2.0):
The following association measures are currently implemented (v0.2.1):

- asymptotic hypothesis tests:
- **z-score** (`z_score`)
- **t-score** (`t_score`)
- parameter: `disc`
- **Dunning's log-likelihood** (`log_likelihood`)
- **Dunning's log-likelihood ratio** (`log_likelihood`)
- parameter: `signed`
- **simple-ll** (`simple_ll`)
- parameter: `signed`
- point estimates of association strength:
- [**log ratio**](http://cass.lancs.ac.uk/log-ratio-an-informal-introduction/) (`log_ratio`)
- **Liddell** (`liddell`)
- **minimum sensitivity** (`min_sensitivity`)
- [**log-ratio**](http://cass.lancs.ac.uk/log-ratio-an-informal-introduction/) (`log_ratio`)
- parameter: `disc`
- **Dice coefficient** (`dice`)
- information theory:
- **mutual information** (`mutual_information`)
- parameter: `disc`
- **local-MI** (`local_mutual_information`)
- **local mutual information** (`local_mutual_information`)
- conservative estimates
- **conservative log-ratio** (`conservative_log_ratio`)
- parameters: `disc`, `alpha`, `correct`, `one_sided`
Expand Down
39 changes: 38 additions & 1 deletion association_measures/measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
"""


import numpy as np
# from statistics import NormalDist # requires python version >= 3.8
from scipy.stats import norm # requires scipy
import numpy as np
from pandas import concat

from .binomial import choose
from .frequencies import expected_frequencies, observed_frequencies
Expand All @@ -31,6 +32,8 @@ def list_measures():
'log_likelihood': log_likelihood,
'simple_ll': simple_ll,
# point estimates of association strength
'liddell': liddell,
'min_sensitivity': min_sensitivity,
'dice': dice,
'log_ratio': log_ratio,
# likelihood measures
Expand Down Expand Up @@ -253,6 +256,40 @@ def simple_ll(df, signed=True, **kwargs):
# POINT ESTIMATES OF ASSOCIATION STRENGTH #
###########################################

def min_sensitivity(df, **kwargs):
"""Calculate Minimum Sensitivity.
:param DataFrame df: pd.DataFrame with columns O11, O12, O21
:return: dice
:rtype: pd.Series
"""

R1 = df['O11'] + df['O12']
C1 = df['O11'] + df['O21']

am1 = df['O11'] / R1
am2 = df['O11'] / C1
am = concat([am1, am2], axis=1).min(axis=1)

return am


def liddell(df, **kwargs):
"""Calculate Liddell
:param DataFrame df: pd.DataFrame with columns O11, O12, O21, O22
:return: liddell
:rtype: pd.Series
"""

C1 = df['O11'] + df['O21']
C2 = df['O21'] + df['O22']

am = (df['O11'] * df['O22'] - df['O21'] * df['O21']) / C1 / C2

return am


def dice(df, **kwargs):
"""
Calculate Dice coefficient
Expand Down
2 changes: 1 addition & 1 deletion association_measures/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
Association measures are mathematical formulae that interpret cooccurrence frequency data.
"""

VERSION = (0, 2, 0)
VERSION = (0, 2, 1)
__version__ = '.'.join(map(str, VERSION))
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
VERSION = None

REQUIRED = [
'wheel',
'pandas',
'scipy'
]
Expand Down
34 changes: 33 additions & 1 deletion tests/test_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ def test_t_score_invalid(invalid_dataframe):
def test_t_score_zero(zero_dataframe):
df = zero_dataframe
df_ams = am.calculate_measures(df, ['t_score'], freq=True, disc=.5)
print(df_ams.loc['der'])
df_ams['t_score'][0] == 15.532438056926377


Expand Down Expand Up @@ -368,6 +367,38 @@ def test_conservative_log_ratio_one_sided(fixed_dataframe):
assert((abs(df_ams['conservative_log_ratio']) <= abs(df_ams['clr_one_sided'])).all())


###################
# MIN_SENSITIVITY #
###################

@pytest.mark.min_sensitivity
def test_min_sensitivity(fixed_dataframe):

df = fixed_dataframe
df_ams = am.calculate_measures(df, ['min_sensitivity'])
assert df_ams['min_sensitivity'][0] == 1


###########
# LIDDELL #
###########

@pytest.mark.liddell
def test_liddell(fixed_dataframe):

df = fixed_dataframe
df_ams = am.calculate_measures(df, ['liddell'])
assert df_ams['liddell'][0] == 1


@pytest.mark.liddell
def test_liddell_zero(zero_dataframe):

df = zero_dataframe
df_ams = am.calculate_measures(df, ['liddell'])
assert df_ams['liddell'][0] == 1


########
# GOLD #
########
Expand All @@ -379,6 +410,7 @@ def test_measures_ucs(ucs_dataframe):
df = df.join(am.calculate_measures(df))

for ucs, assoc in [('am.Dice', 'dice'),
('am.MS', 'min_sensitivity'),
('am.t.score', 't_score'),
('am.z.score', 'z_score'),
('am.MI', 'mutual_information'),
Expand Down

0 comments on commit e512589

Please sign in to comment.