Merge pull request #25 from fau-klue/ms-measure

v0.2.1
fau-klue · Jun 3, 2022 · e512589 · e512589
2 parents 8062593 + d20485a
commit e512589
Show file tree

Hide file tree

Showing 6 changed files with 88 additions and 15 deletions.
diff --git a/Pipfile b/Pipfile
@@ -4,13 +4,14 @@ url = "https://pypi.org/simple"
 verify_ssl = true
 
 [dev-packages]
-coveralls = "==1.7.0"
-cython = "==0.29.24"
-pylint = "==2.3.1"
-pytest = "==4.4.0"
-pytest-cov = "==2.6.1"
-twine = "==3.4.2"
+pytest = "==7.0.1"
+pylint = "==2.13.9"
+pytest-cov = "==3.0.0"
+cython = "==0.29.30"
+twine = "==3.7.1"
+setuptools = "==59.6.0"
 
 [packages]
-pandas = "*"
-scipy = "*"
+wheel = ">=0.37.1"
+pandas = ">=1.1.5"
+scipy = ">=1.5.4"
diff --git a/README.md b/README.md
@@ -126,24 +126,26 @@ You can thus `join` the results directly to the input.
 
 ## Association Measures
 
-The following association measures are currently implemented (v0.2.0):
+The following association measures are currently implemented (v0.2.1):
 
 - asymptotic hypothesis tests:
   - **z-score** (`z_score`)
   - **t-score** (`t_score`)
     - parameter: `disc`
-  - **Dunning's log-likelihood** (`log_likelihood`)
+  - **Dunning's log-likelihood ratio** (`log_likelihood`)
     - parameter: `signed`
   - **simple-ll** (`simple_ll`)
     - parameter: `signed`
 - point estimates of association strength:
-  - [**log ratio**](http://cass.lancs.ac.uk/log-ratio-an-informal-introduction/) (`log_ratio`)
+  - **Liddell** (`liddell`)
+  - **minimum sensitivity** (`min_sensitivity`)
+  - [**log-ratio**](http://cass.lancs.ac.uk/log-ratio-an-informal-introduction/) (`log_ratio`)
     - parameter: `disc`
   - **Dice coefficient** (`dice`)
 - information theory:
   - **mutual information** (`mutual_information`)
       - parameter: `disc`
-  - **local-MI** (`local_mutual_information`)
+  - **local mutual information** (`local_mutual_information`)
 - conservative estimates
   - **conservative log-ratio** (`conservative_log_ratio`)
     - parameters: `disc`, `alpha`, `correct`, `one_sided`

diff --git a/association_measures/measures.py b/association_measures/measures.py
@@ -5,9 +5,10 @@
 """
 
 
+import numpy as np
 # from statistics import NormalDist  # requires python version >= 3.8
 from scipy.stats import norm    # requires scipy
-import numpy as np
+from pandas import concat
 
 from .binomial import choose
 from .frequencies import expected_frequencies, observed_frequencies
@@ -31,6 +32,8 @@ def list_measures():
         'log_likelihood': log_likelihood,
         'simple_ll': simple_ll,
         # point estimates of association strength
+        'liddell': liddell,
+        'min_sensitivity': min_sensitivity,
         'dice': dice,
         'log_ratio': log_ratio,
         # likelihood measures
@@ -253,6 +256,40 @@ def simple_ll(df, signed=True, **kwargs):
 # POINT ESTIMATES OF ASSOCIATION STRENGTH #
 ###########################################
 
+def min_sensitivity(df, **kwargs):
+    """Calculate Minimum Sensitivity.
+
+    :param DataFrame df: pd.DataFrame with columns O11, O12, O21
+    :return: dice
+    :rtype: pd.Series
+    """
+
+    R1 = df['O11'] + df['O12']
+    C1 = df['O11'] + df['O21']
+
+    am1 = df['O11'] / R1
+    am2 = df['O11'] / C1
+    am = concat([am1, am2], axis=1).min(axis=1)
+
+    return am
+
+
+def liddell(df, **kwargs):
+    """Calculate Liddell
+
+    :param DataFrame df: pd.DataFrame with columns O11, O12, O21, O22
+    :return: liddell
+    :rtype: pd.Series
+    """
+
+    C1 = df['O11'] + df['O21']
+    C2 = df['O21'] + df['O22']
+
+    am = (df['O11'] * df['O22'] - df['O21'] * df['O21']) / C1 / C2
+
+    return am
+
+
 def dice(df, **kwargs):
     """
     Calculate Dice coefficient

diff --git a/association_measures/version.py b/association_measures/version.py
@@ -2,5 +2,5 @@
 Association measures are mathematical formulae that interpret cooccurrence frequency data.
 """
 
-VERSION = (0, 2, 0)
+VERSION = (0, 2, 1)
 __version__ = '.'.join(map(str, VERSION))
diff --git a/setup.py b/setup.py
@@ -18,6 +18,7 @@
 VERSION = None
 
 REQUIRED = [
+    'wheel',
     'pandas',
     'scipy'
 ]

diff --git a/tests/test_measures.py b/tests/test_measures.py
@@ -146,7 +146,6 @@ def test_t_score_invalid(invalid_dataframe):
 def test_t_score_zero(zero_dataframe):
     df = zero_dataframe
     df_ams = am.calculate_measures(df, ['t_score'], freq=True, disc=.5)
-    print(df_ams.loc['der'])
     df_ams['t_score'][0] == 15.532438056926377
 
 
@@ -368,6 +367,38 @@ def test_conservative_log_ratio_one_sided(fixed_dataframe):
     assert((abs(df_ams['conservative_log_ratio']) <= abs(df_ams['clr_one_sided'])).all())
 
 
+###################
+# MIN_SENSITIVITY #
+###################
+
+@pytest.mark.min_sensitivity
+def test_min_sensitivity(fixed_dataframe):
+
+    df = fixed_dataframe
+    df_ams = am.calculate_measures(df, ['min_sensitivity'])
+    assert df_ams['min_sensitivity'][0] == 1
+
+
+###########
+# LIDDELL #
+###########
+
+@pytest.mark.liddell
+def test_liddell(fixed_dataframe):
+
+    df = fixed_dataframe
+    df_ams = am.calculate_measures(df, ['liddell'])
+    assert df_ams['liddell'][0] == 1
+
+
+@pytest.mark.liddell
+def test_liddell_zero(zero_dataframe):
+
+    df = zero_dataframe
+    df_ams = am.calculate_measures(df, ['liddell'])
+    assert df_ams['liddell'][0] == 1
+
+
 ########
 # GOLD #
 ########
@@ -379,6 +410,7 @@ def test_measures_ucs(ucs_dataframe):
     df = df.join(am.calculate_measures(df))
 
     for ucs, assoc in [('am.Dice', 'dice'),
+                       ('am.MS', 'min_sensitivity'),
                        ('am.t.score', 't_score'),
                        ('am.z.score', 'z_score'),
                        ('am.MI', 'mutual_information'),