Refactor law of motion. (#373)

OpenSourceEconomics · Aug 19, 2020 · dc44f6a · dc44f6a
1 parent ddde5de
commit dc44f6a
Show file tree

Hide file tree

Showing 7 changed files with 155 additions and 168 deletions.
diff --git a/docs/release_notes.rst b/docs/release_notes.rst
@@ -87,8 +87,11 @@ releases are available on `Anaconda.org
 - :gh:`361` adds standard deviations of parameters for example models
   (:ghuser:`timmens`).
 - :gh:`363` enables msm function to return simulated moments or comparison plot data for
-  use with `estimagic <https://github.com/OpenSourceEconomics/estimagic>`_ (:ghuser:`amageh`).
+  use with `estimagic <https://github.com/OpenSourceEconomics/estimagic>`_
+  (:ghuser:`amageh`).
 - :gh:`369` adds second set of parameters for kw_97 models (:ghuser:`amageh`).
+- :gh:`373` refactors the law of motion and simplifies the collection of child indices
+  (:ghuser:`tobiasraabe`).
 - :gh:`371` changes the names of the criterion functions for maximum likelihood and msm
   estimation. Makes replacement functions optional for estimation with
   msm and sets identity matrix as default weighting matrix (:ghuser:`amageh`).

diff --git a/respy/_numba.py b/respy/_numba.py
@@ -1,6 +1,8 @@
 """Special functions for using numba."""
 import warnings
 
+import numba as nb
+import numpy as np
 from numba import NumbaDeprecationWarning
 from numba import types
 from numba.extending import intrinsic
@@ -74,3 +76,19 @@ def array_indexer(a, i):
         return tup
 
     return function_signature, codegen
+
+
+@nb.njit
+def sum_over_numba_boolean_unituple(tuple_):
+    """Compute the sum over a boolean :class:`numba.types.UniTuple`.
+
+    Parameters
+    ----------
+    tuple_ : numba.types.UniTuple[bool]
+
+    Returns
+    -------
+    sum_ : Union[float, int]
+
+    """
+    return np.sum(np.array([1 for i in tuple_ if i]))
diff --git a/respy/shared.py b/respy/shared.py
@@ -590,7 +590,7 @@ def map_observations_to_states(states, state_space, optim_paras):
     core_columns = ["period"] + create_core_state_space_columns(optim_paras)
     core = states.reset_index(level="period")[core_columns].to_numpy(dtype="int64")
 
-    core_key, core_index = _map_observations_to_core_states_numba(
+    core_key, core_index = map_states_to_core_key_and_core_index(
         core, state_space.indexer
     )
 
@@ -611,14 +611,30 @@ def map_observations_to_states(states, state_space, optim_paras):
 
 
 @nb.njit
-def _map_observations_to_core_states_numba(core, indexer):
-    """Map observations to states in Numba."""
-    n_observations = core.shape[0]
-    core_key = np.zeros(n_observations, dtype=np.int64)
-    core_index = np.zeros(n_observations, dtype=np.int64)
+def map_states_to_core_key_and_core_index(states, indexer):
+    """Map states to the core key and core index.
 
-    for i in range(n_observations):
-        core_key_, core_index_ = indexer[array_to_tuple(indexer, core[i])]
+    Parameters
+    ----------
+    states : numpy.ndarray
+        Multidimensional array containing only core dimensions of states.
+    indexer : numba.typed.Dict
+        A dictionary with core states as keys and the core key and core index as values.
+
+    Returns
+    -------
+    core_key : numpy.ndarray
+        An array containing the core key. See :ref:`core_key`.
+    core_index : numpy.ndarray
+        An array containing the core index. See :ref:`core_indices`.
+
+    """
+    n_states = states.shape[0]
+    core_key = np.zeros(n_states, dtype=np.int64)
+    core_index = np.zeros(n_states, dtype=np.int64)
+
+    for i in range(n_states):
+        core_key_, core_index_ = indexer[array_to_tuple(indexer, states[i])]
         core_key[i] = core_key_
         core_index[i] = core_index_
 
@@ -700,3 +716,55 @@ def select_valid_choices(choices, choice_set):
 
     """
     return [x for i, x in enumerate(choices) if choice_set[i]]
+
+
+def apply_law_of_motion_for_core(df, optim_paras):
+    """Apply the law of motion for the core dimensions.
+
+    This function only applies the law of motion for core dimensions which are the
+    period, experiences, and previous choices. Depending on the integer-encoded choice
+    in ``df["choice"]``, the new state is computed.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        The DataFrame contains states with information on the period, experiences,
+        previous choices. The current choice is encoded as an integer in a column named
+        ``"choice"``.
+    optim_paras : dict
+        Contains model parameters.
+
+    Returns
+    -------
+    df : pandas.DataFrame
+        The DataFrame contains the states in the next period.
+
+    """
+    n_lagged_choices = optim_paras["n_lagged_choices"]
+
+    # Update work experiences.
+    for i, choice in enumerate(optim_paras["choices_w_exp"]):
+        df[f"exp_{choice}"] += df["choice"] == i
+
+    # Update lagged choices by deleting oldest lagged, renaming other lags and inserting
+    # choice in the first position.
+    if n_lagged_choices:
+        # Save position of first lagged choice.
+        position = df.columns.tolist().index("lagged_choice_1")
+
+        # Drop oldest lag.
+        df = df.drop(columns=f"lagged_choice_{n_lagged_choices}")
+
+        # Rename newer lags
+        rename_lagged_choices = {
+            f"lagged_choice_{i}": f"lagged_choice_{i + 1}"
+            for i in range(1, n_lagged_choices)
+        }
+        df = df.rename(columns=rename_lagged_choices)
+
+        # Add current choice as new lag.
+        df.insert(position, "lagged_choice_1", df["choice"])
+
+    df["period"] = df["period"] + 1
+
+    return df
diff --git a/respy/simulate.py b/respy/simulate.py
@@ -10,6 +10,7 @@
 from respy.parallelization import parallelize_across_dense_dimensions
 from respy.parallelization import split_and_combine_df
 from respy.pre_processing.model_processing import process_params_and_options
+from respy.shared import apply_law_of_motion_for_core
 from respy.shared import calculate_value_functions_and_flow_utilities
 from respy.shared import compute_covariates
 from respy.shared import create_base_draws
@@ -218,10 +219,13 @@ def simulate(
             optim_paras=optim_paras,
         )
 
-        data.append(current_df_extended)
+        data.append(current_df_extended.copy(deep=True))
 
         if is_n_step_ahead and period != n_simulation_periods - 1:
-            df = _apply_law_of_motion(current_df_extended, optim_paras)
+            current_df_extended = current_df_extended.reset_index()
+            df = apply_law_of_motion_for_core(current_df_extended, optim_paras)
+            state_space_columns = create_state_space_columns(optim_paras)
+            df = df.set_index(["identifier", "period"])[state_space_columns]
 
     simulated_data = _process_simulation_output(data, optim_paras)
 
@@ -554,65 +558,6 @@ def _random_choice(choices, probabilities=None, decimals=5):
     return out
 
 
-def _apply_law_of_motion(df, optim_paras):
-    """Apply the law of motion to get the states in the next period.
-
-    For n-step-ahead simulations, the states of the next period are generated from the
-    current states and the current decision. This function changes experiences and
-    previous choices according to the choice in the current period, to get the states of
-    the next period.
-
-    We implicitly assume that observed variables are constant.
-
-    Parameters
-    ----------
-    df : pandas.DataFrame
-        The DataFrame contains the simulated information of individuals in one period.
-    optim_paras : dict
-
-    Returns
-    -------
-    df : pandas.DataFrame
-        The DataFrame contains the states of individuals in the next period.
-
-    """
-    df = df.copy()
-    n_lagged_choices = optim_paras["n_lagged_choices"]
-
-    # Update work experiences.
-    for i, choice in enumerate(optim_paras["choices_w_exp"]):
-        df[f"exp_{choice}"] += df["choice"] == i
-
-    # Update lagged choices by deleting oldest lagged, renaming other lags and inserting
-    # choice in the first position.
-    if n_lagged_choices:
-        # Save position of first lagged choice.
-        position = df.columns.tolist().index("lagged_choice_1")
-
-        # Drop oldest lag.
-        df = df.drop(columns=f"lagged_choice_{n_lagged_choices}")
-
-        # Rename newer lags
-        rename_lagged_choices = {
-            f"lagged_choice_{i}": f"lagged_choice_{i + 1}"
-            for i in range(1, n_lagged_choices)
-        }
-        df = df.rename(columns=rename_lagged_choices)
-
-        # Add current choice as new lag.
-        df.insert(position, "lagged_choice_1", df["choice"])
-
-    # Increment period in MultiIndex by one.
-    df.index = df.index.set_levels(
-        df.index.get_level_values("period") + 1, level="period", verify_integrity=False
-    )
-
-    state_space_columns = create_state_space_columns(optim_paras)
-    df = df[state_space_columns]
-
-    return df
-
-
 def _harmonize_simulation_arguments(method, df, n_simulation_periods, options):
     """Harmonize the arguments of the simulation.