Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor law of motion. #373

Merged
merged 8 commits into from
Aug 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion docs/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,11 @@ releases are available on `Anaconda.org
- :gh:`361` adds standard deviations of parameters for example models
(:ghuser:`timmens`).
- :gh:`363` enables msm function to return simulated moments or comparison plot data for
use with `estimagic <https://github.com/OpenSourceEconomics/estimagic>`_ (:ghuser:`amageh`).
use with `estimagic <https://github.com/OpenSourceEconomics/estimagic>`_
(:ghuser:`amageh`).
- :gh:`369` adds second set of parameters for kw_97 models (:ghuser:`amageh`).
- :gh:`373` refactors the law of motion and simplifies the collection of child indices
(:ghuser:`tobiasraabe`).
- :gh:`371` changes the names of the criterion functions for maximum likelihood and msm
estimation. Makes replacement functions optional for estimation with
msm and sets identity matrix as default weighting matrix (:ghuser:`amageh`).
Expand Down
18 changes: 18 additions & 0 deletions respy/_numba.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Special functions for using numba."""
import warnings

import numba as nb
import numpy as np
from numba import NumbaDeprecationWarning
from numba import types
from numba.extending import intrinsic
Expand Down Expand Up @@ -74,3 +76,19 @@ def array_indexer(a, i):
return tup

return function_signature, codegen


@nb.njit
def sum_over_numba_boolean_unituple(tuple_):
"""Compute the sum over a boolean :class:`numba.types.UniTuple`.

Parameters
----------
tuple_ : numba.types.UniTuple[bool]

Returns
-------
sum_ : Union[float, int]

"""
return np.sum(np.array([1 for i in tuple_ if i]))
84 changes: 76 additions & 8 deletions respy/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ def map_observations_to_states(states, state_space, optim_paras):
core_columns = ["period"] + create_core_state_space_columns(optim_paras)
core = states.reset_index(level="period")[core_columns].to_numpy(dtype="int64")

core_key, core_index = _map_observations_to_core_states_numba(
core_key, core_index = map_states_to_core_key_and_core_index(
core, state_space.indexer
)

Expand All @@ -611,14 +611,30 @@ def map_observations_to_states(states, state_space, optim_paras):


@nb.njit
def _map_observations_to_core_states_numba(core, indexer):
"""Map observations to states in Numba."""
n_observations = core.shape[0]
core_key = np.zeros(n_observations, dtype=np.int64)
core_index = np.zeros(n_observations, dtype=np.int64)
def map_states_to_core_key_and_core_index(states, indexer):
"""Map states to the core key and core index.

for i in range(n_observations):
core_key_, core_index_ = indexer[array_to_tuple(indexer, core[i])]
Parameters
----------
states : numpy.ndarray
Multidimensional array containing only core dimensions of states.
indexer : numba.typed.Dict
A dictionary with core states as keys and the core key and core index as values.

Returns
-------
core_key : numpy.ndarray
An array containing the core key. See :ref:`core_key`.
core_index : numpy.ndarray
An array containing the core index. See :ref:`core_indices`.

"""
n_states = states.shape[0]
core_key = np.zeros(n_states, dtype=np.int64)
core_index = np.zeros(n_states, dtype=np.int64)

for i in range(n_states):
core_key_, core_index_ = indexer[array_to_tuple(indexer, states[i])]
core_key[i] = core_key_
core_index[i] = core_index_

Expand Down Expand Up @@ -700,3 +716,55 @@ def select_valid_choices(choices, choice_set):

"""
return [x for i, x in enumerate(choices) if choice_set[i]]


def apply_law_of_motion_for_core(df, optim_paras):
"""Apply the law of motion for the core dimensions.

This function only applies the law of motion for core dimensions which are the
period, experiences, and previous choices. Depending on the integer-encoded choice
in ``df["choice"]``, the new state is computed.

Parameters
----------
df : pandas.DataFrame
The DataFrame contains states with information on the period, experiences,
previous choices. The current choice is encoded as an integer in a column named
``"choice"``.
optim_paras : dict
Contains model parameters.

Returns
-------
df : pandas.DataFrame
The DataFrame contains the states in the next period.

"""
n_lagged_choices = optim_paras["n_lagged_choices"]

# Update work experiences.
for i, choice in enumerate(optim_paras["choices_w_exp"]):
df[f"exp_{choice}"] += df["choice"] == i

# Update lagged choices by deleting oldest lagged, renaming other lags and inserting
# choice in the first position.
if n_lagged_choices:
# Save position of first lagged choice.
position = df.columns.tolist().index("lagged_choice_1")

# Drop oldest lag.
df = df.drop(columns=f"lagged_choice_{n_lagged_choices}")

# Rename newer lags
rename_lagged_choices = {
f"lagged_choice_{i}": f"lagged_choice_{i + 1}"
for i in range(1, n_lagged_choices)
}
df = df.rename(columns=rename_lagged_choices)

# Add current choice as new lag.
df.insert(position, "lagged_choice_1", df["choice"])

df["period"] = df["period"] + 1

return df
67 changes: 6 additions & 61 deletions respy/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from respy.parallelization import parallelize_across_dense_dimensions
from respy.parallelization import split_and_combine_df
from respy.pre_processing.model_processing import process_params_and_options
from respy.shared import apply_law_of_motion_for_core
from respy.shared import calculate_value_functions_and_flow_utilities
from respy.shared import compute_covariates
from respy.shared import create_base_draws
Expand Down Expand Up @@ -218,10 +219,13 @@ def simulate(
optim_paras=optim_paras,
)

data.append(current_df_extended)
data.append(current_df_extended.copy(deep=True))

if is_n_step_ahead and period != n_simulation_periods - 1:
df = _apply_law_of_motion(current_df_extended, optim_paras)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be an option to not apply the law of motion here but take choice and dense key to get the next indices directly from the state space?
Then we would also save the indexer step and save the calculation.

Copy link
Member Author

@tobiasraabe tobiasraabe Aug 17, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have created the issue #376 to discuss your point. Even if my point on cpu bound and io bound is not true, it makes sense to separate the changes. Is it ok with you?

current_df_extended = current_df_extended.reset_index()
df = apply_law_of_motion_for_core(current_df_extended, optim_paras)
state_space_columns = create_state_space_columns(optim_paras)
df = df.set_index(["identifier", "period"])[state_space_columns]

simulated_data = _process_simulation_output(data, optim_paras)

Expand Down Expand Up @@ -554,65 +558,6 @@ def _random_choice(choices, probabilities=None, decimals=5):
return out


def _apply_law_of_motion(df, optim_paras):
"""Apply the law of motion to get the states in the next period.

For n-step-ahead simulations, the states of the next period are generated from the
current states and the current decision. This function changes experiences and
previous choices according to the choice in the current period, to get the states of
the next period.

We implicitly assume that observed variables are constant.

Parameters
----------
df : pandas.DataFrame
The DataFrame contains the simulated information of individuals in one period.
optim_paras : dict

Returns
-------
df : pandas.DataFrame
The DataFrame contains the states of individuals in the next period.

"""
df = df.copy()
n_lagged_choices = optim_paras["n_lagged_choices"]

# Update work experiences.
for i, choice in enumerate(optim_paras["choices_w_exp"]):
df[f"exp_{choice}"] += df["choice"] == i

# Update lagged choices by deleting oldest lagged, renaming other lags and inserting
# choice in the first position.
if n_lagged_choices:
# Save position of first lagged choice.
position = df.columns.tolist().index("lagged_choice_1")

# Drop oldest lag.
df = df.drop(columns=f"lagged_choice_{n_lagged_choices}")

# Rename newer lags
rename_lagged_choices = {
f"lagged_choice_{i}": f"lagged_choice_{i + 1}"
for i in range(1, n_lagged_choices)
}
df = df.rename(columns=rename_lagged_choices)

# Add current choice as new lag.
df.insert(position, "lagged_choice_1", df["choice"])

# Increment period in MultiIndex by one.
df.index = df.index.set_levels(
df.index.get_level_values("period") + 1, level="period", verify_integrity=False
)

state_space_columns = create_state_space_columns(optim_paras)
df = df[state_space_columns]

return df


def _harmonize_simulation_arguments(method, df, n_simulation_periods, options):
"""Harmonize the arguments of the simulation.

Expand Down
Loading