Source code for src.analysis.main_simulation.calc_simulation_convergence
"""
This module simulates the convergence of bagging towards a stable value as seen
in Subsection 5.4 of the final paper.
For this we use the ``MonteCarloSimulation`` class described in :ref:`model_code`
in the *simulate_convergence()* function and return the results as a dictionary.
Also the intuition of the simulation setup from :ref:`model_code` and
:ref:`design_choice` carries over to this module.
"""
import sys
import json
import pickle
import numpy as np
from src.model_code.montecarlosimulation import MonteCarloSimulation
from bld.project_paths import project_paths_join as ppj
[docs]def simulate_convergence(general_settings, convergence_settings, model):
"""
A function that simulates the convergence of the Bagging Algorithm.
Parameters
----------
general_settings: Dictionary as described in :ref:`model_specs`
The dictionary is shared across various simulations and defines the
overall simulation set-up.
convergence_settings: Dictionary as described in :ref:`model_specs`
The dictionary defines the simulation set-up that is specific to the
convergence of the Bagging Algorithm.
model: String that defines the data generating process to be considered.
The option are 'friedman', 'linear' and 'indicator' which is usually
passed as the first system argument.
Returns a tuple of the simulation results:
- tuple[0]: Numpy array of shape = [len(n_bootstraps_array), 4], where
*n_bootstraps_array* is the array of Bootstrap iterations to
be considered. This is defined by keys in *convergence_settings*.
The array consists of the MSPE decompositions for each of those
bootstrap iterations.
- tuple[1]: Numpy array of shape = 4 with the MSPE decomposition for a
larger bootstrap iterations.
"""
# MSE + Variance + Bias + Error = 4
size_mse_decomp = 4
# Create an array with all bootstrap iterations we consider.
n_bootstraps_array = (
np.arange(
convergence_settings['min_bootstrap'],
convergence_settings['max_bootstrap'],
convergence_settings['steps_bootstrap']
)
)
# Create an array to save the results.
output_convergence = (
np.ones((n_bootstraps_array.shape[0], size_mse_decomp)) * np.nan
)
# Create the simulation instance, which will be used to perform all
# simulations.
simulation_basis = (
MonteCarloSimulation(
n_repeat=general_settings['n_repeat'],
noise=general_settings['noise'],
data_process=model,
n_test_train=general_settings['n_test_train'],
random_seeds=general_settings['random_seeds']
)
)
# Simulate over the range of bootstrap iteration values.
for index, n_bootstrap in enumerate(n_bootstraps_array):
output_convergence[index, :] = (
simulation_basis.calc_mse(
ratio=general_settings['bagging_ratio'],
bootstrap=True, min_split_tree=general_settings
["min_split_tree"],
b_iterations=n_bootstrap
)
)
# Simulate MSE for a high number of bootstrap iterations to visualize its
# convergence.
output_large_b = (
simulation_basis.calc_mse(
ratio=general_settings['bagging_ratio'],
bootstrap=True,
min_split_tree=general_settings["min_split_tree"],
b_iterations=convergence_settings['converged_bootstrap']
)
)
return output_convergence, output_large_b
if __name__ == '__main__':
DGP_MODEL = sys.argv[1]
with open(ppj("IN_MODEL_SPECS", "general_settings.json")) as f:
GENERAL_SETTINGS_IMPORTED = json.load(f)
with open(ppj("IN_MODEL_SPECS", "convergence_settings.json")) as f:
CONVERGENCE_SETTINGS_IMPORTED = json.load(f)
OUTPUT_SIMULATION = simulate_convergence(
GENERAL_SETTINGS_IMPORTED,
CONVERGENCE_SETTINGS_IMPORTED,
DGP_MODEL)
SIMULATION_CONVERGENCE = {}
SIMULATION_CONVERGENCE['bagging_range'] = OUTPUT_SIMULATION[0]
SIMULATION_CONVERGENCE['bagging_large'] = OUTPUT_SIMULATION[1]
with open(ppj("OUT_ANALYSIS_MAIN", "output_convergence_{}.pickle"
.format(DGP_MODEL)), "wb") as out_file:
pickle.dump(SIMULATION_CONVERGENCE, out_file)
print('Done with the {} model for the convergence simulation'.format(DGP_MODEL))