Source code for src.final.real_data_simulation.plot_boston
"""
The module which created Figure 8 of the final paper can be found under
*src.final.real_data_simulation.plot_boston*. The calculations for this
have been performed in the module *calc_boston*, which can be
found under *src.analysis.real_data_simulation* and has been described
in :ref:`analysis`.
The *.pickle* files, which were created by the module described above and which are
used here, where saved under *bld.out.analysis.real_data_simulation*.
"""
import pickle
import json
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from bld.project_paths import project_paths_join as ppj
[docs]def plot_boston(settings_plotting, subagging_settings, output_boston):
"""
A function that creates figure 8 in the final paper.
Parameters
----------
settings_plotting: Dictionary as described in :ref:`model_specs`
The dictionary contains all plotting specifications that are shared
across various modules.
subagging_settings: Dictionary as described in :ref:`model_specs`
The dictionary defines the simulation set-up that is specific to the
subagging simulation.
output_boston: Dictionary as defined by *calc_boston* in
*src.analysis.real_data_simulation*
The dictionary that contains the simulation results for bagging and
subagging (for the ratio range) for the boston housing data.
"""
plt.style.use([settings_plotting['style']])
fig = plt.figure(figsize=settings_plotting['figsize']['single_model'])
# Create a range of subsampeling ratios as it was used in all simulations.
ratio_range = (
np.linspace(
subagging_settings['min_ratio'],
subagging_settings['max_ratio'],
subagging_settings["n_ratios"]
)
)
# MSE for Bagging is constant.
bagging_mse_plot = (
np.ones(subagging_settings["n_ratios"]) * output_boston['mse_bagging']
)
# For ratio=1 subagging is the same as fitting a single tree.
tree_mse_plot = (
np.ones(subagging_settings["n_ratios"]) * output_boston['mse_subagging'][-1]
)
plt.plot(
ratio_range,
output_boston['mse_subagging'],
color=settings_plotting['colors']['subagging'],
label=r'$MSPE \: Subagging$'
)
plt.plot(
ratio_range,
bagging_mse_plot,
color=settings_plotting['colors']['bagging'],
label=r'$MSPE \: Bagging$'
)
plt.plot(
ratio_range,
tree_mse_plot,
color=settings_plotting['colors']['trees'],
label=r'$MSPE \: Tree$'
)
plt.xlabel('$a$')
plt.ylim(ymin=0)
plt.title(r'$MSPE \: for \: Boston \: Housing \:Data$')
plt.legend(
ncol=3, loc='lower left',
bbox_to_anchor=(-0.1, -0.27),
frameon=True, fontsize=12
)
plt.tight_layout(pad=0.4, w_pad=1, h_pad=2.5)
fig.savefig(
ppj("OUT_FIGURES_REAL_DATA", "plot_boston.pdf"),
bbox_inches='tight'
)
if __name__ == '__main__':
with open(ppj("IN_MODEL_SPECS", "settings_plotting.json")) as f:
SETTINGS_PLOTTING_IMPORTED = json.load(f)
# Those settings are not in the decencies for waf, as the output pickle
# file that we load already depends on them.
# Hence, it is redundant to specify it again as a dependency as it is
# already implied.
with open(ppj("IN_MODEL_SPECS", "subagging_settings.json")) as f:
SUBAGGING_SETTINGS_IMPORTED = json.load(f)
with open(ppj("OUT_ANALYSIS_REAL_DATA", "output_boston.pickle"), "rb") as in_file:
OUTPUT_BOSTON_IMPORTED = pickle.load(in_file)
plot_boston(
SETTINGS_PLOTTING_IMPORTED,
SUBAGGING_SETTINGS_IMPORTED,
OUTPUT_BOSTON_IMPORTED
)