Source code for src.final.real_data_simulation.plot_boston

"""
The module which created Figure 8 of the final paper can be found under
*src.final.real_data_simulation.plot_boston*. The calculations for this
have been performed in the module *calc_boston*, which can be
found under *src.analysis.real_data_simulation* and has been described
in :ref:`analysis`.
The *.pickle* files, which were created by the module described above and which are
used here, where saved under *bld.out.analysis.real_data_simulation*.

"""
import pickle
import json
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from bld.project_paths import project_paths_join as ppj


[docs]def plot_boston(settings_plotting, subagging_settings, output_boston):
    """
    A function that creates figure 8 in the final paper.

    Parameters
    ----------
    settings_plotting: Dictionary as described in :ref:`model_specs`
        The dictionary contains all plotting specifications that are shared
        across various modules.

    subagging_settings: Dictionary as described in :ref:`model_specs`
        The dictionary defines the simulation set-up that is specific to the
        subagging simulation.

    output_boston: Dictionary as defined by *calc_boston* in
    *src.analysis.real_data_simulation*
        The dictionary that contains the simulation results for bagging and
        subagging (for the ratio range) for the boston housing data.

    """
    plt.style.use([settings_plotting['style']])
    fig = plt.figure(figsize=settings_plotting['figsize']['single_model'])

    # Create a range of subsampeling ratios as it was used in all simulations.
    ratio_range = (
        np.linspace(
            subagging_settings['min_ratio'],
            subagging_settings['max_ratio'],
            subagging_settings["n_ratios"]
        )
    )

    # MSE for Bagging is constant.
    bagging_mse_plot = (
        np.ones(subagging_settings["n_ratios"]) * output_boston['mse_bagging']
    )
    # For ratio=1 subagging is the same as fitting a single tree.
    tree_mse_plot = (
        np.ones(subagging_settings["n_ratios"]) * output_boston['mse_subagging'][-1]
    )

    plt.plot(
        ratio_range,
        output_boston['mse_subagging'],
        color=settings_plotting['colors']['subagging'],
        label=r'$MSPE \: Subagging$'
    )
    plt.plot(
        ratio_range,
        bagging_mse_plot,
        color=settings_plotting['colors']['bagging'],
        label=r'$MSPE \: Bagging$'
    )
    plt.plot(
        ratio_range,
        tree_mse_plot,
        color=settings_plotting['colors']['trees'],
        label=r'$MSPE \: Tree$'
    )
    plt.xlabel('$a$')
    plt.ylim(ymin=0)
    plt.title(r'$MSPE \: for \: Boston \: Housing \:Data$')
    plt.legend(
        ncol=3, loc='lower left',
        bbox_to_anchor=(-0.1, -0.27),
        frameon=True, fontsize=12
    )
    plt.tight_layout(pad=0.4, w_pad=1, h_pad=2.5)
    fig.savefig(
        ppj("OUT_FIGURES_REAL_DATA", "plot_boston.pdf"),
        bbox_inches='tight'
    )


if __name__ == '__main__':

    with open(ppj("IN_MODEL_SPECS", "settings_plotting.json")) as f:
        SETTINGS_PLOTTING_IMPORTED = json.load(f)

    # Those settings are not in the decencies for waf, as the output pickle
    # file that we load already depends on them.
    # Hence, it is redundant to specify it again as a dependency as it is
    # already implied.
    with open(ppj("IN_MODEL_SPECS", "subagging_settings.json")) as f:
        SUBAGGING_SETTINGS_IMPORTED = json.load(f)

    with open(ppj("OUT_ANALYSIS_REAL_DATA", "output_boston.pickle"), "rb") as in_file:
        OUTPUT_BOSTON_IMPORTED = pickle.load(in_file)

    plot_boston(
        SETTINGS_PLOTTING_IMPORTED,
        SUBAGGING_SETTINGS_IMPORTED,
        OUTPUT_BOSTON_IMPORTED
    )