Source code for mlens.parallel.single_run

"""ML-ENSEMBLE

:author: Sebastian Flennerhag
:copyright: 2017
:licence: MIT

Estimation engine for parallel preprocessing of estimators in a single run,
such as when fitting a final layer (meta estimator) that does not require
propagating predictions.
"""

from .estimation import BaseEstimator
from ..externals.sklearn.base import clone


###############################################################################
[docs]class SingleRun(BaseEstimator): """Single run fit sub-process class. Class for fitting a estimators in a layer without any sub-fits. """ def __init__(self, job, layer): super(SingleRun, self).__init__(layer=layer) self._default_initialization(job)
[docs] def run(self, parallel): """Execute blending.""" super(SingleRun, self).run(parallel)
def _format_instance_list(self): """Expand the instance lists to every fold with associated indices.""" self.e = _expand_instance_list(self.layer.estimators) self.t = _expand_instance_list(self.layer.preprocessing) def _get_col_id(self): """Assign unique col_id to every estimator.""" c = getattr(self.layer, 'classes_', 1) k = self.layer.n_feature_prop self.c = _get_col_idx(self.layer.preprocessing, self.layer.estimators, c, k)
############################################################################### def _expand_instance_list(instance_list): """Build a list of estimation tuples with train and test indices.""" # We modify the instance list slightly by adding None for the # training and test set indices if isinstance(instance_list, dict): return [(case, None, None, [(n, clone(e)) for n, e in instance_list[case]]) for case in sorted(instance_list)] else: return [(None, None, None, [(n, clone(e)) for n, e in instance_list])] def _get_col_idx(preprocessing, estimators, labels, n_feature_prop): """Utility for assigning each ``est`` in each ``prep`` a unique ``col_id``. Parameters ---------- preprocessing : dict or list mapping of preprocessing cases, if any. estimators : dict or list mapping of estimators per preprocessing case, or list of estimators. labels : int number of labels to expand col_id with n_feature_prop : int number of features being propagated. Predictions are concatenated from the right. """ inc = 1 if labels is None else labels # Set up main columns mapping if isinstance(preprocessing, list) or preprocessing is None: idx = {(None, inst_name): int(n_feature_prop + inc * i) for i, (inst_name, _) in enumerate(estimators)} else: # Nested for loop required case_list, idx, col = sorted(preprocessing), dict(), n_feature_prop for case in case_list: for inst_name, _ in estimators[case]: idx[case, inst_name] = col col += inc return idx