"""ML-ENSEMBLE
:author: Sebastian Flennerhag
:copyright: 2017
:licence: MIT
Base class for estimation.
"""
import numpy as np
from abc import ABCMeta, abstractmethod
from ._base_functions import fit, predict, transform, construct_args
from ..utils import check_is_fitted, print_time, safe_print
from ..utils.exceptions import NotFittedError, ParallelProcessingWarning
try:
from time import perf_counter as time_
except ImportError:
from time import time as time_
import warnings
FUNCS = {'fit': fit,
'predict': predict,
'predict_proba': predict,
'transform': transform
}
[docs]class BaseEstimator(object):
"""Base class for estimating a layer in parallel.
Estimation class to be used as based for a layer estimation engined that
is callable by the :class:`ParallelProcess` job manager.
A subclass must implement a constructor that accepts the following args:
- ``job`` : the :class:`Job` instance containing relevant data
- ``layer``: the :class:`Layer` instance to estimate
- ``n``: the position in the :class:`LayerContainer` stack of the layer
as well as a ``run`` method that accepts a :class:`Parallel` instance.
Parameters
----------
layer : :class:`Layer`
layer to be estimated.
"""
__metaclass__ = ABCMeta
@abstractmethod
def __init__(self, layer):
self.layer = layer
def __call__(self, parallel):
"""Defines the job to complete.
Parameters
----------
parallel : object
:class:`Parallel` instance.
"""
if self.layer.verbose >= 2:
printout = "stderr" if self.layer.verbose < 50 else "stdout"
safe_print('Processing %s' % self.layer.name, file=printout)
t0 = time_()
self.run(parallel)
if self.layer.verbose >= 2:
print_time(t0, '%s Done' % self.layer.name, file=printout)
@abstractmethod
[docs] def run(self, parallel):
"""Method for executing estimation.
Default method relies on the default constructor. Both can be replaced
if desired.
Parameters
----------
parallel : object
:class:`Parallel` instance.
"""
self.execute(self, parallel=parallel, **self.args)
def _default_initialization(self, job):
"""Utility method for default initialization scheme."""
self.dir = job.dir
self.execute = FUNCS[job.job]
self.args = construct_args(self.execute, job)
def _build_scores(self, s):
"""Build a cv-score mapping."""
scores = dict()
# Build shell dictionary with main estimators as keys
for k, v in s[:self.layer.n_pred]:
case_name, est_name = k.split('___')
if case_name == '':
name = est_name
else:
name = '%s__%s' % (case_name, est_name)
scores[name] = []
# Populate with list of scores from folds
for k, v in s[self.layer.n_pred:]:
case_name, est_name = k.split('___')
est_name = '__'.join(est_name.split('__')[:-1])
if '__' not in case_name:
name = est_name
else:
case_name = case_name.split('__')[0]
name = '%s__%s' % (case_name, est_name)
scores[name].append(v)
# Aggregate to get cross-validated mean scores
for k, v in scores.items():
if None in v or len(v) == 0:
continue
try:
scores[k] = (np.mean(v), np.std(v))
except Exception as exc:
warnings.warn("Aggregating scores failed. Scores:\n%r\n"
"Details: %r" % (v, exc),
ParallelProcessingWarning)
return scores
def _check_fitted(self):
"""Utility function for checking that fitted estimators exist."""
check_is_fitted(self.layer, "estimators_")
assert isinstance(self.layer.estimators_, list)
if len(self.layer.estimators_) == 0:
raise NotFittedError("No estimators successfully fitted.")
def _retrieve(self, s):
"""Get transformers and estimators fitted on folds or on full data."""
n_pred = self.layer.n_pred
n_prep = max(self.layer.n_prep, 1)
if s == 'full':
# If full, grab the first n_pred estimators, and the first
# n_prep preprocessing pipelines, which are fitted on
# the full training data. We take max on n_prep to avoid getting
# empty preprocessing_ slice when n_prep = 0 when no preprocessing.
ests = self.layer.estimators_[:n_pred]
if self.layer.preprocessing_ is None:
prep = None
else:
prep = dict(self.layer.preprocessing_[:n_prep])
elif s == 'fold':
# If fold, grab the estimators after n_pred, and the preprocessing
# pipelines after n_prep, which are fitted on folds of the
# training data.
ests = self.layer.estimators_[n_pred:]
if self.layer.preprocessing_ is None:
prep = None
else:
prep = dict(self.layer.preprocessing_[n_prep:])
else:
raise ValueError("Argument not understood. Only 'full' and 'fold' "
"are acceptable argument values.")
return prep, ests