Source code for mlens.preprocessing.ensemble_transformer
"""ML-ENSEMBLE
:author: Sebastian Flennerhag
:copyright: 2017
:licence: MIT
Ensemble transformer class. Fully integrable with Scikit-learn.
"""
from __future__ import division
from .. import config
from ..base import INDEXERS, IdTrain
from ..utils import check_ensemble_build, check_inputs
from ..ensemble.base import BaseEnsemble
from ..externals.sklearn.validation import check_random_state
[docs]class EnsembleTransformer(BaseEnsemble):
r"""Ensemble Transformer class.
The Ensemble class allows users to build layers of an ensemble through a
transformer API. The transformer is closely related to
:class:`SequentialEnsemble`, in that any accepted type of layer can be
added. The transformer differs fundamentally in one significant aspect:
when fitted, it will store a random sample of the training set together
with the training dimensions, and if in a call to ``transform``, the
data to be transformed correspodns to the training set, the transformer
will recreate the prediction matrix from the ``fit`` call. In contrast,
a fitted ensemble will only use the base learners fitted on the full
dataset, and as such predicting the training set will not reproduce the
predictions from the ``fit`` call.
The :class:`EnsembleTransformer` is a powerful tool to use as a
preprocessing pipeline in an :class:`Evaluator` instance, as it would
faithfully recreate the prediction matrix a potential meta learner would
face. Hence, a user can 'preprocess' the training data with the
:class:`EnsembleTransformer` to generate k-fold base learner predictions,
and then fit different meta learners (or higher-order layers) in a call
to ``evaluate``.
See Also
--------
:class:`SequentialEnsemble`, :class:`Evaluator`
Parameters
----------
shuffle : bool (default = True)
whether to shuffle data before generating folds.
random_state : int (default = None)
random seed if shuffling inputs.
scorer : object (default = None)
scoring function. If a function is provided, base estimators will be
scored on the training set assembled for fitting the meta estimator.
Since those predictions are out-of-sample, the scores represent valid
test scores. The scorer should be a function that accepts an array of
true values and an array of predictions: ``score = f(y_true, y_pred)``.
raise_on_exception : bool (default = True)
whether to issue warnings on soft exceptions or raise error.
Examples include lack of layers, bad inputs, and failed fit of an
estimator in a layer. If set to ``False``, warnings are issued instead
but estimation continues unless exception is fatal. Note that this
can result in unexpected behavior unless the exception is anticipated.
sample_dim : int (default = 10)
dimensionality of training set to sample. During a call to `fit`, a
random sample of size [sample_dim, sample_dim] will be sampled from the
training data, along with the dimensions of the training data. If in a
call to ``transform``, sampling the same indices on the array to
transform gives the same sample matrix, the transformer will reproduce
the predictions from the call to ``fit``, as opposed to using the
base learners fitted on the full training data.
array_check : int (default = 2)
level of strictness in checking input arrays.
- ``array_check = 0`` will not check ``X`` or ``y``
- ``array_check = 1`` will check ``X`` and ``y`` for
inconsistencies and warn when format looks suspicious,
but retain original format.
- ``array_check = 2`` will impose Scikit-learn array checks,
which converts ``X`` and ``y`` to numpy arrays and raises
an error if conversion fails.
verbose : int or bool (default = False)
level of verbosity.
* ``verbose = 0`` silent (same as ``verbose = False``)
* ``verbose = 1`` messages at start and finish (same as
``verbose = True``)
* ``verbose = 2`` messages for each layer
If ``verbose >= 50`` prints to ``sys.stdout``, else ``sys.stderr``.
For verbosity in the layers themselves, use ``fit_params``.
n_jobs : int (default = 1)
number of CPU cores to use for fitting and prediction.
Attributes
----------
scores\_ : dict
if ``scorer`` was passed to instance, ``scores_`` contains dictionary
with cross-validated scores assembled during ``fit`` call. The fold
structure used for scoring is determined by ``folds``.
Examples
--------
>>> from mlens.preprocessing import EnsembleTransformer
>>> from mlens.model_selection import Evaluator
>>> from mlens.metrics.metrics import rmse
>>> from sklearn.datasets import load_boston
>>> from sklearn.linear_model import Lasso
>>> from sklearn.svm import SVR
>>> from scipy.stats import uniform
>>> from pandas import DataFrame
>>>
>>> X, y = load_boston(True)
>>>
>>> ensemble = EnsembleTransformer()
>>>
>>> ensemble.add('stack', [SVR(), Lasso()])
>>>
>>> evl = Evaluator(scorer=rmse, random_state=10)
>>>
>>> evl.preprocess(X, y, [('scale', ensemble)])
>>>
>>> draws = {(None, 'svr'): {'C': uniform(10, 100)},
... (None, 'lasso'): {'alpha': uniform(0.01, 0.1)}}
>>>
>>> evl.evaluate(X, y, [SVR(), Lasso()], draws, n_iter=10)
>>>
>>> DataFrame(evl.summary)
fit_time_mean fit_time_std test_score_mean test_score_std \
lasso 0.000818 0.000362 7.514181 0.827578
svr 0.009790 0.000596 10.949149 0.577554
train_score_mean train_score_std params
lasso 6.228287 0.949872 {'alpha': 0.0871320643267}
svr 5.794856 1.348409 {'C': 12.0751949359}
"""
def __init__(self,
shuffle=False,
random_state=None,
scorer=None,
raise_on_exception=True,
array_check=2,
verbose=False,
n_jobs=1,
layers=None,
backend=None,
sample_dim=10):
super(EnsembleTransformer, self).__init__(
shuffle=shuffle, random_state=random_state,
scorer=scorer, raise_on_exception=raise_on_exception,
verbose=verbose, n_jobs=n_jobs, layers=layers,
backend=backend, array_check=array_check)
self.sample_dim = sample_dim
self.id_train = IdTrain(size=sample_dim)
[docs] def add(self, cls, estimators, preprocessing=None, **kwargs):
"""Add layer to ensemble transformer.
Parameters
----------
cls : str
layer class. Accepted types are:
* 'blend' : blend ensemble
* 'subset' : subsemble
* 'stack' : super learner
estimators: dict of lists or list or instance
estimators constituting the layer. If preprocessing is none and the
layer is meant to be the meta estimator, it is permissible to pass
a single instantiated estimator. If ``preprocessing`` is
``None`` or ``list``, ``estimators`` should be a ``list``.
The list can either contain estimator instances,
named tuples of estimator instances, or a combination of both. ::
option_1 = [estimator_1, estimator_2]
option_2 = [("est-1", estimator_1), ("est-2", estimator_2)]
option_3 = [estimator_1, ("est-2", estimator_2)]
If different preprocessing pipelines are desired, a dictionary
that maps estimators to preprocessing pipelines must be passed.
The names of the estimator dictionary must correspond to the
names of the estimator dictionary. ::
preprocessing_cases = {"case-1": [trans_1, trans_2],
"case-2": [alt_trans_1, alt_trans_2]}
estimators = {"case-1": [est_a, est_b],
"case-2": [est_c, est_d]}
The lists for each dictionary entry can be any of ``option_1``,
``option_2`` and ``option_3``.
preprocessing: dict of lists or list, optional (default = None)
preprocessing pipelines for given layer. If
the same preprocessing applies to all estimators, ``preprocessing``
should be a list of transformer instances. The list can contain the
instances directly, named tuples of transformers,
or a combination of both. ::
option_1 = [transformer_1, transformer_2]
option_2 = [("trans-1", transformer_1),
("trans-2", transformer_2)]
option_3 = [transformer_1, ("trans-2", transformer_2)]
If different preprocessing pipelines are desired, a dictionary
that maps preprocessing pipelines must be passed. The names of the
preprocessing dictionary must correspond to the names of the
estimator dictionary. ::
preprocessing_cases = {"case-1": [trans_1, trans_2],
"case-2": [alt_trans_1, alt_trans_2]}
estimators = {"case-1": [est_a, est_b],
"case-2": [est_c, est_d]}
The lists for each dictionary entry can be any of ``option_1``,
``option_2`` and ``option_3``.
**kwargs : optional
optional keyword arguments to instantiate layer with. See
respective ensemble for further details.
Returns
-------
self : instance
ensemble instance with layer instantiated.
"""
if cls not in INDEXERS:
raise NotImplementedError("Layer class not implemented. Select "
"one of %r." % sorted(INDEXERS))
# If no kwargs, instantiate with defaults
if kwargs is None:
return self._add(estimators, cls, INDEXERS[cls](), preprocessing)
# Else, pop arguments belonging to the indexer
indexer, kwargs_idx = INDEXERS[cls], dict()
args = indexer.__init__.__code__.co_varnames
for arg in args:
if arg in kwargs:
kwargs_idx[arg] = kwargs.pop(arg)
if 'raise_on_exception' in args and \
'raise_on_exception' not in kwargs_idx:
kwargs_idx['raise_on_exception'] = self.raise_on_exception
else:
kwargs['raise_on_exception'] = kwargs_idx['raise_on_exception']
indexer = indexer(**kwargs_idx)
return self._add(estimators=estimators,
cls=cls,
indexer=indexer,
preprocessing=preprocessing,
verbose=self.verbose,
**kwargs)
[docs] def fit(self, X, y=None):
"""Fit the transformer.
Same as the fit method on an ensemble, except that a sample of X is
stored for future comparison.
"""
X, y = check_inputs(X, y, self.array_check)
self.id_train.fit(X)
return super(EnsembleTransformer, self).fit(X, y)
[docs] def predict(self, X):
"""Generate predictions for X. Same as ``transform``."""
return self.transform(X)
[docs] def transform(self, X, y=None):
"""Transform input :math:`X` into a prediction matrix :math:`Z`.
If :math:`X` is the training set, the transformer will
reproduce the :math:`Z` from the call to ``fit``. If X is another
data set, :math:`Z` will be produced using base learners fitted on the
full training data (equivalent to calling ``predict`` on an ensemble.)
"""
if not self.id_train.is_train(X):
return super(EnsembleTransformer, self).predict(X)
else:
return self._transform(X)
def _transform(self, X):
"""Reproduce predictions from 'fit' call."""
if not check_ensemble_build(self):
# No layers instantiated, but raise_on_exception is False
return
X, _ = check_inputs(X, check_level=self.array_check)
if self.shuffle:
r = check_random_state(self.random_state)
idx = r.permutation(X.shape[0])
X = X[idx]
y = self.layers.transform(X)
if y.shape[1] == 1:
# The meta estimator is treated as a layer and thus a prediction
# matrix with shape [n_samples, 1] is created. Ravel before return
y = y.ravel()
return y