Source code for mlens.preprocessing.ensemble_transformer

"""ML-ENSEMBLE

:author: Sebastian Flennerhag
:copyright: 2017
:licence: MIT

Ensemble transformer class. Fully integrable with Scikit-learn.
"""

from __future__ import division

from .. import config
from ..base import INDEXERS, IdTrain
from ..utils import check_ensemble_build, check_inputs
from ..ensemble.base import BaseEnsemble
from ..externals.sklearn.validation import check_random_state


[docs]class EnsembleTransformer(BaseEnsemble):

    r"""Ensemble Transformer class.

    The Ensemble class allows users to build layers of an ensemble through a
    transformer API. The transformer is closely related to
    :class:`SequentialEnsemble`, in that any accepted type of layer can be
    added. The transformer differs fundamentally in one significant aspect:
    when fitted, it will store a random sample of the training set together
    with the training dimensions, and if in a call to ``transform``, the
    data to be transformed correspodns to the training set, the transformer
    will recreate the prediction matrix from the ``fit`` call. In contrast,
    a fitted ensemble will only use the base learners fitted on the full
    dataset, and as such predicting the training set will not reproduce the
    predictions from the ``fit`` call.

    The :class:`EnsembleTransformer` is a powerful tool to use as a
    preprocessing pipeline in an :class:`Evaluator` instance, as it would
    faithfully recreate the prediction matrix a potential meta learner would
    face. Hence, a user can 'preprocess' the training data with the
    :class:`EnsembleTransformer` to generate k-fold base learner predictions,
    and then fit different meta learners (or higher-order layers) in a call
    to ``evaluate``.

    See Also
    --------
    :class:`SequentialEnsemble`, :class:`Evaluator`

    Parameters
    ----------
    shuffle : bool (default = True)
        whether to shuffle data before generating folds.

    random_state : int (default = None)
        random seed if shuffling inputs.

    scorer : object (default = None)
        scoring function. If a function is provided, base estimators will be
        scored on the training set assembled for fitting the meta estimator.
        Since those predictions are out-of-sample, the scores represent valid
        test scores. The scorer should be a function that accepts an array of
        true values and an array of predictions: ``score = f(y_true, y_pred)``.

    raise_on_exception : bool (default = True)
        whether to issue warnings on soft exceptions or raise error.
        Examples include lack of layers, bad inputs, and failed fit of an
        estimator in a layer. If set to ``False``, warnings are issued instead
        but estimation continues unless exception is fatal. Note that this
        can result in unexpected behavior unless the exception is anticipated.

    sample_dim : int (default = 10)
        dimensionality of training set to sample. During a call to `fit`, a
        random sample of size [sample_dim, sample_dim] will be sampled from the
        training data, along with the dimensions of the training data. If in a
        call to ``transform``, sampling the same indices on the array to
        transform gives the same sample matrix, the transformer will reproduce
        the predictions from the call to ``fit``, as opposed to using the
        base learners fitted on the full training data.

    array_check : int (default = 2)
        level of strictness in checking input arrays.

            - ``array_check = 0`` will not check ``X`` or ``y``

            - ``array_check = 1`` will check ``X`` and ``y`` for
              inconsistencies and warn when format looks suspicious,
              but retain original format.

            - ``array_check = 2`` will impose Scikit-learn array checks,
              which converts ``X`` and ``y`` to numpy arrays and raises
              an error if conversion fails.

    verbose : int or bool (default = False)
        level of verbosity.

            * ``verbose = 0`` silent (same as ``verbose = False``)

            * ``verbose = 1`` messages at start and finish (same as
              ``verbose = True``)

            * ``verbose = 2`` messages for each layer

        If ``verbose >= 50`` prints to ``sys.stdout``, else ``sys.stderr``.
        For verbosity in the layers themselves, use ``fit_params``.

    n_jobs : int (default = 1)
        number of CPU cores to use for fitting and prediction.

    Attributes
    ----------
    scores\_ : dict
        if ``scorer`` was passed to instance, ``scores_`` contains dictionary
        with cross-validated scores assembled during ``fit`` call. The fold
        structure used for scoring is determined by ``folds``.

    Examples
    --------
    >>> from mlens.preprocessing import EnsembleTransformer
    >>> from mlens.model_selection import Evaluator
    >>> from mlens.metrics.metrics import rmse
    >>> from sklearn.datasets import load_boston
    >>> from sklearn.linear_model import Lasso
    >>> from sklearn.svm import SVR
    >>> from scipy.stats import uniform
    >>> from pandas import DataFrame
    >>>
    >>> X, y = load_boston(True)
    >>>
    >>> ensemble = EnsembleTransformer()
    >>>
    >>> ensemble.add('stack', [SVR(), Lasso()])
    >>>
    >>> evl = Evaluator(scorer=rmse, random_state=10)
    >>>
    >>> evl.preprocess(X, y, [('scale', ensemble)])
    >>>
    >>> draws = {(None, 'svr'): {'C': uniform(10,  100)},
    ...          (None, 'lasso'): {'alpha': uniform(0.01, 0.1)}}
    >>>
    >>> evl.evaluate(X, y, [SVR(), Lasso()], draws, n_iter=10)
    >>>
    >>> DataFrame(evl.summary)
           fit_time_mean  fit_time_std  test_score_mean  test_score_std  \
    lasso       0.000818      0.000362         7.514181        0.827578
    svr         0.009790      0.000596        10.949149        0.577554
           train_score_mean  train_score_std                      params
    lasso          6.228287         0.949872  {'alpha': 0.0871320643267}
    svr            5.794856         1.348409        {'C': 12.0751949359}
    """

    def __init__(self,
                 shuffle=False,
                 random_state=None,
                 scorer=None,
                 raise_on_exception=True,
                 array_check=2,
                 verbose=False,
                 n_jobs=1,
                 layers=None,
                 backend=None,
                 sample_dim=10):

        super(EnsembleTransformer, self).__init__(
                shuffle=shuffle, random_state=random_state,
                scorer=scorer, raise_on_exception=raise_on_exception,
                verbose=verbose, n_jobs=n_jobs, layers=layers,
                backend=backend, array_check=array_check)

        self.sample_dim = sample_dim
        self.id_train = IdTrain(size=sample_dim)

[docs]    def add(self, cls, estimators, preprocessing=None, **kwargs):
        """Add layer to ensemble transformer.

        Parameters
        ----------
        cls : str
            layer class. Accepted types are:

                * 'blend' : blend ensemble
                * 'subset' : subsemble
                * 'stack' : super learner

        estimators: dict of lists or list or instance
            estimators constituting the layer. If preprocessing is none and the
            layer is meant to be the meta estimator, it is permissible to pass
            a single instantiated estimator. If ``preprocessing`` is
            ``None`` or ``list``, ``estimators`` should be a ``list``.
            The list can either contain estimator instances,
            named tuples of estimator instances, or a combination of both. ::

                option_1 = [estimator_1, estimator_2]
                option_2 = [("est-1", estimator_1), ("est-2", estimator_2)]
                option_3 = [estimator_1, ("est-2", estimator_2)]

            If different preprocessing pipelines are desired, a dictionary
            that maps estimators to preprocessing pipelines must be passed.
            The names of the estimator dictionary must correspond to the
            names of the estimator dictionary. ::

                preprocessing_cases = {"case-1": [trans_1, trans_2],
                                       "case-2": [alt_trans_1, alt_trans_2]}

                estimators = {"case-1": [est_a, est_b],
                              "case-2": [est_c, est_d]}

            The lists for each dictionary entry can be any of ``option_1``,
            ``option_2`` and ``option_3``.

        preprocessing: dict of lists or list, optional (default = None)
            preprocessing pipelines for given layer. If
            the same preprocessing applies to all estimators, ``preprocessing``
            should be a list of transformer instances. The list can contain the
            instances directly, named tuples of transformers,
            or a combination of both. ::

                option_1 = [transformer_1, transformer_2]
                option_2 = [("trans-1", transformer_1),
                            ("trans-2", transformer_2)]
                option_3 = [transformer_1, ("trans-2", transformer_2)]

            If different preprocessing pipelines are desired, a dictionary
            that maps preprocessing pipelines must be passed. The names of the
            preprocessing dictionary must correspond to the names of the
            estimator dictionary. ::

                preprocessing_cases = {"case-1": [trans_1, trans_2],
                                       "case-2": [alt_trans_1, alt_trans_2]}

                estimators = {"case-1": [est_a, est_b],
                              "case-2": [est_c, est_d]}

            The lists for each dictionary entry can be any of ``option_1``,
            ``option_2`` and ``option_3``.

        **kwargs : optional
            optional keyword arguments to instantiate layer with. See
            respective ensemble for further details.

        Returns
        -------
        self : instance
            ensemble instance with layer instantiated.
        """
        if cls not in INDEXERS:
            raise NotImplementedError("Layer class not implemented. Select "
                                      "one of %r." % sorted(INDEXERS))

        # If no kwargs, instantiate with defaults
        if kwargs is None:
            return self._add(estimators, cls, INDEXERS[cls](), preprocessing)

        # Else, pop arguments belonging to the indexer
        indexer, kwargs_idx = INDEXERS[cls], dict()

        args = indexer.__init__.__code__.co_varnames
        for arg in args:
            if arg in kwargs:
                kwargs_idx[arg] = kwargs.pop(arg)

        if 'raise_on_exception' in args and \
                'raise_on_exception' not in kwargs_idx:
            kwargs_idx['raise_on_exception'] = self.raise_on_exception
        else:
            kwargs['raise_on_exception'] = kwargs_idx['raise_on_exception']

        indexer = indexer(**kwargs_idx)

        return self._add(estimators=estimators,
                         cls=cls,
                         indexer=indexer,
                         preprocessing=preprocessing,
                         verbose=self.verbose,
                         **kwargs)

[docs]    def fit(self, X, y=None):
        """Fit the transformer.

        Same as the fit method on an ensemble, except that a sample of X is
        stored for future comparison.
        """
        X, y = check_inputs(X, y, self.array_check)
        self.id_train.fit(X)
        return super(EnsembleTransformer, self).fit(X, y)

[docs]    def predict(self, X):
        """Generate predictions for X. Same as ``transform``."""
        return self.transform(X)

[docs]    def transform(self, X, y=None):
        """Transform input :math:`X` into a prediction matrix :math:`Z`.

        If :math:`X`  is the training set, the transformer will
        reproduce the :math:`Z` from the call to ``fit``. If X is another
        data set, :math:`Z` will be produced using base learners fitted on the
        full training data (equivalent to calling ``predict`` on an ensemble.)
        """
        if not self.id_train.is_train(X):
            return super(EnsembleTransformer, self).predict(X)
        else:
            return self._transform(X)

    def _transform(self, X):
        """Reproduce predictions from 'fit' call."""
        if not check_ensemble_build(self):
            # No layers instantiated, but raise_on_exception is False
            return

        X, _ = check_inputs(X, check_level=self.array_check)

        if self.shuffle:
            r = check_random_state(self.random_state)
            idx = r.permutation(X.shape[0])
            X = X[idx]

        y = self.layers.transform(X)

        if y.shape[1] == 1:
            # The meta estimator is treated as a layer and thus a prediction
            # matrix with shape [n_samples, 1] is created. Ravel before return
            y = y.ravel()

        return y