Skip to content

DGBFMultiOutputModel

deepgboost.dgbf.dgbf_multioutput.DGBFMultiOutputModel

Multi-output Distributed Gradient Boosting Forest model.

Operates entirely in 2-D space (n_samples, K). Each layer fits K groups of T single-output trees (one group per class) on the per-class pseudo-residual slice. NNLS weights are solved per-class.

Parameters:

Name Type Description Default
n_trees int

Number of trees per boosting layer per class.

10
n_layers int

Number of boosting layers.

10
max_depth int or None

Maximum depth of each decision tree.

None
max_features (int, float, str or None)

Features considered at each split.

None
min_weight_fraction_leaf float

Minimum fraction of the total (weighted) number of samples required to be at a leaf node. Prevents leaves whose accumulated Hessian mass is too small, analogous to XGBoost's min_child_weight. The default 0.0 preserves the original behaviour exactly.

0.0
learning_rate float

Shrinkage factor applied to pseudo-residuals.

0.1
subsample_min_frac float

Minimum subsample fraction at the first layer. Grows to 1.0 at the last layer (dynamic sampling, paper sec. 3.1.3).

0.3
weight_solver str

"nnls" or "uniform".

'nnls'
hessian_reg float

L2 regularisation added to the Hessian denominator (mirrors XGBoost λ).

0.0
random_state int or None

Master seed.

None
Source code in src/deepgboost/dgbf/dgbf_multioutput.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
class DGBFMultiOutputModel:
    """
    Multi-output Distributed Gradient Boosting Forest model.

    Operates entirely in 2-D space (n_samples, K).  Each layer fits K groups
    of T single-output trees (one group per class) on the per-class
    pseudo-residual slice.  NNLS weights are solved per-class.

    Parameters
    ----------
    n_trees : int
        Number of trees per boosting layer per class.
    n_layers : int
        Number of boosting layers.
    max_depth : int or None
        Maximum depth of each decision tree.
    max_features : int, float, str or None
        Features considered at each split.
    min_weight_fraction_leaf : float, default=0.0
        Minimum fraction of the total (weighted) number of samples required
        to be at a leaf node.  Prevents leaves whose accumulated Hessian
        mass is too small, analogous to XGBoost's ``min_child_weight``.
        The default ``0.0`` preserves the original behaviour exactly.
    learning_rate : float
        Shrinkage factor applied to pseudo-residuals.
    subsample_min_frac : float
        Minimum subsample fraction at the first layer.  Grows to 1.0 at
        the last layer (dynamic sampling, paper sec. 3.1.3).
    weight_solver : str
        ``"nnls"`` or ``"uniform"``.
    hessian_reg : float
        L2 regularisation added to the Hessian denominator (mirrors XGBoost λ).
    random_state : int or None
        Master seed.
    """

    def __init__(
        self,
        n_trees: int = 10,
        n_layers: int = 10,
        max_depth: int | None = None,
        max_features: int | float | str | None = None,
        min_weight_fraction_leaf: float = 0.0,
        learning_rate: float = 0.1,
        subsample_min_frac: float = 0.3,
        weight_solver: str = "nnls",
        hessian_reg: float = 0.0,
        random_state: int | None = None,
    ):
        self.n_trees = n_trees
        self.n_layers = n_layers
        self.max_depth = max_depth
        self.max_features = max_features
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.learning_rate = learning_rate
        self.subsample_min_frac = subsample_min_frac
        self.weight_solver = weight_solver
        self.hessian_reg = hessian_reg
        self.random_state = random_state

        # Fitted state
        self.graph_: list[list[list[TreeUpdater]]] = []
        # weights_[l] has shape (K, n_trees): per-class combination weights
        self.weights_: list[np.ndarray] = []
        self.prior_: np.ndarray = np.array([])
        self.feature_importances_: np.ndarray | None = None
        self.n_features_in_: int = 0
        self.evals_result_: dict = {}

    # ------------------------------------------------------------------
    # Training
    # ------------------------------------------------------------------

    def fit(
        self,
        X: np.ndarray,
        y: np.ndarray,
        callbacks: Sequence["TrainingCallback"] | None = None,
        evals: list[tuple[np.ndarray, np.ndarray, str]] | None = None,
    ) -> "DGBFMultiOutputModel":
        """
        Fit the multi-output DGBF model.

        Parameters
        ----------
        X : np.ndarray of shape (n_samples, n_features)
            Training feature matrix.
        y : np.ndarray of shape (n_samples, K)
            One-hot encoded targets.
        callbacks : list of TrainingCallback, optional
        evals : list of (X_val, y_val_onehot, name) tuples, optional

        Returns
        -------
        self
        """
        if y.ndim != 2:
            raise ValueError(
                "DGBFMultiOutputModel requires a 2-D one-hot target y of shape "
                "(n_samples, K).",
            )

        obj = SoftmaxObjective()
        rng = np.random.default_rng(self.random_state)
        n_samples, n_features = X.shape
        K = y.shape[1]

        # Initialise state
        self.graph_ = []
        self.weights_ = []
        self.prior_ = obj.prior(y)  # (K,)
        self.n_features_in_ = n_features
        self.evals_result_ = {}
        self._layer_cond_numbers_: list[float] = []
        feature_importance_accum = np.zeros(n_features)

        if evals:
            for _, _, name in evals:
                self.evals_result_[name] = {"logloss": []}

        callbacks = callbacks or []
        for cb in callbacks:
            cb.before_training(self)

        for layer_idx in range(self.n_layers):
            F_prev = self.predict_raw(X)  # (n_samples, K)

            g = obj.gradient(y, F_prev)  # (n_samples, K)
            h = obj.hessian(y, F_prev)  # (n_samples, K)

            pseudo_y = (
                g / np.maximum(h + self.hessian_reg, 1e-7)
            ) * self.learning_rate  # (n_samples, K)

            stop = False
            evals_log: dict = {}
            for cb in callbacks:
                if cb.before_iteration(self, layer_idx, evals_log):
                    stop = True
            if stop:
                break

            new_layer, new_weights, layer_cond = self._fit_layer(
                X,
                pseudo_y,
                layer_idx,
                rng,
                h,
                n_samples,
                K,
            )
            self.graph_.append(new_layer)
            self.weights_.append(new_weights)
            self._layer_cond_numbers_.append(layer_cond)

            # new_layer is list[list[TreeUpdater]] of shape (K, n_trees)
            for class_trees in new_layer:
                for tree in class_trees:
                    feature_importance_accum += tree.feature_importances_

            if evals:
                for X_val, y_val, name in evals:
                    F_val = self.predict_raw(X_val)  # (n_val, K)
                    p_val = obj.transform(F_val)  # softmax → (n_val, K)
                    p_val = np.clip(p_val, 1e-7, 1.0 - 1e-7)
                    logloss = float(
                        -np.mean(np.sum(y_val * np.log(p_val), axis=1)),
                    )
                    self.evals_result_.setdefault(name, {}).setdefault(
                        "logloss",
                        [],
                    ).append(logloss)
                    evals_log[name] = {"logloss": logloss}

            stop = False
            for cb in callbacks:
                if cb.after_iteration(self, layer_idx, evals_log):
                    stop = True
            if stop:
                break

        total = feature_importance_accum.sum()
        self.feature_importances_ = (
            feature_importance_accum / total if total > 0 else feature_importance_accum
        )

        for cb in callbacks:
            cb.after_training(self)

        return self

    def _fit_layer(
        self,
        X: np.ndarray,
        pseudo_y: np.ndarray,
        layer_idx: int,
        rng: np.random.Generator,
        hessian: np.ndarray,
        n_samples: int,
        K: int,
    ) -> tuple[list[list[TreeUpdater]], np.ndarray, float]:
        """
        Fit K independent groups of n_trees single-output trees for one layer.

        Each class k trains n_trees trees on target ``pseudo_y[:, k]`` with
        ``sample_weight=hessian[:, k]``.  NNLS weights are solved per-class
        from the single-output predictions.

        Parameters
        ----------
        X : np.ndarray of shape (n_samples, n_features)
            Training feature matrix.
        pseudo_y : np.ndarray of shape (n_samples, K)
            Shrunk per-class pseudo-residuals for this layer.
        layer_idx : int
        rng : np.random.Generator
        hessian : np.ndarray of shape (n_samples, K)
            Per-sample per-class Hessian.
        n_samples : int
        K : int

        Returns
        -------
        new_layer : list[list[TreeUpdater]] of shape (K, n_trees)
        layer_weights : (K, n_trees)
        cond : float
            Mean condition number used as a diagnostic.
        """
        new_layer: list[list[TreeUpdater]] = []
        layer_weights = np.zeros((K, self.n_trees))
        cond_values: list[float] = []

        for k in range(K):
            class_trees: list[TreeUpdater] = []
            class_preds: list[np.ndarray] = []  # each (n_samples,)

            for t in range(self.n_trees):
                sample_idx = bootstrap_sampler(
                    n_samples=n_samples,
                    n_layers=self.n_layers,
                    layer_idx=layer_idx,
                    subsample_min_frac=self.subsample_min_frac,
                    rng=rng,
                )

                tree_seed = int(rng.integers(0, 2**31))
                tree = TreeUpdater(
                    max_depth=self.max_depth,
                    max_features=self.max_features,
                    min_weight_fraction_leaf=self.min_weight_fraction_leaf,
                    random_state=tree_seed,
                )

                # Exact per-class Hessian as sample_weight
                sw = hessian[sample_idx, k]  # (n_sub,)
                tree.fit(
                    X[sample_idx],
                    pseudo_y[sample_idx, k],  # (n_sub,) — single-output
                    sample_weight=sw,
                )

                # tree.predict returns (n_samples, 1) for single-output tree; flatten
                class_preds.append(tree.predict(X)[:, 0])
                class_trees.append(tree)

            # (n_samples, n_trees) predictor matrix for class k
            preds_k = np.column_stack(class_preds)
            cond_values.append(float(np.linalg.cond(preds_k)))

            layer_weights[k] = weight_solver(
                preds_k,  # (n_samples, n_trees)
                pseudo_y[:, k],  # (n_samples,)
                method=self.weight_solver,
                sample_weight=hessian[:, k],
            )
            new_layer.append(class_trees)

        cond = float(np.mean(cond_values))
        return new_layer, layer_weights, cond

    # ------------------------------------------------------------------
    # Inference
    # ------------------------------------------------------------------

    def predict_raw(self, X: np.ndarray) -> np.ndarray:
        """
        Raw ensemble output before softmax.

        Parameters
        ----------
        X : (n_samples, n_features)

        Returns
        -------
        np.ndarray of shape (n_samples, K)
        """
        n_samples = X.shape[0]
        K = len(self.prior_)
        accum = np.tile(self.prior_, (n_samples, 1)).copy()  # (n_samples, K)

        for layer_idx, layer in enumerate(self.graph_):
            # layer is list[list[TreeUpdater]] of shape (K, n_trees)
            for k, class_trees in enumerate(layer):
                for t, tree in enumerate(class_trees):
                    # tree.predict returns (n_samples, 1) for single-output; flatten
                    accum[:, k] += (
                        self.weights_[layer_idx][k, t] * tree.predict(X)[:, 0]
                    )

        return accum  # (n_samples, K)

    # ------------------------------------------------------------------
    # Utilities
    # ------------------------------------------------------------------

    def _check_is_fitted(self) -> None:
        if not self.graph_:
            raise RuntimeError(
                "This DGBFMultiOutputModel instance is not fitted yet. "
                "Call 'fit' first.",
            )

    def get_params(self) -> dict:
        """Return a dictionary of all constructor parameters and their values."""
        return {
            "n_trees": self.n_trees,
            "n_layers": self.n_layers,
            "max_depth": self.max_depth,
            "max_features": self.max_features,
            "min_weight_fraction_leaf": self.min_weight_fraction_leaf,
            "learning_rate": self.learning_rate,
            "subsample_min_frac": self.subsample_min_frac,
            "weight_solver": self.weight_solver,
            "hessian_reg": self.hessian_reg,
            "random_state": self.random_state,
        }

    def __repr__(self) -> str:
        p = self.get_params()
        parts = ", ".join(f"{k}={v!r}" for k, v in p.items())
        return f"DGBFMultiOutputModel({parts})"

fit(X, y, callbacks=None, evals=None)

Fit the multi-output DGBF model.

Parameters:

Name Type Description Default
X np.ndarray of shape (n_samples, n_features)

Training feature matrix.

required
y np.ndarray of shape (n_samples, K)

One-hot encoded targets.

required
callbacks list of TrainingCallback
None
evals list of (X_val, y_val_onehot, name) tuples
None

Returns:

Type Description
self
Source code in src/deepgboost/dgbf/dgbf_multioutput.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
def fit(
    self,
    X: np.ndarray,
    y: np.ndarray,
    callbacks: Sequence["TrainingCallback"] | None = None,
    evals: list[tuple[np.ndarray, np.ndarray, str]] | None = None,
) -> "DGBFMultiOutputModel":
    """
    Fit the multi-output DGBF model.

    Parameters
    ----------
    X : np.ndarray of shape (n_samples, n_features)
        Training feature matrix.
    y : np.ndarray of shape (n_samples, K)
        One-hot encoded targets.
    callbacks : list of TrainingCallback, optional
    evals : list of (X_val, y_val_onehot, name) tuples, optional

    Returns
    -------
    self
    """
    if y.ndim != 2:
        raise ValueError(
            "DGBFMultiOutputModel requires a 2-D one-hot target y of shape "
            "(n_samples, K).",
        )

    obj = SoftmaxObjective()
    rng = np.random.default_rng(self.random_state)
    n_samples, n_features = X.shape
    K = y.shape[1]

    # Initialise state
    self.graph_ = []
    self.weights_ = []
    self.prior_ = obj.prior(y)  # (K,)
    self.n_features_in_ = n_features
    self.evals_result_ = {}
    self._layer_cond_numbers_: list[float] = []
    feature_importance_accum = np.zeros(n_features)

    if evals:
        for _, _, name in evals:
            self.evals_result_[name] = {"logloss": []}

    callbacks = callbacks or []
    for cb in callbacks:
        cb.before_training(self)

    for layer_idx in range(self.n_layers):
        F_prev = self.predict_raw(X)  # (n_samples, K)

        g = obj.gradient(y, F_prev)  # (n_samples, K)
        h = obj.hessian(y, F_prev)  # (n_samples, K)

        pseudo_y = (
            g / np.maximum(h + self.hessian_reg, 1e-7)
        ) * self.learning_rate  # (n_samples, K)

        stop = False
        evals_log: dict = {}
        for cb in callbacks:
            if cb.before_iteration(self, layer_idx, evals_log):
                stop = True
        if stop:
            break

        new_layer, new_weights, layer_cond = self._fit_layer(
            X,
            pseudo_y,
            layer_idx,
            rng,
            h,
            n_samples,
            K,
        )
        self.graph_.append(new_layer)
        self.weights_.append(new_weights)
        self._layer_cond_numbers_.append(layer_cond)

        # new_layer is list[list[TreeUpdater]] of shape (K, n_trees)
        for class_trees in new_layer:
            for tree in class_trees:
                feature_importance_accum += tree.feature_importances_

        if evals:
            for X_val, y_val, name in evals:
                F_val = self.predict_raw(X_val)  # (n_val, K)
                p_val = obj.transform(F_val)  # softmax → (n_val, K)
                p_val = np.clip(p_val, 1e-7, 1.0 - 1e-7)
                logloss = float(
                    -np.mean(np.sum(y_val * np.log(p_val), axis=1)),
                )
                self.evals_result_.setdefault(name, {}).setdefault(
                    "logloss",
                    [],
                ).append(logloss)
                evals_log[name] = {"logloss": logloss}

        stop = False
        for cb in callbacks:
            if cb.after_iteration(self, layer_idx, evals_log):
                stop = True
        if stop:
            break

    total = feature_importance_accum.sum()
    self.feature_importances_ = (
        feature_importance_accum / total if total > 0 else feature_importance_accum
    )

    for cb in callbacks:
        cb.after_training(self)

    return self

get_params()

Return a dictionary of all constructor parameters and their values.

Source code in src/deepgboost/dgbf/dgbf_multioutput.py
346
347
348
349
350
351
352
353
354
355
356
357
358
359
def get_params(self) -> dict:
    """Return a dictionary of all constructor parameters and their values."""
    return {
        "n_trees": self.n_trees,
        "n_layers": self.n_layers,
        "max_depth": self.max_depth,
        "max_features": self.max_features,
        "min_weight_fraction_leaf": self.min_weight_fraction_leaf,
        "learning_rate": self.learning_rate,
        "subsample_min_frac": self.subsample_min_frac,
        "weight_solver": self.weight_solver,
        "hessian_reg": self.hessian_reg,
        "random_state": self.random_state,
    }

predict_raw(X)

Raw ensemble output before softmax.

Parameters:

Name Type Description Default
X (n_samples, n_features)
required

Returns:

Type Description
np.ndarray of shape (n_samples, K)
Source code in src/deepgboost/dgbf/dgbf_multioutput.py
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
def predict_raw(self, X: np.ndarray) -> np.ndarray:
    """
    Raw ensemble output before softmax.

    Parameters
    ----------
    X : (n_samples, n_features)

    Returns
    -------
    np.ndarray of shape (n_samples, K)
    """
    n_samples = X.shape[0]
    K = len(self.prior_)
    accum = np.tile(self.prior_, (n_samples, 1)).copy()  # (n_samples, K)

    for layer_idx, layer in enumerate(self.graph_):
        # layer is list[list[TreeUpdater]] of shape (K, n_trees)
        for k, class_trees in enumerate(layer):
            for t, tree in enumerate(class_trees):
                # tree.predict returns (n_samples, 1) for single-output; flatten
                accum[:, k] += (
                    self.weights_[layer_idx][k, t] * tree.predict(X)[:, 0]
                )

    return accum  # (n_samples, K)