Skip to content

Commit

Permalink
[R] rename proxy dmatrix -> data batch (#10016)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-cortes authored Jan 31, 2024
1 parent 1e72dc1 commit 0955213
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 42 deletions.
2 changes: 1 addition & 1 deletion R-package/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ export(setinfo)
export(xgb.DMatrix)
export(xgb.DMatrix.hasinfo)
export(xgb.DMatrix.save)
export(xgb.DataBatch)
export(xgb.DataIter)
export(xgb.ExternalDMatrix)
export(xgb.ProxyDMatrix)
export(xgb.QuantileDMatrix)
export(xgb.QuantileDMatrix.from_iterator)
export(xgb.attr)
Expand Down
43 changes: 22 additions & 21 deletions R-package/R/xgb.DMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ xgb.QuantileDMatrix <- function(
.Call(XGDMatrixFree_R, proxy_handle)
})
iterator_next <- function() {
return(xgb.ProxyDMatrix.internal(proxy_handle, data_iterator))
return(xgb.ProxyDMatrix(proxy_handle, data_iterator))
}
iterator_reset <- function() {
return(data_iterator$f_reset(iterator_env))
Expand Down Expand Up @@ -391,12 +391,12 @@ xgb.QuantileDMatrix <- function(
#' to know which part of the data to pass next.
#' @param f_next `function(env)` which is responsible for:\itemize{
#' \item Accessing or retrieving the next batch of data in the iterator.
#' \item Supplying this data by calling function \link{xgb.ProxyDMatrix} on it and returning the result.
#' \item Supplying this data by calling function \link{xgb.DataBatch} on it and returning the result.
#' \item Keeping track of where in the iterator batch it is or will go next, which can for example
#' be done by modifiying variables in the `env` variable that is passed here.
#' \item Signaling whether there are more batches to be consumed or not, by returning `NULL`
#' when the stream of data ends (all batches in the iterator have been consumed), or the result from
#' calling \link{xgb.ProxyDMatrix} when there are more batches in the line to be consumed.
#' calling \link{xgb.DataBatch} when there are more batches in the line to be consumed.
#' }
#' @param f_reset `function(env)` which is responsible for reseting the data iterator
#' (i.e. taking it back to the first batch, called before and after the sequence of batches
Expand All @@ -406,7 +406,7 @@ xgb.QuantileDMatrix <- function(
#' (and in the same order) must be passed in subsequent iterations.
#' @return An `xgb.DataIter` object, containing the same inputs supplied here, which can then
#' be passed to \link{xgb.ExternalDMatrix}.
#' @seealso \link{xgb.ExternalDMatrix}, \link{xgb.ProxyDMatrix}.
#' @seealso \link{xgb.ExternalDMatrix}, \link{xgb.DataBatch}.
#' @export
xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
if (!is.function(f_next)) {
Expand Down Expand Up @@ -434,7 +434,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
env[["iter"]] <- curr_iter + 1L
})
return(
xgb.ProxyDMatrix(
xgb.DataBatch(
data = env[["data"]],
label = env[["label"]],
weight = env[["weight"]],
Expand Down Expand Up @@ -464,13 +464,13 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
.make.proxy.handle <- function() {
out <- .Call(XGProxyDMatrixCreate_R)
attributes(out) <- list(
class = c("xgb.DMatrix", "xgb.ProxyDMatrixHandle"),
class = c("xgb.DMatrix", "xgb.ProxyDMatrix"),
fields = new.env()
)
return(out)
}

#' @title Proxy DMatrix Updater
#' @title Structure for Data Batches
#' @description Helper function to supply data in batches of a data iterator when
#' constructing a DMatrix from external memory through \link{xgb.ExternalDMatrix}
#' or through \link{xgb.QuantileDMatrix.from_iterator}.
Expand All @@ -480,8 +480,8 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
#' when constructing a DMatrix through external memory - otherwise, one should call
#' \link{xgb.DMatrix} or \link{xgb.QuantileDMatrix}.
#'
#' The object that results from calling this function directly is \bold{not} like the other
#' `xgb.DMatrix` variants - i.e. cannot be used to train a model, nor to get predictions - only
#' The object that results from calling this function directly is \bold{not} like
#' an `xgb.DMatrix` - i.e. cannot be used to train a model, nor to get predictions - only
#' possible usage is to supply data to an iterator, from which a DMatrix is then constructed.
#'
#' For more information and for example usage, see the documentation for \link{xgb.ExternalDMatrix}.
Expand All @@ -499,11 +499,11 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
#' \link{xgb.DMatrix} for details on it.
#' \item CSR matrices, as class `dgRMatrix` from package `Matrix`.
#' }
#' @return An object of class `xgb.ProxyDMatrix`, which is just a list containing the
#' @return An object of class `xgb.DataBatch`, which is just a list containing the
#' data and parameters passed here. It does \bold{not} inherit from `xgb.DMatrix`.
#' @seealso \link{xgb.DataIter}, \link{xgb.ExternalDMatrix}.
#' @export
xgb.ProxyDMatrix <- function(
xgb.DataBatch <- function(
data,
label = NULL,
weight = NULL,
Expand All @@ -530,17 +530,18 @@ xgb.ProxyDMatrix <- function(
label_upper_bound = label_upper_bound,
feature_weights = feature_weights
)
class(out) <- "xgb.ProxyDMatrix"
class(out) <- "xgb.DataBatch"
return(out)
}

xgb.ProxyDMatrix.internal <- function(proxy_handle, data_iterator) {
# This is only for internal usage, class is not exposed to the user.
xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) {
lst <- data_iterator$f_next(data_iterator$env)
if (is.null(lst)) {
return(0L)
}
if (!inherits(lst, "xgb.ProxyDMatrix")) {
stop("DataIter 'f_next' must return either NULL or the result from calling 'xgb.ProxyDMatrix'.")
if (!inherits(lst, "xgb.DataBatch")) {
stop("DataIter 'f_next' must return either NULL or the result from calling 'xgb.DataBatch'.")
}

if (!is.null(lst$group) && !is.null(lst$qid)) {
Expand Down Expand Up @@ -606,7 +607,7 @@ xgb.ProxyDMatrix.internal <- function(proxy_handle, data_iterator) {
#' This should not pose any problem for `numeric` types, since they do have an inheret NaN value.
#' @return An 'xgb.DMatrix' object, with subclass 'xgb.ExternalDMatrix', in which the data is not
#' held internally but accessed through the iterator when needed.
#' @seealso \link{xgb.DataIter}, \link{xgb.ProxyDMatrix}, \link{xgb.QuantileDMatrix.from_iterator}
#' @seealso \link{xgb.DataIter}, \link{xgb.DataBatch}, \link{xgb.QuantileDMatrix.from_iterator}
#' @examples
#' library(xgboost)
#' data(mtcars)
Expand Down Expand Up @@ -646,10 +647,10 @@ xgb.ProxyDMatrix.internal <- function(proxy_handle, data_iterator) {
#' iterator_env[["iter"]] <- curr_iter + 1
#' })
#'
#' # Function 'xgb.ProxyDMatrix' must be called manually
#' # Function 'xgb.DataBatch' must be called manually
#' # at each batch with all the appropriate attributes,
#' # such as feature names and feature types.
#' return(xgb.ProxyDMatrix(data = x_batch, label = y_batch))
#' return(xgb.DataBatch(data = x_batch, label = y_batch))
#' }
#'
#' # This moves the iterator back to its beginning
Expand Down Expand Up @@ -693,7 +694,7 @@ xgb.ExternalDMatrix <- function(
.Call(XGDMatrixFree_R, proxy_handle)
})
iterator_next <- function() {
return(xgb.ProxyDMatrix.internal(proxy_handle, data_iterator))
return(xgb.ProxyDMatrix(proxy_handle, data_iterator))
}
iterator_reset <- function() {
return(data_iterator$f_reset(data_iterator$env))
Expand Down Expand Up @@ -736,7 +737,7 @@ xgb.ExternalDMatrix <- function(
#' @inheritParams xgb.ExternalDMatrix
#' @inheritParams xgb.QuantileDMatrix
#' @return An 'xgb.DMatrix' object, with subclass 'xgb.QuantileDMatrix'.
#' @seealso \link{xgb.DataIter}, \link{xgb.ProxyDMatrix}, \link{xgb.ExternalDMatrix},
#' @seealso \link{xgb.DataIter}, \link{xgb.DataBatch}, \link{xgb.ExternalDMatrix},
#' \link{xgb.QuantileDMatrix}
#' @export
xgb.QuantileDMatrix.from_iterator <- function( # nolint
Expand All @@ -758,7 +759,7 @@ xgb.QuantileDMatrix.from_iterator <- function( # nolint
.Call(XGDMatrixFree_R, proxy_handle)
})
iterator_next <- function() {
return(xgb.ProxyDMatrix.internal(proxy_handle, data_iterator))
return(xgb.ProxyDMatrix(proxy_handle, data_iterator))
}
iterator_reset <- function() {
return(data_iterator$f_reset(data_iterator$env))
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions R-package/man/xgb.DataIter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions R-package/man/xgb.ExternalDMatrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion R-package/man/xgb.QuantileDMatrix.from_iterator.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions R-package/tests/testthat/test_dmatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ test_that("xgb.DMatrix: ExternalDMatrix produces the same results as regular DMa
y = mtcars[, 1]
)
)
iterator_next <- function(iterator_env, proxy_handle) {
iterator_next <- function(iterator_env) {
curr_iter <- iterator_env[["iter"]]
if (curr_iter >= 2) {
return(NULL)
Expand All @@ -487,7 +487,7 @@ test_that("xgb.DMatrix: ExternalDMatrix produces the same results as regular DMa
on.exit({
iterator_env[["iter"]] <- curr_iter + 1
})
return(xgb.ProxyDMatrix(data = x_batch, label = y_batch))
return(xgb.DataBatch(data = x_batch, label = y_batch))
}
iterator_reset <- function(iterator_env) {
iterator_env[["iter"]] <- 0
Expand Down Expand Up @@ -546,7 +546,7 @@ test_that("xgb.DMatrix: External QDM produces same results as regular QDM", {
y = mtcars[, 1]
)
)
iterator_next <- function(iterator_env, proxy_handle) {
iterator_next <- function(iterator_env) {
curr_iter <- iterator_env[["iter"]]
if (curr_iter >= 2) {
return(NULL)
Expand All @@ -561,7 +561,7 @@ test_that("xgb.DMatrix: External QDM produces same results as regular QDM", {
on.exit({
iterator_env[["iter"]] <- curr_iter + 1
})
return(xgb.ProxyDMatrix(data = x_batch, label = y_batch))
return(xgb.DataBatch(data = x_batch, label = y_batch))
}
iterator_reset <- function(iterator_env) {
iterator_env[["iter"]] <- 0
Expand Down Expand Up @@ -604,7 +604,7 @@ test_that("xgb.DMatrix: R errors thrown on DataIterator are thrown back to the u
y = mtcars[, 1]
)
)
iterator_next <- function(iterator_env, proxy_handle) {
iterator_next <- function(iterator_env) {
curr_iter <- iterator_env[["iter"]]
if (curr_iter >= 2) {
return(0)
Expand All @@ -618,7 +618,7 @@ test_that("xgb.DMatrix: R errors thrown on DataIterator are thrown back to the u
on.exit({
iterator_env[["iter"]] <- curr_iter + 1
})
return(xgb.ProxyDMatrix(data = x_batch, label = y_batch))
return(xgb.DataBatch(data = x_batch, label = y_batch))
}
iterator_reset <- function(iterator_env) {
iterator_env[["iter"]] <- 0
Expand Down

0 comments on commit 0955213

Please sign in to comment.