From 32cbab1cc00e5640fd79fd8557c098128d7efbec Mon Sep 17 00:00:00 2001 From: david-cortes Date: Tue, 2 Jan 2024 08:20:51 +0100 Subject: [PATCH 1/4] [R] put 'verbose' in correct argument (#9942) --- R-package/R/xgb.train.R | 16 ++++++++-------- R-package/man/xgb.train.Rd | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index d93a0643d1b3..e20c1af3e9fd 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -251,9 +251,9 @@ #' watchlist <- list(train = dtrain, eval = dtest) #' #' ## A simple xgb.train example: -#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread, +#' param <- list(max_depth = 2, eta = 1, nthread = nthread, #' objective = "binary:logistic", eval_metric = "auc") -#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist) +#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0) #' #' ## An xgb.train example where custom objective and evaluation metric are #' ## used: @@ -272,13 +272,13 @@ #' #' # These functions could be used by passing them either: #' # as 'objective' and 'eval_metric' parameters in the params list: -#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread, +#' param <- list(max_depth = 2, eta = 1, nthread = nthread, #' objective = logregobj, eval_metric = evalerror) -#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist) +#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0) #' #' # or through the ... arguments: -#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread) -#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, +#' param <- list(max_depth = 2, eta = 1, nthread = nthread) +#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, #' objective = logregobj, eval_metric = evalerror) #' #' # or as dedicated 'obj' and 'feval' parameters of xgb.train: @@ -287,10 +287,10 @@ #' #' #' ## An xgb.train example of using variable learning rates at each iteration: -#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread, +#' param <- list(max_depth = 2, eta = 1, nthread = nthread, #' objective = "binary:logistic", eval_metric = "auc") #' my_etas <- list(eta = c(0.5, 0.1)) -#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, +#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, #' callbacks = list(cb.reset.parameters(my_etas))) #' #' ## Early stopping: diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index 0ef2e2216d66..b2eaff27c4c1 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -303,9 +303,9 @@ dtest <- with( watchlist <- list(train = dtrain, eval = dtest) ## A simple xgb.train example: -param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread, +param <- list(max_depth = 2, eta = 1, nthread = nthread, objective = "binary:logistic", eval_metric = "auc") -bst <- xgb.train(param, dtrain, nrounds = 2, watchlist) +bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0) ## An xgb.train example where custom objective and evaluation metric are ## used: @@ -324,13 +324,13 @@ evalerror <- function(preds, dtrain) { # These functions could be used by passing them either: # as 'objective' and 'eval_metric' parameters in the params list: -param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread, +param <- list(max_depth = 2, eta = 1, nthread = nthread, objective = logregobj, eval_metric = evalerror) -bst <- xgb.train(param, dtrain, nrounds = 2, watchlist) +bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0) # or through the ... arguments: -param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread) -bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, +param <- list(max_depth = 2, eta = 1, nthread = nthread) +bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, objective = logregobj, eval_metric = evalerror) # or as dedicated 'obj' and 'feval' parameters of xgb.train: @@ -339,10 +339,10 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, ## An xgb.train example of using variable learning rates at each iteration: -param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread, +param <- list(max_depth = 2, eta = 1, nthread = nthread, objective = "binary:logistic", eval_metric = "auc") my_etas <- list(eta = c(0.5, 0.1)) -bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, +bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, callbacks = list(cb.reset.parameters(my_etas))) ## Early stopping: From 9e33a102021aa2fa2283d5a1e6447f24c3ce9633 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Tue, 2 Jan 2024 14:20:01 +0100 Subject: [PATCH 2/4] [R] Replace `xgboost()` with `xgb.train()` in most tests and examples (#9941) --- R-package/R/utils.R | 5 +- R-package/R/xgb.Booster.R | 15 ++-- R-package/R/xgb.load.R | 6 +- R-package/R/xgb.save.R | 6 +- R-package/R/xgb.save.raw.R | 4 +- R-package/R/xgb.serialize.R | 4 +- R-package/demo/create_sparse_matrix.R | 4 +- R-package/demo/interaction_constraints.R | 18 ++-- R-package/demo/poisson_regression.R | 4 +- .../a-compatibility-note-for-saveRDS-save.Rd | 5 +- R-package/man/predict.xgb.Booster.Rd | 15 ++-- R-package/man/xgb.load.Rd | 6 +- R-package/man/xgb.save.Rd | 6 +- R-package/man/xgb.save.raw.Rd | 4 +- R-package/man/xgb.serialize.Rd | 4 +- R-package/tests/testthat/test_basic.R | 84 ++++++++++--------- R-package/tests/testthat/test_callbacks.R | 8 +- R-package/tests/testthat/test_gc_safety.R | 4 +- R-package/tests/testthat/test_helpers.R | 49 ++++++----- .../testthat/test_interaction_constraints.R | 6 +- R-package/tests/testthat/test_interactions.R | 10 +-- R-package/tests/testthat/test_io.R | 4 +- R-package/tests/testthat/test_monotone.R | 6 +- .../tests/testthat/test_parameter_exposure.R | 14 ++-- .../tests/testthat/test_poisson_regression.R | 4 +- R-package/tests/testthat/test_unicode.R | 6 +- R-package/vignettes/xgboostfromJSON.Rmd | 5 +- 27 files changed, 156 insertions(+), 150 deletions(-) diff --git a/R-package/R/utils.R b/R-package/R/utils.R index bf08c481d118..1798e4ad1aff 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -383,8 +383,9 @@ NULL #' #' @examples #' data(agaricus.train, package='xgboost') -#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, -#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") +#' bst <- xgb.train(data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), +#' max_depth = 2, eta = 1, nthread = 2, nrounds = 2, +#' objective = "binary:logistic") #' #' # Save as a stand-alone file; load it with xgb.load() #' xgb.save(bst, 'xgb.model') diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index 4e980641a17d..371f3d129142 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -272,9 +272,8 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) { #' train <- agaricus.train #' test <- agaricus.test #' -#' bst <- xgboost( -#' data = train$data, -#' label = train$label, +#' bst <- xgb.train( +#' data = xgb.DMatrix(train$data, label = train$label), #' max_depth = 2, #' eta = 0.5, #' nthread = nthread, @@ -316,9 +315,8 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) { #' #' set.seed(11) #' -#' bst <- xgboost( -#' data = as.matrix(iris[, -5]), -#' label = lb, +#' bst <- xgb.train( +#' data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), #' max_depth = 4, #' eta = 0.5, #' nthread = 2, @@ -341,9 +339,8 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) { #' # compare with predictions from softmax: #' set.seed(11) #' -#' bst <- xgboost( -#' data = as.matrix(iris[, -5]), -#' label = lb, +#' bst <- xgb.train( +#' data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), #' max_depth = 4, #' eta = 0.5, #' nthread = 2, diff --git a/R-package/R/xgb.load.R b/R-package/R/xgb.load.R index cbdbdacc35f2..e8f9e0023892 100644 --- a/R-package/R/xgb.load.R +++ b/R-package/R/xgb.load.R @@ -29,8 +29,10 @@ #' #' train <- agaricus.train #' test <- agaricus.test -#' bst <- xgboost( -#' data = train$data, label = train$label, max_depth = 2, eta = 1, +#' bst <- xgb.train( +#' data = xgb.DMatrix(train$data, label = train$label), +#' max_depth = 2, +#' eta = 1, #' nthread = nthread, #' nrounds = 2, #' objective = "binary:logistic" diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R index ab55bc4a9699..32b7d96180d2 100644 --- a/R-package/R/xgb.save.R +++ b/R-package/R/xgb.save.R @@ -32,8 +32,10 @@ #' #' train <- agaricus.train #' test <- agaricus.test -#' bst <- xgboost( -#' data = train$data, label = train$label, max_depth = 2, eta = 1, +#' bst <- xgb.train( +#' data = xgb.DMatrix(train$data, label = train$label), +#' max_depth = 2, +#' eta = 1, #' nthread = nthread, #' nrounds = 2, #' objective = "binary:logistic" diff --git a/R-package/R/xgb.save.raw.R b/R-package/R/xgb.save.raw.R index cad0fb0e01c2..63c06e0715d5 100644 --- a/R-package/R/xgb.save.raw.R +++ b/R-package/R/xgb.save.raw.R @@ -23,8 +23,8 @@ #' #' train <- agaricus.train #' test <- agaricus.test -#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, -#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic") +#' bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, +#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic") #' #' raw <- xgb.save.raw(bst) #' bst <- xgb.load.raw(raw) diff --git a/R-package/R/xgb.serialize.R b/R-package/R/xgb.serialize.R index 00bbb429320c..c20d2b51c312 100644 --- a/R-package/R/xgb.serialize.R +++ b/R-package/R/xgb.serialize.R @@ -9,8 +9,8 @@ #' data(agaricus.test, package='xgboost') #' train <- agaricus.train #' test <- agaricus.test -#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, -#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") +#' bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, +#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") #' raw <- xgb.serialize(bst) #' bst <- xgb.unserialize(raw) #' diff --git a/R-package/demo/create_sparse_matrix.R b/R-package/demo/create_sparse_matrix.R index f8afb14ba04f..08a40608cdf8 100644 --- a/R-package/demo/create_sparse_matrix.R +++ b/R-package/demo/create_sparse_matrix.R @@ -81,8 +81,8 @@ output_vector <- df[, Y := 0][Improved == "Marked", Y := 1][, Y] # Following is the same process as other demo cat("Learning...\n") -bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 9, - eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic") +bst <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = output_vector), max_depth = 9, + eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic") importance <- xgb.importance(feature_names = colnames(sparse_matrix), model = bst) print(importance) diff --git a/R-package/demo/interaction_constraints.R b/R-package/demo/interaction_constraints.R index 9e694e3eb3db..72287513eeeb 100644 --- a/R-package/demo/interaction_constraints.R +++ b/R-package/demo/interaction_constraints.R @@ -74,26 +74,26 @@ cols2ids <- function(object, col_names) { interaction_list_fid <- cols2ids(interaction_list, colnames(train)) # Fit model with interaction constraints -bst <- xgboost(data = train, label = y, max_depth = 4, - eta = 0.1, nthread = 2, nrounds = 1000, - interaction_constraints = interaction_list_fid) +bst <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 4, + eta = 0.1, nthread = 2, nrounds = 1000, + interaction_constraints = interaction_list_fid) bst_tree <- xgb.model.dt.tree(colnames(train), bst) bst_interactions <- treeInteractions(bst_tree, 4) # interactions constrained to combinations of V1*V2 and V3*V4*V5 # Fit model without interaction constraints -bst2 <- xgboost(data = train, label = y, max_depth = 4, - eta = 0.1, nthread = 2, nrounds = 1000) +bst2 <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 4, + eta = 0.1, nthread = 2, nrounds = 1000) bst2_tree <- xgb.model.dt.tree(colnames(train), bst2) bst2_interactions <- treeInteractions(bst2_tree, 4) # much more interactions # Fit model with both interaction and monotonicity constraints -bst3 <- xgboost(data = train, label = y, max_depth = 4, - eta = 0.1, nthread = 2, nrounds = 1000, - interaction_constraints = interaction_list_fid, - monotone_constraints = c(-1, 0, 0, 0, 0, 0, 0, 0, 0, 0)) +bst3 <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 4, + eta = 0.1, nthread = 2, nrounds = 1000, + interaction_constraints = interaction_list_fid, + monotone_constraints = c(-1, 0, 0, 0, 0, 0, 0, 0, 0, 0)) bst3_tree <- xgb.model.dt.tree(colnames(train), bst3) bst3_interactions <- treeInteractions(bst3_tree, 4) diff --git a/R-package/demo/poisson_regression.R b/R-package/demo/poisson_regression.R index 121ac17f2173..685314b30e96 100644 --- a/R-package/demo/poisson_regression.R +++ b/R-package/demo/poisson_regression.R @@ -1,6 +1,6 @@ data(mtcars) head(mtcars) -bst <- xgboost(data = as.matrix(mtcars[, -11]), label = mtcars[, 11], - objective = 'count:poisson', nrounds = 5) +bst <- xgb.train(data = xgb.DMatrix(as.matrix(mtcars[, -11]), label = mtcars[, 11]), + objective = 'count:poisson', nrounds = 5) pred <- predict(bst, as.matrix(mtcars[, -11])) sqrt(mean((pred - mtcars[, 11]) ^ 2)) diff --git a/R-package/man/a-compatibility-note-for-saveRDS-save.Rd b/R-package/man/a-compatibility-note-for-saveRDS-save.Rd index 85b52243c1b9..023cff9fdc90 100644 --- a/R-package/man/a-compatibility-note-for-saveRDS-save.Rd +++ b/R-package/man/a-compatibility-note-for-saveRDS-save.Rd @@ -33,8 +33,9 @@ For more details and explanation about model persistence and archival, consult t } \examples{ data(agaricus.train, package='xgboost') -bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, - eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") +bst <- xgb.train(data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), + max_depth = 2, eta = 1, nthread = 2, nrounds = 2, + objective = "binary:logistic") # Save as a stand-alone file; load it with xgb.load() xgb.save(bst, 'xgb.model') diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd index 135177dda99b..f47cab321021 100644 --- a/R-package/man/predict.xgb.Booster.Rd +++ b/R-package/man/predict.xgb.Booster.Rd @@ -136,9 +136,8 @@ data.table::setDTthreads(nthread) train <- agaricus.train test <- agaricus.test -bst <- xgboost( - data = train$data, - label = train$label, +bst <- xgb.train( + data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, eta = 0.5, nthread = nthread, @@ -180,9 +179,8 @@ num_class <- 3 set.seed(11) -bst <- xgboost( - data = as.matrix(iris[, -5]), - label = lb, +bst <- xgb.train( + data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), max_depth = 4, eta = 0.5, nthread = 2, @@ -205,9 +203,8 @@ sum(pred_labels != lb) / length(lb) # compare with predictions from softmax: set.seed(11) -bst <- xgboost( - data = as.matrix(iris[, -5]), - label = lb, +bst <- xgb.train( + data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), max_depth = 4, eta = 0.5, nthread = 2, diff --git a/R-package/man/xgb.load.Rd b/R-package/man/xgb.load.Rd index 1a406cc21d0e..63f551d7a914 100644 --- a/R-package/man/xgb.load.Rd +++ b/R-package/man/xgb.load.Rd @@ -34,8 +34,10 @@ data.table::setDTthreads(nthread) train <- agaricus.train test <- agaricus.test -bst <- xgboost( - data = train$data, label = train$label, max_depth = 2, eta = 1, +bst <- xgb.train( + data = xgb.DMatrix(train$data, label = train$label), + max_depth = 2, + eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic" diff --git a/R-package/man/xgb.save.Rd b/R-package/man/xgb.save.Rd index a7e160a12a9b..22c6c8fa3652 100644 --- a/R-package/man/xgb.save.Rd +++ b/R-package/man/xgb.save.Rd @@ -38,8 +38,10 @@ data.table::setDTthreads(nthread) train <- agaricus.train test <- agaricus.test -bst <- xgboost( - data = train$data, label = train$label, max_depth = 2, eta = 1, +bst <- xgb.train( + data = xgb.DMatrix(train$data, label = train$label), + max_depth = 2, + eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic" diff --git a/R-package/man/xgb.save.raw.Rd b/R-package/man/xgb.save.raw.Rd index 0835519336a0..498272148022 100644 --- a/R-package/man/xgb.save.raw.Rd +++ b/R-package/man/xgb.save.raw.Rd @@ -32,8 +32,8 @@ data.table::setDTthreads(nthread) train <- agaricus.train test <- agaricus.test -bst <- xgboost(data = train$data, label = train$label, max_depth = 2, - eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic") +bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, + eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic") raw <- xgb.save.raw(bst) bst <- xgb.load.raw(raw) diff --git a/R-package/man/xgb.serialize.Rd b/R-package/man/xgb.serialize.Rd index 952441d98bea..5bf4205f82b5 100644 --- a/R-package/man/xgb.serialize.Rd +++ b/R-package/man/xgb.serialize.Rd @@ -21,8 +21,8 @@ data(agaricus.train, package='xgboost') data(agaricus.test, package='xgboost') train <- agaricus.train test <- agaricus.test -bst <- xgboost(data = train$data, label = train$label, max_depth = 2, - eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") +bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, + eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") raw <- xgb.serialize(bst) bst <- xgb.unserialize(raw) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 8ecf86e87178..d4b3a6be36af 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -16,10 +16,11 @@ n_threads <- 1 test_that("train and predict binary classification", { nrounds <- 2 expect_output( - bst <- xgboost( - data = train$data, label = train$label, max_depth = 2, + bst <- xgb.train( + data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, eta = 1, nthread = n_threads, nrounds = nrounds, - objective = "binary:logistic", eval_metric = "error" + objective = "binary:logistic", eval_metric = "error", + watchlist = list(train = xgb.DMatrix(train$data, label = train$label)) ), "train-error" ) @@ -104,9 +105,8 @@ test_that("dart prediction works", { rnorm(100) set.seed(1994) - booster_by_xgboost <- xgboost( - data = d, - label = y, + booster_by_xgboost <- xgb.train( + data = xgb.DMatrix(d, label = y), max_depth = 2, booster = "dart", rate_drop = 0.5, @@ -151,10 +151,11 @@ test_that("train and predict softprob", { lb <- as.numeric(iris$Species) - 1 set.seed(11) expect_output( - bst <- xgboost( - data = as.matrix(iris[, -5]), label = lb, + bst <- xgb.train( + data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5, - objective = "multi:softprob", num_class = 3, eval_metric = "merror" + objective = "multi:softprob", num_class = 3, eval_metric = "merror", + watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb)) ), "train-merror" ) @@ -201,10 +202,11 @@ test_that("train and predict softmax", { lb <- as.numeric(iris$Species) - 1 set.seed(11) expect_output( - bst <- xgboost( - data = as.matrix(iris[, -5]), label = lb, + bst <- xgb.train( + data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5, - objective = "multi:softmax", num_class = 3, eval_metric = "merror" + objective = "multi:softmax", num_class = 3, eval_metric = "merror", + watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb)) ), "train-merror" ) @@ -222,11 +224,12 @@ test_that("train and predict RF", { set.seed(11) lb <- train$label # single iteration - bst <- xgboost( - data = train$data, label = lb, max_depth = 5, + bst <- xgb.train( + data = xgb.DMatrix(train$data, label = lb), max_depth = 5, nthread = n_threads, nrounds = 1, objective = "binary:logistic", eval_metric = "error", - num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1 + num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1, + watchlist = list(train = xgb.DMatrix(train$data, label = lb)) ) expect_equal(bst$niter, 1) expect_equal(xgb.ntree(bst), 20) @@ -248,12 +251,13 @@ test_that("train and predict RF with softprob", { lb <- as.numeric(iris$Species) - 1 nrounds <- 15 set.seed(11) - bst <- xgboost( - data = as.matrix(iris[, -5]), label = lb, + bst <- xgb.train( + data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), max_depth = 3, eta = 0.9, nthread = n_threads, nrounds = nrounds, objective = "multi:softprob", eval_metric = "merror", num_class = 3, verbose = 0, - num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5 + num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5, + watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb)) ) expect_equal(bst$niter, 15) expect_equal(xgb.ntree(bst), 15 * 3 * 4) @@ -271,10 +275,11 @@ test_that("train and predict RF with softprob", { test_that("use of multiple eval metrics works", { expect_output( - bst <- xgboost( - data = train$data, label = train$label, max_depth = 2, + bst <- xgb.train( + data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic", - eval_metric = "error", eval_metric = "auc", eval_metric = "logloss" + eval_metric = "error", eval_metric = "auc", eval_metric = "logloss", + watchlist = list(train = xgb.DMatrix(train$data, label = train$label)) ), "train-error.*train-auc.*train-logloss" ) @@ -282,10 +287,11 @@ test_that("use of multiple eval metrics works", { expect_equal(dim(bst$evaluation_log), c(2, 4)) expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss")) expect_output( - bst2 <- xgboost( - data = train$data, label = train$label, max_depth = 2, + bst2 <- xgb.train( + data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic", - eval_metric = list("error", "auc", "logloss") + eval_metric = list("error", "auc", "logloss"), + watchlist = list(train = xgb.DMatrix(train$data, label = train$label)) ), "train-error.*train-auc.*train-logloss" ) @@ -361,7 +367,7 @@ test_that("xgb.cv works", { expect_is(cv, "xgb.cv.synchronous") expect_false(is.null(cv$evaluation_log)) expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03) - expect_lt(cv$evaluation_log[, min(test_error_std)], 0.008) + expect_lt(cv$evaluation_log[, min(test_error_std)], 0.0085) expect_equal(cv$niter, 2) expect_false(is.null(cv$folds) && is.list(cv$folds)) expect_length(cv$folds, 5) @@ -391,8 +397,8 @@ test_that("xgb.cv works with stratified folds", { test_that("train and predict with non-strict classes", { # standard dense matrix input train_dense <- as.matrix(train$data) - bst <- xgboost( - data = train_dense, label = train$label, max_depth = 2, + bst <- xgb.train( + data = xgb.DMatrix(train_dense, label = train$label), max_depth = 2, eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic", verbose = 0 ) @@ -402,8 +408,8 @@ test_that("train and predict with non-strict classes", { class(train_dense) <- "shmatrix" expect_true(is.matrix(train_dense)) expect_error( - bst <- xgboost( - data = train_dense, label = train$label, max_depth = 2, + bst <- xgb.train( + data = xgb.DMatrix(train_dense, label = train$label), max_depth = 2, eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic", verbose = 0 ), @@ -416,8 +422,8 @@ test_that("train and predict with non-strict classes", { class(train_dense) <- c("pphmatrix", "shmatrix") expect_true(is.matrix(train_dense)) expect_error( - bst <- xgboost( - data = train_dense, label = train$label, max_depth = 2, + bst <- xgb.train( + data = xgb.DMatrix(train_dense, label = train$label), max_depth = 2, eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic", verbose = 0 ), @@ -480,8 +486,8 @@ test_that("colsample_bytree works", { }) test_that("Configuration works", { - bst <- xgboost( - data = train$data, label = train$label, max_depth = 2, + bst <- xgb.train( + data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic", eval_metric = "error", eval_metric = "auc", eval_metric = "logloss" ) @@ -521,8 +527,8 @@ test_that("strict_shape works", { y <- as.numeric(iris$Species) - 1 X <- as.matrix(iris[, -5]) - bst <- xgboost( - data = X, label = y, + bst <- xgb.train( + data = xgb.DMatrix(X, label = y), max_depth = 2, nrounds = n_rounds, nthread = n_threads, objective = "multi:softprob", num_class = 3, eval_metric = "merror" ) @@ -536,8 +542,8 @@ test_that("strict_shape works", { X <- agaricus.train$data y <- agaricus.train$label - bst <- xgboost( - data = X, label = y, max_depth = 2, nthread = n_threads, + bst <- xgb.train( + data = xgb.DMatrix(X, label = y), max_depth = 2, nthread = n_threads, nrounds = n_rounds, objective = "binary:logistic", eval_metric = "error", eval_metric = "auc", eval_metric = "logloss" ) @@ -555,8 +561,8 @@ test_that("'predict' accepts CSR data", { x_csc <- as(X[1L, , drop = FALSE], "CsparseMatrix") x_csr <- as(x_csc, "RsparseMatrix") x_spv <- as(x_csc, "sparseVector") - bst <- xgboost( - data = X, label = y, objective = "binary:logistic", + bst <- xgb.train( + data = xgb.DMatrix(X, label = y), objective = "binary:logistic", nrounds = 5L, verbose = FALSE, nthread = n_threads, ) p_csc <- predict(bst, x_csc) diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R index b5d3c5310f00..63a4c3f252eb 100644 --- a/R-package/tests/testthat/test_callbacks.R +++ b/R-package/tests/testthat/test_callbacks.R @@ -265,14 +265,14 @@ test_that("early stopping works with titanic", { dtx <- model.matrix(~ 0 + ., data = titanic[, c("Pclass", "Sex")]) dty <- titanic$Survived - xgboost::xgboost( - data = dtx, - label = dty, + xgboost::xgb.train( + data = xgb.DMatrix(dtx, label = dty), objective = "binary:logistic", eval_metric = "auc", nrounds = 100, early_stopping_rounds = 3, - nthread = n_threads + nthread = n_threads, + watchlist = list(train = xgb.DMatrix(dtx, label = dty)) ) expect_true(TRUE) # should not crash diff --git a/R-package/tests/testthat/test_gc_safety.R b/R-package/tests/testthat/test_gc_safety.R index f77af1eabd0e..44d8f81a4eda 100644 --- a/R-package/tests/testthat/test_gc_safety.R +++ b/R-package/tests/testthat/test_gc_safety.R @@ -6,8 +6,8 @@ test_that("train and prediction when gctorture is on", { train <- agaricus.train test <- agaricus.test gctorture(TRUE) - bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") + bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max.depth = 2, + eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") pred <- predict(bst, test$data) gctorture(FALSE) expect_length(pred, length(test$label)) diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index 7fae052b4f47..fd1fffbac640 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -25,15 +25,15 @@ if (isTRUE(VCD_AVAILABLE)) { label <- df[, ifelse(Improved == "Marked", 1, 0)] # binary - bst.Tree <- xgboost(data = sparse_matrix, label = label, max_depth = 9, - eta = 1, nthread = 2, nrounds = nrounds, verbose = 0, - objective = "binary:logistic", booster = "gbtree", - base_score = 0.5) + bst.Tree <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), max_depth = 9, + eta = 1, nthread = 2, nrounds = nrounds, verbose = 0, + objective = "binary:logistic", booster = "gbtree", + base_score = 0.5) - bst.GLM <- xgboost(data = sparse_matrix, label = label, - eta = 1, nthread = 1, nrounds = nrounds, verbose = 0, - objective = "binary:logistic", booster = "gblinear", - base_score = 0.5) + bst.GLM <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), + eta = 1, nthread = 1, nrounds = nrounds, verbose = 0, + objective = "binary:logistic", booster = "gblinear", + base_score = 0.5) feature.names <- colnames(sparse_matrix) } @@ -41,13 +41,13 @@ if (isTRUE(VCD_AVAILABLE)) { # multiclass mlabel <- as.numeric(iris$Species) - 1 nclass <- 3 -mbst.Tree <- xgboost(data = as.matrix(iris[, -5]), label = mlabel, verbose = 0, - max_depth = 3, eta = 0.5, nthread = 2, nrounds = nrounds, - objective = "multi:softprob", num_class = nclass, base_score = 0) +mbst.Tree <- xgb.train(data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel), verbose = 0, + max_depth = 3, eta = 0.5, nthread = 2, nrounds = nrounds, + objective = "multi:softprob", num_class = nclass, base_score = 0) -mbst.GLM <- xgboost(data = as.matrix(iris[, -5]), label = mlabel, verbose = 0, - booster = "gblinear", eta = 0.1, nthread = 1, nrounds = nrounds, - objective = "multi:softprob", num_class = nclass, base_score = 0) +mbst.GLM <- xgb.train(data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel), verbose = 0, + booster = "gblinear", eta = 0.1, nthread = 1, nrounds = nrounds, + objective = "multi:softprob", num_class = nclass, base_score = 0) test_that("xgb.dump works", { @@ -71,8 +71,9 @@ test_that("xgb.dump works for gblinear", { expect_length(xgb.dump(bst.GLM), 14) # also make sure that it works properly for a sparse model where some coefficients # are 0 from setting large L1 regularization: - bst.GLM.sp <- xgboost(data = sparse_matrix, label = label, eta = 1, nthread = 2, nrounds = 1, - alpha = 2, objective = "binary:logistic", booster = "gblinear") + bst.GLM.sp <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), eta = 1, + nthread = 2, nrounds = 1, + alpha = 2, objective = "binary:logistic", booster = "gblinear") d.sp <- xgb.dump(bst.GLM.sp) expect_length(d.sp, 14) expect_gt(sum(d.sp == "0"), 0) @@ -168,7 +169,7 @@ test_that("SHAPs sum to predictions, with or without DART", { nrounds <- 30 for (booster in list("gbtree", "dart")) { - fit <- xgboost( + fit <- xgb.train( params = c( list( nthread = 2, @@ -177,8 +178,7 @@ test_that("SHAPs sum to predictions, with or without DART", { eval_metric = "rmse"), if (booster == "dart") list(rate_drop = .01, one_drop = TRUE)), - data = d, - label = y, + data = xgb.DMatrix(d, label = y), nrounds = nrounds) pr <- function(...) { @@ -360,9 +360,8 @@ test_that("xgb.importance works with and without feature names", { expect_equal(importance_from_dump(), importance, tolerance = 1e-6) ## decision stump - m <- xgboost::xgboost( - data = as.matrix(data.frame(x = c(0, 1))), - label = c(1, 2), + m <- xgboost::xgb.train( + data = xgb.DMatrix(as.matrix(data.frame(x = c(0, 1))), label = c(1, 2)), nrounds = 1, base_score = 0.5, nthread = 2 @@ -393,9 +392,9 @@ test_that("xgb.importance works with GLM model", { test_that("xgb.model.dt.tree and xgb.importance work with a single split model", { .skip_if_vcd_not_available() - bst1 <- xgboost(data = sparse_matrix, label = label, max_depth = 1, - eta = 1, nthread = 2, nrounds = 1, verbose = 0, - objective = "binary:logistic") + bst1 <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), max_depth = 1, + eta = 1, nthread = 2, nrounds = 1, verbose = 0, + objective = "binary:logistic") expect_error(dt <- xgb.model.dt.tree(model = bst1), regexp = NA) # no error expect_equal(nrow(dt), 3) expect_error(imp <- xgb.importance(model = bst1), regexp = NA) # no error diff --git a/R-package/tests/testthat/test_interaction_constraints.R b/R-package/tests/testthat/test_interaction_constraints.R index ee4c453b3de5..cfffb029ce84 100644 --- a/R-package/tests/testthat/test_interaction_constraints.R +++ b/R-package/tests/testthat/test_interaction_constraints.R @@ -13,9 +13,9 @@ train <- matrix(c(x1, x2, x3), ncol = 3) test_that("interaction constraints for regression", { # Fit a model that only allows interaction between x1 and x2 - bst <- xgboost(data = train, label = y, max_depth = 3, - eta = 0.1, nthread = 2, nrounds = 100, verbose = 0, - interaction_constraints = list(c(0, 1))) + bst <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 3, + eta = 0.1, nthread = 2, nrounds = 100, verbose = 0, + interaction_constraints = list(c(0, 1))) # Set all observations to have the same x3 values then increment # by the same amount diff --git a/R-package/tests/testthat/test_interactions.R b/R-package/tests/testthat/test_interactions.R index 398531e0ec60..645efc12a14c 100644 --- a/R-package/tests/testthat/test_interactions.R +++ b/R-package/tests/testthat/test_interactions.R @@ -98,15 +98,14 @@ test_that("SHAP contribution values are not NAN", { ivs <- c("x1", "x2") - fit <- xgboost( + fit <- xgb.train( verbose = 0, params = list( objective = "reg:squarederror", eval_metric = "rmse", nthread = n_threads ), - data = as.matrix(subset(d, fold == 2)[, ivs]), - label = subset(d, fold == 2)$y, + data = xgb.DMatrix(as.matrix(subset(d, fold == 2)[, ivs]), label = subset(d, fold == 2)$y), nrounds = 3 ) @@ -169,9 +168,8 @@ test_that("multiclass feature interactions work", { test_that("SHAP single sample works", { train <- agaricus.train test <- agaricus.test - booster <- xgboost( - data = train$data, - label = train$label, + booster <- xgb.train( + data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, nrounds = 4, objective = "binary:logistic", diff --git a/R-package/tests/testthat/test_io.R b/R-package/tests/testthat/test_io.R index 8cf5a9ae97c2..3c64ddc720bf 100644 --- a/R-package/tests/testthat/test_io.R +++ b/R-package/tests/testthat/test_io.R @@ -7,8 +7,8 @@ test <- agaricus.test test_that("load/save raw works", { nrounds <- 8 - booster <- xgboost( - data = train$data, label = train$label, + booster <- xgb.train( + data = xgb.DMatrix(train$data, label = train$label), nrounds = nrounds, objective = "binary:logistic", nthread = 2 ) diff --git a/R-package/tests/testthat/test_monotone.R b/R-package/tests/testthat/test_monotone.R index cb5827698878..671c02bd0658 100644 --- a/R-package/tests/testthat/test_monotone.R +++ b/R-package/tests/testthat/test_monotone.R @@ -7,9 +7,9 @@ train <- matrix(x, ncol = 1) test_that("monotone constraints for regression", { - bst <- xgboost(data = train, label = y, max_depth = 2, - eta = 0.1, nthread = 2, nrounds = 100, verbose = 0, - monotone_constraints = -1) + bst <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 2, + eta = 0.1, nthread = 2, nrounds = 100, verbose = 0, + monotone_constraints = -1) pred <- predict(bst, train) diff --git a/R-package/tests/testthat/test_parameter_exposure.R b/R-package/tests/testthat/test_parameter_exposure.R index ea71ca7b7e39..5b12fde01a37 100644 --- a/R-package/tests/testthat/test_parameter_exposure.R +++ b/R-package/tests/testthat/test_parameter_exposure.R @@ -10,13 +10,13 @@ dtest <- xgb.DMatrix( agaricus.test$data, label = agaricus.test$label, nthread = 2 ) -bst <- xgboost(data = dtrain, - max_depth = 2, - eta = 1, - nrounds = 10, - nthread = 1, - verbose = 0, - objective = "binary:logistic") +bst <- xgb.train(data = dtrain, + max_depth = 2, + eta = 1, + nrounds = 10, + nthread = 1, + verbose = 0, + objective = "binary:logistic") test_that("call is exposed to R", { expect_false(is.null(bst$call)) diff --git a/R-package/tests/testthat/test_poisson_regression.R b/R-package/tests/testthat/test_poisson_regression.R index 55918b57ad17..e251a13ad854 100644 --- a/R-package/tests/testthat/test_poisson_regression.R +++ b/R-package/tests/testthat/test_poisson_regression.R @@ -4,8 +4,8 @@ set.seed(1994) test_that("Poisson regression works", { data(mtcars) - bst <- xgboost( - data = as.matrix(mtcars[, -11]), label = mtcars[, 11], + bst <- xgb.train( + data = xgb.DMatrix(as.matrix(mtcars[, -11]), label = mtcars[, 11]), objective = 'count:poisson', nrounds = 10, verbose = 0, nthread = 2 ) expect_equal(class(bst), "xgb.Booster") diff --git a/R-package/tests/testthat/test_unicode.R b/R-package/tests/testthat/test_unicode.R index c8a225716f81..718d58109163 100644 --- a/R-package/tests/testthat/test_unicode.R +++ b/R-package/tests/testthat/test_unicode.R @@ -8,9 +8,9 @@ set.seed(1994) test_that("Can save and load models with Unicode paths", { nrounds <- 2 - bst <- xgboost(data = train$data, label = train$label, max_depth = 2, - eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic", - eval_metric = "error") + bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, + eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic", + eval_metric = "error") tmpdir <- tempdir() lapply(c("모델.json", "がうる・ぐら.json", "类继承.ubj"), function(x) { path <- file.path(tmpdir, x) diff --git a/R-package/vignettes/xgboostfromJSON.Rmd b/R-package/vignettes/xgboostfromJSON.Rmd index e7ccdf3a9d06..f5bc3ad9b7f0 100644 --- a/R-package/vignettes/xgboostfromJSON.Rmd +++ b/R-package/vignettes/xgboostfromJSON.Rmd @@ -52,9 +52,8 @@ labels <- c(1, 1, 1, data <- data.frame(dates = dates, labels = labels) -bst <- xgboost( - data = as.matrix(data$dates), - label = labels, +bst <- xgb.train( + data = xgb.DMatrix(as.matrix(data$dates), label = labels), nthread = 2, nrounds = 1, objective = "binary:logistic", From 49247458f9ede5e4073f5a38b4d6deafc20238c8 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Wed, 3 Jan 2024 08:26:55 +0100 Subject: [PATCH 3/4] [R] Minor improvements for evaluation printing (#9940) --- R-package/R/callbacks.R | 3 ++- R-package/R/xgb.cv.R | 5 +++-- R-package/tests/testthat/test_callbacks.R | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index 54f821a795cb..f8f3b5a30ceb 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -770,7 +770,8 @@ xgb.gblinear.history <- function(model, class_index = NULL) { if (!is.null(eval_err)) { if (length(eval_res) != length(eval_err)) stop('eval_res & eval_err lengths mismatch') - res <- paste0(sprintf("%s:%f+%f", enames, eval_res, eval_err), collapse = '\t') + # Note: UTF-8 code for plus/minus sign is U+00B1 + res <- paste0(sprintf("%s:%f\U00B1%f", enames, eval_res, eval_err), collapse = '\t') } else { res <- paste0(sprintf("%s:%f", enames, eval_res), collapse = '\t') } diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index 1c17d86f042f..b0d8c4ebeec7 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -244,8 +244,9 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing ) }) msg <- simplify2array(msg) - bst_evaluation <- rowMeans(msg) - bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2) # nolint + # Note: these variables might look unused here, but they are used in the callbacks + bst_evaluation <- rowMeans(msg) # nolint + bst_evaluation_err <- apply(msg, 1, sd) # nolint for (f in cb$post_iter) f() diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R index 63a4c3f252eb..de515038074c 100644 --- a/R-package/tests/testthat/test_callbacks.R +++ b/R-package/tests/testthat/test_callbacks.R @@ -57,7 +57,7 @@ test_that("cb.print.evaluation works as expected", { expect_output(f5(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000") bst_evaluation_err <- c('train-auc' = 0.1, 'test-auc' = 0.2) - expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\\+0.100000\ttest-auc:0.800000\\+0.200000") + expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000±0.100000\ttest-auc:0.800000±0.200000") }) test_that("cb.evaluation.log works as expected", { From 3c004a4145c667df84cf7785a672defbde30c2b6 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Wed, 3 Jan 2024 10:29:21 +0100 Subject: [PATCH 4/4] [R] Add missing DMatrix functions (#9929) * `XGDMatrixGetQuantileCut` * `XGDMatrixNumNonMissing` * `XGDMatrixGetDataAsCSR` --------- Co-authored-by: Jiaming Yuan --- R-package/NAMESPACE | 5 + R-package/R/xgb.DMatrix.R | 105 ++++++++++++++ R-package/R/xgboost.R | 3 +- R-package/man/xgb.get.DMatrix.data.Rd | 19 +++ .../man/xgb.get.DMatrix.num.non.missing.Rd | 17 +++ R-package/man/xgb.get.DMatrix.qcut.Rd | 58 ++++++++ R-package/src/Makevars.in | 1 + R-package/src/Makevars.win | 1 + R-package/src/init.c | 6 + R-package/src/xgboost_R.cc | 130 +++++++++++++++++- R-package/src/xgboost_R.h | 25 ++++ R-package/tests/testthat/test_dmatrix.R | 59 ++++++++ src/data/array_interface.cc | 13 ++ src/data/array_interface.h | 5 - 14 files changed, 438 insertions(+), 9 deletions(-) create mode 100644 R-package/man/xgb.get.DMatrix.data.Rd create mode 100644 R-package/man/xgb.get.DMatrix.num.non.missing.Rd create mode 100644 R-package/man/xgb.get.DMatrix.qcut.Rd create mode 100644 src/data/array_interface.cc diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 40ede23a537a..e6f7a82b8e20 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -37,6 +37,9 @@ export(xgb.create.features) export(xgb.cv) export(xgb.dump) export(xgb.gblinear.history) +export(xgb.get.DMatrix.data) +export(xgb.get.DMatrix.num.non.missing) +export(xgb.get.DMatrix.qcut) export(xgb.get.config) export(xgb.ggplot.deepness) export(xgb.ggplot.importance) @@ -60,6 +63,7 @@ export(xgb.unserialize) export(xgboost) import(methods) importClassesFrom(Matrix,dgCMatrix) +importClassesFrom(Matrix,dgRMatrix) importClassesFrom(Matrix,dgeMatrix) importFrom(Matrix,colSums) importFrom(Matrix,sparse.model.matrix) @@ -83,6 +87,7 @@ importFrom(graphics,points) importFrom(graphics,title) importFrom(jsonlite,fromJSON) importFrom(jsonlite,toJSON) +importFrom(methods,new) importFrom(stats,median) importFrom(stats,predict) importFrom(utils,head) diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R index 6acd1e6b2646..cc16e18da8ee 100644 --- a/R-package/R/xgb.DMatrix.R +++ b/R-package/R/xgb.DMatrix.R @@ -526,6 +526,111 @@ setinfo.xgb.DMatrix <- function(object, name, info) { stop("setinfo: unknown info name ", name) } +#' @title Get Quantile Cuts from DMatrix +#' @description Get the quantile cuts (a.k.a. borders) from an `xgb.DMatrix` +#' that has been quantized for the histogram method (`tree_method="hist"`). +#' +#' These cuts are used in order to assign observations to bins - i.e. these are ordered +#' boundaries which are used to determine assignment condition `border_low < x < border_high`. +#' As such, the first and last bin will be outside of the range of the data, so as to include +#' all of the observations there. +#' +#' If a given column has 'n' bins, then there will be 'n+1' cuts / borders for that column, +#' which will be output in sorted order from lowest to highest. +#' +#' Different columns can have different numbers of bins according to their range. +#' @param dmat An `xgb.DMatrix` object, as returned by \link{xgb.DMatrix}. +#' @param output Output format for the quantile cuts. Possible options are:\itemize{ +#' \item `"list"` will return the output as a list with one entry per column, where +#' each column will have a numeric vector with the cuts. The list will be named if +#' `dmat` has column names assigned to it. +#' \item `"arrays"` will return a list with entries `indptr` (base-0 indexing) and +#' `data`. Here, the cuts for column 'i' are obtained by slicing 'data' from entries +#' `indptr[i]+1` to `indptr[i+1]`. +#' } +#' @return The quantile cuts, in the format specified by parameter `output`. +#' @examples +#' library(xgboost) +#' data(mtcars) +#' y <- mtcars$mpg +#' x <- as.matrix(mtcars[, -1]) +#' dm <- xgb.DMatrix(x, label = y, nthread = 1) +#' +#' # DMatrix is not quantized right away, but will be once a hist model is generated +#' model <- xgb.train( +#' data = dm, +#' params = list( +#' tree_method = "hist", +#' max_bin = 8, +#' nthread = 1 +#' ), +#' nrounds = 3 +#' ) +#' +#' # Now can get the quantile cuts +#' xgb.get.DMatrix.qcut(dm) +#' @export +xgb.get.DMatrix.qcut <- function(dmat, output = c("list", "arrays")) { # nolint + stopifnot(inherits(dmat, "xgb.DMatrix")) + output <- head(output, 1L) + stopifnot(output %in% c("list", "arrays")) + res <- .Call(XGDMatrixGetQuantileCut_R, dmat) + if (output == "arrays") { + return(res) + } else { + feature_names <- getinfo(dmat, "feature_name") + ncols <- length(res$indptr) - 1 + out <- lapply( + seq(1, ncols), + function(col) { + st <- res$indptr[col] + end <- res$indptr[col + 1] + if (end <= st) { + return(numeric()) + } + return(res$data[seq(1 + st, end)]) + } + ) + if (NROW(feature_names)) { + names(out) <- feature_names + } + return(out) + } +} + +#' @title Get Number of Non-Missing Entries in DMatrix +#' @param dmat An `xgb.DMatrix` object, as returned by \link{xgb.DMatrix}. +#' @return The number of non-missing entries in the DMatrix +#' @export +xgb.get.DMatrix.num.non.missing <- function(dmat) { # nolint + stopifnot(inherits(dmat, "xgb.DMatrix")) + return(.Call(XGDMatrixNumNonMissing_R, dmat)) +} + +#' @title Get DMatrix Data +#' @param dmat An `xgb.DMatrix` object, as returned by \link{xgb.DMatrix}. +#' @return The data held in the DMatrix, as a sparse CSR matrix (class `dgRMatrix` +#' from package `Matrix`). If it had feature names, these will be added as column names +#' in the output. +#' @export +xgb.get.DMatrix.data <- function(dmat) { + stopifnot(inherits(dmat, "xgb.DMatrix")) + res <- .Call(XGDMatrixGetDataAsCSR_R, dmat) + out <- methods::new("dgRMatrix") + nrows <- as.integer(length(res$indptr) - 1) + out@p <- res$indptr + out@j <- res$indices + out@x <- res$data + out@Dim <- as.integer(c(nrows, res$ncols)) + + feature_names <- getinfo(dmat, "feature_name") + dim_names <- list(NULL, NULL) + if (NROW(feature_names)) { + dim_names[[2L]] <- feature_names + } + out@Dimnames <- dim_names + return(out) +} #' Get a new DMatrix containing the specified rows of #' original xgb.DMatrix object diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index f61c535e228f..af6253a72792 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -82,7 +82,7 @@ NULL NULL # Various imports -#' @importClassesFrom Matrix dgCMatrix dgeMatrix +#' @importClassesFrom Matrix dgCMatrix dgeMatrix dgRMatrix #' @importFrom Matrix colSums #' @importFrom Matrix sparse.model.matrix #' @importFrom Matrix sparseVector @@ -98,6 +98,7 @@ NULL #' @importFrom data.table setnames #' @importFrom jsonlite fromJSON #' @importFrom jsonlite toJSON +#' @importFrom methods new #' @importFrom utils object.size str tail #' @importFrom stats predict #' @importFrom stats median diff --git a/R-package/man/xgb.get.DMatrix.data.Rd b/R-package/man/xgb.get.DMatrix.data.Rd new file mode 100644 index 000000000000..36783f5835ff --- /dev/null +++ b/R-package/man/xgb.get.DMatrix.data.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.DMatrix.R +\name{xgb.get.DMatrix.data} +\alias{xgb.get.DMatrix.data} +\title{Get DMatrix Data} +\usage{ +xgb.get.DMatrix.data(dmat) +} +\arguments{ +\item{dmat}{An \code{xgb.DMatrix} object, as returned by \link{xgb.DMatrix}.} +} +\value{ +The data held in the DMatrix, as a sparse CSR matrix (class \code{dgRMatrix} +from package \code{Matrix}). If it had feature names, these will be added as column names +in the output. +} +\description{ +Get DMatrix Data +} diff --git a/R-package/man/xgb.get.DMatrix.num.non.missing.Rd b/R-package/man/xgb.get.DMatrix.num.non.missing.Rd new file mode 100644 index 000000000000..4eb2697f8a00 --- /dev/null +++ b/R-package/man/xgb.get.DMatrix.num.non.missing.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.DMatrix.R +\name{xgb.get.DMatrix.num.non.missing} +\alias{xgb.get.DMatrix.num.non.missing} +\title{Get Number of Non-Missing Entries in DMatrix} +\usage{ +xgb.get.DMatrix.num.non.missing(dmat) +} +\arguments{ +\item{dmat}{An \code{xgb.DMatrix} object, as returned by \link{xgb.DMatrix}.} +} +\value{ +The number of non-missing entries in the DMatrix +} +\description{ +Get Number of Non-Missing Entries in DMatrix +} diff --git a/R-package/man/xgb.get.DMatrix.qcut.Rd b/R-package/man/xgb.get.DMatrix.qcut.Rd new file mode 100644 index 000000000000..8f7c3da75878 --- /dev/null +++ b/R-package/man/xgb.get.DMatrix.qcut.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.DMatrix.R +\name{xgb.get.DMatrix.qcut} +\alias{xgb.get.DMatrix.qcut} +\title{Get Quantile Cuts from DMatrix} +\usage{ +xgb.get.DMatrix.qcut(dmat, output = c("list", "arrays")) +} +\arguments{ +\item{dmat}{An \code{xgb.DMatrix} object, as returned by \link{xgb.DMatrix}.} + +\item{output}{Output format for the quantile cuts. Possible options are:\itemize{ +\item \code{"list"} will return the output as a list with one entry per column, where +each column will have a numeric vector with the cuts. The list will be named if +\code{dmat} has column names assigned to it. +\item \code{"arrays"} will return a list with entries \code{indptr} (base-0 indexing) and +\code{data}. Here, the cuts for column 'i' are obtained by slicing 'data' from entries +\code{indptr[i]+1} to \code{indptr[i+1]}. +}} +} +\value{ +The quantile cuts, in the format specified by parameter \code{output}. +} +\description{ +Get the quantile cuts (a.k.a. borders) from an \code{xgb.DMatrix} +that has been quantized for the histogram method (\code{tree_method="hist"}). + +These cuts are used in order to assign observations to bins - i.e. these are ordered +boundaries which are used to determine assignment condition \verb{border_low < x < border_high}. +As such, the first and last bin will be outside of the range of the data, so as to include +all of the observations there. + +If a given column has 'n' bins, then there will be 'n+1' cuts / borders for that column, +which will be output in sorted order from lowest to highest. + +Different columns can have different numbers of bins according to their range. +} +\examples{ +library(xgboost) +data(mtcars) +y <- mtcars$mpg +x <- as.matrix(mtcars[, -1]) +dm <- xgb.DMatrix(x, label = y, nthread = 1) + +# DMatrix is not quantized right away, but will be once a hist model is generated +model <- xgb.train( + data = dm, + params = list( + tree_method = "hist", + max_bin = 8, + nthread = 1 + ), + nrounds = 3 +) + +# Now can get the quantile cuts +xgb.get.DMatrix.qcut(dm) +} diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index 8af5dbbf647a..dd13983f5b59 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -63,6 +63,7 @@ OBJECTS= \ $(PKGROOT)/src/gbm/gblinear.o \ $(PKGROOT)/src/gbm/gblinear_model.o \ $(PKGROOT)/src/data/adapter.o \ + $(PKGROOT)/src/data/array_interface.o \ $(PKGROOT)/src/data/simple_dmatrix.o \ $(PKGROOT)/src/data/data.o \ $(PKGROOT)/src/data/sparse_page_raw_format.o \ diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 60f754fef47e..46a862711dc6 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -63,6 +63,7 @@ OBJECTS= \ $(PKGROOT)/src/gbm/gblinear.o \ $(PKGROOT)/src/gbm/gblinear_model.o \ $(PKGROOT)/src/data/adapter.o \ + $(PKGROOT)/src/data/array_interface.o \ $(PKGROOT)/src/data/simple_dmatrix.o \ $(PKGROOT)/src/data/data.o \ $(PKGROOT)/src/data/sparse_page_raw_format.o \ diff --git a/R-package/src/init.c b/R-package/src/init.c index f957229af236..5eee8ebe6ab2 100644 --- a/R-package/src/init.c +++ b/R-package/src/init.c @@ -45,6 +45,9 @@ extern SEXP XGDMatrixCreateFromDF_R(SEXP, SEXP, SEXP); extern SEXP XGDMatrixGetStrFeatureInfo_R(SEXP, SEXP); extern SEXP XGDMatrixNumCol_R(SEXP); extern SEXP XGDMatrixNumRow_R(SEXP); +extern SEXP XGDMatrixGetQuantileCut_R(SEXP); +extern SEXP XGDMatrixNumNonMissing_R(SEXP); +extern SEXP XGDMatrixGetDataAsCSR_R(SEXP); extern SEXP XGDMatrixSaveBinary_R(SEXP, SEXP, SEXP); extern SEXP XGDMatrixSetInfo_R(SEXP, SEXP, SEXP); extern SEXP XGDMatrixSetStrFeatureInfo_R(SEXP, SEXP, SEXP); @@ -84,6 +87,9 @@ static const R_CallMethodDef CallEntries[] = { {"XGDMatrixGetStrFeatureInfo_R", (DL_FUNC) &XGDMatrixGetStrFeatureInfo_R, 2}, {"XGDMatrixNumCol_R", (DL_FUNC) &XGDMatrixNumCol_R, 1}, {"XGDMatrixNumRow_R", (DL_FUNC) &XGDMatrixNumRow_R, 1}, + {"XGDMatrixGetQuantileCut_R", (DL_FUNC) &XGDMatrixGetQuantileCut_R, 1}, + {"XGDMatrixNumNonMissing_R", (DL_FUNC) &XGDMatrixNumNonMissing_R, 1}, + {"XGDMatrixGetDataAsCSR_R", (DL_FUNC) &XGDMatrixGetDataAsCSR_R, 1}, {"XGDMatrixSaveBinary_R", (DL_FUNC) &XGDMatrixSaveBinary_R, 3}, {"XGDMatrixSetInfo_R", (DL_FUNC) &XGDMatrixSetInfo_R, 3}, {"XGDMatrixSetStrFeatureInfo_R", (DL_FUNC) &XGDMatrixSetStrFeatureInfo_R, 3}, diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index fb05c33b46ec..60a3fe68b973 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -1,5 +1,5 @@ /** - * Copyright 2014-2023 by XGBoost Contributors + * Copyright 2014-2024, XGBoost Contributors */ #include #include @@ -9,9 +9,11 @@ #include #include +#include #include #include #include +#include #include #include #include @@ -20,14 +22,14 @@ #include "../../src/c_api/c_api_error.h" #include "../../src/c_api/c_api_utils.h" // MakeSparseFromPtr #include "../../src/common/threading_utils.h" +#include "../../src/data/array_interface.h" // for ArrayInterface #include "./xgboost_R.h" // Must follow other includes. namespace { - struct ErrorWithUnwind : public std::exception {}; -void ThrowExceptionFromRError(void *unused, Rboolean jump) { +void ThrowExceptionFromRError(void *, Rboolean jump) { if (jump) { throw ErrorWithUnwind(); } @@ -49,6 +51,30 @@ SEXP SafeMkChar(const char *c_str, SEXP continuation_token) { continuation_token); } +SEXP WrappedAllocReal(void *void_ptr) { + size_t *size = static_cast(void_ptr); + return Rf_allocVector(REALSXP, *size); +} + +SEXP SafeAllocReal(size_t size, SEXP continuation_token) { + return R_UnwindProtect( + WrappedAllocReal, static_cast(&size), + ThrowExceptionFromRError, nullptr, + continuation_token); +} + +SEXP WrappedAllocInteger(void *void_ptr) { + size_t *size = static_cast(void_ptr); + return Rf_allocVector(INTSXP, *size); +} + +SEXP SafeAllocInteger(size_t size, SEXP continuation_token) { + return R_UnwindProtect( + WrappedAllocInteger, static_cast(&size), + ThrowExceptionFromRError, nullptr, + continuation_token); +} + [[nodiscard]] std::string MakeArrayInterfaceFromRMat(SEXP R_mat) { SEXP mat_dims = Rf_getAttrib(R_mat, R_DimSymbol); if (Rf_xlength(mat_dims) > 2) { @@ -136,6 +162,37 @@ SEXP SafeMkChar(const char *c_str, SEXP continuation_token) { jconfig["nthread"] = Rf_asInteger(n_threads); return Json::Dump(jconfig); } + +// Allocate a R vector and copy an array interface encoded object to it. +[[nodiscard]] SEXP CopyArrayToR(const char *array_str, SEXP ctoken) { + xgboost::ArrayInterface<1> array{xgboost::StringView{array_str}}; + // R supports only int and double. + bool is_int = + xgboost::DispatchDType(array.type, [](auto t) { return std::is_integral_v; }); + bool is_float = xgboost::DispatchDType( + array.type, [](auto v) { return std::is_floating_point_v; }); + CHECK(is_int || is_float) << "Internal error: Invalid DType."; + CHECK(array.is_contiguous) << "Internal error: Return by XGBoost should be contiguous"; + + // Allocate memory in R + SEXP out = + Rf_protect(is_int ? SafeAllocInteger(array.n, ctoken) : SafeAllocReal(array.n, ctoken)); + + xgboost::DispatchDType(array.type, [&](auto t) { + using T = decltype(t); + auto in_ptr = static_cast(array.data); + if (is_int) { + auto out_ptr = INTEGER(out); + std::copy_n(in_ptr, array.n, out_ptr); + } else { + auto out_ptr = REAL(out); + std::copy_n(in_ptr, array.n, out_ptr); + } + }); + + Rf_unprotect(1); + return out; +} } // namespace struct RRNGStateController { @@ -540,6 +597,73 @@ XGB_DLL SEXP XGDMatrixNumCol_R(SEXP handle) { return ScalarInteger(static_cast(ncol)); } +XGB_DLL SEXP XGDMatrixGetQuantileCut_R(SEXP handle) { + const char *out_names[] = {"indptr", "data", ""}; + SEXP continuation_token = Rf_protect(R_MakeUnwindCont()); + SEXP out = Rf_protect(Rf_mkNamed(VECSXP, out_names)); + R_API_BEGIN(); + const char *out_indptr; + const char *out_data; + CHECK_CALL(XGDMatrixGetQuantileCut(R_ExternalPtrAddr(handle), "{}", &out_indptr, &out_data)); + try { + SET_VECTOR_ELT(out, 0, CopyArrayToR(out_indptr, continuation_token)); + SET_VECTOR_ELT(out, 1, CopyArrayToR(out_data, continuation_token)); + } catch (ErrorWithUnwind &e) { + R_ContinueUnwind(continuation_token); + } + R_API_END(); + Rf_unprotect(2); + return out; +} + +XGB_DLL SEXP XGDMatrixNumNonMissing_R(SEXP handle) { + SEXP out = Rf_protect(Rf_allocVector(REALSXP, 1)); + R_API_BEGIN(); + bst_ulong out_; + CHECK_CALL(XGDMatrixNumNonMissing(R_ExternalPtrAddr(handle), &out_)); + REAL(out)[0] = static_cast(out_); + R_API_END(); + Rf_unprotect(1); + return out; +} + +XGB_DLL SEXP XGDMatrixGetDataAsCSR_R(SEXP handle) { + const char *out_names[] = {"indptr", "indices", "data", "ncols", ""}; + SEXP out = Rf_protect(Rf_mkNamed(VECSXP, out_names)); + R_API_BEGIN(); + + bst_ulong nrows, ncols, nnz; + CHECK_CALL(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrows)); + CHECK_CALL(XGDMatrixNumCol(R_ExternalPtrAddr(handle), &ncols)); + CHECK_CALL(XGDMatrixNumNonMissing(R_ExternalPtrAddr(handle), &nnz)); + if (std::max(nrows, ncols) > std::numeric_limits::max()) { + Rf_error("%s", "Error: resulting DMatrix data does not fit into R 'dgRMatrix'."); + } + + SET_VECTOR_ELT(out, 0, Rf_allocVector(INTSXP, nrows + 1)); + SET_VECTOR_ELT(out, 1, Rf_allocVector(INTSXP, nnz)); + SET_VECTOR_ELT(out, 2, Rf_allocVector(REALSXP, nnz)); + SET_VECTOR_ELT(out, 3, Rf_ScalarInteger(ncols)); + + std::unique_ptr indptr(new bst_ulong[nrows + 1]); + std::unique_ptr indices(new unsigned[nnz]); + std::unique_ptr data(new float[nnz]); + + CHECK_CALL(XGDMatrixGetDataAsCSR(R_ExternalPtrAddr(handle), + "{}", + indptr.get(), + indices.get(), + data.get())); + + std::copy(indptr.get(), indptr.get() + nrows + 1, INTEGER(VECTOR_ELT(out, 0))); + std::copy(indices.get(), indices.get() + nnz, INTEGER(VECTOR_ELT(out, 1))); + std::copy(data.get(), data.get() + nnz, REAL(VECTOR_ELT(out, 2))); + + R_API_END(); + Rf_unprotect(1); + return out; +} + // functions related to booster void _BoosterFinalizer(SEXP ext) { if (R_ExternalPtrAddr(ext) == NULL) return; diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h index 2e874e3a6a2a..4e3458957932 100644 --- a/R-package/src/xgboost_R.h +++ b/R-package/src/xgboost_R.h @@ -143,6 +143,31 @@ XGB_DLL SEXP XGDMatrixNumRow_R(SEXP handle); */ XGB_DLL SEXP XGDMatrixNumCol_R(SEXP handle); +/*! + * \brief return the quantile cuts used for the histogram method + * \param handle an instance of data matrix + * \return A list with entries 'indptr' and 'data' + */ +XGB_DLL SEXP XGDMatrixGetQuantileCut_R(SEXP handle); + +/*! + * \brief get the number of non-missing entries in a dmatrix + * \param handle an instance of data matrix + * \return the number of non-missing entries + */ +XGB_DLL SEXP XGDMatrixNumNonMissing_R(SEXP handle); + +/*! + * \brief get the data in a dmatrix in CSR format + * \param handle an instance of data matrix + * \return R list with the following entries in this order: + * - 'indptr + * - 'indices + * - 'data' + * - 'ncol' + */ +XGB_DLL SEXP XGDMatrixGetDataAsCSR_R(SEXP handle); + /*! * \brief create xgboost learner * \param dmats a list of dmatrix handles that will be cached diff --git a/R-package/tests/testthat/test_dmatrix.R b/R-package/tests/testthat/test_dmatrix.R index 55a6996874fb..81ac884d0a1e 100644 --- a/R-package/tests/testthat/test_dmatrix.R +++ b/R-package/tests/testthat/test_dmatrix.R @@ -375,3 +375,62 @@ test_that("xgb.DMatrix: can take multi-dimensional 'base_margin'", { ) expect_equal(pred_only_x, pred_w_base - b, tolerance = 1e-5) }) + +test_that("xgb.DMatrix: number of non-missing matches data", { + x <- matrix(1:10, nrow = 5) + dm1 <- xgb.DMatrix(x) + expect_equal(xgb.get.DMatrix.num.non.missing(dm1), 10) + + x[2, 2] <- NA + x[4, 1] <- NA + dm2 <- xgb.DMatrix(x) + expect_equal(xgb.get.DMatrix.num.non.missing(dm2), 8) +}) + +test_that("xgb.DMatrix: retrieving data as CSR", { + data(mtcars) + dm <- xgb.DMatrix(as.matrix(mtcars)) + csr <- xgb.get.DMatrix.data(dm) + expect_equal(dim(csr), dim(mtcars)) + expect_equal(colnames(csr), colnames(mtcars)) + expect_equal(unname(as.matrix(csr)), unname(as.matrix(mtcars)), tolerance = 1e-6) +}) + +test_that("xgb.DMatrix: quantile cuts look correct", { + data(mtcars) + y <- mtcars$mpg + x <- as.matrix(mtcars[, -1]) + dm <- xgb.DMatrix(x, label = y) + model <- xgb.train( + data = dm, + params = list( + tree_method = "hist", + max_bin = 8, + nthread = 1 + ), + nrounds = 3 + ) + qcut_list <- xgb.get.DMatrix.qcut(dm, "list") + qcut_arrays <- xgb.get.DMatrix.qcut(dm, "arrays") + + expect_equal(length(qcut_arrays), 2) + expect_equal(names(qcut_arrays), c("indptr", "data")) + expect_equal(length(qcut_arrays$indptr), ncol(x) + 1) + expect_true(min(diff(qcut_arrays$indptr)) > 0) + + col_min <- apply(x, 2, min) + col_max <- apply(x, 2, max) + + expect_equal(length(qcut_list), ncol(x)) + expect_equal(names(qcut_list), colnames(x)) + lapply( + seq(1, ncol(x)), + function(col) { + cuts <- qcut_list[[col]] + expect_true(min(diff(cuts)) > 0) + expect_true(col_min[col] > cuts[1]) + expect_true(col_max[col] < cuts[length(cuts)]) + expect_true(length(cuts) <= 9) + } + ) +}) diff --git a/src/data/array_interface.cc b/src/data/array_interface.cc new file mode 100644 index 000000000000..06b9ed00c870 --- /dev/null +++ b/src/data/array_interface.cc @@ -0,0 +1,13 @@ +/** + * Copyright 2019-2024, XGBoost Contributors + */ +#include "array_interface.h" + +#include "../common/common.h" // for AssertGPUSupport + +namespace xgboost { +#if !defined(XGBOOST_USE_CUDA) +void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); } +bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; } +#endif // !defined(XGBOOST_USE_CUDA) +} // namespace xgboost diff --git a/src/data/array_interface.h b/src/data/array_interface.h index 0170e6a847d3..6f2438f37196 100644 --- a/src/data/array_interface.h +++ b/src/data/array_interface.h @@ -375,11 +375,6 @@ struct ToDType { static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI8; }; -#if !defined(XGBOOST_USE_CUDA) -inline void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); } -inline bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; } -#endif // !defined(XGBOOST_USE_CUDA) - /** * \brief A type erased view over __array_interface__ protocol defined by numpy *