From 32cbab1cc00e5640fd79fd8557c098128d7efbec Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Tue, 2 Jan 2024 08:20:51 +0100
Subject: [PATCH 1/4] [R] put 'verbose' in correct argument (#9942)

---
 R-package/R/xgb.train.R    | 16 ++++++++--------
 R-package/man/xgb.train.Rd | 16 ++++++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index d93a0643d1b3..e20c1af3e9fd 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -251,9 +251,9 @@
 #' watchlist <- list(train = dtrain, eval = dtest)
 #'
 #' ## A simple xgb.train example:
-#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
+#' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = "binary:logistic", eval_metric = "auc")
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
 #'
 #' ## An xgb.train example where custom objective and evaluation metric are
 #' ## used:
@@ -272,13 +272,13 @@
 #'
 #' # These functions could be used by passing them either:
 #' #  as 'objective' and 'eval_metric' parameters in the params list:
-#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
+#' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = logregobj, eval_metric = evalerror)
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
 #'
 #' #  or through the ... arguments:
-#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+#' param <- list(max_depth = 2, eta = 1, nthread = nthread)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
 #'                  objective = logregobj, eval_metric = evalerror)
 #'
 #' #  or as dedicated 'obj' and 'feval' parameters of xgb.train:
@@ -287,10 +287,10 @@
 #'
 #'
 #' ## An xgb.train example of using variable learning rates at each iteration:
-#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
+#' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = "binary:logistic", eval_metric = "auc")
 #' my_etas <- list(eta = c(0.5, 0.1))
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
 #'                  callbacks = list(cb.reset.parameters(my_etas)))
 #'
 #' ## Early stopping:
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index 0ef2e2216d66..b2eaff27c4c1 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -303,9 +303,9 @@ dtest <- with(
 watchlist <- list(train = dtrain, eval = dtest)
 
 ## A simple xgb.train example:
-param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
+param <- list(max_depth = 2, eta = 1, nthread = nthread,
               objective = "binary:logistic", eval_metric = "auc")
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
+bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
 
 ## An xgb.train example where custom objective and evaluation metric are
 ## used:
@@ -324,13 +324,13 @@ evalerror <- function(preds, dtrain) {
 
 # These functions could be used by passing them either:
 #  as 'objective' and 'eval_metric' parameters in the params list:
-param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
+param <- list(max_depth = 2, eta = 1, nthread = nthread,
               objective = logregobj, eval_metric = evalerror)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
+bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
 
 #  or through the ... arguments:
-param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+param <- list(max_depth = 2, eta = 1, nthread = nthread)
+bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
                  objective = logregobj, eval_metric = evalerror)
 
 #  or as dedicated 'obj' and 'feval' parameters of xgb.train:
@@ -339,10 +339,10 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
 
 
 ## An xgb.train example of using variable learning rates at each iteration:
-param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
+param <- list(max_depth = 2, eta = 1, nthread = nthread,
               objective = "binary:logistic", eval_metric = "auc")
 my_etas <- list(eta = c(0.5, 0.1))
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
                  callbacks = list(cb.reset.parameters(my_etas)))
 
 ## Early stopping:

From 9e33a102021aa2fa2283d5a1e6447f24c3ce9633 Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Tue, 2 Jan 2024 14:20:01 +0100
Subject: [PATCH 2/4] [R] Replace `xgboost()` with `xgb.train()` in most tests
 and examples (#9941)

---
 R-package/R/utils.R                           |  5 +-
 R-package/R/xgb.Booster.R                     | 15 ++--
 R-package/R/xgb.load.R                        |  6 +-
 R-package/R/xgb.save.R                        |  6 +-
 R-package/R/xgb.save.raw.R                    |  4 +-
 R-package/R/xgb.serialize.R                   |  4 +-
 R-package/demo/create_sparse_matrix.R         |  4 +-
 R-package/demo/interaction_constraints.R      | 18 ++--
 R-package/demo/poisson_regression.R           |  4 +-
 .../a-compatibility-note-for-saveRDS-save.Rd  |  5 +-
 R-package/man/predict.xgb.Booster.Rd          | 15 ++--
 R-package/man/xgb.load.Rd                     |  6 +-
 R-package/man/xgb.save.Rd                     |  6 +-
 R-package/man/xgb.save.raw.Rd                 |  4 +-
 R-package/man/xgb.serialize.Rd                |  4 +-
 R-package/tests/testthat/test_basic.R         | 84 ++++++++++---------
 R-package/tests/testthat/test_callbacks.R     |  8 +-
 R-package/tests/testthat/test_gc_safety.R     |  4 +-
 R-package/tests/testthat/test_helpers.R       | 49 ++++++-----
 .../testthat/test_interaction_constraints.R   |  6 +-
 R-package/tests/testthat/test_interactions.R  | 10 +--
 R-package/tests/testthat/test_io.R            |  4 +-
 R-package/tests/testthat/test_monotone.R      |  6 +-
 .../tests/testthat/test_parameter_exposure.R  | 14 ++--
 .../tests/testthat/test_poisson_regression.R  |  4 +-
 R-package/tests/testthat/test_unicode.R       |  6 +-
 R-package/vignettes/xgboostfromJSON.Rmd       |  5 +-
 27 files changed, 156 insertions(+), 150 deletions(-)

diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index bf08c481d118..1798e4ad1aff 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -383,8 +383,9 @@ NULL
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
-#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#' bst <- xgb.train(data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
+#'                  max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
+#'                  objective = "binary:logistic")
 #'
 #' # Save as a stand-alone file; load it with xgb.load()
 #' xgb.save(bst, 'xgb.model')
diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index 4e980641a17d..371f3d129142 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -272,9 +272,8 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' train <- agaricus.train
 #' test <- agaricus.test
 #'
-#' bst <- xgboost(
-#'   data = train$data,
-#'   label = train$label,
+#' bst <- xgb.train(
+#'   data = xgb.DMatrix(train$data, label = train$label),
 #'   max_depth = 2,
 #'   eta = 0.5,
 #'   nthread = nthread,
@@ -316,9 +315,8 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #'
 #' set.seed(11)
 #'
-#' bst <- xgboost(
-#'   data = as.matrix(iris[, -5]),
-#'   label = lb,
+#' bst <- xgb.train(
+#'   data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
 #'   max_depth = 4,
 #'   eta = 0.5,
 #'   nthread = 2,
@@ -341,9 +339,8 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' # compare with predictions from softmax:
 #' set.seed(11)
 #'
-#' bst <- xgboost(
-#'   data = as.matrix(iris[, -5]),
-#'   label = lb,
+#' bst <- xgb.train(
+#'   data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
 #'   max_depth = 4,
 #'   eta = 0.5,
 #'   nthread = 2,
diff --git a/R-package/R/xgb.load.R b/R-package/R/xgb.load.R
index cbdbdacc35f2..e8f9e0023892 100644
--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@@ -29,8 +29,10 @@
 #'
 #' train <- agaricus.train
 #' test <- agaricus.test
-#' bst <- xgboost(
-#'   data = train$data, label = train$label, max_depth = 2, eta = 1,
+#' bst <- xgb.train(
+#'   data = xgb.DMatrix(train$data, label = train$label),
+#'   max_depth = 2,
+#'   eta = 1,
 #'   nthread = nthread,
 #'   nrounds = 2,
 #'   objective = "binary:logistic"
diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R
index ab55bc4a9699..32b7d96180d2 100644
--- a/R-package/R/xgb.save.R
+++ b/R-package/R/xgb.save.R
@@ -32,8 +32,10 @@
 #'
 #' train <- agaricus.train
 #' test <- agaricus.test
-#' bst <- xgboost(
-#'   data = train$data, label = train$label, max_depth = 2, eta = 1,
+#' bst <- xgb.train(
+#'   data = xgb.DMatrix(train$data, label = train$label),
+#'   max_depth = 2,
+#'   eta = 1,
 #'   nthread = nthread,
 #'   nrounds = 2,
 #'   objective = "binary:logistic"
diff --git a/R-package/R/xgb.save.raw.R b/R-package/R/xgb.save.raw.R
index cad0fb0e01c2..63c06e0715d5 100644
--- a/R-package/R/xgb.save.raw.R
+++ b/R-package/R/xgb.save.raw.R
@@ -23,8 +23,8 @@
 #'
 #' train <- agaricus.train
 #' test <- agaricus.test
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-#'                eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
+#' bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
+#'                  eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
 #'
 #' raw <- xgb.save.raw(bst)
 #' bst <- xgb.load.raw(raw)
diff --git a/R-package/R/xgb.serialize.R b/R-package/R/xgb.serialize.R
index 00bbb429320c..c20d2b51c312 100644
--- a/R-package/R/xgb.serialize.R
+++ b/R-package/R/xgb.serialize.R
@@ -9,8 +9,8 @@
 #' data(agaricus.test, package='xgboost')
 #' train <- agaricus.train
 #' test <- agaricus.test
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+#' bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
+#'                  eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 #' raw <- xgb.serialize(bst)
 #' bst <- xgb.unserialize(raw)
 #'
diff --git a/R-package/demo/create_sparse_matrix.R b/R-package/demo/create_sparse_matrix.R
index f8afb14ba04f..08a40608cdf8 100644
--- a/R-package/demo/create_sparse_matrix.R
+++ b/R-package/demo/create_sparse_matrix.R
@@ -81,8 +81,8 @@ output_vector <- df[, Y := 0][Improved == "Marked", Y := 1][, Y]
 
 # Following is the same process as other demo
 cat("Learning...\n")
-bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 9,
-               eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic")
+bst <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = output_vector), max_depth = 9,
+                 eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic")
 
 importance <- xgb.importance(feature_names = colnames(sparse_matrix), model = bst)
 print(importance)
diff --git a/R-package/demo/interaction_constraints.R b/R-package/demo/interaction_constraints.R
index 9e694e3eb3db..72287513eeeb 100644
--- a/R-package/demo/interaction_constraints.R
+++ b/R-package/demo/interaction_constraints.R
@@ -74,26 +74,26 @@ cols2ids <- function(object, col_names) {
 interaction_list_fid <- cols2ids(interaction_list, colnames(train))
 
 # Fit model with interaction constraints
-bst <- xgboost(data = train, label = y, max_depth = 4,
-               eta = 0.1, nthread = 2, nrounds = 1000,
-               interaction_constraints = interaction_list_fid)
+bst <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 4,
+                 eta = 0.1, nthread = 2, nrounds = 1000,
+                 interaction_constraints = interaction_list_fid)
 
 bst_tree <- xgb.model.dt.tree(colnames(train), bst)
 bst_interactions <- treeInteractions(bst_tree, 4)
   # interactions constrained to combinations of V1*V2 and V3*V4*V5
 
 # Fit model without interaction constraints
-bst2 <- xgboost(data = train, label = y, max_depth = 4,
-                eta = 0.1, nthread = 2, nrounds = 1000)
+bst2 <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 4,
+                  eta = 0.1, nthread = 2, nrounds = 1000)
 
 bst2_tree <- xgb.model.dt.tree(colnames(train), bst2)
 bst2_interactions <- treeInteractions(bst2_tree, 4)  # much more interactions
 
 # Fit model with both interaction and monotonicity constraints
-bst3 <- xgboost(data = train, label = y, max_depth = 4,
-                eta = 0.1, nthread = 2, nrounds = 1000,
-                interaction_constraints = interaction_list_fid,
-                monotone_constraints = c(-1, 0, 0, 0, 0, 0, 0, 0, 0, 0))
+bst3 <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 4,
+                  eta = 0.1, nthread = 2, nrounds = 1000,
+                  interaction_constraints = interaction_list_fid,
+                  monotone_constraints = c(-1, 0, 0, 0, 0, 0, 0, 0, 0, 0))
 
 bst3_tree <- xgb.model.dt.tree(colnames(train), bst3)
 bst3_interactions <- treeInteractions(bst3_tree, 4)
diff --git a/R-package/demo/poisson_regression.R b/R-package/demo/poisson_regression.R
index 121ac17f2173..685314b30e96 100644
--- a/R-package/demo/poisson_regression.R
+++ b/R-package/demo/poisson_regression.R
@@ -1,6 +1,6 @@
 data(mtcars)
 head(mtcars)
-bst <- xgboost(data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
-               objective = 'count:poisson', nrounds = 5)
+bst <- xgb.train(data = xgb.DMatrix(as.matrix(mtcars[, -11]), label = mtcars[, 11]),
+                 objective = 'count:poisson', nrounds = 5)
 pred <- predict(bst, as.matrix(mtcars[, -11]))
 sqrt(mean((pred - mtcars[, 11]) ^ 2))
diff --git a/R-package/man/a-compatibility-note-for-saveRDS-save.Rd b/R-package/man/a-compatibility-note-for-saveRDS-save.Rd
index 85b52243c1b9..023cff9fdc90 100644
--- a/R-package/man/a-compatibility-note-for-saveRDS-save.Rd
+++ b/R-package/man/a-compatibility-note-for-saveRDS-save.Rd
@@ -33,8 +33,9 @@ For more details and explanation about model persistence and archival, consult t
 }
 \examples{
 data(agaricus.train, package='xgboost')
-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+bst <- xgb.train(data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
+                 max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
+                 objective = "binary:logistic")
 
 # Save as a stand-alone file; load it with xgb.load()
 xgb.save(bst, 'xgb.model')
diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd
index 135177dda99b..f47cab321021 100644
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -136,9 +136,8 @@ data.table::setDTthreads(nthread)
 train <- agaricus.train
 test <- agaricus.test
 
-bst <- xgboost(
-  data = train$data,
-  label = train$label,
+bst <- xgb.train(
+  data = xgb.DMatrix(train$data, label = train$label),
   max_depth = 2,
   eta = 0.5,
   nthread = nthread,
@@ -180,9 +179,8 @@ num_class <- 3
 
 set.seed(11)
 
-bst <- xgboost(
-  data = as.matrix(iris[, -5]),
-  label = lb,
+bst <- xgb.train(
+  data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
   max_depth = 4,
   eta = 0.5,
   nthread = 2,
@@ -205,9 +203,8 @@ sum(pred_labels != lb) / length(lb)
 # compare with predictions from softmax:
 set.seed(11)
 
-bst <- xgboost(
-  data = as.matrix(iris[, -5]),
-  label = lb,
+bst <- xgb.train(
+  data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
   max_depth = 4,
   eta = 0.5,
   nthread = 2,
diff --git a/R-package/man/xgb.load.Rd b/R-package/man/xgb.load.Rd
index 1a406cc21d0e..63f551d7a914 100644
--- a/R-package/man/xgb.load.Rd
+++ b/R-package/man/xgb.load.Rd
@@ -34,8 +34,10 @@ data.table::setDTthreads(nthread)
 
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(
-  data = train$data, label = train$label, max_depth = 2, eta = 1,
+bst <- xgb.train(
+  data = xgb.DMatrix(train$data, label = train$label),
+  max_depth = 2,
+  eta = 1,
   nthread = nthread,
   nrounds = 2,
   objective = "binary:logistic"
diff --git a/R-package/man/xgb.save.Rd b/R-package/man/xgb.save.Rd
index a7e160a12a9b..22c6c8fa3652 100644
--- a/R-package/man/xgb.save.Rd
+++ b/R-package/man/xgb.save.Rd
@@ -38,8 +38,10 @@ data.table::setDTthreads(nthread)
 
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(
-  data = train$data, label = train$label, max_depth = 2, eta = 1,
+bst <- xgb.train(
+  data = xgb.DMatrix(train$data, label = train$label),
+  max_depth = 2,
+  eta = 1,
   nthread = nthread,
   nrounds = 2,
   objective = "binary:logistic"
diff --git a/R-package/man/xgb.save.raw.Rd b/R-package/man/xgb.save.raw.Rd
index 0835519336a0..498272148022 100644
--- a/R-package/man/xgb.save.raw.Rd
+++ b/R-package/man/xgb.save.raw.Rd
@@ -32,8 +32,8 @@ data.table::setDTthreads(nthread)
 
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
+bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
+                 eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
 
 raw <- xgb.save.raw(bst)
 bst <- xgb.load.raw(raw)
diff --git a/R-package/man/xgb.serialize.Rd b/R-package/man/xgb.serialize.Rd
index 952441d98bea..5bf4205f82b5 100644
--- a/R-package/man/xgb.serialize.Rd
+++ b/R-package/man/xgb.serialize.Rd
@@ -21,8 +21,8 @@ data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
+                 eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
 raw <- xgb.serialize(bst)
 bst <- xgb.unserialize(raw)
 
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 8ecf86e87178..d4b3a6be36af 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -16,10 +16,11 @@ n_threads <- 1
 test_that("train and predict binary classification", {
   nrounds <- 2
   expect_output(
-    bst <- xgboost(
-      data = train$data, label = train$label, max_depth = 2,
+    bst <- xgb.train(
+      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
       eta = 1, nthread = n_threads, nrounds = nrounds,
-      objective = "binary:logistic", eval_metric = "error"
+      objective = "binary:logistic", eval_metric = "error",
+      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
     ),
     "train-error"
   )
@@ -104,9 +105,8 @@ test_that("dart prediction works", {
     rnorm(100)
 
   set.seed(1994)
-  booster_by_xgboost <- xgboost(
-    data = d,
-    label = y,
+  booster_by_xgboost <- xgb.train(
+    data = xgb.DMatrix(d, label = y),
     max_depth = 2,
     booster = "dart",
     rate_drop = 0.5,
@@ -151,10 +151,11 @@ test_that("train and predict softprob", {
   lb <- as.numeric(iris$Species) - 1
   set.seed(11)
   expect_output(
-    bst <- xgboost(
-      data = as.matrix(iris[, -5]), label = lb,
+    bst <- xgb.train(
+      data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
       max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
-      objective = "multi:softprob", num_class = 3, eval_metric = "merror"
+      objective = "multi:softprob", num_class = 3, eval_metric = "merror",
+      watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
     ),
     "train-merror"
   )
@@ -201,10 +202,11 @@ test_that("train and predict softmax", {
   lb <- as.numeric(iris$Species) - 1
   set.seed(11)
   expect_output(
-    bst <- xgboost(
-      data = as.matrix(iris[, -5]), label = lb,
+    bst <- xgb.train(
+      data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
       max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
-      objective = "multi:softmax", num_class = 3, eval_metric = "merror"
+      objective = "multi:softmax", num_class = 3, eval_metric = "merror",
+      watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
     ),
     "train-merror"
   )
@@ -222,11 +224,12 @@ test_that("train and predict RF", {
   set.seed(11)
   lb <- train$label
   # single iteration
-  bst <- xgboost(
-    data = train$data, label = lb, max_depth = 5,
+  bst <- xgb.train(
+    data = xgb.DMatrix(train$data, label = lb), max_depth = 5,
     nthread = n_threads,
     nrounds = 1, objective = "binary:logistic", eval_metric = "error",
-    num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1
+    num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1,
+    watchlist = list(train = xgb.DMatrix(train$data, label = lb))
   )
   expect_equal(bst$niter, 1)
   expect_equal(xgb.ntree(bst), 20)
@@ -248,12 +251,13 @@ test_that("train and predict RF with softprob", {
   lb <- as.numeric(iris$Species) - 1
   nrounds <- 15
   set.seed(11)
-  bst <- xgboost(
-    data = as.matrix(iris[, -5]), label = lb,
+  bst <- xgb.train(
+    data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
     max_depth = 3, eta = 0.9, nthread = n_threads, nrounds = nrounds,
     objective = "multi:softprob", eval_metric = "merror",
     num_class = 3, verbose = 0,
-    num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5
+    num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5,
+    watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
   )
   expect_equal(bst$niter, 15)
   expect_equal(xgb.ntree(bst), 15 * 3 * 4)
@@ -271,10 +275,11 @@ test_that("train and predict RF with softprob", {
 
 test_that("use of multiple eval metrics works", {
   expect_output(
-    bst <- xgboost(
-      data = train$data, label = train$label, max_depth = 2,
+    bst <- xgb.train(
+      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
       eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
-      eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
+      eval_metric = "error", eval_metric = "auc", eval_metric = "logloss",
+      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
     ),
     "train-error.*train-auc.*train-logloss"
   )
@@ -282,10 +287,11 @@ test_that("use of multiple eval metrics works", {
   expect_equal(dim(bst$evaluation_log), c(2, 4))
   expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
   expect_output(
-    bst2 <- xgboost(
-      data = train$data, label = train$label, max_depth = 2,
+    bst2 <- xgb.train(
+      data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
       eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
-      eval_metric = list("error", "auc", "logloss")
+      eval_metric = list("error", "auc", "logloss"),
+      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
     ),
     "train-error.*train-auc.*train-logloss"
   )
@@ -361,7 +367,7 @@ test_that("xgb.cv works", {
   expect_is(cv, "xgb.cv.synchronous")
   expect_false(is.null(cv$evaluation_log))
   expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)
-  expect_lt(cv$evaluation_log[, min(test_error_std)], 0.008)
+  expect_lt(cv$evaluation_log[, min(test_error_std)], 0.0085)
   expect_equal(cv$niter, 2)
   expect_false(is.null(cv$folds) && is.list(cv$folds))
   expect_length(cv$folds, 5)
@@ -391,8 +397,8 @@ test_that("xgb.cv works with stratified folds", {
 test_that("train and predict with non-strict classes", {
   # standard dense matrix input
   train_dense <- as.matrix(train$data)
-  bst <- xgboost(
-    data = train_dense, label = train$label, max_depth = 2,
+  bst <- xgb.train(
+    data = xgb.DMatrix(train_dense, label = train$label), max_depth = 2,
     eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
     verbose = 0
   )
@@ -402,8 +408,8 @@ test_that("train and predict with non-strict classes", {
   class(train_dense) <- "shmatrix"
   expect_true(is.matrix(train_dense))
   expect_error(
-    bst <- xgboost(
-      data = train_dense, label = train$label, max_depth = 2,
+    bst <- xgb.train(
+      data = xgb.DMatrix(train_dense, label = train$label), max_depth = 2,
       eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
       verbose = 0
     ),
@@ -416,8 +422,8 @@ test_that("train and predict with non-strict classes", {
   class(train_dense) <- c("pphmatrix", "shmatrix")
   expect_true(is.matrix(train_dense))
   expect_error(
-    bst <- xgboost(
-      data = train_dense, label = train$label, max_depth = 2,
+    bst <- xgb.train(
+      data = xgb.DMatrix(train_dense, label = train$label), max_depth = 2,
       eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
       verbose = 0
     ),
@@ -480,8 +486,8 @@ test_that("colsample_bytree works", {
 })
 
 test_that("Configuration works", {
-  bst <- xgboost(
-    data = train$data, label = train$label, max_depth = 2,
+  bst <- xgb.train(
+    data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
     eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
     eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
   )
@@ -521,8 +527,8 @@ test_that("strict_shape works", {
     y <- as.numeric(iris$Species) - 1
     X <- as.matrix(iris[, -5])
 
-    bst <- xgboost(
-      data = X, label = y,
+    bst <- xgb.train(
+      data = xgb.DMatrix(X, label = y),
       max_depth = 2, nrounds = n_rounds, nthread = n_threads,
       objective = "multi:softprob", num_class = 3, eval_metric = "merror"
     )
@@ -536,8 +542,8 @@ test_that("strict_shape works", {
     X <- agaricus.train$data
     y <- agaricus.train$label
 
-    bst <- xgboost(
-      data = X, label = y, max_depth = 2, nthread = n_threads,
+    bst <- xgb.train(
+      data = xgb.DMatrix(X, label = y), max_depth = 2, nthread = n_threads,
       nrounds = n_rounds, objective = "binary:logistic",
       eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
     )
@@ -555,8 +561,8 @@ test_that("'predict' accepts CSR data", {
   x_csc <- as(X[1L, , drop = FALSE], "CsparseMatrix")
   x_csr <- as(x_csc, "RsparseMatrix")
   x_spv <- as(x_csc, "sparseVector")
-  bst <- xgboost(
-    data = X, label = y, objective = "binary:logistic",
+  bst <- xgb.train(
+    data = xgb.DMatrix(X, label = y), objective = "binary:logistic",
     nrounds = 5L, verbose = FALSE, nthread = n_threads,
   )
   p_csc <- predict(bst, x_csc)
diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R
index b5d3c5310f00..63a4c3f252eb 100644
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -265,14 +265,14 @@ test_that("early stopping works with titanic", {
   dtx <- model.matrix(~ 0 + ., data = titanic[, c("Pclass", "Sex")])
   dty <- titanic$Survived
 
-  xgboost::xgboost(
-    data = dtx,
-    label = dty,
+  xgboost::xgb.train(
+    data = xgb.DMatrix(dtx, label = dty),
     objective = "binary:logistic",
     eval_metric = "auc",
     nrounds = 100,
     early_stopping_rounds = 3,
-    nthread = n_threads
+    nthread = n_threads,
+    watchlist = list(train = xgb.DMatrix(dtx, label = dty))
   )
 
   expect_true(TRUE)  # should not crash
diff --git a/R-package/tests/testthat/test_gc_safety.R b/R-package/tests/testthat/test_gc_safety.R
index f77af1eabd0e..44d8f81a4eda 100644
--- a/R-package/tests/testthat/test_gc_safety.R
+++ b/R-package/tests/testthat/test_gc_safety.R
@@ -6,8 +6,8 @@ test_that("train and prediction when gctorture is on", {
   train <- agaricus.train
   test <- agaricus.test
   gctorture(TRUE)
-  bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
-                 eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+  bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max.depth = 2,
+                   eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
   pred <- predict(bst, test$data)
   gctorture(FALSE)
   expect_length(pred, length(test$label))
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
index 7fae052b4f47..fd1fffbac640 100644
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -25,15 +25,15 @@ if (isTRUE(VCD_AVAILABLE)) {
     label <- df[, ifelse(Improved == "Marked", 1, 0)]
 
     # binary
-    bst.Tree <- xgboost(data = sparse_matrix, label = label, max_depth = 9,
-                        eta = 1, nthread = 2, nrounds = nrounds, verbose = 0,
-                        objective = "binary:logistic", booster = "gbtree",
-                        base_score = 0.5)
+    bst.Tree <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), max_depth = 9,
+                          eta = 1, nthread = 2, nrounds = nrounds, verbose = 0,
+                          objective = "binary:logistic", booster = "gbtree",
+                          base_score = 0.5)
 
-    bst.GLM <- xgboost(data = sparse_matrix, label = label,
-                       eta = 1, nthread = 1, nrounds = nrounds, verbose = 0,
-                       objective = "binary:logistic", booster = "gblinear",
-                       base_score = 0.5)
+    bst.GLM <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label),
+                         eta = 1, nthread = 1, nrounds = nrounds, verbose = 0,
+                         objective = "binary:logistic", booster = "gblinear",
+                         base_score = 0.5)
 
     feature.names <- colnames(sparse_matrix)
 }
@@ -41,13 +41,13 @@ if (isTRUE(VCD_AVAILABLE)) {
 # multiclass
 mlabel <- as.numeric(iris$Species) - 1
 nclass <- 3
-mbst.Tree <- xgboost(data = as.matrix(iris[, -5]), label = mlabel, verbose = 0,
-                     max_depth = 3, eta = 0.5, nthread = 2, nrounds = nrounds,
-                     objective = "multi:softprob", num_class = nclass, base_score = 0)
+mbst.Tree <- xgb.train(data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel), verbose = 0,
+                       max_depth = 3, eta = 0.5, nthread = 2, nrounds = nrounds,
+                       objective = "multi:softprob", num_class = nclass, base_score = 0)
 
-mbst.GLM <- xgboost(data = as.matrix(iris[, -5]), label = mlabel, verbose = 0,
-                    booster = "gblinear", eta = 0.1, nthread = 1, nrounds = nrounds,
-                    objective = "multi:softprob", num_class = nclass, base_score = 0)
+mbst.GLM <- xgb.train(data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel), verbose = 0,
+                      booster = "gblinear", eta = 0.1, nthread = 1, nrounds = nrounds,
+                      objective = "multi:softprob", num_class = nclass, base_score = 0)
 
 
 test_that("xgb.dump works", {
@@ -71,8 +71,9 @@ test_that("xgb.dump works for gblinear", {
   expect_length(xgb.dump(bst.GLM), 14)
   # also make sure that it works properly for a sparse model where some coefficients
   # are 0 from setting large L1 regularization:
-  bst.GLM.sp <- xgboost(data = sparse_matrix, label = label, eta = 1, nthread = 2, nrounds = 1,
-                        alpha = 2, objective = "binary:logistic", booster = "gblinear")
+  bst.GLM.sp <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), eta = 1,
+                          nthread = 2, nrounds = 1,
+                          alpha = 2, objective = "binary:logistic", booster = "gblinear")
   d.sp <- xgb.dump(bst.GLM.sp)
   expect_length(d.sp, 14)
   expect_gt(sum(d.sp == "0"), 0)
@@ -168,7 +169,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
   nrounds <- 30
 
   for (booster in list("gbtree", "dart")) {
-    fit <- xgboost(
+    fit <- xgb.train(
       params = c(
         list(
           nthread = 2,
@@ -177,8 +178,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
           eval_metric = "rmse"),
         if (booster == "dart")
           list(rate_drop = .01, one_drop = TRUE)),
-      data = d,
-      label = y,
+      data = xgb.DMatrix(d, label = y),
       nrounds = nrounds)
 
     pr <- function(...) {
@@ -360,9 +360,8 @@ test_that("xgb.importance works with and without feature names", {
   expect_equal(importance_from_dump(), importance, tolerance = 1e-6)
 
   ## decision stump
-  m <- xgboost::xgboost(
-    data = as.matrix(data.frame(x = c(0, 1))),
-    label = c(1, 2),
+  m <- xgboost::xgb.train(
+    data = xgb.DMatrix(as.matrix(data.frame(x = c(0, 1))), label = c(1, 2)),
     nrounds = 1,
     base_score = 0.5,
     nthread = 2
@@ -393,9 +392,9 @@ test_that("xgb.importance works with GLM model", {
 
 test_that("xgb.model.dt.tree and xgb.importance work with a single split model", {
   .skip_if_vcd_not_available()
-  bst1 <- xgboost(data = sparse_matrix, label = label, max_depth = 1,
-                  eta = 1, nthread = 2, nrounds = 1, verbose = 0,
-                  objective = "binary:logistic")
+  bst1 <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), max_depth = 1,
+                    eta = 1, nthread = 2, nrounds = 1, verbose = 0,
+                    objective = "binary:logistic")
   expect_error(dt <- xgb.model.dt.tree(model = bst1), regexp = NA) # no error
   expect_equal(nrow(dt), 3)
   expect_error(imp <- xgb.importance(model = bst1), regexp = NA) # no error
diff --git a/R-package/tests/testthat/test_interaction_constraints.R b/R-package/tests/testthat/test_interaction_constraints.R
index ee4c453b3de5..cfffb029ce84 100644
--- a/R-package/tests/testthat/test_interaction_constraints.R
+++ b/R-package/tests/testthat/test_interaction_constraints.R
@@ -13,9 +13,9 @@ train <- matrix(c(x1, x2, x3), ncol = 3)
 
 test_that("interaction constraints for regression", {
   # Fit a model that only allows interaction between x1 and x2
-  bst <- xgboost(data = train, label = y, max_depth = 3,
-                 eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
-                 interaction_constraints = list(c(0, 1)))
+  bst <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 3,
+                   eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
+                   interaction_constraints = list(c(0, 1)))
 
   # Set all observations to have the same x3 values then increment
   #  by the same amount
diff --git a/R-package/tests/testthat/test_interactions.R b/R-package/tests/testthat/test_interactions.R
index 398531e0ec60..645efc12a14c 100644
--- a/R-package/tests/testthat/test_interactions.R
+++ b/R-package/tests/testthat/test_interactions.R
@@ -98,15 +98,14 @@ test_that("SHAP contribution values are not NAN", {
 
   ivs <- c("x1", "x2")
 
-  fit <- xgboost(
+  fit <- xgb.train(
     verbose = 0,
     params = list(
       objective = "reg:squarederror",
       eval_metric = "rmse",
       nthread = n_threads
     ),
-    data = as.matrix(subset(d, fold == 2)[, ivs]),
-    label = subset(d, fold == 2)$y,
+    data = xgb.DMatrix(as.matrix(subset(d, fold == 2)[, ivs]), label = subset(d, fold == 2)$y),
     nrounds = 3
   )
 
@@ -169,9 +168,8 @@ test_that("multiclass feature interactions work", {
 test_that("SHAP single sample works", {
   train <- agaricus.train
   test <- agaricus.test
-  booster <- xgboost(
-    data = train$data,
-    label = train$label,
+  booster <- xgb.train(
+    data = xgb.DMatrix(train$data, label = train$label),
     max_depth = 2,
     nrounds = 4,
     objective = "binary:logistic",
diff --git a/R-package/tests/testthat/test_io.R b/R-package/tests/testthat/test_io.R
index 8cf5a9ae97c2..3c64ddc720bf 100644
--- a/R-package/tests/testthat/test_io.R
+++ b/R-package/tests/testthat/test_io.R
@@ -7,8 +7,8 @@ test <- agaricus.test
 
 test_that("load/save raw works", {
   nrounds <- 8
-  booster <- xgboost(
-    data = train$data, label = train$label,
+  booster <- xgb.train(
+    data = xgb.DMatrix(train$data, label = train$label),
     nrounds = nrounds, objective = "binary:logistic",
     nthread = 2
   )
diff --git a/R-package/tests/testthat/test_monotone.R b/R-package/tests/testthat/test_monotone.R
index cb5827698878..671c02bd0658 100644
--- a/R-package/tests/testthat/test_monotone.R
+++ b/R-package/tests/testthat/test_monotone.R
@@ -7,9 +7,9 @@ train <- matrix(x, ncol = 1)
 
 
 test_that("monotone constraints for regression", {
-    bst <- xgboost(data = train, label = y, max_depth = 2,
-                   eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
-                   monotone_constraints = -1)
+    bst <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 2,
+                     eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
+                     monotone_constraints = -1)
 
     pred <- predict(bst, train)
 
diff --git a/R-package/tests/testthat/test_parameter_exposure.R b/R-package/tests/testthat/test_parameter_exposure.R
index ea71ca7b7e39..5b12fde01a37 100644
--- a/R-package/tests/testthat/test_parameter_exposure.R
+++ b/R-package/tests/testthat/test_parameter_exposure.R
@@ -10,13 +10,13 @@ dtest <- xgb.DMatrix(
   agaricus.test$data, label = agaricus.test$label, nthread = 2
 )
 
-bst <- xgboost(data = dtrain,
-               max_depth = 2,
-               eta = 1,
-               nrounds = 10,
-               nthread = 1,
-               verbose = 0,
-               objective = "binary:logistic")
+bst <- xgb.train(data = dtrain,
+                 max_depth = 2,
+                 eta = 1,
+                 nrounds = 10,
+                 nthread = 1,
+                 verbose = 0,
+                 objective = "binary:logistic")
 
 test_that("call is exposed to R", {
   expect_false(is.null(bst$call))
diff --git a/R-package/tests/testthat/test_poisson_regression.R b/R-package/tests/testthat/test_poisson_regression.R
index 55918b57ad17..e251a13ad854 100644
--- a/R-package/tests/testthat/test_poisson_regression.R
+++ b/R-package/tests/testthat/test_poisson_regression.R
@@ -4,8 +4,8 @@ set.seed(1994)
 
 test_that("Poisson regression works", {
   data(mtcars)
-  bst <- xgboost(
-    data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
+  bst <- xgb.train(
+    data = xgb.DMatrix(as.matrix(mtcars[, -11]), label = mtcars[, 11]),
     objective = 'count:poisson', nrounds = 10, verbose = 0, nthread = 2
   )
   expect_equal(class(bst), "xgb.Booster")
diff --git a/R-package/tests/testthat/test_unicode.R b/R-package/tests/testthat/test_unicode.R
index c8a225716f81..718d58109163 100644
--- a/R-package/tests/testthat/test_unicode.R
+++ b/R-package/tests/testthat/test_unicode.R
@@ -8,9 +8,9 @@ set.seed(1994)
 
 test_that("Can save and load models with Unicode paths", {
   nrounds <- 2
-  bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-                 eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic",
-                 eval_metric = "error")
+  bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
+                   eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic",
+                   eval_metric = "error")
   tmpdir <- tempdir()
   lapply(c("모델.json", "がうる・ぐら.json", "类继承.ubj"), function(x) {
     path <- file.path(tmpdir, x)
diff --git a/R-package/vignettes/xgboostfromJSON.Rmd b/R-package/vignettes/xgboostfromJSON.Rmd
index e7ccdf3a9d06..f5bc3ad9b7f0 100644
--- a/R-package/vignettes/xgboostfromJSON.Rmd
+++ b/R-package/vignettes/xgboostfromJSON.Rmd
@@ -52,9 +52,8 @@ labels <- c(1, 1, 1,
 
 data <- data.frame(dates = dates, labels = labels)
 
-bst <- xgboost(
-  data = as.matrix(data$dates),
-  label = labels,
+bst <- xgb.train(
+  data = xgb.DMatrix(as.matrix(data$dates), label = labels),
   nthread = 2,
   nrounds = 1,
   objective = "binary:logistic",

From 49247458f9ede5e4073f5a38b4d6deafc20238c8 Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Wed, 3 Jan 2024 08:26:55 +0100
Subject: [PATCH 3/4] [R] Minor improvements for evaluation printing (#9940)

---
 R-package/R/callbacks.R                   | 3 ++-
 R-package/R/xgb.cv.R                      | 5 +++--
 R-package/tests/testthat/test_callbacks.R | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R
index 54f821a795cb..f8f3b5a30ceb 100644
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -770,7 +770,8 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
   if (!is.null(eval_err)) {
     if (length(eval_res) != length(eval_err))
       stop('eval_res & eval_err lengths mismatch')
-    res <- paste0(sprintf("%s:%f+%f", enames, eval_res, eval_err), collapse = '\t')
+    # Note: UTF-8 code for plus/minus sign is U+00B1
+    res <- paste0(sprintf("%s:%f\U00B1%f", enames, eval_res, eval_err), collapse = '\t')
   } else {
     res <- paste0(sprintf("%s:%f", enames, eval_res), collapse = '\t')
   }
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index 1c17d86f042f..b0d8c4ebeec7 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -244,8 +244,9 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
       )
     })
     msg <- simplify2array(msg)
-    bst_evaluation <- rowMeans(msg)
-    bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2) # nolint
+    # Note: these variables might look unused here, but they are used in the callbacks
+    bst_evaluation <- rowMeans(msg) # nolint
+    bst_evaluation_err <- apply(msg, 1, sd) # nolint
 
     for (f in cb$post_iter) f()
 
diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R
index 63a4c3f252eb..de515038074c 100644
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -57,7 +57,7 @@ test_that("cb.print.evaluation works as expected", {
   expect_output(f5(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
 
   bst_evaluation_err  <- c('train-auc' = 0.1, 'test-auc' = 0.2)
-  expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\\+0.100000\ttest-auc:0.800000\\+0.200000")
+  expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000±0.100000\ttest-auc:0.800000±0.200000")
 })
 
 test_that("cb.evaluation.log works as expected", {

From 3c004a4145c667df84cf7785a672defbde30c2b6 Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Wed, 3 Jan 2024 10:29:21 +0100
Subject: [PATCH 4/4] [R] Add missing DMatrix functions (#9929)

* `XGDMatrixGetQuantileCut`
* `XGDMatrixNumNonMissing`
* `XGDMatrixGetDataAsCSR`

---------

Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
---
 R-package/NAMESPACE                           |   5 +
 R-package/R/xgb.DMatrix.R                     | 105 ++++++++++++++
 R-package/R/xgboost.R                         |   3 +-
 R-package/man/xgb.get.DMatrix.data.Rd         |  19 +++
 .../man/xgb.get.DMatrix.num.non.missing.Rd    |  17 +++
 R-package/man/xgb.get.DMatrix.qcut.Rd         |  58 ++++++++
 R-package/src/Makevars.in                     |   1 +
 R-package/src/Makevars.win                    |   1 +
 R-package/src/init.c                          |   6 +
 R-package/src/xgboost_R.cc                    | 130 +++++++++++++++++-
 R-package/src/xgboost_R.h                     |  25 ++++
 R-package/tests/testthat/test_dmatrix.R       |  59 ++++++++
 src/data/array_interface.cc                   |  13 ++
 src/data/array_interface.h                    |   5 -
 14 files changed, 438 insertions(+), 9 deletions(-)
 create mode 100644 R-package/man/xgb.get.DMatrix.data.Rd
 create mode 100644 R-package/man/xgb.get.DMatrix.num.non.missing.Rd
 create mode 100644 R-package/man/xgb.get.DMatrix.qcut.Rd
 create mode 100644 src/data/array_interface.cc

diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index 40ede23a537a..e6f7a82b8e20 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -37,6 +37,9 @@ export(xgb.create.features)
 export(xgb.cv)
 export(xgb.dump)
 export(xgb.gblinear.history)
+export(xgb.get.DMatrix.data)
+export(xgb.get.DMatrix.num.non.missing)
+export(xgb.get.DMatrix.qcut)
 export(xgb.get.config)
 export(xgb.ggplot.deepness)
 export(xgb.ggplot.importance)
@@ -60,6 +63,7 @@ export(xgb.unserialize)
 export(xgboost)
 import(methods)
 importClassesFrom(Matrix,dgCMatrix)
+importClassesFrom(Matrix,dgRMatrix)
 importClassesFrom(Matrix,dgeMatrix)
 importFrom(Matrix,colSums)
 importFrom(Matrix,sparse.model.matrix)
@@ -83,6 +87,7 @@ importFrom(graphics,points)
 importFrom(graphics,title)
 importFrom(jsonlite,fromJSON)
 importFrom(jsonlite,toJSON)
+importFrom(methods,new)
 importFrom(stats,median)
 importFrom(stats,predict)
 importFrom(utils,head)
diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R
index 6acd1e6b2646..cc16e18da8ee 100644
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@@ -526,6 +526,111 @@ setinfo.xgb.DMatrix <- function(object, name, info) {
   stop("setinfo: unknown info name ", name)
 }
 
+#' @title Get Quantile Cuts from DMatrix
+#' @description Get the quantile cuts (a.k.a. borders) from an `xgb.DMatrix`
+#' that has been quantized for the histogram method (`tree_method="hist"`).
+#'
+#' These cuts are used in order to assign observations to bins - i.e. these are ordered
+#' boundaries which are used to determine assignment condition `border_low < x < border_high`.
+#' As such, the first and last bin will be outside of the range of the data, so as to include
+#' all of the observations there.
+#'
+#' If a given column has 'n' bins, then there will be 'n+1' cuts / borders for that column,
+#' which will be output in sorted order from lowest to highest.
+#'
+#' Different columns can have different numbers of bins according to their range.
+#' @param dmat An `xgb.DMatrix` object, as returned by \link{xgb.DMatrix}.
+#' @param output Output format for the quantile cuts. Possible options are:\itemize{
+#' \item `"list"` will return the output as a list with one entry per column, where
+#' each column will have a numeric vector with the cuts. The list will be named if
+#' `dmat` has column names assigned to it.
+#' \item `"arrays"` will return a list with entries `indptr` (base-0 indexing) and
+#' `data`. Here, the cuts for column 'i' are obtained by slicing 'data' from entries
+#' `indptr[i]+1` to `indptr[i+1]`.
+#' }
+#' @return The quantile cuts, in the format specified by parameter `output`.
+#' @examples
+#' library(xgboost)
+#' data(mtcars)
+#' y <- mtcars$mpg
+#' x <- as.matrix(mtcars[, -1])
+#' dm <- xgb.DMatrix(x, label = y, nthread = 1)
+#'
+#' # DMatrix is not quantized right away, but will be once a hist model is generated
+#' model <- xgb.train(
+#'   data = dm,
+#'   params = list(
+#'     tree_method = "hist",
+#'     max_bin = 8,
+#'     nthread = 1
+#'   ),
+#'   nrounds = 3
+#' )
+#'
+#' # Now can get the quantile cuts
+#' xgb.get.DMatrix.qcut(dm)
+#' @export
+xgb.get.DMatrix.qcut <- function(dmat, output = c("list", "arrays")) { # nolint
+  stopifnot(inherits(dmat, "xgb.DMatrix"))
+  output <- head(output, 1L)
+  stopifnot(output %in% c("list", "arrays"))
+  res <- .Call(XGDMatrixGetQuantileCut_R, dmat)
+  if (output == "arrays") {
+    return(res)
+  } else {
+    feature_names <- getinfo(dmat, "feature_name")
+    ncols <- length(res$indptr) - 1
+    out <- lapply(
+      seq(1, ncols),
+      function(col) {
+        st <- res$indptr[col]
+        end <- res$indptr[col + 1]
+        if (end <= st) {
+          return(numeric())
+        }
+        return(res$data[seq(1 + st, end)])
+      }
+    )
+    if (NROW(feature_names)) {
+      names(out) <- feature_names
+    }
+    return(out)
+  }
+}
+
+#' @title Get Number of Non-Missing Entries in DMatrix
+#' @param dmat An `xgb.DMatrix` object, as returned by \link{xgb.DMatrix}.
+#' @return The number of non-missing entries in the DMatrix
+#' @export
+xgb.get.DMatrix.num.non.missing <- function(dmat) { # nolint
+  stopifnot(inherits(dmat, "xgb.DMatrix"))
+  return(.Call(XGDMatrixNumNonMissing_R, dmat))
+}
+
+#' @title Get DMatrix Data
+#' @param dmat An `xgb.DMatrix` object, as returned by \link{xgb.DMatrix}.
+#' @return The data held in the DMatrix, as a sparse CSR matrix (class `dgRMatrix`
+#' from package `Matrix`). If it had feature names, these will be added as column names
+#' in the output.
+#' @export
+xgb.get.DMatrix.data <- function(dmat) {
+  stopifnot(inherits(dmat, "xgb.DMatrix"))
+  res <- .Call(XGDMatrixGetDataAsCSR_R, dmat)
+  out <- methods::new("dgRMatrix")
+  nrows <- as.integer(length(res$indptr) - 1)
+  out@p <- res$indptr
+  out@j <- res$indices
+  out@x <- res$data
+  out@Dim <- as.integer(c(nrows, res$ncols))
+
+  feature_names <- getinfo(dmat, "feature_name")
+  dim_names <- list(NULL, NULL)
+  if (NROW(feature_names)) {
+    dim_names[[2L]] <- feature_names
+  }
+  out@Dimnames <- dim_names
+  return(out)
+}
 
 #' Get a new DMatrix containing the specified rows of
 #' original xgb.DMatrix object
diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index f61c535e228f..af6253a72792 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -82,7 +82,7 @@ NULL
 NULL
 
 # Various imports
-#' @importClassesFrom Matrix dgCMatrix dgeMatrix
+#' @importClassesFrom Matrix dgCMatrix dgeMatrix dgRMatrix
 #' @importFrom Matrix colSums
 #' @importFrom Matrix sparse.model.matrix
 #' @importFrom Matrix sparseVector
@@ -98,6 +98,7 @@ NULL
 #' @importFrom data.table setnames
 #' @importFrom jsonlite fromJSON
 #' @importFrom jsonlite toJSON
+#' @importFrom methods new
 #' @importFrom utils object.size str tail
 #' @importFrom stats predict
 #' @importFrom stats median
diff --git a/R-package/man/xgb.get.DMatrix.data.Rd b/R-package/man/xgb.get.DMatrix.data.Rd
new file mode 100644
index 000000000000..36783f5835ff
--- /dev/null
+++ b/R-package/man/xgb.get.DMatrix.data.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xgb.DMatrix.R
+\name{xgb.get.DMatrix.data}
+\alias{xgb.get.DMatrix.data}
+\title{Get DMatrix Data}
+\usage{
+xgb.get.DMatrix.data(dmat)
+}
+\arguments{
+\item{dmat}{An \code{xgb.DMatrix} object, as returned by \link{xgb.DMatrix}.}
+}
+\value{
+The data held in the DMatrix, as a sparse CSR matrix (class \code{dgRMatrix}
+from package \code{Matrix}). If it had feature names, these will be added as column names
+in the output.
+}
+\description{
+Get DMatrix Data
+}
diff --git a/R-package/man/xgb.get.DMatrix.num.non.missing.Rd b/R-package/man/xgb.get.DMatrix.num.non.missing.Rd
new file mode 100644
index 000000000000..4eb2697f8a00
--- /dev/null
+++ b/R-package/man/xgb.get.DMatrix.num.non.missing.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xgb.DMatrix.R
+\name{xgb.get.DMatrix.num.non.missing}
+\alias{xgb.get.DMatrix.num.non.missing}
+\title{Get Number of Non-Missing Entries in DMatrix}
+\usage{
+xgb.get.DMatrix.num.non.missing(dmat)
+}
+\arguments{
+\item{dmat}{An \code{xgb.DMatrix} object, as returned by \link{xgb.DMatrix}.}
+}
+\value{
+The number of non-missing entries in the DMatrix
+}
+\description{
+Get Number of Non-Missing Entries in DMatrix
+}
diff --git a/R-package/man/xgb.get.DMatrix.qcut.Rd b/R-package/man/xgb.get.DMatrix.qcut.Rd
new file mode 100644
index 000000000000..8f7c3da75878
--- /dev/null
+++ b/R-package/man/xgb.get.DMatrix.qcut.Rd
@@ -0,0 +1,58 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xgb.DMatrix.R
+\name{xgb.get.DMatrix.qcut}
+\alias{xgb.get.DMatrix.qcut}
+\title{Get Quantile Cuts from DMatrix}
+\usage{
+xgb.get.DMatrix.qcut(dmat, output = c("list", "arrays"))
+}
+\arguments{
+\item{dmat}{An \code{xgb.DMatrix} object, as returned by \link{xgb.DMatrix}.}
+
+\item{output}{Output format for the quantile cuts. Possible options are:\itemize{
+\item \code{"list"} will return the output as a list with one entry per column, where
+each column will have a numeric vector with the cuts. The list will be named if
+\code{dmat} has column names assigned to it.
+\item \code{"arrays"} will return a list with entries \code{indptr} (base-0 indexing) and
+\code{data}. Here, the cuts for column 'i' are obtained by slicing 'data' from entries
+\code{indptr[i]+1} to \code{indptr[i+1]}.
+}}
+}
+\value{
+The quantile cuts, in the format specified by parameter \code{output}.
+}
+\description{
+Get the quantile cuts (a.k.a. borders) from an \code{xgb.DMatrix}
+that has been quantized for the histogram method (\code{tree_method="hist"}).
+
+These cuts are used in order to assign observations to bins - i.e. these are ordered
+boundaries which are used to determine assignment condition \verb{border_low < x < border_high}.
+As such, the first and last bin will be outside of the range of the data, so as to include
+all of the observations there.
+
+If a given column has 'n' bins, then there will be 'n+1' cuts / borders for that column,
+which will be output in sorted order from lowest to highest.
+
+Different columns can have different numbers of bins according to their range.
+}
+\examples{
+library(xgboost)
+data(mtcars)
+y <- mtcars$mpg
+x <- as.matrix(mtcars[, -1])
+dm <- xgb.DMatrix(x, label = y, nthread = 1)
+
+# DMatrix is not quantized right away, but will be once a hist model is generated
+model <- xgb.train(
+  data = dm,
+  params = list(
+    tree_method = "hist",
+    max_bin = 8,
+    nthread = 1
+  ),
+  nrounds = 3
+)
+
+# Now can get the quantile cuts
+xgb.get.DMatrix.qcut(dm)
+}
diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in
index 8af5dbbf647a..dd13983f5b59 100644
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -63,6 +63,7 @@ OBJECTS= \
     $(PKGROOT)/src/gbm/gblinear.o \
     $(PKGROOT)/src/gbm/gblinear_model.o \
     $(PKGROOT)/src/data/adapter.o \
+    $(PKGROOT)/src/data/array_interface.o \
     $(PKGROOT)/src/data/simple_dmatrix.o \
     $(PKGROOT)/src/data/data.o \
     $(PKGROOT)/src/data/sparse_page_raw_format.o \
diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win
index 60f754fef47e..46a862711dc6 100644
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@@ -63,6 +63,7 @@ OBJECTS= \
     $(PKGROOT)/src/gbm/gblinear.o \
     $(PKGROOT)/src/gbm/gblinear_model.o \
     $(PKGROOT)/src/data/adapter.o \
+    $(PKGROOT)/src/data/array_interface.o \
     $(PKGROOT)/src/data/simple_dmatrix.o \
     $(PKGROOT)/src/data/data.o \
     $(PKGROOT)/src/data/sparse_page_raw_format.o \
diff --git a/R-package/src/init.c b/R-package/src/init.c
index f957229af236..5eee8ebe6ab2 100644
--- a/R-package/src/init.c
+++ b/R-package/src/init.c
@@ -45,6 +45,9 @@ extern SEXP XGDMatrixCreateFromDF_R(SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixGetStrFeatureInfo_R(SEXP, SEXP);
 extern SEXP XGDMatrixNumCol_R(SEXP);
 extern SEXP XGDMatrixNumRow_R(SEXP);
+extern SEXP XGDMatrixGetQuantileCut_R(SEXP);
+extern SEXP XGDMatrixNumNonMissing_R(SEXP);
+extern SEXP XGDMatrixGetDataAsCSR_R(SEXP);
 extern SEXP XGDMatrixSaveBinary_R(SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixSetInfo_R(SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixSetStrFeatureInfo_R(SEXP, SEXP, SEXP);
@@ -84,6 +87,9 @@ static const R_CallMethodDef CallEntries[] = {
   {"XGDMatrixGetStrFeatureInfo_R", (DL_FUNC) &XGDMatrixGetStrFeatureInfo_R, 2},
   {"XGDMatrixNumCol_R",           (DL_FUNC) &XGDMatrixNumCol_R,           1},
   {"XGDMatrixNumRow_R",           (DL_FUNC) &XGDMatrixNumRow_R,           1},
+  {"XGDMatrixGetQuantileCut_R",   (DL_FUNC) &XGDMatrixGetQuantileCut_R,   1},
+  {"XGDMatrixNumNonMissing_R",    (DL_FUNC) &XGDMatrixNumNonMissing_R,    1},
+  {"XGDMatrixGetDataAsCSR_R",     (DL_FUNC) &XGDMatrixGetDataAsCSR_R,     1},
   {"XGDMatrixSaveBinary_R",       (DL_FUNC) &XGDMatrixSaveBinary_R,       3},
   {"XGDMatrixSetInfo_R",          (DL_FUNC) &XGDMatrixSetInfo_R,          3},
   {"XGDMatrixSetStrFeatureInfo_R", (DL_FUNC) &XGDMatrixSetStrFeatureInfo_R, 3},
diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc
index fb05c33b46ec..60a3fe68b973 100644
--- a/R-package/src/xgboost_R.cc
+++ b/R-package/src/xgboost_R.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2023 by XGBoost Contributors
+ * Copyright 2014-2024, XGBoost Contributors
  */
 #include <dmlc/common.h>
 #include <dmlc/omp.h>
@@ -9,9 +9,11 @@
 #include <xgboost/logging.h>
 
 #include <algorithm>
+#include <cmath>
 #include <cstdint>
 #include <cstdio>
 #include <cstring>
+#include <limits>
 #include <sstream>
 #include <string>
 #include <utility>
@@ -20,14 +22,14 @@
 #include "../../src/c_api/c_api_error.h"
 #include "../../src/c_api/c_api_utils.h"  // MakeSparseFromPtr
 #include "../../src/common/threading_utils.h"
+#include "../../src/data/array_interface.h"  // for ArrayInterface
 
 #include "./xgboost_R.h"  // Must follow other includes.
 
 namespace {
-
 struct ErrorWithUnwind : public std::exception {};
 
-void ThrowExceptionFromRError(void *unused, Rboolean jump) {
+void ThrowExceptionFromRError(void *, Rboolean jump) {
   if (jump) {
     throw ErrorWithUnwind();
   }
@@ -49,6 +51,30 @@ SEXP SafeMkChar(const char *c_str, SEXP continuation_token) {
     continuation_token);
 }
 
+SEXP WrappedAllocReal(void *void_ptr) {
+  size_t *size = static_cast<size_t*>(void_ptr);
+  return Rf_allocVector(REALSXP, *size);
+}
+
+SEXP SafeAllocReal(size_t size, SEXP continuation_token) {
+  return R_UnwindProtect(
+    WrappedAllocReal, static_cast<void*>(&size),
+    ThrowExceptionFromRError, nullptr,
+    continuation_token);
+}
+
+SEXP WrappedAllocInteger(void *void_ptr) {
+  size_t *size = static_cast<size_t*>(void_ptr);
+  return Rf_allocVector(INTSXP, *size);
+}
+
+SEXP SafeAllocInteger(size_t size, SEXP continuation_token) {
+  return R_UnwindProtect(
+    WrappedAllocInteger, static_cast<void*>(&size),
+    ThrowExceptionFromRError, nullptr,
+    continuation_token);
+}
+
 [[nodiscard]] std::string MakeArrayInterfaceFromRMat(SEXP R_mat) {
   SEXP mat_dims = Rf_getAttrib(R_mat, R_DimSymbol);
   if (Rf_xlength(mat_dims) > 2) {
@@ -136,6 +162,37 @@ SEXP SafeMkChar(const char *c_str, SEXP continuation_token) {
   jconfig["nthread"] = Rf_asInteger(n_threads);
   return Json::Dump(jconfig);
 }
+
+// Allocate a R vector and copy an array interface encoded object to it.
+[[nodiscard]] SEXP CopyArrayToR(const char *array_str, SEXP ctoken) {
+  xgboost::ArrayInterface<1> array{xgboost::StringView{array_str}};
+  // R supports only int and double.
+  bool is_int =
+      xgboost::DispatchDType(array.type, [](auto t) { return std::is_integral_v<decltype(t)>; });
+  bool is_float = xgboost::DispatchDType(
+      array.type, [](auto v) { return std::is_floating_point_v<decltype(v)>; });
+  CHECK(is_int || is_float) << "Internal error: Invalid DType.";
+  CHECK(array.is_contiguous) << "Internal error: Return by XGBoost should be contiguous";
+
+  // Allocate memory in R
+  SEXP out =
+      Rf_protect(is_int ? SafeAllocInteger(array.n, ctoken) : SafeAllocReal(array.n, ctoken));
+
+  xgboost::DispatchDType(array.type, [&](auto t) {
+    using T = decltype(t);
+    auto in_ptr = static_cast<T const *>(array.data);
+    if (is_int) {
+      auto out_ptr = INTEGER(out);
+      std::copy_n(in_ptr, array.n, out_ptr);
+    } else {
+      auto out_ptr = REAL(out);
+      std::copy_n(in_ptr, array.n, out_ptr);
+    }
+  });
+
+  Rf_unprotect(1);
+  return out;
+}
 }  // namespace
 
 struct RRNGStateController {
@@ -540,6 +597,73 @@ XGB_DLL SEXP XGDMatrixNumCol_R(SEXP handle) {
   return ScalarInteger(static_cast<int>(ncol));
 }
 
+XGB_DLL SEXP XGDMatrixGetQuantileCut_R(SEXP handle) {
+  const char *out_names[] = {"indptr", "data", ""};
+  SEXP continuation_token = Rf_protect(R_MakeUnwindCont());
+  SEXP out = Rf_protect(Rf_mkNamed(VECSXP, out_names));
+  R_API_BEGIN();
+  const char *out_indptr;
+  const char *out_data;
+  CHECK_CALL(XGDMatrixGetQuantileCut(R_ExternalPtrAddr(handle), "{}", &out_indptr, &out_data));
+  try {
+    SET_VECTOR_ELT(out, 0, CopyArrayToR(out_indptr, continuation_token));
+    SET_VECTOR_ELT(out, 1, CopyArrayToR(out_data, continuation_token));
+  } catch (ErrorWithUnwind &e) {
+    R_ContinueUnwind(continuation_token);
+  }
+  R_API_END();
+  Rf_unprotect(2);
+  return out;
+}
+
+XGB_DLL SEXP XGDMatrixNumNonMissing_R(SEXP handle) {
+  SEXP out = Rf_protect(Rf_allocVector(REALSXP, 1));
+  R_API_BEGIN();
+  bst_ulong out_;
+  CHECK_CALL(XGDMatrixNumNonMissing(R_ExternalPtrAddr(handle), &out_));
+  REAL(out)[0] = static_cast<double>(out_);
+  R_API_END();
+  Rf_unprotect(1);
+  return out;
+}
+
+XGB_DLL SEXP XGDMatrixGetDataAsCSR_R(SEXP handle) {
+  const char *out_names[] = {"indptr", "indices", "data", "ncols", ""};
+  SEXP out = Rf_protect(Rf_mkNamed(VECSXP, out_names));
+  R_API_BEGIN();
+
+  bst_ulong nrows, ncols, nnz;
+  CHECK_CALL(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrows));
+  CHECK_CALL(XGDMatrixNumCol(R_ExternalPtrAddr(handle), &ncols));
+  CHECK_CALL(XGDMatrixNumNonMissing(R_ExternalPtrAddr(handle), &nnz));
+  if (std::max(nrows, ncols) > std::numeric_limits<int>::max()) {
+    Rf_error("%s", "Error: resulting DMatrix data does not fit into R 'dgRMatrix'.");
+  }
+
+  SET_VECTOR_ELT(out, 0, Rf_allocVector(INTSXP, nrows + 1));
+  SET_VECTOR_ELT(out, 1, Rf_allocVector(INTSXP, nnz));
+  SET_VECTOR_ELT(out, 2, Rf_allocVector(REALSXP, nnz));
+  SET_VECTOR_ELT(out, 3, Rf_ScalarInteger(ncols));
+
+  std::unique_ptr<bst_ulong[]> indptr(new bst_ulong[nrows + 1]);
+  std::unique_ptr<unsigned[]> indices(new unsigned[nnz]);
+  std::unique_ptr<float[]> data(new float[nnz]);
+
+  CHECK_CALL(XGDMatrixGetDataAsCSR(R_ExternalPtrAddr(handle),
+                                   "{}",
+                                   indptr.get(),
+                                   indices.get(),
+                                   data.get()));
+
+  std::copy(indptr.get(), indptr.get() + nrows + 1, INTEGER(VECTOR_ELT(out, 0)));
+  std::copy(indices.get(), indices.get() + nnz, INTEGER(VECTOR_ELT(out, 1)));
+  std::copy(data.get(), data.get() + nnz, REAL(VECTOR_ELT(out, 2)));
+
+  R_API_END();
+  Rf_unprotect(1);
+  return out;
+}
+
 // functions related to booster
 void _BoosterFinalizer(SEXP ext) {
   if (R_ExternalPtrAddr(ext) == NULL) return;
diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h
index 2e874e3a6a2a..4e3458957932 100644
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@@ -143,6 +143,31 @@ XGB_DLL SEXP XGDMatrixNumRow_R(SEXP handle);
  */
 XGB_DLL SEXP XGDMatrixNumCol_R(SEXP handle);
 
+/*!
+ * \brief return the quantile cuts used for the histogram method
+ * \param handle an instance of data matrix
+ * \return A list with entries 'indptr' and 'data'
+ */
+XGB_DLL SEXP XGDMatrixGetQuantileCut_R(SEXP handle);
+
+/*!
+ * \brief get the number of non-missing entries in a dmatrix
+ * \param handle an instance of data matrix
+ * \return the number of non-missing entries
+ */
+XGB_DLL SEXP XGDMatrixNumNonMissing_R(SEXP handle);
+
+/*!
+ * \brief get the data in a dmatrix in CSR format
+ * \param handle an instance of data matrix
+ * \return R list with the following entries in this order:
+ * - 'indptr
+ * - 'indices
+ * - 'data'
+ * - 'ncol'
+ */
+XGB_DLL SEXP XGDMatrixGetDataAsCSR_R(SEXP handle);
+
 /*!
  * \brief create xgboost learner
  * \param dmats a list of dmatrix handles that will be cached
diff --git a/R-package/tests/testthat/test_dmatrix.R b/R-package/tests/testthat/test_dmatrix.R
index 55a6996874fb..81ac884d0a1e 100644
--- a/R-package/tests/testthat/test_dmatrix.R
+++ b/R-package/tests/testthat/test_dmatrix.R
@@ -375,3 +375,62 @@ test_that("xgb.DMatrix: can take multi-dimensional 'base_margin'", {
   )
   expect_equal(pred_only_x, pred_w_base - b, tolerance = 1e-5)
 })
+
+test_that("xgb.DMatrix: number of non-missing matches data", {
+  x <- matrix(1:10, nrow = 5)
+  dm1 <- xgb.DMatrix(x)
+  expect_equal(xgb.get.DMatrix.num.non.missing(dm1), 10)
+
+  x[2, 2] <- NA
+  x[4, 1] <- NA
+  dm2 <- xgb.DMatrix(x)
+  expect_equal(xgb.get.DMatrix.num.non.missing(dm2), 8)
+})
+
+test_that("xgb.DMatrix: retrieving data as CSR", {
+  data(mtcars)
+  dm <- xgb.DMatrix(as.matrix(mtcars))
+  csr <- xgb.get.DMatrix.data(dm)
+  expect_equal(dim(csr), dim(mtcars))
+  expect_equal(colnames(csr), colnames(mtcars))
+  expect_equal(unname(as.matrix(csr)), unname(as.matrix(mtcars)), tolerance = 1e-6)
+})
+
+test_that("xgb.DMatrix: quantile cuts look correct", {
+  data(mtcars)
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1])
+  dm <- xgb.DMatrix(x, label = y)
+  model <- xgb.train(
+    data = dm,
+    params = list(
+      tree_method = "hist",
+      max_bin = 8,
+      nthread = 1
+    ),
+    nrounds = 3
+  )
+  qcut_list <- xgb.get.DMatrix.qcut(dm, "list")
+  qcut_arrays <- xgb.get.DMatrix.qcut(dm, "arrays")
+
+  expect_equal(length(qcut_arrays), 2)
+  expect_equal(names(qcut_arrays), c("indptr", "data"))
+  expect_equal(length(qcut_arrays$indptr), ncol(x) + 1)
+  expect_true(min(diff(qcut_arrays$indptr)) > 0)
+
+  col_min <- apply(x, 2, min)
+  col_max <- apply(x, 2, max)
+
+  expect_equal(length(qcut_list), ncol(x))
+  expect_equal(names(qcut_list), colnames(x))
+  lapply(
+    seq(1, ncol(x)),
+    function(col) {
+      cuts <- qcut_list[[col]]
+      expect_true(min(diff(cuts)) > 0)
+      expect_true(col_min[col] > cuts[1])
+      expect_true(col_max[col] < cuts[length(cuts)])
+      expect_true(length(cuts) <= 9)
+    }
+  )
+})
diff --git a/src/data/array_interface.cc b/src/data/array_interface.cc
new file mode 100644
index 000000000000..06b9ed00c870
--- /dev/null
+++ b/src/data/array_interface.cc
@@ -0,0 +1,13 @@
+/**
+ * Copyright 2019-2024, XGBoost Contributors
+ */
+#include "array_interface.h"
+
+#include "../common/common.h"  // for AssertGPUSupport
+
+namespace xgboost {
+#if !defined(XGBOOST_USE_CUDA)
+void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }
+bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }
+#endif  // !defined(XGBOOST_USE_CUDA)
+}  // namespace xgboost
diff --git a/src/data/array_interface.h b/src/data/array_interface.h
index 0170e6a847d3..6f2438f37196 100644
--- a/src/data/array_interface.h
+++ b/src/data/array_interface.h
@@ -375,11 +375,6 @@ struct ToDType<int64_t> {
   static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI8;
 };
 
-#if !defined(XGBOOST_USE_CUDA)
-inline void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }
-inline bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }
-#endif  // !defined(XGBOOST_USE_CUDA)
-
 /**
  * \brief A type erased view over __array_interface__ protocol defined by numpy
  *