Skip to content

Commit

Permalink
[backport] Build R 1.7.6 package. (#9845)
Browse files Browse the repository at this point in the history
- Restrict the number of threads in IO.
- Specify the number of threads in demos and tests.
- Add helper scripts for checks.
  • Loading branch information
trivialfis authored Dec 6, 2023
1 parent 36eb41c commit 4362701
Show file tree
Hide file tree
Showing 54 changed files with 1,182 additions and 378 deletions.
2 changes: 2 additions & 0 deletions R-package/.Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
^.*\.Rproj$
^\.Rproj\.user$
README.md
^doc$
^Meta$
2 changes: 1 addition & 1 deletion R-package/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: xgboost
Type: Package
Title: Extreme Gradient Boosting
Version: 1.7.6.1
Date: 2023-06-16
Date: 2023-12-05
Authors@R: c(
person("Tianqi", "Chen", role = c("aut"),
email = "[email protected]"),
Expand Down
22 changes: 14 additions & 8 deletions R-package/R/callbacks.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ cb.print.evaluation <- function(period = 1, showsd = TRUE) {
i == env$begin_iteration ||
i == env$end_iteration) {
stdev <- if (showsd) env$bst_evaluation_err else NULL
msg <- format.eval.string(i, env$bst_evaluation, stdev)
msg <- .format_eval_string(i, env$bst_evaluation, stdev)
cat(msg, '\n')
}
}
Expand Down Expand Up @@ -380,7 +380,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
if ((maximize && score > best_score) ||
(!maximize && score < best_score)) {

best_msg <<- format.eval.string(i, env$bst_evaluation, env$bst_evaluation_err)
best_msg <<- .format_eval_string(
i, env$bst_evaluation, env$bst_evaluation_err
)
best_score <<- score
best_iteration <<- i
best_ntreelimit <<- best_iteration * env$num_parallel_tree
Expand Down Expand Up @@ -555,14 +557,18 @@ cb.cv.predict <- function(save_models = FALSE) {
#'
#' @examples
#' #### Binary classification:
#' #
#'
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
#' # without considering the 2nd order interactions:
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
#' colnames(x)
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
Expand Down Expand Up @@ -592,9 +598,9 @@ cb.cv.predict <- function(save_models = FALSE) {
#'
#' #### Multiclass classification:
#' #
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2)
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
#' # For the default linear updater 'shotgun' it sometimes is helpful
#' # to use smaller eta to reduce instability
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 70, eta = 0.5,
Expand Down Expand Up @@ -752,7 +758,7 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
#

# Format the evaluation metric string
format.eval.string <- function(iter, eval_res, eval_err = NULL) {
.format_eval_string <- function(iter, eval_res, eval_err = NULL) {
if (length(eval_res) == 0)
stop('no evaluation results')
enames <- names(eval_res)
Expand Down
26 changes: 21 additions & 5 deletions R-package/R/xgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -259,11 +259,16 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
#'
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' train <- agaricus.train
#' test <- agaricus.test
#'
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
#' eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
#' # use all trees by default
#' pred <- predict(bst, test$data)
#' # use only the 1st tree
Expand Down Expand Up @@ -329,8 +334,14 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
object <- xgb.Booster.complete(object, saveraw = FALSE)

if (!inherits(newdata, "xgb.DMatrix"))
newdata <- xgb.DMatrix(newdata, missing = missing, nthread = NVL(object$params[["nthread"]], -1))
if (!inherits(newdata, "xgb.DMatrix")) {
config <- jsonlite::fromJSON(xgb.config(object))
nthread <- strtoi(config$learner$generic_param$nthread)
newdata <- xgb.DMatrix(
newdata,
missing = missing, nthread = NVL(nthread, -1)
)
}
if (!is.null(object[["feature_names"]]) &&
!is.null(colnames(newdata)) &&
!identical(object[["feature_names"]], colnames(newdata)))
Expand Down Expand Up @@ -620,10 +631,15 @@ xgb.attributes <- function(object) {
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#' train <- agaricus.train
#'
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' bst <- xgboost(
#' data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
#' )
#' config <- xgb.config(bst)
#'
#' @rdname xgb.config
Expand Down
7 changes: 6 additions & 1 deletion R-package/R/xgb.DMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#' dtrain <- with(
#' agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
#' )
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
Expand Down
15 changes: 12 additions & 3 deletions R-package/R/xgb.load.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,23 @@
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' bst <- xgboost(
#' data = train$data, label = train$label, max_depth = 2, eta = 1,
#' nthread = nthread,
#' nrounds = 2,
#' objective = "binary:logistic"
#' )
#'
#' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model')
#' if (file.exists('xgb.model')) file.remove('xgb.model')
#' pred <- predict(bst, test$data)
#' @export
xgb.load <- function(modelfile) {
if (is.null(modelfile))
Expand Down
5 changes: 4 additions & 1 deletion R-package/R/xgb.model.dt.tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,12 @@
#' # Basic use:
#'
#' data(agaricus.train, package='xgboost')
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
#'
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
#'
Expand Down
7 changes: 5 additions & 2 deletions R-package/R/xgb.plot.deepness.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,13 @@
#' @examples
#'
#' data(agaricus.train, package='xgboost')
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' # Change max_depth to a higher number to get a more significant result
#' ## Change max_depth to a higher number to get a more significant result
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
#' eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
#' eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
#' subsample = 0.5, min_child_weight = 2)
#'
#' xgb.plot.deepness(bst)
Expand Down
9 changes: 7 additions & 2 deletions R-package/R/xgb.plot.importance.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,14 @@
#'
#' @examples
#' data(agaricus.train)
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' bst <- xgboost(
#' data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
#' eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
#' )
#'
#' importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
#'
Expand Down
13 changes: 9 additions & 4 deletions R-package/R/xgb.plot.multi.trees.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,15 @@
#' @examples
#'
#' data(agaricus.train, package='xgboost')
#'
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
#' eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
#' min_child_weight = 50, verbose = 0)
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' bst <- xgboost(
#' data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
#' eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
#' min_child_weight = 50, verbose = 0
#' )
#'
#' p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
#' print(p)
Expand Down
16 changes: 10 additions & 6 deletions R-package/R/xgb.plot.shap.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,14 @@
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#' nrounds <- 20
#'
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
#' eta = 0.1, max_depth = 3, subsample = .5,
#' method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
#' method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
#'
#' xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
#' contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
Expand All @@ -85,12 +90,11 @@
#'
#' # multiclass example - plots for each class separately:
#' nclass <- 3
#' nrounds <- 20
#' x <- as.matrix(iris[, -5])
#' set.seed(123)
#' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
#' mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
#' objective = "multi:softprob", num_class = nclass, verbose = 0)
#' trees0 <- seq(from=0, by=nclass, length.out=nrounds)
#' col <- rgb(0, 0, 1, 0.5)
Expand Down Expand Up @@ -193,7 +197,7 @@ xgb.plot.shap <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
#' hence allows us to see which features have a negative / positive contribution
#' on the model prediction, and whether the contribution is different for larger
#' or smaller values of the feature. We effectively try to replicate the
#' \code{summary_plot} function from https://github.com/slundberg/shap.
#' \code{summary_plot} function from https://github.com/shap/shap
#'
#' @inheritParams xgb.plot.shap
#'
Expand All @@ -202,7 +206,7 @@ xgb.plot.shap <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
#'
#' @examples # See \code{\link{xgb.plot.shap}}.
#' @seealso \code{\link{xgb.plot.shap}}, \code{\link{xgb.ggplot.shap.summary}},
#' \url{https://github.com/slundberg/shap}
#' \url{https://github.com/shap/shap}
xgb.plot.shap.summary <- function(data, shap_contrib = NULL, features = NULL, top_n = 10, model = NULL,
trees = NULL, target_class = NULL, approxcontrib = FALSE, subsample = NULL) {
# Only ggplot implementation is available.
Expand Down
14 changes: 11 additions & 3 deletions R-package/R/xgb.save.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,22 @@
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' bst <- xgboost(
#' data = train$data, label = train$label, max_depth = 2, eta = 1,
#' nthread = nthread,
#' nrounds = 2,
#' objective = "binary:logistic"
#' )
#' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model')
#' if (file.exists('xgb.model')) file.remove('xgb.model')
#' pred <- predict(bst, test$data)
#' @export
xgb.save <- function(model, fname) {
if (typeof(fname) != "character")
Expand Down
9 changes: 7 additions & 2 deletions R-package/R/xgb.save.raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,18 @@
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
#'
#' raw <- xgb.save.raw(bst)
#' bst <- xgb.load.raw(raw)
#' pred <- predict(bst, test$data)
#'
#' @export
xgb.save.raw <- function(model, raw_format = "deprecated") {
Expand Down
Loading

0 comments on commit 4362701

Please sign in to comment.