Skip to content

Commit

Permalink
[R] rename Quality -> Gain (#9938)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-cortes authored Dec 31, 2023
1 parent 8b9c98b commit 73713de
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 22 deletions.
10 changes: 5 additions & 5 deletions R-package/R/xgb.model.dt.tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#' - `Yes`: ID of the next node when the split condition is met.
#' - `No`: ID of the next node when the split condition is not met.
#' - `Missing`: ID of the next node when the branch value is missing.
#' - `Quality`: either the split gain (change in loss) or the leaf value.
#' - `Gain`: either the split gain (change in loss) or the leaf value.
#' - `Cover`: metric related to the number of observations either seen by a split
#' or collected by a leaf during training.
#'
Expand Down Expand Up @@ -122,7 +122,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
# parse branch lines
branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
"gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Gain", "Cover")
td[
isLeaf == FALSE,
(branch_cols) := {
Expand All @@ -132,7 +132,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
if (length(xtr) == 0) {
as.data.table(
list(Feature = "NA", Split = "NA", Yes = "NA", No = "NA", Missing = "NA", Quality = "NA", Cover = "NA")
list(Feature = "NA", Split = "NA", Yes = "NA", No = "NA", Missing = "NA", Gain = "NA", Cover = "NA")
)
} else {
as.data.table(xtr)
Expand All @@ -152,7 +152,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,

# parse leaf lines
leaf_rx <- paste0("leaf=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
leaf_cols <- c("Feature", "Quality", "Cover")
leaf_cols <- c("Feature", "Gain", "Cover")
td[
isLeaf == TRUE,
(leaf_cols) := {
Expand All @@ -167,7 +167,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
]

# convert some columns to numeric
numeric_cols <- c("Split", "Quality", "Cover")
numeric_cols <- c("Split", "Gain", "Cover")
td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols = numeric_cols]
if (use_int_id) {
int_cols <- c("Yes", "No", "Missing")
Expand Down
4 changes: 2 additions & 2 deletions R-package/R/xgb.plot.deepness.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.d
stop("Model tree columns are not as expected!\n",
" Note that this function works only for tree models.")

dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight = Quality)], by = "ID")
dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight = Gain)], by = "ID")
setkeyv(dt_depths, c("Tree", "ID"))
# count by depth levels, and also calculate average cover at a depth
dt_summaries <- dt_depths[, .(.N, Cover = mean(Cover)), Depth]
Expand Down Expand Up @@ -157,6 +157,6 @@ get.leaf.depth <- function(dt_tree) {
# They are mainly column names inferred by Data.table...
globalVariables(
c(
".N", "N", "Depth", "Quality", "Cover", "Tree", "ID", "Yes", "No", "Feature", "Leaf", "Weight"
".N", "N", "Depth", "Gain", "Cover", "Tree", "ID", "Yes", "No", "Feature", "Leaf", "Weight"
)
)
4 changes: 2 additions & 2 deletions R-package/R/xgb.plot.multi.trees.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,13 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
data.table::set(tree.matrix, j = nm, value = sub("^\\d+-", "", tree.matrix[[nm]]))

nodes.dt <- tree.matrix[
, .(Quality = sum(Quality))
, .(Gain = sum(Gain))
, by = .(abs.node.position, Feature)
][, .(Text = paste0(
paste0(
Feature[seq_len(min(length(Feature), features_keep))],
" (",
format(Quality[seq_len(min(length(Quality), features_keep))], digits = 5),
format(Gain[seq_len(min(length(Gain), features_keep))], digits = 5),
")"
),
collapse = "\n"
Expand Down
4 changes: 2 additions & 2 deletions R-package/R/xgb.plot.tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot

dt <- xgb.model.dt.tree(feature_names = feature_names, model = model, trees = trees)

dt[, label := paste0(Feature, "\nCover: ", Cover, ifelse(Feature == "Leaf", "\nValue: ", "\nGain: "), Quality)]
dt[, label := paste0(Feature, "\nCover: ", Cover, ifelse(Feature == "Leaf", "\nValue: ", "\nGain: "), Gain)]
if (show_node_id)
dt[, label := paste0(ID, ": ", label)]
dt[Node == 0, label := paste0("Tree ", Tree, "\n", label)]
Expand Down Expand Up @@ -199,4 +199,4 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot
# Avoid error messages during CRAN check.
# The reason is that these variables are never declared
# They are mainly column names inferred by Data.table...
globalVariables(c("Feature", "ID", "Cover", "Quality", "Split", "Yes", "No", "Missing", ".", "shape", "filledcolor", "label"))
globalVariables(c("Feature", "ID", "Cover", "Gain", "Split", "Yes", "No", "Missing", ".", "shape", "filledcolor", "label"))
2 changes: 1 addition & 1 deletion R-package/man/xgb.model.dt.tree.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions R-package/tests/testthat/test_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ test_that("xgb.Booster serializing as R object works", {

test_that("xgb.model.dt.tree works with and without feature names", {
.skip_if_vcd_not_available()
names.dt.trees <- c("Tree", "Node", "ID", "Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
names.dt.trees <- c("Tree", "Node", "ID", "Feature", "Split", "Yes", "No", "Missing", "Gain", "Cover")
dt.tree <- xgb.model.dt.tree(feature_names = feature.names, model = bst.Tree)
expect_equal(names.dt.trees, names(dt.tree))
if (!flag_32bit)
Expand Down Expand Up @@ -341,7 +341,7 @@ test_that("xgb.importance works with and without feature names", {
trees = trees
)[
Feature != "Leaf", .(
Gain = sum(Quality),
Gain = sum(Gain),
Cover = sum(Cover),
Frequency = .N
),
Expand Down
16 changes: 8 additions & 8 deletions R-package/tests/testthat/test_update.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ test_that("updating the model works", {
# should be the same evaluation but different gains and larger cover
expect_equal(bst2$evaluation_log, bst2r$evaluation_log)
if (!win32_flag) {
expect_equal(tr2[Feature == 'Leaf']$Quality, tr2r[Feature == 'Leaf']$Quality)
expect_equal(tr2[Feature == 'Leaf']$Gain, tr2r[Feature == 'Leaf']$Gain)
}
expect_gt(sum(abs(tr2[Feature != 'Leaf']$Quality - tr2r[Feature != 'Leaf']$Quality)), 100)
expect_gt(sum(abs(tr2[Feature != 'Leaf']$Gain - tr2r[Feature != 'Leaf']$Gain)), 100)
expect_gt(sum(tr2r$Cover) / sum(tr2$Cover), 1.5)

# process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:
Expand All @@ -72,8 +72,8 @@ test_that("updating the model works", {
tr2u <- xgb.model.dt.tree(model = bst2u)
# should be the same evaluation but different gains and larger cover
expect_equal(bst2$evaluation_log, bst2u$evaluation_log)
expect_equal(tr2[Feature == 'Leaf']$Quality, tr2u[Feature == 'Leaf']$Quality)
expect_gt(sum(abs(tr2[Feature != 'Leaf']$Quality - tr2u[Feature != 'Leaf']$Quality)), 100)
expect_equal(tr2[Feature == 'Leaf']$Gain, tr2u[Feature == 'Leaf']$Gain)
expect_gt(sum(abs(tr2[Feature != 'Leaf']$Gain - tr2u[Feature != 'Leaf']$Gain)), 100)
expect_gt(sum(tr2u$Cover) / sum(tr2$Cover), 1.5)
# the results should be the same as for the model with an extra 'refresh' updater
expect_equal(bst2r$evaluation_log, bst2u$evaluation_log)
Expand All @@ -87,8 +87,8 @@ test_that("updating the model works", {
tr1ut <- xgb.model.dt.tree(model = bst1ut)
# should be the same evaluations but different gains and smaller cover (test data is smaller)
expect_equal(bst1$evaluation_log, bst1ut$evaluation_log)
expect_equal(tr1[Feature == 'Leaf']$Quality, tr1ut[Feature == 'Leaf']$Quality)
expect_gt(sum(abs(tr1[Feature != 'Leaf']$Quality - tr1ut[Feature != 'Leaf']$Quality)), 100)
expect_equal(tr1[Feature == 'Leaf']$Gain, tr1ut[Feature == 'Leaf']$Gain)
expect_gt(sum(abs(tr1[Feature != 'Leaf']$Gain - tr1ut[Feature != 'Leaf']$Gain)), 100)
expect_lt(sum(tr1ut$Cover) / sum(tr1$Cover), 0.5)
})

Expand All @@ -111,7 +111,7 @@ test_that("updating works for multiclass & multitree", {

# should be the same evaluation but different gains and larger cover
expect_equal(bst0$evaluation_log, bst0u$evaluation_log)
expect_equal(tr0[Feature == 'Leaf']$Quality, tr0u[Feature == 'Leaf']$Quality)
expect_gt(sum(abs(tr0[Feature != 'Leaf']$Quality - tr0u[Feature != 'Leaf']$Quality)), 100)
expect_equal(tr0[Feature == 'Leaf']$Gain, tr0u[Feature == 'Leaf']$Gain)
expect_gt(sum(abs(tr0[Feature != 'Leaf']$Gain - tr0u[Feature != 'Leaf']$Gain)), 100)
expect_gt(sum(tr0u$Cover) / sum(tr0$Cover), 1.5)
})

0 comments on commit 73713de

Please sign in to comment.