Skip to content

Commit 434865f

Browse files
authored
Merge branch 'dmlc:release_2.1.0' into release_2.1.0
2 parents 54ab2e9 + 600be4d commit 434865f

File tree

21 files changed

+114
-48
lines changed

21 files changed

+114
-48
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ if(PLUGIN_SYCL)
44
string(REPLACE " -isystem ${CONDA_PREFIX}/include" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
55
endif()
66

7-
project(xgboost LANGUAGES CXX C VERSION 2.1.2)
7+
project(xgboost LANGUAGES CXX C VERSION 2.1.3)
88
include(cmake/Utils.cmake)
99
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
1010

R-package/DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: xgboost
22
Type: Package
33
Title: Extreme Gradient Boosting
4-
Version: 2.1.2.1
5-
Date: 2024-10-23
4+
Version: 2.1.3.1
5+
Date: 2024-11-26
66
Authors@R: c(
77
person("Tianqi", "Chen", role = c("aut"),
88
email = "[email protected]"),

R-package/configure

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#! /bin/sh
22
# Guess values for system-dependent variables and create Makefiles.
3-
# Generated by GNU Autoconf 2.71 for xgboost 2.1.2.
3+
# Generated by GNU Autoconf 2.71 for xgboost 2.1.3.
44
#
55
#
66
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
607607
# Identity of this package.
608608
PACKAGE_NAME='xgboost'
609609
PACKAGE_TARNAME='xgboost'
610-
PACKAGE_VERSION='2.1.2'
611-
PACKAGE_STRING='xgboost 2.1.2'
610+
PACKAGE_VERSION='2.1.3'
611+
PACKAGE_STRING='xgboost 2.1.3'
612612
PACKAGE_BUGREPORT=''
613613
PACKAGE_URL=''
614614

@@ -1259,7 +1259,7 @@ if test "$ac_init_help" = "long"; then
12591259
# Omit some internal or obsolete options to make the list less imposing.
12601260
# This message is too long to be a string in the A/UX 3.1 sh.
12611261
cat <<_ACEOF
1262-
\`configure' configures xgboost 2.1.2 to adapt to many kinds of systems.
1262+
\`configure' configures xgboost 2.1.3 to adapt to many kinds of systems.
12631263
12641264
Usage: $0 [OPTION]... [VAR=VALUE]...
12651265
@@ -1321,7 +1321,7 @@ fi
13211321

13221322
if test -n "$ac_init_help"; then
13231323
case $ac_init_help in
1324-
short | recursive ) echo "Configuration of xgboost 2.1.2:";;
1324+
short | recursive ) echo "Configuration of xgboost 2.1.3:";;
13251325
esac
13261326
cat <<\_ACEOF
13271327
@@ -1404,7 +1404,7 @@ fi
14041404
test -n "$ac_init_help" && exit $ac_status
14051405
if $ac_init_version; then
14061406
cat <<\_ACEOF
1407-
xgboost configure 2.1.2
1407+
xgboost configure 2.1.3
14081408
generated by GNU Autoconf 2.71
14091409
14101410
Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1603,7 +1603,7 @@ cat >config.log <<_ACEOF
16031603
This file contains any messages produced by compilers while
16041604
running configure, to aid debugging if configure makes a mistake.
16051605
1606-
It was created by xgboost $as_me 2.1.2, which was
1606+
It was created by xgboost $as_me 2.1.3, which was
16071607
generated by GNU Autoconf 2.71. Invocation command line was
16081608
16091609
$ $0$ac_configure_args_raw
@@ -3709,7 +3709,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
37093709
# report actual input values of CONFIG_FILES etc. instead of their
37103710
# values after options handling.
37113711
ac_log="
3712-
This file was extended by xgboost $as_me 2.1.2, which was
3712+
This file was extended by xgboost $as_me 2.1.3, which was
37133713
generated by GNU Autoconf 2.71. Invocation command line was
37143714
37153715
CONFIG_FILES = $CONFIG_FILES
@@ -3773,7 +3773,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
37733773
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
37743774
ac_cs_config='$ac_cs_config_escaped'
37753775
ac_cs_version="\\
3776-
xgboost config.status 2.1.2
3776+
xgboost config.status 2.1.3
37773777
configured by $0, generated by GNU Autoconf 2.71,
37783778
with options \\"\$ac_cs_config\\"
37793779

R-package/configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
AC_PREREQ(2.69)
44

5-
AC_INIT([xgboost],[2.1.2],[],[xgboost],[])
5+
AC_INIT([xgboost],[2.1.3],[],[xgboost],[])
66

77
: ${R_HOME=`R RHOME`}
88
if test -z "${R_HOME}"; then

include/xgboost/version_config.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@
66

77
#define XGBOOST_VER_MAJOR 2 /* NOLINT */
88
#define XGBOOST_VER_MINOR 1 /* NOLINT */
9-
#define XGBOOST_VER_PATCH 2 /* NOLINT */
9+
#define XGBOOST_VER_PATCH 3 /* NOLINT */
1010

1111
#endif // XGBOOST_VERSION_CONFIG_H_

jvm-packages/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>ml.dmlc</groupId>
88
<artifactId>xgboost-jvm_2.12</artifactId>
9-
<version>2.1.2</version>
9+
<version>2.1.3</version>
1010
<packaging>pom</packaging>
1111
<name>XGBoost JVM Package</name>
1212
<description>JVM Package for XGBoost</description>

jvm-packages/xgboost4j-example/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
<parent>
77
<groupId>ml.dmlc</groupId>
88
<artifactId>xgboost-jvm_2.12</artifactId>
9-
<version>2.1.2</version>
9+
<version>2.1.3</version>
1010
</parent>
1111
<name>xgboost4j-example</name>
1212
<artifactId>xgboost4j-example_2.12</artifactId>
13-
<version>2.1.2</version>
13+
<version>2.1.3</version>
1414
<packaging>jar</packaging>
1515
<build>
1616
<plugins>

jvm-packages/xgboost4j-flink/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
<parent>
77
<groupId>ml.dmlc</groupId>
88
<artifactId>xgboost-jvm_2.12</artifactId>
9-
<version>2.1.2</version>
9+
<version>2.1.3</version>
1010
</parent>
1111

1212
<name>xgboost4j-flink</name>
1313
<artifactId>xgboost4j-flink_2.12</artifactId>
14-
<version>2.1.2</version>
14+
<version>2.1.3</version>
1515
<properties>
1616
<flink-ml.version>2.2.0</flink-ml.version>
1717
</properties>

jvm-packages/xgboost4j-gpu/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
<parent>
77
<groupId>ml.dmlc</groupId>
88
<artifactId>xgboost-jvm_2.12</artifactId>
9-
<version>2.1.2</version>
9+
<version>2.1.3</version>
1010
</parent>
1111
<artifactId>xgboost4j-gpu_2.12</artifactId>
1212
<name>xgboost4j-gpu</name>
13-
<version>2.1.2</version>
13+
<version>2.1.3</version>
1414
<packaging>jar</packaging>
1515

1616
<dependencies>

jvm-packages/xgboost4j-spark-gpu/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<parent>
77
<groupId>ml.dmlc</groupId>
88
<artifactId>xgboost-jvm_2.12</artifactId>
9-
<version>2.1.2</version>
9+
<version>2.1.3</version>
1010
</parent>
1111
<name>xgboost4j-spark-gpu</name>
1212
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>

jvm-packages/xgboost4j-spark/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<parent>
77
<groupId>ml.dmlc</groupId>
88
<artifactId>xgboost-jvm_2.12</artifactId>
9-
<version>2.1.2</version>
9+
<version>2.1.3</version>
1010
</parent>
1111
<name>xgboost4j-spark</name>
1212
<artifactId>xgboost4j-spark_2.12</artifactId>

jvm-packages/xgboost4j/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
<parent>
77
<groupId>ml.dmlc</groupId>
88
<artifactId>xgboost-jvm_2.12</artifactId>
9-
<version>2.1.2</version>
9+
<version>2.1.3</version>
1010
</parent>
1111
<name>xgboost4j</name>
1212
<artifactId>xgboost4j_2.12</artifactId>
13-
<version>2.1.2</version>
13+
<version>2.1.3</version>
1414
<packaging>jar</packaging>
1515

1616
<dependencies>

python-package/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ authors = [
1414
{ name = "Hyunsu Cho", email = "[email protected]" },
1515
{ name = "Jiaming Yuan", email = "[email protected]" }
1616
]
17-
version = "2.1.2"
17+
version = "2.1.3"
1818
requires-python = ">=3.8"
1919
license = { text = "Apache-2.0" }
2020
classifiers = [

python-package/xgboost/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.1.2
1+
2.1.3

python-package/xgboost/core.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2489,6 +2489,7 @@ def inplace_predict(
24892489
_arrow_transform,
24902490
_is_arrow,
24912491
_is_cudf_df,
2492+
_is_cudf_pandas,
24922493
_is_cupy_alike,
24932494
_is_list,
24942495
_is_np_array_like,
@@ -2498,6 +2499,9 @@ def inplace_predict(
24982499
_transform_pandas_df,
24992500
)
25002501

2502+
if _is_cudf_pandas(data):
2503+
data = data._fsproxy_fast # pylint: disable=protected-access
2504+
25012505
enable_categorical = True
25022506
if _is_arrow(data):
25032507
data = _arrow_transform(data)

python-package/xgboost/data.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,16 @@ def _is_cudf_df(data: DataType) -> bool:
835835
return lazy_isinstance(data, "cudf.core.dataframe", "DataFrame")
836836

837837

838+
def _is_cudf_pandas(data: DataType) -> bool:
839+
"""Must go before both pandas and cudf checks."""
840+
return (
841+
lazy_isinstance(data, "pandas.core.frame", "DataFrame")
842+
or lazy_isinstance(data, "pandas.core.series", "Series")
843+
) and lazy_isinstance(
844+
type(data), "cudf.pandas.fast_slow_proxy", "_FastSlowProxyMeta"
845+
)
846+
847+
838848
def _get_cudf_cat_predicate() -> Callable[[Any], bool]:
839849
try:
840850
from cudf import CategoricalDtype
@@ -1187,6 +1197,8 @@ def dispatch_data_backend(
11871197
)
11881198
if _is_arrow(data):
11891199
data = _arrow_transform(data)
1200+
if _is_cudf_pandas(data):
1201+
data = data._fsproxy_fast # pylint: disable=protected-access
11901202
if _is_pandas_series(data):
11911203
import pandas as pd
11921204

@@ -1327,6 +1339,8 @@ def dispatch_meta_backend(
13271339
return
13281340
if _is_arrow(data):
13291341
data = _arrow_transform(data)
1342+
if _is_cudf_pandas(data):
1343+
data = data._fsproxy_fast # pylint: disable=protected-access
13301344
if _is_pandas_df(data):
13311345
_meta_from_pandas_df(data, name, dtype=dtype, handle=handle)
13321346
return
@@ -1398,6 +1412,8 @@ def _proxy_transform(
13981412
feature_types: Optional[FeatureTypes],
13991413
enable_categorical: bool,
14001414
) -> TransformedData:
1415+
if _is_cudf_pandas(data):
1416+
data = data._fsproxy_fast # pylint: disable=protected-access
14011417
if _is_cudf_df(data) or _is_cudf_ser(data):
14021418
return _transform_cudf_df(
14031419
data, feature_names, feature_types, enable_categorical

python-package/xgboost/spark/core.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,9 @@ def _get_unwrapped_vec_cols(feature_col: Column) -> List[Column]:
597597
)
598598

599599

600+
_MODEL_CHUNK_SIZE = 4096 * 1024
601+
602+
600603
class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
601604
_input_kwargs: Dict[str, Any]
602605

@@ -1091,25 +1094,27 @@ def _train_booster(
10911094
context.barrier()
10921095

10931096
if context.partitionId() == 0:
1094-
yield pd.DataFrame(
1095-
data={
1096-
"config": [booster.save_config()],
1097-
"booster": [booster.save_raw("json").decode("utf-8")],
1098-
}
1099-
)
1097+
config = booster.save_config()
1098+
yield pd.DataFrame({"data": [config]})
1099+
booster_json = booster.save_raw("json").decode("utf-8")
1100+
1101+
for offset in range(0, len(booster_json), _MODEL_CHUNK_SIZE):
1102+
booster_chunk = booster_json[offset : offset + _MODEL_CHUNK_SIZE]
1103+
yield pd.DataFrame({"data": [booster_chunk]})
11001104

11011105
def _run_job() -> Tuple[str, str]:
11021106
rdd = (
11031107
dataset.mapInPandas(
11041108
_train_booster, # type: ignore
1105-
schema="config string, booster string",
1109+
schema="data string",
11061110
)
11071111
.rdd.barrier()
11081112
.mapPartitions(lambda x: x)
11091113
)
11101114
rdd_with_resource = self._try_stage_level_scheduling(rdd)
1111-
ret = rdd_with_resource.collect()[0]
1112-
return ret[0], ret[1]
1115+
ret = rdd_with_resource.collect()
1116+
data = [v[0] for v in ret]
1117+
return data[0], "".join(data[1:])
11131118

11141119
get_logger(_LOG_TAG).info(
11151120
"Running xgboost-%s on %s workers with"
@@ -1690,7 +1695,12 @@ def saveImpl(self, path: str) -> None:
16901695
_SparkXGBSharedReadWrite.saveMetadata(self.instance, path, self.sc, self.logger)
16911696
model_save_path = os.path.join(path, "model")
16921697
booster = xgb_model.get_booster().save_raw("json").decode("utf-8")
1693-
_get_spark_session().sparkContext.parallelize([booster], 1).saveAsTextFile(
1698+
booster_chunks = []
1699+
1700+
for offset in range(0, len(booster), _MODEL_CHUNK_SIZE):
1701+
booster_chunks.append(booster[offset : offset + _MODEL_CHUNK_SIZE])
1702+
1703+
_get_spark_session().sparkContext.parallelize(booster_chunks, 1).saveAsTextFile(
16941704
model_save_path
16951705
)
16961706

@@ -1721,8 +1731,8 @@ def load(self, path: str) -> "_SparkXGBModel":
17211731
)
17221732
model_load_path = os.path.join(path, "model")
17231733

1724-
ser_xgb_model = (
1725-
_get_spark_session().sparkContext.textFile(model_load_path).collect()[0]
1734+
ser_xgb_model = "".join(
1735+
_get_spark_session().sparkContext.textFile(model_load_path).collect()
17261736
)
17271737

17281738
def create_xgb_model() -> "XGBModel":

src/common/random.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ class ColumnSampler {
230230
};
231231

232232
inline auto MakeColumnSampler(Context const* ctx) {
233-
std::uint32_t seed = common::GlobalRandomEngine()();
233+
std::uint32_t seed = common::GlobalRandom()();
234234
auto rc = collective::Broadcast(ctx, linalg::MakeVec(&seed, 1), 0);
235235
collective::SafeColl(rc);
236236
auto cs = std::make_shared<common::ColumnSampler>(seed);

src/tree/updater_gpu_hist.cu

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -867,12 +867,7 @@ class GPUHistMaker : public TreeUpdater {
867867
CHECK_GE(ctx_->Ordinal(), 0) << "Must have at least one device";
868868
info_ = &dmat->Info();
869869

870-
// Synchronise the column sampling seed
871-
uint32_t column_sampling_seed = common::GlobalRandom()();
872-
auto rc = collective::Broadcast(
873-
ctx_, linalg::MakeVec(&column_sampling_seed, sizeof(column_sampling_seed)), 0);
874-
SafeColl(rc);
875-
this->column_sampler_ = std::make_shared<common::ColumnSampler>(column_sampling_seed);
870+
this->column_sampler_ = common::MakeColumnSampler(ctx_);
876871

877872
auto batch_param = BatchParam{param->max_bin, TrainParam::DftSparseThreshold()};
878873
dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));
@@ -1012,8 +1007,7 @@ class GPUGlobalApproxMaker : public TreeUpdater {
10121007

10131008
monitor_.Start(__func__);
10141009
CHECK(ctx_->IsCUDA()) << error::InvalidCUDAOrdinal();
1015-
uint32_t column_sampling_seed = common::GlobalRandom()();
1016-
this->column_sampler_ = std::make_shared<common::ColumnSampler>(column_sampling_seed);
1010+
this->column_sampler_ = common::MakeColumnSampler(ctx_);
10171011

10181012
p_last_fmat_ = p_fmat;
10191013
initialised_ = true;

0 commit comments

Comments
 (0)