diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 7e0ae793ba6e..1865ddbb17af 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -252,17 +252,30 @@ def _from_numpy_array( _check_data_shape(data) data, _ = _ensure_np_dtype(data, data.dtype) handle = ctypes.c_void_p() - _check_call( - _LIB.XGDMatrixCreateFromDense( - _array_interface(data), - make_jcargs( - missing=float(missing), - nthread=int(nthread), - data_split_mode=int(data_split_mode), - ), - ctypes.byref(handle), + if isinstance(data, np.ndarray) and data.dtype == np.float32 and data.flags['C_CONTIGUOUS'] and data.size <= 32768: + _check_call( + _LIB.XGDMatrixCreateFromMat_omp( + data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), + c_bst_ulong(data.shape[0]), + c_bst_ulong(data.shape[1]), + ctypes.c_float(missing), + ctypes.byref(handle), + ctypes.c_int(nthread), + ctypes.c_int(data_split_mode), + ) + ) + else: + _check_call( + _LIB.XGDMatrixCreateFromDense( + _array_interface(data), + make_jcargs( + missing=float(missing), + nthread=int(nthread), + data_split_mode=int(data_split_mode), + ), + ctypes.byref(handle), + ) ) - ) return handle, feature_names, feature_types diff --git a/tests/python/microbench_numpy.py b/tests/python/microbench_numpy.py new file mode 100755 index 000000000000..9e302b581fdc --- /dev/null +++ b/tests/python/microbench_numpy.py @@ -0,0 +1,56 @@ +import numpy as np +import xgboost as xgb +from collections import defaultdict +import timeit +import ctypes +from xgboost.core import _LIB, DataSplitMode +from xgboost.data import _check_call, _array_interface, c_bst_ulong, make_jcargs + +def measure_create_dmatrix(rows, cols, nthread, use_optimization): + data = np.random.randn(rows, cols).astype(np.float32) + data = np.ascontiguousarray(data) + + handle = ctypes.c_void_p() + missing = np.nan + + start = timeit.default_timer() + if use_optimization: + _LIB.XGDMatrixCreateFromMat_omp( + data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), + c_bst_ulong(data.shape[0]), + c_bst_ulong(data.shape[1]), + ctypes.c_float(missing), + ctypes.byref(handle), + ctypes.c_int(nthread), + ctypes.c_int(DataSplitMode.ROW), + ) + else: + _LIB.XGDMatrixCreateFromDense( + _array_interface(data), + make_jcargs( + missing=float(missing), + nthread=int(nthread), + data_split_mode=int(DataSplitMode.ROW), + ), + ctypes.byref(handle), + ) + end = timeit.default_timer() + return end - start + +COLS = 1000 + +print(f"{'Threads':8} | {'Rows':8} | {'Cols':8} | {'Current (sec)':15} | {'Optimized (sec)':15} | {'Ratio':12}") + +for nthread in [1, 2, 4, 8]: + for rows in [1, 4, 16, 64, 256, 1024, 4096, 16384]: + repeats = 65536 // rows + + current = 0 + for i in range(repeats): + current += measure_create_dmatrix(rows=rows, cols=COLS, nthread=nthread, use_optimization=False) + + optimized = 0 + for i in range(repeats): + optimized += measure_create_dmatrix(rows=rows, cols=COLS, nthread=nthread, use_optimization=True) + + print(f"{nthread:8} | {rows:8} | {COLS:8} | {current/repeats:15.4g} | {optimized/repeats:15.4g} | {optimized / current:12.1%}") diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py index cdc571a916df..7d2ab9c57b8d 100644 --- a/tests/python/test_basic.py +++ b/tests/python/test_basic.py @@ -210,6 +210,47 @@ def test_dmatrix_numpy_init_omp(self): assert dm.num_row() == row assert dm.num_col() == cols + def _test_dmatrix_numpy_init_omp_contiguous(self, test_contiguous: bool): + rows = [1000, 11326, 15000] + cols = 50 + for row in rows: + X = np.random.randn(row, cols) + y = np.random.randn(row).astype("f") + + # Ensure data is contiguous + if test_contiguous: + X = np.ascontiguousarray(X).astype(np.float32) + y = np.ascontiguousarray(y).astype(np.float32) + assert X.flags['C_CONTIGUOUS'] + else: + X = np.asfortranarray(X) + y = np.asfortranarray(y) + assert not X.flags['C_CONTIGUOUS'] + + dm = xgb.DMatrix(X, y, nthread=0) + np.testing.assert_allclose(dm.get_data().toarray(), X, rtol=1e-7) + np.testing.assert_array_equal(dm.get_label(), y) + assert dm.num_row() == row + assert dm.num_col() == cols + + dm = xgb.DMatrix(X, y, nthread=1) + np.testing.assert_allclose(dm.get_data().toarray(), X, rtol=1e-7) + np.testing.assert_array_equal(dm.get_label(), y) + assert dm.num_row() == row + assert dm.num_col() == cols + + dm = xgb.DMatrix(X, y, nthread=10) + np.testing.assert_allclose(dm.get_data().toarray(), X, rtol=1e-7) + np.testing.assert_array_equal(dm.get_label(), y) + assert dm.num_row() == row + assert dm.num_col() == cols + + def test_dmatrix_numpy_init_omp_contiguous(self): + return self._test_dmatrix_numpy_init_omp_contiguous(True) + + def test_dmatrix_numpy_init_omp_not_contiguous(self): + return self._test_dmatrix_numpy_init_omp_contiguous(False) + def test_cv(self): dm, _ = tm.load_agaricus(__file__) params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}