some new features
This commit is contained in:
@ -0,0 +1,77 @@
|
||||
import os
|
||||
from joblib import cpu_count
|
||||
|
||||
|
||||
# Module level cache for cpu_count as we do not expect this to change during
|
||||
# the lifecycle of a Python program. This dictionary is keyed by
|
||||
# only_physical_cores.
|
||||
_CPU_COUNTS = {}
|
||||
|
||||
|
||||
def _openmp_parallelism_enabled():
|
||||
"""Determines whether scikit-learn has been built with OpenMP
|
||||
|
||||
It allows to retrieve at runtime the information gathered at compile time.
|
||||
"""
|
||||
# SKLEARN_OPENMP_PARALLELISM_ENABLED is resolved at compile time and defined
|
||||
# in _openmp_helpers.pxd as a boolean. This function exposes it to Python.
|
||||
return SKLEARN_OPENMP_PARALLELISM_ENABLED
|
||||
|
||||
|
||||
cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=True):
|
||||
"""Determine the effective number of threads to be used for OpenMP calls
|
||||
|
||||
- For ``n_threads = None``,
|
||||
- if the ``OMP_NUM_THREADS`` environment variable is set, return
|
||||
``openmp.omp_get_max_threads()``
|
||||
- otherwise, return the minimum between ``openmp.omp_get_max_threads()``
|
||||
and the number of cpus, taking cgroups quotas into account. Cgroups
|
||||
quotas can typically be set by tools such as Docker.
|
||||
The result of ``omp_get_max_threads`` can be influenced by environment
|
||||
variable ``OMP_NUM_THREADS`` or at runtime by ``omp_set_num_threads``.
|
||||
|
||||
- For ``n_threads > 0``, return this as the maximal number of threads for
|
||||
parallel OpenMP calls.
|
||||
|
||||
- For ``n_threads < 0``, return the maximal number of threads minus
|
||||
``|n_threads + 1|``. In particular ``n_threads = -1`` will use as many
|
||||
threads as there are available cores on the machine.
|
||||
|
||||
- Raise a ValueError for ``n_threads = 0``.
|
||||
|
||||
Passing the `only_physical_cores=False` flag makes it possible to use extra
|
||||
threads for SMT/HyperThreading logical cores. It has been empirically
|
||||
observed that using as many threads as available SMT cores can slightly
|
||||
improve the performance in some cases, but can severely degrade
|
||||
performance other times. Therefore it is recommended to use
|
||||
`only_physical_cores=True` unless an empirical study has been conducted to
|
||||
assess the impact of SMT on a case-by-case basis (using various input data
|
||||
shapes, in particular small data shapes).
|
||||
|
||||
If scikit-learn is built without OpenMP support, always return 1.
|
||||
"""
|
||||
if n_threads == 0:
|
||||
raise ValueError("n_threads = 0 is invalid")
|
||||
|
||||
if not SKLEARN_OPENMP_PARALLELISM_ENABLED:
|
||||
# OpenMP disabled at build-time => sequential mode
|
||||
return 1
|
||||
|
||||
if os.getenv("OMP_NUM_THREADS"):
|
||||
# Fall back to user provided number of threads making it possible
|
||||
# to exceed the number of cpus.
|
||||
max_n_threads = omp_get_max_threads()
|
||||
else:
|
||||
try:
|
||||
n_cpus = _CPU_COUNTS[only_physical_cores]
|
||||
except KeyError:
|
||||
n_cpus = cpu_count(only_physical_cores=only_physical_cores)
|
||||
_CPU_COUNTS[only_physical_cores] = n_cpus
|
||||
max_n_threads = min(omp_get_max_threads(), n_cpus)
|
||||
|
||||
if n_threads is None:
|
||||
return max_n_threads
|
||||
elif n_threads < 0:
|
||||
return max(1, max_n_threads + n_threads + 1)
|
||||
|
||||
return n_threads
|
||||
Reference in New Issue
Block a user