'''
Histogram a la ROOT.
'''
import logging
logger = logging.getLogger(__name__)
import numpy as np
from numpy import log2
from copy import copy,deepcopy
import collections
from bctools.exceptions import UserError
[docs]
class Axis:
"""
Bin edges. Optionally labeled
Args:
edges (array-like): Bin edges
label (str): Label for axis. If edges is an Axis object, this will
override its label
scale (str): Bin center mode e.g. `"linear"` or `"log"`.
See `scale()`. If edges is an Axis
object, this will override its mode
Attributes:
edges (array-like): Bin edges
label (str): Label for axis
"""
def __init__(self, edges, label = None, scale = None):
if isinstance(edges, Axis):
self.edges = edges.edges
#Override
if label is None:
self.label = edges.label
else:
self.label = label
if scale is None:
self._scale = edges._scale
else:
self._scale = scale
else:
self.edges = np.array(edges)
if len(self.edges) < 2:
raise ValueError("All edges need at least two edges")
if any(np.diff(self.edges) <= 0):
raise ValueError("All bin edges must be strictly monotonically"
" increasing")
self.label = label
if scale is None:
self._scale = 'linear'
else:
self._scale = scale
[docs]
def scale(self, mode = None):
"""
Control what is considered the center of the bin. This affects
`centers()` and interpolations.
Args:
mode (str or None):
- linear (default): The center is the midpoint between the bin edges
- symmetric: same as linear, except for the first center, which
will correspond to the lower edge. This is, for example, useful
when the histogram is filled with the absolute value of a
variable.
- log: The center is the logarithmic (or geometrical) midpoint between
the bin edges.
Return:
str: The new mode (or current if mode is `None`).
"""
if mode is not None:
if mode not in ['linear', 'symmetric', 'log']:
raise ValueError("Bin center mode '{}' not supported".format(mode))
if mode == 'log' and self.min <= 0:
raise ArithmeticError("Bin center mode 'log' can only be assigned "
"to axes starting at a positive number")
self._scale = mode
return self._scale
def __array__(self):
return np.array(self.edges)
def __len__(self):
return len(self.edges)
def __eq__(self, other):
return (np.array_equal(self.edges, other.edges)
and
self.label == other.label)
def __getitem__(self, key):
return self.edges[key]
[docs]
def find_bin(self, value):
"""
Return the bin `value` corresponds to.
Return:
int: Bin number. -1 for underflow, `nbins` for overflow
"""
return np.digitize(value, self.edges)-1
[docs]
def interp_weights(self, value):
"""
Get the two closest bins to `value`, together with the weights to
linearly interpolate between them. The bin contents are assigned to
the center of the bin.
Values in the edges beyond the center of the first/last bin will
result in no interpolation.
Return:
[int, int]: Bins
[float, float]: Weights
"""
bin0 = self.find_bin(value)
if bin0 < 0 or bin0 == self.nbins:
raise ValueError("Value {} out of bounds [{} - {})"
.format(value,
self.min,
self.max))
# Linear interpolation with two closest bins
center0 = self.centers[bin0]
if value > center0:
bin1 = bin0 + 1
else:
bin1 = bin0 - 1
# Handle histogram edges, beyond center of first/last pixel
bin1 = min(self.nbins-1, max(0, bin1))
if bin0 == bin1:
# No interpolation at the very edge
return ([bin0,bin1],[1,0])
# Sort
if bin0 > bin1:
bin0, bin1 = bin1, bin0
# Weights
center0 = self.centers[bin0]
center1 = self.centers[bin1]
if self._scale == 'log':
center0 = log2(center0)
center1 = log2(center1)
value = log2(value)
norm = center1 - center0
w0 = (center1 - value) / norm
w1 = (value - center0) / norm
return ([bin0,bin1], [w0,w1])
@property
def lower_bounds(self):
'''
Lower bound of each bin
'''
return self.edges[:-1]
@property
def upper_bounds(self):
'''
Upper bound of each bin
'''
return self.edges[1:]
@property
def bounds(self):
'''
Start of [lower_bound, upper_bound] values for each bin.
'''
return np.transpose([self.lower_bounds, self.upper_bounds])
@property
def min(self):
"""
Overall lower bound
"""
return self.edges[0]
@property
def max(self):
"""
Overall upper bound of histogram
"""
return self.edges[-1]
@property
def centers(self):
'''
Center of each bin.
'''
if self._scale == 'linear':
centers = (self.edges[1:] + self.edges[:-1])/2
elif self._scale == 'symmetric':
centers = (self.edges[1:] + self.edges[:-1])/2
centers[0] = self.min
elif self._scale == 'log':
log2_edges = log2(self.edges)
centers = 2**((log2_edges[1:] + log2_edges[:-1])/2)
else:
raise AssertionError("This shouldn't happen, "
"tell maintainers to fix it")
return centers
@property
def widths(self):
'''
Width each bin.
'''
return np.diff(self.edges)
@property
def nbins(self):
"""
Number of elements along each axis. Either an int (1D histogram) or an
array
"""
return len(self.edges)-1
[docs]
class Axes:
"""
Holds a list of axes.
The operator Axes[key] return a subset of these. Key can be either the
index or the label. If the key is a single index, a single Axis object
will be returned
Args:
edges (array or list of arrays or Axis): Definition of bin edges.
labels (array of str): Optionally label the axes for easier indexing.
Will override the labels of edges, if they are Axis objects
scale (str or array): Bin center mode e.g. `"linear"` or `"log"`.
See Axis.scale. If not an array, all axes will have this mode.
"""
def __init__(self, edges, labels=None, scale = None):
# Standarize axes as list of Axis
if isinstance(edges, Axes):
# From another Axes object
self._axes = copy(edges._axes)
elif np.ndim(edges) == 0:
if np.isscalar(edges):
raise TypeError("'edges' can't be a scalar")
elif np.ndim(edges) == 1:
if len(edges) == 0:
raise ValueError("'edges' can't be an empty array")
if all(np.ndim(a) == 0 for a in edges):
#1D histogram
self._axes = [Axis(edges)]
else:
#Multi-dimensional histogram.
self._axes = [Axis(axis) for axis in edges]
elif np.ndim(edges) == 2:
#Multi-dimensional histogram
self._axes = [Axis(axis) for axis in edges]
else:
raise ValueError("'edges' can have at most two dimensions")
#Override labels if nedeed
if labels is not None:
if np.isscalar(labels):
labels = [labels]
if len(labels) != self.ndim:
raise ValueError("Edges - labels size mismatch")
for n,label in enumerate(labels):
self._axes[n] = Axis(self._axes[n], label)
#Maps labels to axes indices. Only keep non-None
labels = np.array([a.label for a in self._axes])
non_none_labels = labels[labels != None]
if len(np.unique(non_none_labels)) != len(non_none_labels):
raise ValueError("Labels can't repeat")
self._labels = {}
for n,label in enumerate(labels):
if label is not None:
self._labels[label] = n
#Override scale if nedeed
if scale is not None:
if np.isscalar(scale):
scale = self.ndim*[scale]
if len(scale) != self.ndim:
raise ValueError("Edges - scale size mismatch")
for mode,ax in zip(scale, self._axes):
ax.scale(mode)
def __len__(self):
return self.ndim
def __iter__(self):
return iter(self._axes)
@property
def labels(self):
"""
Label of axes.
Return:
Either a string or a tuple of string. None if
they are not defined
"""
return [a.label for a in self._axes]
@property
def ndim(self):
"""
Number of axes
"""
return len(self._axes)
[docs]
def key_to_index(self, key):
"""
Turn a key or list of keys, either indices or labels, into indices
Args:
key (int or str): Index or label
Return:
int: Index
"""
if isinstance(key, int):
return key
if (isinstance(key, (np.ndarray, list, tuple, range))
and
not isinstance(key, str)):
return tuple(self.key_to_index(k) for k in key)
else:
#Label
try:
return self._labels[key]
except KeyError:
logger.error("Axis with label {} not found".format(key))
raise
def __getitem__(self, key):
indices = self.key_to_index(key)
if np.isscalar(indices):
return self._axes[indices]
else:
return Axes([self._axes[i] for i in indices])
def __eq__(self, other):
return all([a1 == a2 for a1,a2 in zip(self._axes,other._axes)])
def __array__(self):
return np.array(self._axes)
[docs]
def interp_weights(self, *values):
"""
Get the bins and weights to linearly interpolate between bins.
The bin contents are assigned to the center of the bin.
Args:
values (float or array): Coordinates within the axes to interpolate.
Must have the same size as `ndims`. Input values as
``(1,2,3)`` or ``([1,2,3])``.
Returns:
array of tuples of int, array of floats: Bins and weights to use. Size=2^ndim. Each bin is specified by a tuple containing `ndim` integers
"""
#Standarize
if (len(values) == 1 and
isinstance(values[0], (list, np.ndarray, range, tuple))):
# Got a sequence
values = values[0]
if len(values) != self.ndim:
raise ValueError("Number of values different than number of dimensions")
# Get the bin/weights for each individual axis
dim_bins = []
dim_weights = []
for dim,value in enumerate(values):
bins,weights = self._axes[dim].interp_weights(value)
dim_bins += [bins]
dim_weights += [weights]
bins = []
weights = []
# Combine them. e.g. for 2D this results in
# weights = [dim_weights[0][0]*dim_weights[1][0],
# dim_weights[0][1]*dim_weights[1][0],
# dim_weights[0][0]*dim_weights[1][1],
# dim_weights[0][1]*dim_weights[1][1]]
# bins = [(dim_bins[0][0], dim_bins[1][0]),
# (dim_bins[0][1], dim_bins[1][0]),
# (dim_bins[0][0], dim_bins[1][1]),
# (dim_bins[0][1], dim_bins[1][1])]
# bit_masks = [0b001, 0b010, 0b100, ...]
bit_masks = 2**np.array(range(self.ndim))
for n in range(2**self.ndim):
weight = 1
bin_list = []
# Since there are two weights per axis, we use bit
# masking to loop between them instead of recursion
for dim,bit_mask in enumerate(bit_masks):
index = int(bool(n & bit_mask)) # Either 0 or 1
weight *= dim_weights[dim][index]
bin_list += [dim_bins[dim][index]]
bins += [tuple(bin_list)]
weights += [weight]
return bins,weights
def _get_axis_property(f):
"""
Decorator to retrieve a property for an axis based on an index.
This allows to specify the axis or axes as (x), (x,y), ([x,y])
or even (x,[z,[x,y]]).
The methods need to be reclared as:
@_get_axis_property
def property_name(self, axis):
return self[axis]
This decorator will automatically get the corresponding property based
on the name of the methods
"""
def wrapper(self, *axis):
if len(axis) == 0:
#Default to first axis, useful for 1D histograms
return wrapper(self, 0)
elif len(axis) == 1:
if np.isscalar(axis[0]):
#Standard way, single axis. Do other types recursively
return getattr(f(self, axis[0]), f.__name__)
else:
return [wrapper(self, a) for a in axis[0]]
else:
return [wrapper(self, a) for a in axis]
return wrapper
@_get_axis_property
def edges(self, axis):
"""
Edges for a given axis
Args:
axis (int, str or list): Axis or axes indices or labels
Return:
array or list of arrays
"""
return self[axis]
@_get_axis_property
def lower_bounds(self, axis=0):
'''
Lower bound of each bin
Args:
axis (int, str or list): Axis or axes indices or labels
Return:
array or list of arrays
'''
return self[axis]
@_get_axis_property
def upper_bounds(self, axis=0):
'''
Upper bound of each bin
Args:
axis (int, str or list): Axis or axes indices or labels
Return:
array or list of arrays
'''
return self[axis]
@_get_axis_property
def bounds(self, axis=0):
'''
Start of [lower_bound, upper_bound] values for each bin.
Args:
axis (int, str or list): Axis or axes indices or labels
Return:
array or list of arrays
'''
return self[axis]
@_get_axis_property
def min(self, axis=0):
"""
Overall lower bound
Args:
axis (int, str or list): Axis or axes indices or labels
Return:
float or array
"""
return self[axis]
@_get_axis_property
def max(self, axis=0):
"""
Overall upper bound of histogram
Args:
axis (int, str or list): Axis or axes indices or labels
Return:
float or array
"""
return self[axis]
@_get_axis_property
def centers(self, axis=0):
'''
Center of each bin.
Args:
axis (int, str or list): Axis or axes indices or labels
Return:
array or list of arrays
'''
return self[axis]
@_get_axis_property
def widths(self, axis=0):
'''
Width each bin.
Args:
axis (int, str or list): Axis or axes indices or labels
Return:
array or list of arrays
'''
return self[axis]
@_get_axis_property
def nbins(self, axis=0):
"""
Number of elements along each axis. Either an int (1D histogram) or an
array
Args:
axis (int, str or list): Axis or axes indices or labels
Return:
float or array
"""
return self[axis]
[docs]
class Histogram(object):
"""
This is a wrapper of a numpy array with axes and a fill method.
Like an array, the histogram can have an arbitrary number of dimensions.
Standard numpy array indexing is supported to get the contents
--i.e. :code:`h[:]`, :code:`h[4]`, :code:`h[[1,3,4]]`, :code:`h[:,5:50:2]]`,
etc.--. However, the meaning of the :code:`-1` index is different. Instead of
counting from the end, :code:`-1` corresponds to the underflow bin. Similarly,
an index equal to the number of bins corresponds to the overflow bin.
You can however give relative position with respect to :code:`h.NBINS`
--e.g. :code:`h[0:h.NBINS]` result in all regular bins, :code:`h[-1:h.NBINS+1]`
includes also the underflow/overflow bins and :code:`h[h.NBINS]` gives you the
contents of the overflow bin.
You can also use an Ellipsis object (...) at the end to specify that the
contents from the rest of the dimension are to have the under and overflow
bins included. e.g. for a 3D histogram :code:`h[1,-1:h.NBINS+1,-1:h.NBINS+1]
= h[1,...]`. h[:] returns all contents without under/overflow bins and
h[...] returns everything, including those special bins.
Additionally, you can specify with a dictionary the indices for specific
axes, with all the rest being the default. e.g.
:code:`h[:,:,1:,:,6] <==> h[{2:slice(1,None), 4:6}]`. If you also use the
axes id if you labeled them, e.g. :code:`h[{'x':slice(0,h.NBINS+1)}]`
If :code:`sumw2` is not :code:`None`, then the histogram will keep track of
the sum of the weights squared --i.e. you better use this if you are using
weighted data and are concern about error bars--
You can use the :code:`*=` operator to weight by a number or an array of
weights with the shape of :code:`h.nbins+2` (to include under/overflow bins).
The operators :code:`+`, :code:`-`, :code:`+=` and :code:`-=` are available.
Both operands need to be histograms. An exception will be raised if the axes
are not the same. Note that :code:`h += h0` is more efficient than
:code:`h = h + h0` since latter involves the instantiation of a new histogram.
Args:
edges (Axes or array): Definition of bin edges, Anything that can
be processes by Axes. Lower edge value is included in the bin,
upper edge value is excluded.
contents (array): Initialization of histogram contents. Might or might
not include under/overflow bins. Initialize to 0 by default.
sumw2 (None, bool or array): If True, it will keep track of the sum of
the weights squared. You can also initialize them with an array
labels (array of str): Optionally label the axes for easier indexing
scale (str or array): Bin center mode e.g. `"linear"` or `"log"`.
See ``Axis.scale()``. Not to be confused with the
``Histogram`'s `method ``scale()``, which `scales` the bin contents,
rather than defining what the scale mode of an axis is.
Attributes:
sumw2 (Histogram): An auxiliary histogram whose contents are the sum
of the square of the weight that were used to fill this histogram
"""
def __init__(self, edges, contents = None, sumw2 = None,
labels=None, scale = None):
self._axes = Axes(edges, labels=labels, scale = scale)
self._nbins = np.array([a.nbins for a in self._axes])
# Standarize contents (with under/overflow) or initialize them to zero.
if contents is not None:
if np.array_equal(self._nbins+2, np.shape(contents)):
# Includes under and overflow
self._contents = np.array(contents)
elif np.array_equal(self._nbins, np.shape(contents)):
# Missing under and overflow, but right shape.
# Adding empty under/overflow bins
self._contents = np.zeros(np.array(np.shape(contents)) + 2)
self._contents[tuple(slice(1,-1)
for i in
range(0,np.ndim(contents)))] = contents
else:
raise ValueError("Edges-contents size mismatch")
else:
self._contents = np.zeros([n+2 for n in self._nbins])
#Check if we'll keep track of the sum of the weights
if sumw2 is None or sumw2 is False:
self.sumw2 = None
elif sumw2 is True:
self.sumw2 = Histogram(self._axes)
else:
self.sumw2 = Histogram(self._axes, sumw2)
[docs]
@classmethod
def concatenate(cls, edges, histograms, label = None):
"""
Generate a Histogram from a list os histograms. The axes of all input
histograms must be equal, and the new histogram will have one more
dimension than the input. The new axis has index 0.
Args:
edges (Axes or array): Definition of bin edges of the new dimension
histograms (list of Histogram): List of histogram to fill contents.
Might or might not include under/overflow bins.
labels (str): Label the new dimension
Return:
Histogram
"""
# Check new axis matches number of histograms,
# with or without under/overflow
new_axis = Axis(edges, label = label)
if len(histograms) == new_axis.nbins:
underflow_bin_shift = 1
elif len(histograms) == new_axis.nbins + 2:
underflow_bin_shift = 0
else:
raise ValueError("Mismatch between number of bins and "
"number of histograms")
# Create new axes and new contents
old_axes = histograms[0].axes
new_axes = Axes([new_axis] + [ax for ax in old_axes])
contents = np.zeros([ax.nbins+2 for ax in new_axes])
sumw2 = np.zeros([ax.nbins+2 for ax in new_axes])
for bin,hist in enumerate(histograms):
if hist.axes != old_axes:
raise ValueError("The axes of all input histogram must equal")
bin += underflow_bin_shift # Account for under/overflow bins
contents[bin] = hist[...]
if sumw2 is not None:
if hist.sumw2 is not None:
sumw2[bin] = hist.sumw2[...]
else:
logger.warning("Not all input histogram have sum of weights "
"squared. sumw2 will be dropped")
sumw2 = None
return Histogram(new_axes, contents, sumw2)
@property
def ndim(self):
return self._axes.ndim
[docs]
def clear(self):
"""
Set all counts to 0
"""
self._contents[:] = 0
if self.sumw2 is not None:
self.sumw2.clear()
def __eq__(self, other):
# Histogram is completely defined by axes and contents
return (self._axes == other._axes
and
np.array_equal(self._contents, other._contents)
and
self.sumw2 == other.sumw2)
class _NBINS():
'''
Convenience class that will expand to the number of bins of a
given dimension.
The trick is to overload the -/+ operators such than
h.NBINS +/- offset (h being an instance of Histogram and NBINS an
static instance of Histogram._NBINS) returns an instance of _NBINS
itself, which stores the offset. The [] operator can then detect that
the input is an instance of _NBINS and convert it into an integer with
respect to the size of the appropiate axis.
'''
def __init__(self, offset = 0):
self.offset = offset
def __add__(self, offset):
return self.__class__(offset)
def __sub__(self, offset):
return self + (-offset)
NBINS = _NBINS()
def _prepare_indices(self, indices):
'''
Prepare indices for it use in __getitem__ and __setitem__
--i.e. [] overloading --
See class help.
'''
def _prepare_index(index, dim):
'''
Modify index for a single axis to account for under/overflow,
as well as to catch instances of _NBINS (see description above)
This depend on the number of bins in the axis of a given
dimension (dim)
'''
if isinstance(index,slice):
# Both start and stop can be either None, an instance of
# _NBINS or an integer
index = slice(1
if index.start is None
else
self._nbins[dim] + index.start.offset + 1
if isinstance(index.start, self._NBINS)
else
index.start+1,
self._nbins[dim]+1
if index.stop is None
else
self._nbins[dim] + index.stop.offset + 1
if isinstance(index.stop, self._NBINS)
else
index.stop+1,
index.step)
# Check bounds. Note index is the _contents index at this point
if index.start < 0 or index.stop > self._nbins[dim] + 2:
raise IndexError("Bin index out of bounds")
return index
elif isinstance(index, (np.integer, int)):
if index < -1 or index > self._nbins[dim]:
raise IndexError("Bin index out of bounds")
return index+1
elif isinstance(index, self._NBINS):
# Referece with respect to nbins
return _prepare_index(self._nbins[dim] + index.offset, dim)
elif isinstance(index, (np.ndarray, list, tuple, range)):
# Note: this will return a copy, not a view
# Handle references with respecto to nbins
index = [self._nbins[dim] + i.offset + 1
if isinstance(i,self._NBINS) else
i + 1
for i in index]
# Check bounds. Note index is the _contents index at this point/
for ind in index:
if ind < 0 or ind > self._nbins[dim] + 1:
raise IndexError("Bin index out of bounds")
return index
else:
raise TypeError("Index can only be an int, slice, list or array")
if isinstance(indices, tuple):
# Get the rest of the dimensions with under/overflow user used ...
if indices[-1] is Ellipsis:
extra_indices = tuple(slice(-1, self._nbins[dim]+1)
for dim in range(len(indices)-1, self.ndim))
indices = self._prepare_indices(indices[:-1] + extra_indices)
return indices
# Standard way. All other ways end up here after recursion
indices = tuple(_prepare_index(index, dim)
for dim,index in enumerate(indices))
# Remove under/overflow of the rest of the dimensions
indices += tuple(_prepare_index(slice(None), dim) for dim in
range(len(indices), self.ndim))
return indices
if isinstance(indices, dict):
# Indices for specified axes. Default to all other axes
new_indices = [slice(None)] * self.ndim
for axis,index in indices.items():
axis = self.axes.key_to_index(axis)
new_indices[axis] = index
return self._prepare_indices(tuple(new_indices))
else:
# Single axis
return self._prepare_indices(tuple([indices]))
def __getitem__(self, indices):
return self._contents[self._prepare_indices(indices)]
def __array__(self):
"""
Convert to numpy array. Drops under/overflow
"""
return self[:]
@property
def axes(self):
"""
Underlaying axes object
"""
return self._axes
@property
def axis(self):
"""
Equivalent to `self.axes[0]`, but fails if `ndim > 1`
"""
if self.ndim > 1:
raise UserError("Property 'axis' can only be used with 1D "
"histograms. Use `axes` for multidimensional "
"histograms")
return self.axes[0]
@property
def shape(self):
'''
Tuple with number of bins along each dimensions
'''
return tuple(self._nbins)
[docs]
def find_bin(self, *values, axis = None):
"""
Return one or more indices corresponding to the bin this value or
set of values correspond to.
You can pass either an array, or specified the values as different
arguments. i.e. :code:`h.find_bin(x,y,z)` = :code:`h.find_bin([x,y,z])`
Args:
values (float or array): Vaule or list of values
axis (int or str or list): If set, values correspond to the\
subset of axes listed here
Return:
int or tuple: Bin index
"""
#Standarize
if len(values) == 1 and \
isinstance(values[0], (list, np.ndarray, range, tuple)):
# Got a sequence
values = values[0]
# Either all axes or those specified by user
if axis is not None:
axes = Axes(self._axes[axis])
else:
axes = self._axes
if len(values) != len(axes):
raise ValueError("Different number of values than axes")
# Digitize
bins = tuple(axis.find_bin(val)
for val,axis in zip(values, axes))
if len(bins) == 1:
return bins[0]
else:
return bins
[docs]
def interp(self, *values):
"""
Get a linearly interpolated content for a given set of values
along each axes. The bin contents are assigned to the center of the bin.
Args:
values (float or array): Coordinates within the axes to interpolate.
Must have the same size as `ndims`. Input values as
`(1,2,3)` or `([1,2,3])`
Return:
float
"""
content = 0
bins,weights = self._axes.interp_weights(*values)
for bin,weight in zip(bins, weights):
content += weight*self[bin]
return content
[docs]
def fill(self, *values, weight=1):
'''
And an entry to the histogram. Can be weighted.
Follow same convention as find_bin()
Args:
values (float or array): Value of entry
weight (float): Value weight in histogram.
Note:
Note that weight needs to be specified explicitely by key, otherwise
it will be considered a value an a IndexError will be thrown.
'''
indices = self._prepare_indices(self.find_bin(*values))
self._contents[indices] += weight
if self.sumw2 is not None:
self.sumw2._contents[indices] += weight*weight
[docs]
def project(self, *axis):
"""
Return a histogram consisting on a projection of the current one
Args:
axis (int or str or list): axis or axes onto which the
histogram will be projected --i.e. will sum up over the
other dimensiones--. The axes of the new histogram will
have the same order --i.e. you can transpose axes--
Return:
Histogram: Projected histogram
"""
if self.ndim == 1:
raise ValueError("Can't project a 1D histogram. "
"Consider using np.sum(h[...])")
#Standarize
if len(axis) == 1 and \
isinstance(axis[0], (list, np.ndarray, range, tuple)):
# Got a sequence
axis = axis[0]
axis = self._axes.key_to_index(axis)
if len(np.unique(axis)) != len(axis):
raise ValueError("An axis can't repeat")
sum_axes = tuple(dim for dim in range(0, self.ndim) if dim not in axis)
new_contents = self._contents.sum(axis = sum_axes)
# Transpose the contents to match the order of the axis provided by the
# the user, which are currently sorted
new_contents = np.transpose(new_contents,
axes = np.argsort(np.argsort(axis)))
new_sumw2 = None
if self.sumw2 is not None:
new_sumw2 = self.sumw2.project(axis)._contents
return Histogram(edges = self._axes[axis],
contents = new_contents,
sumw2 = new_sumw2)
[docs]
def slice(self, axis, start = None, stop = None,
underflow = True, overflow = True):
"""
Return a histogram which is a slice of the current one, along a given
dimension.
Follows same indexing convention as [] operator
Args:
axis (int or str): Dimension that will be sliced
start (None or int): Start bin (inclusive)
stop (None or int): Stop bin (exclusive). Must be greater than start
underflow (bool): If True, the contents before the start bin will be
summed up and kept in the underflow bins, otherwise they'll
be discarded
overflow (overflow): same as underflow
Return:
Histogram: Sliced histogram
"""
# If it's a label, turn it into an integer
axis = self._axes.key_to_index(axis)
# Standarize start/stop and checks. These are the indices of the bins,
# without taking into account under/overflow
if isinstance(start, self._NBINS):
start = self._nbins[axis] + start.offset
elif start is None:
start = 0
if start < 0 or not (start < self._nbins[axis]):
raise IndexError("'start' out of bounds")
if isinstance(stop, self._NBINS):
stop = self._nbins[axis] + stop.offset
elif stop is None:
stop = self._nbins[axis]
if not (stop > start):
raise ValueError("'stop' must be greater than 'start'")
if stop > self._nbins[axis]:
raise IndexError("'stop' out of bounds")
# Redefine axis. Include upper bound of last bin in slice
new_axes = [Axis(a.edges[start:stop+1], a.label) if adim == axis
else a
for adim,a in enumerate(self._axes)]
# This is to select every bin from the dimensions we are not cutting on
axis_pre = tuple(slice(None) for i in range(0,axis))
axis_post = tuple(slice(None) for i in range(axis+1, self.ndim))
# Slice, keeping all entries not in the slice in the under/overflow bins
# if needed
new_contents = self._contents[axis_pre +
tuple([slice(start+1,stop+1)]) +
axis_post]
if underflow:
underflow_contents = np.sum(self._contents[axis_pre +
tuple([slice(0,start+1)]) +
axis_post],
axis = axis, keepdims = True)
else:
underflow_contents = np.zeros([1 if d == axis
else self._nbins[d] + 2
for d in range(self.ndim)])
new_contents = np.append(underflow_contents, new_contents, axis = axis)
if overflow:
overflow_contents = np.sum(self._contents[axis_pre +
tuple([slice(stop+1,
self._nbins[axis]+2)]) +
axis_post],
axis = axis, keepdims = True)
else:
overflow_contents = np.zeros([1 if d == axis
else self._nbins[d] + 2
for d in range(self.ndim)])
new_contents = np.append(new_contents, overflow_contents, axis = axis)
new_sumw2 = None
if self.sumw2 is not None:
new_sumw2 = self.sumw2.slice(axis, start, stop)._contents
# Create new histogram
return Histogram(edges = new_axes,
contents = new_contents,
sumw2 = new_sumw2)
def __imul__(self, weight):
"""
Multiply contents by a weight. sumw2 will be updated accordingly
Args:
weights (float or array): Either a number or an array with the
shape of nbins+2 (to include under/overflow bins)
"""
try:
self._contents *= weight
except ValueError:
logger.error("Weights not compatible with contents size. "
"Are you taking into account under/overflow bins")
raise
if self.sumw2 is not None:
self.sumw2 *= weight*weight
return self
def __mul__(self, other):
"""
* operator between histograms of compatible axes
"""
# This copies sumw2 as well
new = deepcopy(self)
new *= other
return new
def __iadd__(self, other):
"""
+= operator
"""
if self._axes != other._axes:
raise ValueError("Can't operate on histogram with different axes")
self._contents += other._contents
if self.sumw2 is not None and other.sumw2 is not None:
self.sumw2 += other.sumw2
elif self.sumw2 is None and other.sumw2 is None:
pass
else:
logger.warning("One of the histogram being added does not"
"track weights squared. sumw2 will be dropped.")
self.sumw2 = None
return self
def __isub__(self, other):
"""
-= operator
Note that defining and using __neg__ will imply using copy() on a
histogram, so we duplicate a little bit of code
"""
if self._axes != other._axes:
raise ValueError("Can't operate on histogram with different axes")
self._contents -= other._contents
if self.sumw2 is not None and other.sumw2 is not None:
self.sumw2 -= other.sumw2
elif self.sumw2 is None and other.sumw2 is None:
pass
else:
logger.warning("One of the histogram in the operation does not"
"track weights squared. sumw2 will be dropped.")
self.sumw2 = None
return self
def __add__(self, other):
"""
+ operator between histograms of compatible axes
"""
# This copies sumw2 as well
new = deepcopy(self)
new += other
return new
def __sub__(self, other):
"""
- operator
"""
# This copies sumw2 as well
new = deepcopy(self)
new -= other
return new
[docs]
def scale(self, weight, indices = Ellipsis, axis = None):
"""
Scale the histogram contents.
By default, :code:`h.scale(weight) <==> h *= weight`
The parameter :code:`indices` can be used to specify the slice that will
be multiplied by the weights. Follow the same index convention as in [].
If axis is specified, then weights and indices correspond to a single
axis.
Args:
weights (array-like): Weights
indices (int or tuple): Bin indices
axis (int or str): Axis index
"""
if axis is None:
indices = self._prepare_indices(indices)
self._contents[indices] *= weight
if self.sumw2 is not None:
self.sumw2._contents[indices] *= weight*weight
else:
# Scale along a dimension
# We just need to reshape weights and indices
axis = self._axes.key_to_index(axis)
if axis < 0 or not (axis < self.ndim):
raise ValueError("Axis out of bounds")
if indices is Ellipsis:
# In this context Ellipsis means all bins inclusing overflow
indices = slice(-1,self.NBINS+1)
# Select all bins from the other dimensions
indices = tuple(axis*[slice(-1,self.NBINS)]
+ [indices]
+ [Ellipsis])
# Change axis shape to be compatible if needed
if not np.isscalar(weight):
weight = np.array(weight)
if weight.ndim != 1:
raise ValueError("When using 'axis', weight can either be "
"scalar or a 1D array")
newaxis = tuple(axis*[np.newaxis]
+ [slice(None,None)]
+ (self.ndim-axis-1)*[np.newaxis])
weight = weight[newaxis]
self.scale(weight,
indices = indices)