Source code for bctools.containers.histogram

'''
Histogram a la ROOT.
'''

import logging
logger = logging.getLogger(__name__)

import numpy as np
from numpy import log2

from copy import copy,deepcopy

import collections

from bctools.exceptions import UserError

[docs] class Axis: """ Bin edges. Optionally labeled Args: edges (array-like): Bin edges label (str): Label for axis. If edges is an Axis object, this will override its label scale (str): Bin center mode e.g. `"linear"` or `"log"`. See `scale()`. If edges is an Axis object, this will override its mode Attributes: edges (array-like): Bin edges label (str): Label for axis """ def __init__(self, edges, label = None, scale = None): if isinstance(edges, Axis): self.edges = edges.edges #Override if label is None: self.label = edges.label else: self.label = label if scale is None: self._scale = edges._scale else: self._scale = scale else: self.edges = np.array(edges) if len(self.edges) < 2: raise ValueError("All edges need at least two edges") if any(np.diff(self.edges) <= 0): raise ValueError("All bin edges must be strictly monotonically" " increasing") self.label = label if scale is None: self._scale = 'linear' else: self._scale = scale
[docs] def scale(self, mode = None): """ Control what is considered the center of the bin. This affects `centers()` and interpolations. Args: mode (str or None): - linear (default): The center is the midpoint between the bin edges - symmetric: same as linear, except for the first center, which will correspond to the lower edge. This is, for example, useful when the histogram is filled with the absolute value of a variable. - log: The center is the logarithmic (or geometrical) midpoint between the bin edges. Return: str: The new mode (or current if mode is `None`). """ if mode is not None: if mode not in ['linear', 'symmetric', 'log']: raise ValueError("Bin center mode '{}' not supported".format(mode)) if mode == 'log' and self.min <= 0: raise ArithmeticError("Bin center mode 'log' can only be assigned " "to axes starting at a positive number") self._scale = mode return self._scale
def __array__(self): return np.array(self.edges) def __len__(self): return len(self.edges) def __eq__(self, other): return (np.array_equal(self.edges, other.edges) and self.label == other.label) def __getitem__(self, key): return self.edges[key]
[docs] def find_bin(self, value): """ Return the bin `value` corresponds to. Return: int: Bin number. -1 for underflow, `nbins` for overflow """ return np.digitize(value, self.edges)-1
[docs] def interp_weights(self, value): """ Get the two closest bins to `value`, together with the weights to linearly interpolate between them. The bin contents are assigned to the center of the bin. Values in the edges beyond the center of the first/last bin will result in no interpolation. Return: [int, int]: Bins [float, float]: Weights """ bin0 = self.find_bin(value) if bin0 < 0 or bin0 == self.nbins: raise ValueError("Value {} out of bounds [{} - {})" .format(value, self.min, self.max)) # Linear interpolation with two closest bins center0 = self.centers[bin0] if value > center0: bin1 = bin0 + 1 else: bin1 = bin0 - 1 # Handle histogram edges, beyond center of first/last pixel bin1 = min(self.nbins-1, max(0, bin1)) if bin0 == bin1: # No interpolation at the very edge return ([bin0,bin1],[1,0]) # Sort if bin0 > bin1: bin0, bin1 = bin1, bin0 # Weights center0 = self.centers[bin0] center1 = self.centers[bin1] if self._scale == 'log': center0 = log2(center0) center1 = log2(center1) value = log2(value) norm = center1 - center0 w0 = (center1 - value) / norm w1 = (value - center0) / norm return ([bin0,bin1], [w0,w1])
@property def lower_bounds(self): ''' Lower bound of each bin ''' return self.edges[:-1] @property def upper_bounds(self): ''' Upper bound of each bin ''' return self.edges[1:] @property def bounds(self): ''' Start of [lower_bound, upper_bound] values for each bin. ''' return np.transpose([self.lower_bounds, self.upper_bounds]) @property def min(self): """ Overall lower bound """ return self.edges[0] @property def max(self): """ Overall upper bound of histogram """ return self.edges[-1] @property def centers(self): ''' Center of each bin. ''' if self._scale == 'linear': centers = (self.edges[1:] + self.edges[:-1])/2 elif self._scale == 'symmetric': centers = (self.edges[1:] + self.edges[:-1])/2 centers[0] = self.min elif self._scale == 'log': log2_edges = log2(self.edges) centers = 2**((log2_edges[1:] + log2_edges[:-1])/2) else: raise AssertionError("This shouldn't happen, " "tell maintainers to fix it") return centers @property def widths(self): ''' Width each bin. ''' return np.diff(self.edges) @property def nbins(self): """ Number of elements along each axis. Either an int (1D histogram) or an array """ return len(self.edges)-1
[docs] class Axes: """ Holds a list of axes. The operator Axes[key] return a subset of these. Key can be either the index or the label. If the key is a single index, a single Axis object will be returned Args: edges (array or list of arrays or Axis): Definition of bin edges. labels (array of str): Optionally label the axes for easier indexing. Will override the labels of edges, if they are Axis objects scale (str or array): Bin center mode e.g. `"linear"` or `"log"`. See Axis.scale. If not an array, all axes will have this mode. """ def __init__(self, edges, labels=None, scale = None): # Standarize axes as list of Axis if isinstance(edges, Axes): # From another Axes object self._axes = copy(edges._axes) elif np.ndim(edges) == 0: if np.isscalar(edges): raise TypeError("'edges' can't be a scalar") elif np.ndim(edges) == 1: if len(edges) == 0: raise ValueError("'edges' can't be an empty array") if all(np.ndim(a) == 0 for a in edges): #1D histogram self._axes = [Axis(edges)] else: #Multi-dimensional histogram. self._axes = [Axis(axis) for axis in edges] elif np.ndim(edges) == 2: #Multi-dimensional histogram self._axes = [Axis(axis) for axis in edges] else: raise ValueError("'edges' can have at most two dimensions") #Override labels if nedeed if labels is not None: if np.isscalar(labels): labels = [labels] if len(labels) != self.ndim: raise ValueError("Edges - labels size mismatch") for n,label in enumerate(labels): self._axes[n] = Axis(self._axes[n], label) #Maps labels to axes indices. Only keep non-None labels = np.array([a.label for a in self._axes]) non_none_labels = labels[labels != None] if len(np.unique(non_none_labels)) != len(non_none_labels): raise ValueError("Labels can't repeat") self._labels = {} for n,label in enumerate(labels): if label is not None: self._labels[label] = n #Override scale if nedeed if scale is not None: if np.isscalar(scale): scale = self.ndim*[scale] if len(scale) != self.ndim: raise ValueError("Edges - scale size mismatch") for mode,ax in zip(scale, self._axes): ax.scale(mode) def __len__(self): return self.ndim def __iter__(self): return iter(self._axes) @property def labels(self): """ Label of axes. Return: Either a string or a tuple of string. None if they are not defined """ return [a.label for a in self._axes] @property def ndim(self): """ Number of axes """ return len(self._axes)
[docs] def key_to_index(self, key): """ Turn a key or list of keys, either indices or labels, into indices Args: key (int or str): Index or label Return: int: Index """ if isinstance(key, int): return key if (isinstance(key, (np.ndarray, list, tuple, range)) and not isinstance(key, str)): return tuple(self.key_to_index(k) for k in key) else: #Label try: return self._labels[key] except KeyError: logger.error("Axis with label {} not found".format(key)) raise
def __getitem__(self, key): indices = self.key_to_index(key) if np.isscalar(indices): return self._axes[indices] else: return Axes([self._axes[i] for i in indices]) def __eq__(self, other): return all([a1 == a2 for a1,a2 in zip(self._axes,other._axes)]) def __array__(self): return np.array(self._axes)
[docs] def interp_weights(self, *values): """ Get the bins and weights to linearly interpolate between bins. The bin contents are assigned to the center of the bin. Args: values (float or array): Coordinates within the axes to interpolate. Must have the same size as `ndims`. Input values as ``(1,2,3)`` or ``([1,2,3])``. Returns: array of tuples of int, array of floats: Bins and weights to use. Size=2^ndim. Each bin is specified by a tuple containing `ndim` integers """ #Standarize if (len(values) == 1 and isinstance(values[0], (list, np.ndarray, range, tuple))): # Got a sequence values = values[0] if len(values) != self.ndim: raise ValueError("Number of values different than number of dimensions") # Get the bin/weights for each individual axis dim_bins = [] dim_weights = [] for dim,value in enumerate(values): bins,weights = self._axes[dim].interp_weights(value) dim_bins += [bins] dim_weights += [weights] bins = [] weights = [] # Combine them. e.g. for 2D this results in # weights = [dim_weights[0][0]*dim_weights[1][0], # dim_weights[0][1]*dim_weights[1][0], # dim_weights[0][0]*dim_weights[1][1], # dim_weights[0][1]*dim_weights[1][1]] # bins = [(dim_bins[0][0], dim_bins[1][0]), # (dim_bins[0][1], dim_bins[1][0]), # (dim_bins[0][0], dim_bins[1][1]), # (dim_bins[0][1], dim_bins[1][1])] # bit_masks = [0b001, 0b010, 0b100, ...] bit_masks = 2**np.array(range(self.ndim)) for n in range(2**self.ndim): weight = 1 bin_list = [] # Since there are two weights per axis, we use bit # masking to loop between them instead of recursion for dim,bit_mask in enumerate(bit_masks): index = int(bool(n & bit_mask)) # Either 0 or 1 weight *= dim_weights[dim][index] bin_list += [dim_bins[dim][index]] bins += [tuple(bin_list)] weights += [weight] return bins,weights
def _get_axis_property(f): """ Decorator to retrieve a property for an axis based on an index. This allows to specify the axis or axes as (x), (x,y), ([x,y]) or even (x,[z,[x,y]]). The methods need to be reclared as: @_get_axis_property def property_name(self, axis): return self[axis] This decorator will automatically get the corresponding property based on the name of the methods """ def wrapper(self, *axis): if len(axis) == 0: #Default to first axis, useful for 1D histograms return wrapper(self, 0) elif len(axis) == 1: if np.isscalar(axis[0]): #Standard way, single axis. Do other types recursively return getattr(f(self, axis[0]), f.__name__) else: return [wrapper(self, a) for a in axis[0]] else: return [wrapper(self, a) for a in axis] return wrapper @_get_axis_property def edges(self, axis): """ Edges for a given axis Args: axis (int, str or list): Axis or axes indices or labels Return: array or list of arrays """ return self[axis] @_get_axis_property def lower_bounds(self, axis=0): ''' Lower bound of each bin Args: axis (int, str or list): Axis or axes indices or labels Return: array or list of arrays ''' return self[axis] @_get_axis_property def upper_bounds(self, axis=0): ''' Upper bound of each bin Args: axis (int, str or list): Axis or axes indices or labels Return: array or list of arrays ''' return self[axis] @_get_axis_property def bounds(self, axis=0): ''' Start of [lower_bound, upper_bound] values for each bin. Args: axis (int, str or list): Axis or axes indices or labels Return: array or list of arrays ''' return self[axis] @_get_axis_property def min(self, axis=0): """ Overall lower bound Args: axis (int, str or list): Axis or axes indices or labels Return: float or array """ return self[axis] @_get_axis_property def max(self, axis=0): """ Overall upper bound of histogram Args: axis (int, str or list): Axis or axes indices or labels Return: float or array """ return self[axis] @_get_axis_property def centers(self, axis=0): ''' Center of each bin. Args: axis (int, str or list): Axis or axes indices or labels Return: array or list of arrays ''' return self[axis] @_get_axis_property def widths(self, axis=0): ''' Width each bin. Args: axis (int, str or list): Axis or axes indices or labels Return: array or list of arrays ''' return self[axis] @_get_axis_property def nbins(self, axis=0): """ Number of elements along each axis. Either an int (1D histogram) or an array Args: axis (int, str or list): Axis or axes indices or labels Return: float or array """ return self[axis]
[docs] class Histogram(object): """ This is a wrapper of a numpy array with axes and a fill method. Like an array, the histogram can have an arbitrary number of dimensions. Standard numpy array indexing is supported to get the contents --i.e. :code:`h[:]`, :code:`h[4]`, :code:`h[[1,3,4]]`, :code:`h[:,5:50:2]]`, etc.--. However, the meaning of the :code:`-1` index is different. Instead of counting from the end, :code:`-1` corresponds to the underflow bin. Similarly, an index equal to the number of bins corresponds to the overflow bin. You can however give relative position with respect to :code:`h.NBINS` --e.g. :code:`h[0:h.NBINS]` result in all regular bins, :code:`h[-1:h.NBINS+1]` includes also the underflow/overflow bins and :code:`h[h.NBINS]` gives you the contents of the overflow bin. You can also use an Ellipsis object (...) at the end to specify that the contents from the rest of the dimension are to have the under and overflow bins included. e.g. for a 3D histogram :code:`h[1,-1:h.NBINS+1,-1:h.NBINS+1] = h[1,...]`. h[:] returns all contents without under/overflow bins and h[...] returns everything, including those special bins. Additionally, you can specify with a dictionary the indices for specific axes, with all the rest being the default. e.g. :code:`h[:,:,1:,:,6] <==> h[{2:slice(1,None), 4:6}]`. If you also use the axes id if you labeled them, e.g. :code:`h[{'x':slice(0,h.NBINS+1)}]` If :code:`sumw2` is not :code:`None`, then the histogram will keep track of the sum of the weights squared --i.e. you better use this if you are using weighted data and are concern about error bars-- You can use the :code:`*=` operator to weight by a number or an array of weights with the shape of :code:`h.nbins+2` (to include under/overflow bins). The operators :code:`+`, :code:`-`, :code:`+=` and :code:`-=` are available. Both operands need to be histograms. An exception will be raised if the axes are not the same. Note that :code:`h += h0` is more efficient than :code:`h = h + h0` since latter involves the instantiation of a new histogram. Args: edges (Axes or array): Definition of bin edges, Anything that can be processes by Axes. Lower edge value is included in the bin, upper edge value is excluded. contents (array): Initialization of histogram contents. Might or might not include under/overflow bins. Initialize to 0 by default. sumw2 (None, bool or array): If True, it will keep track of the sum of the weights squared. You can also initialize them with an array labels (array of str): Optionally label the axes for easier indexing scale (str or array): Bin center mode e.g. `"linear"` or `"log"`. See ``Axis.scale()``. Not to be confused with the ``Histogram`'s `method ``scale()``, which `scales` the bin contents, rather than defining what the scale mode of an axis is. Attributes: sumw2 (Histogram): An auxiliary histogram whose contents are the sum of the square of the weight that were used to fill this histogram """ def __init__(self, edges, contents = None, sumw2 = None, labels=None, scale = None): self._axes = Axes(edges, labels=labels, scale = scale) self._nbins = np.array([a.nbins for a in self._axes]) # Standarize contents (with under/overflow) or initialize them to zero. if contents is not None: if np.array_equal(self._nbins+2, np.shape(contents)): # Includes under and overflow self._contents = np.array(contents) elif np.array_equal(self._nbins, np.shape(contents)): # Missing under and overflow, but right shape. # Adding empty under/overflow bins self._contents = np.zeros(np.array(np.shape(contents)) + 2) self._contents[tuple(slice(1,-1) for i in range(0,np.ndim(contents)))] = contents else: raise ValueError("Edges-contents size mismatch") else: self._contents = np.zeros([n+2 for n in self._nbins]) #Check if we'll keep track of the sum of the weights if sumw2 is None or sumw2 is False: self.sumw2 = None elif sumw2 is True: self.sumw2 = Histogram(self._axes) else: self.sumw2 = Histogram(self._axes, sumw2)
[docs] @classmethod def concatenate(cls, edges, histograms, label = None): """ Generate a Histogram from a list os histograms. The axes of all input histograms must be equal, and the new histogram will have one more dimension than the input. The new axis has index 0. Args: edges (Axes or array): Definition of bin edges of the new dimension histograms (list of Histogram): List of histogram to fill contents. Might or might not include under/overflow bins. labels (str): Label the new dimension Return: Histogram """ # Check new axis matches number of histograms, # with or without under/overflow new_axis = Axis(edges, label = label) if len(histograms) == new_axis.nbins: underflow_bin_shift = 1 elif len(histograms) == new_axis.nbins + 2: underflow_bin_shift = 0 else: raise ValueError("Mismatch between number of bins and " "number of histograms") # Create new axes and new contents old_axes = histograms[0].axes new_axes = Axes([new_axis] + [ax for ax in old_axes]) contents = np.zeros([ax.nbins+2 for ax in new_axes]) sumw2 = np.zeros([ax.nbins+2 for ax in new_axes]) for bin,hist in enumerate(histograms): if hist.axes != old_axes: raise ValueError("The axes of all input histogram must equal") bin += underflow_bin_shift # Account for under/overflow bins contents[bin] = hist[...] if sumw2 is not None: if hist.sumw2 is not None: sumw2[bin] = hist.sumw2[...] else: logger.warning("Not all input histogram have sum of weights " "squared. sumw2 will be dropped") sumw2 = None return Histogram(new_axes, contents, sumw2)
@property def ndim(self): return self._axes.ndim
[docs] def clear(self): """ Set all counts to 0 """ self._contents[:] = 0 if self.sumw2 is not None: self.sumw2.clear()
def __eq__(self, other): # Histogram is completely defined by axes and contents return (self._axes == other._axes and np.array_equal(self._contents, other._contents) and self.sumw2 == other.sumw2) class _NBINS(): ''' Convenience class that will expand to the number of bins of a given dimension. The trick is to overload the -/+ operators such than h.NBINS +/- offset (h being an instance of Histogram and NBINS an static instance of Histogram._NBINS) returns an instance of _NBINS itself, which stores the offset. The [] operator can then detect that the input is an instance of _NBINS and convert it into an integer with respect to the size of the appropiate axis. ''' def __init__(self, offset = 0): self.offset = offset def __add__(self, offset): return self.__class__(offset) def __sub__(self, offset): return self + (-offset) NBINS = _NBINS() def _prepare_indices(self, indices): ''' Prepare indices for it use in __getitem__ and __setitem__ --i.e. [] overloading -- See class help. ''' def _prepare_index(index, dim): ''' Modify index for a single axis to account for under/overflow, as well as to catch instances of _NBINS (see description above) This depend on the number of bins in the axis of a given dimension (dim) ''' if isinstance(index,slice): # Both start and stop can be either None, an instance of # _NBINS or an integer index = slice(1 if index.start is None else self._nbins[dim] + index.start.offset + 1 if isinstance(index.start, self._NBINS) else index.start+1, self._nbins[dim]+1 if index.stop is None else self._nbins[dim] + index.stop.offset + 1 if isinstance(index.stop, self._NBINS) else index.stop+1, index.step) # Check bounds. Note index is the _contents index at this point if index.start < 0 or index.stop > self._nbins[dim] + 2: raise IndexError("Bin index out of bounds") return index elif isinstance(index, (np.integer, int)): if index < -1 or index > self._nbins[dim]: raise IndexError("Bin index out of bounds") return index+1 elif isinstance(index, self._NBINS): # Referece with respect to nbins return _prepare_index(self._nbins[dim] + index.offset, dim) elif isinstance(index, (np.ndarray, list, tuple, range)): # Note: this will return a copy, not a view # Handle references with respecto to nbins index = [self._nbins[dim] + i.offset + 1 if isinstance(i,self._NBINS) else i + 1 for i in index] # Check bounds. Note index is the _contents index at this point/ for ind in index: if ind < 0 or ind > self._nbins[dim] + 1: raise IndexError("Bin index out of bounds") return index else: raise TypeError("Index can only be an int, slice, list or array") if isinstance(indices, tuple): # Get the rest of the dimensions with under/overflow user used ... if indices[-1] is Ellipsis: extra_indices = tuple(slice(-1, self._nbins[dim]+1) for dim in range(len(indices)-1, self.ndim)) indices = self._prepare_indices(indices[:-1] + extra_indices) return indices # Standard way. All other ways end up here after recursion indices = tuple(_prepare_index(index, dim) for dim,index in enumerate(indices)) # Remove under/overflow of the rest of the dimensions indices += tuple(_prepare_index(slice(None), dim) for dim in range(len(indices), self.ndim)) return indices if isinstance(indices, dict): # Indices for specified axes. Default to all other axes new_indices = [slice(None)] * self.ndim for axis,index in indices.items(): axis = self.axes.key_to_index(axis) new_indices[axis] = index return self._prepare_indices(tuple(new_indices)) else: # Single axis return self._prepare_indices(tuple([indices])) def __getitem__(self, indices): return self._contents[self._prepare_indices(indices)] def __array__(self): """ Convert to numpy array. Drops under/overflow """ return self[:] @property def axes(self): """ Underlaying axes object """ return self._axes @property def axis(self): """ Equivalent to `self.axes[0]`, but fails if `ndim > 1` """ if self.ndim > 1: raise UserError("Property 'axis' can only be used with 1D " "histograms. Use `axes` for multidimensional " "histograms") return self.axes[0] @property def shape(self): ''' Tuple with number of bins along each dimensions ''' return tuple(self._nbins)
[docs] def find_bin(self, *values, axis = None): """ Return one or more indices corresponding to the bin this value or set of values correspond to. You can pass either an array, or specified the values as different arguments. i.e. :code:`h.find_bin(x,y,z)` = :code:`h.find_bin([x,y,z])` Args: values (float or array): Vaule or list of values axis (int or str or list): If set, values correspond to the\ subset of axes listed here Return: int or tuple: Bin index """ #Standarize if len(values) == 1 and \ isinstance(values[0], (list, np.ndarray, range, tuple)): # Got a sequence values = values[0] # Either all axes or those specified by user if axis is not None: axes = Axes(self._axes[axis]) else: axes = self._axes if len(values) != len(axes): raise ValueError("Different number of values than axes") # Digitize bins = tuple(axis.find_bin(val) for val,axis in zip(values, axes)) if len(bins) == 1: return bins[0] else: return bins
[docs] def interp(self, *values): """ Get a linearly interpolated content for a given set of values along each axes. The bin contents are assigned to the center of the bin. Args: values (float or array): Coordinates within the axes to interpolate. Must have the same size as `ndims`. Input values as `(1,2,3)` or `([1,2,3])` Return: float """ content = 0 bins,weights = self._axes.interp_weights(*values) for bin,weight in zip(bins, weights): content += weight*self[bin] return content
[docs] def fill(self, *values, weight=1): ''' And an entry to the histogram. Can be weighted. Follow same convention as find_bin() Args: values (float or array): Value of entry weight (float): Value weight in histogram. Note: Note that weight needs to be specified explicitely by key, otherwise it will be considered a value an a IndexError will be thrown. ''' indices = self._prepare_indices(self.find_bin(*values)) self._contents[indices] += weight if self.sumw2 is not None: self.sumw2._contents[indices] += weight*weight
[docs] def project(self, *axis): """ Return a histogram consisting on a projection of the current one Args: axis (int or str or list): axis or axes onto which the histogram will be projected --i.e. will sum up over the other dimensiones--. The axes of the new histogram will have the same order --i.e. you can transpose axes-- Return: Histogram: Projected histogram """ if self.ndim == 1: raise ValueError("Can't project a 1D histogram. " "Consider using np.sum(h[...])") #Standarize if len(axis) == 1 and \ isinstance(axis[0], (list, np.ndarray, range, tuple)): # Got a sequence axis = axis[0] axis = self._axes.key_to_index(axis) if len(np.unique(axis)) != len(axis): raise ValueError("An axis can't repeat") sum_axes = tuple(dim for dim in range(0, self.ndim) if dim not in axis) new_contents = self._contents.sum(axis = sum_axes) # Transpose the contents to match the order of the axis provided by the # the user, which are currently sorted new_contents = np.transpose(new_contents, axes = np.argsort(np.argsort(axis))) new_sumw2 = None if self.sumw2 is not None: new_sumw2 = self.sumw2.project(axis)._contents return Histogram(edges = self._axes[axis], contents = new_contents, sumw2 = new_sumw2)
[docs] def slice(self, axis, start = None, stop = None, underflow = True, overflow = True): """ Return a histogram which is a slice of the current one, along a given dimension. Follows same indexing convention as [] operator Args: axis (int or str): Dimension that will be sliced start (None or int): Start bin (inclusive) stop (None or int): Stop bin (exclusive). Must be greater than start underflow (bool): If True, the contents before the start bin will be summed up and kept in the underflow bins, otherwise they'll be discarded overflow (overflow): same as underflow Return: Histogram: Sliced histogram """ # If it's a label, turn it into an integer axis = self._axes.key_to_index(axis) # Standarize start/stop and checks. These are the indices of the bins, # without taking into account under/overflow if isinstance(start, self._NBINS): start = self._nbins[axis] + start.offset elif start is None: start = 0 if start < 0 or not (start < self._nbins[axis]): raise IndexError("'start' out of bounds") if isinstance(stop, self._NBINS): stop = self._nbins[axis] + stop.offset elif stop is None: stop = self._nbins[axis] if not (stop > start): raise ValueError("'stop' must be greater than 'start'") if stop > self._nbins[axis]: raise IndexError("'stop' out of bounds") # Redefine axis. Include upper bound of last bin in slice new_axes = [Axis(a.edges[start:stop+1], a.label) if adim == axis else a for adim,a in enumerate(self._axes)] # This is to select every bin from the dimensions we are not cutting on axis_pre = tuple(slice(None) for i in range(0,axis)) axis_post = tuple(slice(None) for i in range(axis+1, self.ndim)) # Slice, keeping all entries not in the slice in the under/overflow bins # if needed new_contents = self._contents[axis_pre + tuple([slice(start+1,stop+1)]) + axis_post] if underflow: underflow_contents = np.sum(self._contents[axis_pre + tuple([slice(0,start+1)]) + axis_post], axis = axis, keepdims = True) else: underflow_contents = np.zeros([1 if d == axis else self._nbins[d] + 2 for d in range(self.ndim)]) new_contents = np.append(underflow_contents, new_contents, axis = axis) if overflow: overflow_contents = np.sum(self._contents[axis_pre + tuple([slice(stop+1, self._nbins[axis]+2)]) + axis_post], axis = axis, keepdims = True) else: overflow_contents = np.zeros([1 if d == axis else self._nbins[d] + 2 for d in range(self.ndim)]) new_contents = np.append(new_contents, overflow_contents, axis = axis) new_sumw2 = None if self.sumw2 is not None: new_sumw2 = self.sumw2.slice(axis, start, stop)._contents # Create new histogram return Histogram(edges = new_axes, contents = new_contents, sumw2 = new_sumw2)
def __imul__(self, weight): """ Multiply contents by a weight. sumw2 will be updated accordingly Args: weights (float or array): Either a number or an array with the shape of nbins+2 (to include under/overflow bins) """ try: self._contents *= weight except ValueError: logger.error("Weights not compatible with contents size. " "Are you taking into account under/overflow bins") raise if self.sumw2 is not None: self.sumw2 *= weight*weight return self def __mul__(self, other): """ * operator between histograms of compatible axes """ # This copies sumw2 as well new = deepcopy(self) new *= other return new def __iadd__(self, other): """ += operator """ if self._axes != other._axes: raise ValueError("Can't operate on histogram with different axes") self._contents += other._contents if self.sumw2 is not None and other.sumw2 is not None: self.sumw2 += other.sumw2 elif self.sumw2 is None and other.sumw2 is None: pass else: logger.warning("One of the histogram being added does not" "track weights squared. sumw2 will be dropped.") self.sumw2 = None return self def __isub__(self, other): """ -= operator Note that defining and using __neg__ will imply using copy() on a histogram, so we duplicate a little bit of code """ if self._axes != other._axes: raise ValueError("Can't operate on histogram with different axes") self._contents -= other._contents if self.sumw2 is not None and other.sumw2 is not None: self.sumw2 -= other.sumw2 elif self.sumw2 is None and other.sumw2 is None: pass else: logger.warning("One of the histogram in the operation does not" "track weights squared. sumw2 will be dropped.") self.sumw2 = None return self def __add__(self, other): """ + operator between histograms of compatible axes """ # This copies sumw2 as well new = deepcopy(self) new += other return new def __sub__(self, other): """ - operator """ # This copies sumw2 as well new = deepcopy(self) new -= other return new
[docs] def scale(self, weight, indices = Ellipsis, axis = None): """ Scale the histogram contents. By default, :code:`h.scale(weight) <==> h *= weight` The parameter :code:`indices` can be used to specify the slice that will be multiplied by the weights. Follow the same index convention as in []. If axis is specified, then weights and indices correspond to a single axis. Args: weights (array-like): Weights indices (int or tuple): Bin indices axis (int or str): Axis index """ if axis is None: indices = self._prepare_indices(indices) self._contents[indices] *= weight if self.sumw2 is not None: self.sumw2._contents[indices] *= weight*weight else: # Scale along a dimension # We just need to reshape weights and indices axis = self._axes.key_to_index(axis) if axis < 0 or not (axis < self.ndim): raise ValueError("Axis out of bounds") if indices is Ellipsis: # In this context Ellipsis means all bins inclusing overflow indices = slice(-1,self.NBINS+1) # Select all bins from the other dimensions indices = tuple(axis*[slice(-1,self.NBINS)] + [indices] + [Ellipsis]) # Change axis shape to be compatible if needed if not np.isscalar(weight): weight = np.array(weight) if weight.ndim != 1: raise ValueError("When using 'axis', weight can either be " "scalar or a 1D array") newaxis = tuple(axis*[np.newaxis] + [slice(None,None)] + (self.ndim-axis-1)*[np.newaxis]) weight = weight[newaxis] self.scale(weight, indices = indices)