Logo ROOT   6.10/00
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
TDFInterface.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_TDF_TINTERFACE
12 #define ROOT_TDF_TINTERFACE
13 
14 #include "ROOT/TBufferMerger.hxx"
15 #include "ROOT/TResultProxy.hxx"
16 #include "ROOT/TDFNodes.hxx"
18 #include "ROOT/TDFUtils.hxx"
19 #include "TChain.h"
20 #include "TFile.h"
21 #include "TH1.h" // For Histo actions
22 #include "TH2.h" // For Histo actions
23 #include "TH3.h" // For Histo actions
24 #include "TInterpreter.h"
25 #include "TProfile.h" // For Histo actions
26 #include "TProfile2D.h" // For Histo actions
27 #include "TRegexp.h"
28 #include "TROOT.h" // IsImplicitMTEnabled
29 #include "TTreeReader.h"
30 
31 #include <initializer_list>
32 #include <memory>
33 #include <string>
34 #include <sstream>
35 #include <typeinfo>
36 #include <type_traits> // is_same, enable_if
37 
38 namespace ROOT {
39 
40 namespace Internal {
41 namespace TDF {
42 using namespace ROOT::Experimental::TDF;
43 using namespace ROOT::Detail::TDF;
44 
45 using TmpBranchBasePtr_t = std::shared_ptr<TCustomColumnBase>;
46 
47 template <typename TDFNode, typename ActionType, typename... BranchTypes, typename ActionResultType>
48 void CallBuildAndBook(TDFNode *node, const ColumnNames_t &bl, unsigned int nSlots,
49  const std::shared_ptr<ActionResultType> &r)
50 {
51  node->template BuildAndBook<BranchTypes...>(bl, r, nSlots, (ActionType *)nullptr);
52 }
53 
54 std::vector<std::string> GetUsedBranchesNames(const std::string, TObjArray *, const std::vector<std::string> &);
55 
56 Long_t JitTransformation(void *thisPtr, const std::string &methodName, const std::string &nodeTypeName,
57  const std::string &name, const std::string &expression, TObjArray *branches,
58  const std::vector<std::string> &tmpBranches,
59  const std::map<std::string, TmpBranchBasePtr_t> &tmpBookedBranches, TTree *tree);
60 
61 void JitBuildAndBook(const ColumnNames_t &bl, const std::string &nodeTypename, void *thisPtr, const std::type_info &art,
62  const std::type_info &at, const void *r, TTree *tree, unsigned int nSlots,
63  const std::map<std::string, TmpBranchBasePtr_t> &tmpBranches);
64 
65 } // namespace TDF
66 } // namespace Internal
67 
68 namespace Experimental {
69 
70 // forward declarations
71 class TDataFrame;
72 
73 } // namespace Experimental
74 } // namespace ROOT
75 
76 namespace cling {
77 std::string printValue(ROOT::Experimental::TDataFrame *tdf); // For a nice printing at the promp
78 }
79 
80 namespace ROOT {
81 namespace Experimental {
82 namespace TDF {
83 namespace TDFDetail = ROOT::Detail::TDF;
84 namespace TDFInternal = ROOT::Internal::TDF;
85 
86 /**
87 * \class ROOT::Experimental::TDF::TInterface
88 * \ingroup dataframe
89 * \brief The public interface to the TDataFrame federation of classes
90 * \tparam T One of the "node" base types (e.g. TLoopManager, TFilterBase). The user never specifies this type manually.
91 */
92 template <typename Proxied>
93 class TInterface {
94  using ColumnNames_t = TDFDetail::ColumnNames_t;
99  friend std::string cling::printValue(ROOT::Experimental::TDataFrame *tdf); // For a nice printing at the prompt
100  template <typename T>
101  friend class TInterface;
102  template <typename TDFNode, typename ActionType, typename... BranchTypes, typename ActionResultType>
103  friend void TDFInternal::CallBuildAndBook(TDFNode *, const TDFDetail::ColumnNames_t &, unsigned int nSlots,
104  const std::shared_ptr<ActionResultType> &);
105 
106 public:
107  ////////////////////////////////////////////////////////////////////////////
108  /// \brief Append a filter to the call graph.
109  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
110  /// signalling whether the event has passed the selection (true) or not (false).
111  /// \param[in] bn Names of the branches in input to the filter function.
112  /// \param[in] name Optional name of this filter. See `Report`.
113  ///
114  /// Append a filter node at the point of the call graph corresponding to the
115  /// object this method is called on.
116  /// The callable `f` should not have side-effects (e.g. modification of an
117  /// external or static variable) to ensure correct results when implicit
118  /// multi-threading is active.
119  ///
120  /// TDataFrame only evaluates filters when necessary: if multiple filters
121  /// are chained one after another, they are executed in order and the first
122  /// one returning false causes the event to be discarded.
123  /// Even if multiple actions or transformations depend on the same filter,
124  /// it is executed once per entry. If its result is requested more than
125  /// once, the cached result is served.
126  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
127  TInterface<TFilterBase> Filter(F f, const ColumnNames_t &bn = {}, std::string_view name = "")
128  {
129  TDFInternal::CheckFilter(f);
130  auto df = GetDataFrameChecked();
131  const ColumnNames_t &defBl = df->GetDefaultBranches();
132  auto nArgs = TDFInternal::TFunctionTraits<F>::Args_t::fgSize;
133  const ColumnNames_t &actualBl = TDFInternal::PickBranchNames(nArgs, bn, defBl);
134  using DFF_t = TDFDetail::TFilter<F, Proxied>;
135  auto FilterPtr = std::make_shared<DFF_t>(std::move(f), actualBl, *fProxiedPtr, name);
136  fProxiedPtr->IncrChildrenCount();
137  df->Book(FilterPtr);
138  TInterface<TFilterBase> tdf_f(FilterPtr, fImplWeakPtr);
139  return tdf_f;
140  }
141 
142  ////////////////////////////////////////////////////////////////////////////
143  /// \brief Append a filter to the call graph.
144  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
145  /// signalling whether the event has passed the selection (true) or not (false).
146  /// \param[in] name Optional name of this filter. See `Report`.
147  ///
148  /// Refer to the first overload of this method for the full documentation.
149  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
150  TInterface<TFilterBase> Filter(F f, std::string_view name)
151  {
152  // The sfinae is there in order to pick up the overloaded method which accepts two strings
153  // rather than this template method.
154  return Filter(f, {}, name);
155  }
156 
157  ////////////////////////////////////////////////////////////////////////////
158  /// \brief Append a filter to the call graph.
159  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
160  /// signalling whether the event has passed the selection (true) or not (false).
161  /// \param[in] bn Names of the branches in input to the filter function.
162  ///
163  /// Refer to the first overload of this method for the full documentation.
164  template <typename F>
165  TInterface<TFilterBase> Filter(F f, const std::initializer_list<std::string> &bn)
166  {
167  return Filter(f, ColumnNames_t{bn});
168  }
169 
170  ////////////////////////////////////////////////////////////////////////////
171  /// \brief Append a filter to the call graph.
172  /// \param[in] expression The filter expression in C++
173  /// \param[in] name Optional name of this filter. See `Report`.
174  ///
175  /// The expression is just in time compiled and used to filter entries. The
176  /// variable names to be used inside are the names of the branches. Only
177  /// valid C++ is accepted.
178  /// Refer to the first overload of this method for the full documentation.
179  TInterface<TFilterBase> Filter(std::string_view expression, std::string_view name = "")
180  {
181  auto df = GetDataFrameChecked();
182  auto tree = df->GetTree();
183  auto branches = tree->GetListOfBranches();
184  auto tmpBranches = fProxiedPtr->GetTmpBranches();
185  auto tmpBookedBranches = df->GetBookedBranches();
186  const std::string expressionInt(expression);
187  const std::string nameInt(name);
188  auto retVal = TDFInternal::JitTransformation(this, "Filter", GetNodeTypeName(), nameInt, expressionInt, branches,
189  tmpBranches, tmpBookedBranches, tree);
190  return *(TInterface<TFilterBase> *)retVal;
191  }
192 
193  ////////////////////////////////////////////////////////////////////////////
194  /// \brief Creates a temporary branch
195  /// \param[in] name The name of the temporary branch.
196  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the
197  /// temporary value. Returns the value that will be assigned to the temporary branch.
198  /// \param[in] bl Names of the branches in input to the producer function.
199  ///
200  /// Create a temporary branch that will be visible from all subsequent nodes
201  /// of the functional chain. The `expression` is only evaluated for entries that pass
202  /// all the preceding filters.
203  /// A new variable is created called `name`, accessible as if it was contained
204  /// in the dataset from subsequent transformations/actions.
205  ///
206  /// Use cases include:
207  ///
208  /// * caching the results of complex calculations for easy and efficient multiple access
209  /// * extraction of quantities of interest from complex objects
210  /// * branch aliasing, i.e. changing the name of a branch
211  ///
212  /// An exception is thrown if the name of the new branch is already in use
213  /// for another branch in the TTree.
214  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
215  TInterface<TCustomColumnBase> Define(std::string_view name, F expression, const ColumnNames_t &bl = {})
216  {
217  auto df = GetDataFrameChecked();
218  TDFInternal::CheckTmpBranch(name, df->GetTree());
219  const ColumnNames_t &defBl = df->GetDefaultBranches();
220  auto nArgs = TDFInternal::TFunctionTraits<F>::Args_t::fgSize;
221  const ColumnNames_t &actualBl = TDFInternal::PickBranchNames(nArgs, bl, defBl);
223  const std::string nameInt(name);
224  auto BranchPtr = std::make_shared<DFB_t>(nameInt, std::move(expression), actualBl, *fProxiedPtr);
225  fProxiedPtr->IncrChildrenCount();
226  df->Book(BranchPtr);
227  TInterface<TCustomColumnBase> tdf_b(BranchPtr, fImplWeakPtr);
228  return tdf_b;
229  }
230 
231  ////////////////////////////////////////////////////////////////////////////
232  /// \brief Creates a temporary branch
233  /// \param[in] name The name of the temporary branch.
234  /// \param[in] expression An expression in C++ which represents the temporary value
235  ///
236  /// The expression is just in time compiled and used to produce new values. The
237  /// variable names to be used inside are the names of the branches. Only
238  /// valid C++ is accepted.
239  /// Refer to the first overload of this method for the full documentation.
240  TInterface<TCustomColumnBase> Define(std::string_view name, std::string_view expression)
241  {
242  auto df = GetDataFrameChecked();
243  auto tree = df->GetTree();
244  auto branches = tree->GetListOfBranches();
245  auto tmpBranches = fProxiedPtr->GetTmpBranches();
246  auto tmpBookedBranches = df->GetBookedBranches();
247  const std::string expressionInt(expression);
248  const std::string nameInt(name);
249  auto retVal = TDFInternal::JitTransformation(this, "Define", GetNodeTypeName(), nameInt, expressionInt, branches,tmpBranches, tmpBookedBranches, tree);
250  return *(TInterface<TCustomColumnBase> *)retVal;
251  }
252 
253  ////////////////////////////////////////////////////////////////////////////
254  /// \brief Create a snapshot of the dataset on disk in the form of a TTree
255  /// \tparam BranchTypes variadic list of branch/column types
256  /// \param[in] treename The name of the output TTree
257  /// \param[in] filename The name of the output TFile
258  /// \param[in] bnames The list of names of the branches to be written
259  ///
260  /// This function returns a `TDataFrame` built with the output tree as a source.
261  template <typename... BranchTypes>
262  TInterface<TLoopManager> Snapshot(std::string_view treename, std::string_view filename,
263  const ColumnNames_t &bnames)
264  {
265  using TypeInd_t = typename TDFInternal::TGenStaticSeq<sizeof...(BranchTypes)>::Type_t;
266  return SnapshotImpl<BranchTypes...>(treename, filename, bnames, TypeInd_t());
267  }
268 
269  ////////////////////////////////////////////////////////////////////////////
270  /// \brief Create a snapshot of the dataset on disk in the form of a TTree
271  /// \param[in] treename The name of the output TTree
272  /// \param[in] filename The name of the output TFile
273  /// \param[in] bnames The list of names of the branches to be written
274  ///
275  /// This function returns a `TDataFrame` built with the output tree as a source.
276  /// The types of the branches are automatically inferred and do not need to be specified.
277  TInterface<TLoopManager> Snapshot(std::string_view treename, std::string_view filename,
278  const ColumnNames_t &bnames)
279  {
280  auto df = GetDataFrameChecked();
281  auto tree = df->GetTree();
282  std::stringstream snapCall;
283  // build a string equivalent to
284  // "reinterpret_cast</nodetype/*>(this)->Snapshot<Ts...>(treename,filename,*reinterpret_cast<ColumnNames_t*>(&bnames))"
285  snapCall << "if (gROOTMutex) gROOTMutex->UnLock(); ((" << GetNodeTypeName() << "*)" << this << ")->Snapshot<";
286  bool first = true;
287  for (auto &b : bnames) {
288  if (!first) snapCall << ", ";
289  snapCall << TDFInternal::ColumnName2ColumnTypeName(b, tree, df->GetBookedBranch(b));
290  first = false;
291  };
292  // TODO is there a way to use ColumnNames_t instead of std::vector<std::string> without parsing the whole header?
293  const std::string treeNameInt(treename);
294  const std::string filenameInt(filename);
295  snapCall << ">(\"" << treeNameInt << "\", \"" << filenameInt << "\", "
296  << "*reinterpret_cast<std::vector<std::string>*>(" << &bnames << ")"
297  << ");";
298  // jit snapCall, return result
299  return *reinterpret_cast<TInterface<TLoopManager> *>(gInterpreter->ProcessLine(snapCall.str().c_str()));
300  }
301 
302  ////////////////////////////////////////////////////////////////////////////
303  /// \brief Create a snapshot of the dataset on disk in the form of a TTree
304  /// \param[in] treename The name of the output TTree
305  /// \param[in] filename The name of the output TFile
306  /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. See the documentation of TRegexp for more details. An empty string signals the selection of all columns.
307  ///
308  /// This function returns a `TDataFrame` built with the output tree as a source.
309  /// The types of the branches are automatically inferred and do not need to be specified.
310  TInterface<TLoopManager> Snapshot(std::string_view treename, std::string_view filename,
311  std::string_view columnNameRegexp = "")
312  {
313  const auto theRegexSize = columnNameRegexp.size();
314  std::string theRegex(columnNameRegexp);
315 
316  const auto isEmptyRegex = 0 == theRegexSize;
317  // This is to avoid cases where branches called b1, b2, b3 are all matched by expression "b"
318  if (theRegexSize > 0 && theRegex[0] != '^') theRegex = "^" + theRegex;
319  if (theRegexSize > 0 && theRegex[theRegexSize-1] != '$') theRegex = theRegex + "$";
320 
321  ColumnNames_t selectedColumns;
322  selectedColumns.reserve(32);
323 
324  const auto tmpBranches = fProxiedPtr->GetTmpBranches();
325  // Since we support gcc48 and it does not provide in its stl std::regex,
326  // we need to use TRegexp
327  TRegexp regexp(theRegex);
328  int dummy;
329  for (auto &&branchName : tmpBranches) {
330  if (isEmptyRegex || -1 != regexp.Index(branchName.c_str(), &dummy)) {
331  selectedColumns.emplace_back(branchName);
332  }
333  }
334 
335  auto df = GetDataFrameChecked();
336  auto tree = df->GetTree();
337  if (tree) {
338  const auto branches = tree->GetListOfBranches();
339  for (auto branch : *branches) {
340  auto branchName = branch->GetName();
341  if (isEmptyRegex || -1 != regexp.Index(branchName, &dummy)) {
342  selectedColumns.emplace_back(branchName);
343  }
344  }
345  }
346 
347  return Snapshot(treename, filename, selectedColumns);
348  }
349 
350  ////////////////////////////////////////////////////////////////////////////
351  /// \brief Creates a node that filters entries based on range
352  /// \param[in] start How many entries to discard before resuming processing.
353  /// \param[in] stop Total number of entries that will be processed before stopping. 0 means "never stop".
354  /// \param[in] stride Process one entry every `stride` entries. Must be strictly greater than 0.
355  ///
356  /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
357  TInterface<TRangeBase> Range(unsigned int start, unsigned int stop, unsigned int stride = 1)
358  {
359  // check invariants
360  if (stride == 0 || (stop != 0 && stop < start))
361  throw std::runtime_error("Range: stride must be strictly greater than 0 and stop must be greater than start.");
363  throw std::runtime_error("Range was called with ImplicitMT enabled. Multi-thread ranges are not supported.");
364 
365  auto df = GetDataFrameChecked();
367  auto RangePtr = std::make_shared<Range_t>(start, stop, stride, *fProxiedPtr);
368  fProxiedPtr->IncrChildrenCount();
369  df->Book(RangePtr);
370  TInterface<TRangeBase> tdf_r(RangePtr, fImplWeakPtr);
371  return tdf_r;
372  }
373 
374  ////////////////////////////////////////////////////////////////////////////
375  /// \brief Creates a node that filters entries based on range
376  /// \param[in] stop Total number of entries that will be processed before stopping. 0 means "never stop".
377  ///
378  /// See the other Range overload for a detailed description.
379  TInterface<TRangeBase> Range(unsigned int stop) { return Range(0, stop, 1); }
380 
381  ////////////////////////////////////////////////////////////////////////////
382  /// \brief Execute a user-defined function on each entry (*instant action*)
383  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined
384  /// calculations.
385  /// \param[in] bl Names of the branches in input to the user function.
386  ///
387  /// The callable `f` is invoked once per entry. This is an *instant action*:
388  /// upon invocation, an event loop as well as execution of all scheduled actions
389  /// is triggered.
390  /// Users are responsible for the thread-safety of this callable when executing
391  /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
392  template <typename F>
393  void Foreach(F f, const ColumnNames_t &bl = {})
394  {
395  using Args_t = typename TDFInternal::TFunctionTraits<decltype(f)>::ArgsNoDecay_t;
396  using Ret_t = typename TDFInternal::TFunctionTraits<decltype(f)>::Ret_t;
397  ForeachSlot(TDFInternal::AddSlotParameter<Ret_t>(f, Args_t()), bl);
398  }
399 
400  ////////////////////////////////////////////////////////////////////////////
401  /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*)
402  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined
403  /// calculations.
404  /// \param[in] bl Names of the branches in input to the user function.
405  ///
406  /// Same as `Foreach`, but the user-defined function takes an extra
407  /// `unsigned int` as its first parameter, the *processing slot index*.
408  /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
409  /// for each thread of execution.
410  /// This is meant as a helper in writing thread-safe `Foreach`
411  /// actions when using `TDataFrame` after `ROOT::EnableImplicitMT()`.
412  /// The user-defined processing callable is able to follow different
413  /// *streams of processing* indexed by the first parameter.
414  /// `ForeachSlot` works just as well with single-thread execution: in that
415  /// case `slot` will always be `0`.
416  template <typename F>
417  void ForeachSlot(F f, const ColumnNames_t &bl = {})
418  {
419  auto df = GetDataFrameChecked();
420  const ColumnNames_t &defBl = df->GetDefaultBranches();
421  auto nArgs = TDFInternal::TFunctionTraits<F>::Args_t::fgSize;
422  const ColumnNames_t &actualBl = TDFInternal::PickBranchNames(nArgs - 1, bl, defBl);
423  using Op_t = TDFInternal::ForeachSlotHelper<F>;
425  df->Book(std::make_shared<DFA_t>(Op_t(std::move(f)), actualBl, *fProxiedPtr));
426  fProxiedPtr->IncrChildrenCount();
427  df->Run();
428  }
429 
430  ////////////////////////////////////////////////////////////////////////////
431  /// \brief Execute a user-defined reduce operation on the values of a branch
432  /// \tparam F The type of the reduce callable. Automatically deduced.
433  /// \tparam T The type of the branch to apply the reduction to. Automatically deduced.
434  /// \param[in] f A callable with signature `T(T,T)`
435  /// \param[in] branchName The branch to be reduced. If omitted, the default branch is used instead.
436  ///
437  /// A reduction takes two values of a branch and merges them into one (e.g.
438  /// by summing them, taking the maximum, etc). This action performs the
439  /// specified reduction operation on all branch values, returning
440  /// a single value of the same type. The callable f must satisfy the general
441  /// requirements of a *processing function* besides having signature `T(T,T)`
442  /// where `T` is the type of branch.
443  ///
444  /// This action is *lazy*: upon invocation of this method the calculation is
445  /// booked but not executed. See TResultProxy documentation.
446  template <typename F, typename T = typename TDFInternal::TFunctionTraits<F>::Ret_t>
447  TResultProxy<T> Reduce(F f, std::string_view branchName = {})
448  {
449  static_assert(std::is_default_constructible<T>::value,
450  "reduce object cannot be default-constructed. Please provide an initialisation value (initValue)");
451  return Reduce(std::move(f), branchName, T());
452  }
453 
454  ////////////////////////////////////////////////////////////////////////////
455  /// \brief Execute a user-defined reduce operation on the values of a branch
456  /// \tparam F The type of the reduce callable. Automatically deduced.
457  /// \tparam T The type of the branch to apply the reduction to. Automatically deduced.
458  /// \param[in] f A callable with signature `T(T,T)`
459  /// \param[in] branchName The branch to be reduced. If omitted, the default branch is used instead.
460  /// \param[in] initValue The reduced object is initialised to this value rather than being default-constructed
461  ///
462  /// See the description of the other Reduce overload for more information.
463  template <typename F, typename T = typename TDFInternal::TFunctionTraits<F>::Ret_t>
464  TResultProxy<T> Reduce(F f, std::string_view branchName, const T &initValue)
465  {
466  using Args_t = typename TDFInternal::TFunctionTraits<F>::Args_t;
467  TDFInternal::CheckReduce(f, Args_t());
468  auto df = GetDataFrameChecked();
469  unsigned int nSlots = df->GetNSlots();
470  auto bl = GetBranchNames<T>({branchName}, "reduce branch values");
471  auto redObjPtr = std::make_shared<T>(initValue);
472  using Op_t = TDFInternal::ReduceHelper<F, T>;
473  using DFA_t = typename TDFInternal::TAction<Op_t, Proxied>;
474  df->Book(std::make_shared<DFA_t>(Op_t(std::move(f), redObjPtr, nSlots), bl, *fProxiedPtr));
475  fProxiedPtr->IncrChildrenCount();
476  return MakeResultProxy(redObjPtr, df);
477  }
478 
479  ////////////////////////////////////////////////////////////////////////////
480  /// \brief Return the number of entries processed (*lazy action*)
481  ///
482  /// This action is *lazy*: upon invocation of this method the calculation is
483  /// booked but not executed. See TResultProxy documentation.
485  {
486  auto df = GetDataFrameChecked();
487  unsigned int nSlots = df->GetNSlots();
488  auto cSPtr = std::make_shared<unsigned int>(0);
489  using Op_t = TDFInternal::CountHelper;
491  df->Book(std::make_shared<DFA_t>(Op_t(cSPtr, nSlots), ColumnNames_t({}), *fProxiedPtr));
492  fProxiedPtr->IncrChildrenCount();
493  return MakeResultProxy(cSPtr, df);
494  }
495 
496  ////////////////////////////////////////////////////////////////////////////
497  /// \brief Return a collection of values of a branch (*lazy action*)
498  /// \tparam T The type of the branch.
499  /// \tparam COLL The type of collection used to store the values.
500  /// \param[in] branchName The name of the branch of which the values are to be collected
501  ///
502  /// This action is *lazy*: upon invocation of this method the calculation is
503  /// booked but not executed. See TResultProxy documentation.
504  template <typename T, typename COLL = std::vector<T>>
505  TResultProxy<COLL> Take(std::string_view branchName = "")
506  {
507  auto df = GetDataFrameChecked();
508  unsigned int nSlots = df->GetNSlots();
509  auto bl = GetBranchNames<T>({branchName}, "get the values of the branch");
510  auto valuesPtr = std::make_shared<COLL>();
511  using Op_t = TDFInternal::TakeHelper<T, COLL>;
513  df->Book(std::make_shared<DFA_t>(Op_t(valuesPtr, nSlots), bl, *fProxiedPtr));
514  fProxiedPtr->IncrChildrenCount();
515  return MakeResultProxy(valuesPtr, df);
516  }
517 
518  ////////////////////////////////////////////////////////////////////////////
519  /// \brief Fill and return a one-dimensional histogram with the values of a branch (*lazy action*)
520  /// \tparam V The type of the branch used to fill the histogram.
521  /// \param[in] model The returned histogram will be constructed using this as a model.
522  /// \param[in] vName The name of the branch that will fill the histogram.
523  ///
524  /// The default branches, if available, will be used instead of branches whose names are left empty.
525  /// Branches can be of a container type (e.g. std::vector<double>), in which case the histogram
526  /// is filled with each one of the elements of the container. In case multiple branches of container type
527  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
528  /// possibly different lengths between events).
529  /// This action is *lazy*: upon invocation of this method the calculation is
530  /// booked but not executed. See TResultProxy documentation.
531  /// The user gives up ownership of the model histogram.
532  template <typename V = TDFDetail::TInferType>
533  TResultProxy<::TH1F> Histo1D(::TH1F &&model = ::TH1F{"", "", 128u, 0., 0.}, std::string_view vName = "")
534  {
535  auto bl = GetBranchNames<V>({vName}, "fill the histogram");
536  auto h = std::make_shared<::TH1F>(std::move(model));
537  if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
538  TDFInternal::HistoUtils<::TH1F>::SetCanExtendAllAxes(*h);
539  return CreateAction<TDFInternal::ActionTypes::Histo1D, V>(bl, h);
540  }
541 
542  template <typename V = TDFDetail::TInferType>
543  TResultProxy<::TH1F> Histo1D(std::string_view vName)
544  {
545  return Histo1D<V>(::TH1F{"", "", 128u, 0., 0.}, vName);
546  }
547 
548  ////////////////////////////////////////////////////////////////////////////
549  /// \brief Fill and return a one-dimensional histogram with the values of a branch (*lazy action*)
550  /// \tparam V The type of the branch used to fill the histogram.
551  /// \param[in] model The returned histogram will be constructed using this as a model.
552  /// \param[in] vName The name of the branch that will fill the histogram.
553  /// \param[in] wName The name of the branch that will provide the weights.
554  ///
555  /// The default branches, if available, will be used instead of branches whose names are left empty.
556  /// Branches can be of a container type (e.g. std::vector<double>), in which case the histogram
557  /// is filled with each one of the elements of the container. In case multiple branches of container type
558  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
559  /// possibly different lengths between events).
560  /// This action is *lazy*: upon invocation of this method the calculation is
561  /// booked but not executed. See TResultProxy documentation.
562  /// The user gives up ownership of the model histogram.
563  template <typename V = TDFDetail::TInferType, typename W = TDFDetail::TInferType>
564  TResultProxy<::TH1F> Histo1D(::TH1F &&model, std::string_view vName, std::string_view wName)
565  {
566  auto bl = GetBranchNames<V, W>({vName, wName}, "fill the histogram");
567  auto h = std::make_shared<::TH1F>(std::move(model));
568  return CreateAction<TDFInternal::ActionTypes::Histo1D, V, W>(bl, h);
569  }
570 
571  template <typename V = TDFDetail::TInferType, typename W = TDFDetail::TInferType>
572  TResultProxy<::TH1F> Histo1D(std::string_view vName, std::string_view wName)
573  {
574  return Histo1D<V, W>(::TH1F{"", "", 128u, 0., 0.}, vName, wName);
575  }
576 
577  template <typename V, typename W>
578  TResultProxy<::TH1F> Histo1D(::TH1F &&model = ::TH1F{"", "", 128u, 0., 0.})
579  {
580  return Histo1D<V, W>(std::move(model), "", "");
581  }
582 
583  ////////////////////////////////////////////////////////////////////////////
584  /// \brief Fill and return a two-dimensional histogram (*lazy action*)
585  /// \tparam V1 The type of the branch used to fill the x axis of the histogram.
586  /// \tparam V2 The type of the branch used to fill the y axis of the histogram.
587  /// \param[in] model The returned histogram will be constructed using this as a model.
588  /// \param[in] v1Name The name of the branch that will fill the x axis.
589  /// \param[in] v2Name The name of the branch that will fill the y axis.
590  ///
591  /// This action is *lazy*: upon invocation of this method the calculation is
592  /// booked but not executed. See TResultProxy documentation.
593  /// The user gives up ownership of the model histogram.
594  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType>
595  TResultProxy<::TH2F> Histo2D(::TH2F &&model, std::string_view v1Name = "", std::string_view v2Name = "")
596  {
597  auto h = std::make_shared<::TH2F>(std::move(model));
598  if (!TDFInternal::HistoUtils<::TH2F>::HasAxisLimits(*h)) {
599  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
600  }
601  auto bl = GetBranchNames<V1, V2>({v1Name, v2Name}, "fill the histogram");
602  return CreateAction<TDFInternal::ActionTypes::Histo2D, V1, V2>(bl, h);
603  }
604 
605  ////////////////////////////////////////////////////////////////////////////
606  /// \brief Fill and return a two-dimensional histogram (*lazy action*)
607  /// \tparam V1 The type of the branch used to fill the x axis of the histogram.
608  /// \tparam V2 The type of the branch used to fill the y axis of the histogram.
609  /// \tparam W The type of the branch used for the weights of the histogram.
610  /// \param[in] model The returned histogram will be constructed using this as a model.
611  /// \param[in] v1Name The name of the branch that will fill the x axis.
612  /// \param[in] v2Name The name of the branch that will fill the y axis.
613  /// \param[in] wName The name of the branch that will provide the weights.
614  ///
615  /// This action is *lazy*: upon invocation of this method the calculation is
616  /// booked but not executed. See TResultProxy documentation.
617  /// The user gives up ownership of the model histogram.
618  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
619  typename W = TDFDetail::TInferType>
620  TResultProxy<::TH2F> Histo2D(::TH2F &&model, std::string_view v1Name, std::string_view v2Name,
621  std::string_view wName)
622  {
623  auto h = std::make_shared<::TH2F>(std::move(model));
624  if (!TDFInternal::HistoUtils<::TH2F>::HasAxisLimits(*h)) {
625  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
626  }
627  auto bl = GetBranchNames<V1, V2, W>({v1Name, v2Name, wName}, "fill the histogram");
628  return CreateAction<TDFInternal::ActionTypes::Histo2D, V1, V2, W>(bl, h);
629  }
630 
631  template <typename V1, typename V2, typename W>
633  {
634  return Histo2D<V1, V2, W>(std::move(model), "", "", "");
635  }
636 
637  ////////////////////////////////////////////////////////////////////////////
638  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
639  /// \tparam V1 The type of the branch used to fill the x axis of the histogram.
640  /// \tparam V2 The type of the branch used to fill the y axis of the histogram.
641  /// \tparam V3 The type of the branch used to fill the z axis of the histogram.
642  /// \param[in] model The returned histogram will be constructed using this as a model.
643  /// \param[in] v1Name The name of the branch that will fill the x axis.
644  /// \param[in] v2Name The name of the branch that will fill the y axis.
645  /// \param[in] v3Name The name of the branch that will fill the z axis.
646  ///
647  /// This action is *lazy*: upon invocation of this method the calculation is
648  /// booked but not executed. See TResultProxy documentation.
649  /// The user gives up ownership of the model histogram.
650  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
651  typename V3 = TDFDetail::TInferType>
652  TResultProxy<::TH3F> Histo3D(::TH3F &&model, std::string_view v1Name = "", std::string_view v2Name = "",
653  std::string_view v3Name = "")
654  {
655  auto h = std::make_shared<::TH3F>(std::move(model));
656  if (!TDFInternal::HistoUtils<::TH3F>::HasAxisLimits(*h)) {
657  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
658  }
659  auto bl = GetBranchNames<V1, V2, V3>({v1Name, v2Name, v3Name}, "fill the histogram");
660  return CreateAction<TDFInternal::ActionTypes::Histo3D, V1, V2, V3>(bl, h);
661  }
662 
663  ////////////////////////////////////////////////////////////////////////////
664  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
665  /// \tparam V1 The type of the branch used to fill the x axis of the histogram.
666  /// \tparam V2 The type of the branch used to fill the y axis of the histogram.
667  /// \tparam V3 The type of the branch used to fill the z axis of the histogram.
668  /// \tparam W The type of the branch used for the weights of the histogram.
669  /// \param[in] model The returned histogram will be constructed using this as a model.
670  /// \param[in] v1Name The name of the branch that will fill the x axis.
671  /// \param[in] v2Name The name of the branch that will fill the y axis.
672  /// \param[in] v3Name The name of the branch that will fill the z axis.
673  /// \param[in] wName The name of the branch that will provide the weights.
674  ///
675  /// This action is *lazy*: upon invocation of this method the calculation is
676  /// booked but not executed. See TResultProxy documentation.
677  /// The user gives up ownership of the model histogram.
678  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
679  typename V3 = TDFDetail::TInferType, typename W = TDFDetail::TInferType>
680  TResultProxy<::TH3F> Histo3D(::TH3F &&model, std::string_view v1Name, std::string_view v2Name,
681  std::string_view v3Name, std::string_view wName)
682  {
683  auto h = std::make_shared<::TH3F>(std::move(model));
684  if (!TDFInternal::HistoUtils<::TH3F>::HasAxisLimits(*h)) {
685  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
686  }
687  auto bl = GetBranchNames<V1, V2, V3, W>({v1Name, v2Name, v3Name, wName}, "fill the histogram");
688  return CreateAction<TDFInternal::ActionTypes::Histo3D, V1, V2, V3, W>(bl, h);
689  }
690 
691  template <typename V1, typename V2, typename V3, typename W>
693  {
694  return Histo3D<V1, V2, V3, W>(std::move(model), "", "", "", "");
695  }
696 
697  ////////////////////////////////////////////////////////////////////////////
698  /// \brief Fill and return a one-dimensional profile (*lazy action*)
699  /// \tparam V1 The type of the branch the values of which are used to fill the profile.
700  /// \tparam V2 The type of the branch the values of which are used to fill the profile.
701  /// \param[in] model The model to be considered to build the new return value.
702  /// \param[in] v1Name The name of the branch that will fill the x axis.
703  /// \param[in] v2Name The name of the branch that will fill the y axis.
704  ///
705  /// This action is *lazy*: upon invocation of this method the calculation is
706  /// booked but not executed. See TResultProxy documentation.
707  /// The user gives up ownership of the model profile object.
708  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType>
709  TResultProxy<::TProfile> Profile1D(::TProfile &&model, std::string_view v1Name = "",
710  std::string_view v2Name = "")
711  {
712  auto h = std::make_shared<::TProfile>(std::move(model));
713  if (!TDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
714  throw std::runtime_error("Profiles with no axes limits are not supported yet.");
715  }
716  auto bl = GetBranchNames<V1, V2>({v1Name, v2Name}, "fill the 1D Profile");
717  return CreateAction<TDFInternal::ActionTypes::Profile1D, V1, V2>(bl, h);
718  }
719 
720  ////////////////////////////////////////////////////////////////////////////
721  /// \brief Fill and return a one-dimensional profile (*lazy action*)
722  /// \tparam V1 The type of the branch the values of which are used to fill the profile.
723  /// \tparam V2 The type of the branch the values of which are used to fill the profile.
724  /// \tparam W The type of the branch the weights of which are used to fill the profile.
725  /// \param[in] model The model to be considered to build the new return value.
726  /// \param[in] v1Name The name of the branch that will fill the x axis.
727  /// \param[in] v2Name The name of the branch that will fill the y axis.
728  /// \param[in] wName The name of the branch that will provide the weights.
729  ///
730  /// This action is *lazy*: upon invocation of this method the calculation is
731  /// booked but not executed. See TResultProxy documentation.
732  /// The user gives up ownership of the model profile object.
733  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
734  typename W = TDFDetail::TInferType>
735  TResultProxy<::TProfile> Profile1D(::TProfile &&model, std::string_view v1Name, std::string_view v2Name,
736  std::string_view wName)
737  {
738  auto h = std::make_shared<::TProfile>(std::move(model));
739  if (!TDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
740  throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
741  }
742  auto bl = GetBranchNames<V1, V2, W>({v1Name, v2Name, wName}, "fill the 1D profile");
743  return CreateAction<TDFInternal::ActionTypes::Profile1D, V1, V2, W>(bl, h);
744  }
745 
746  template <typename V1, typename V2, typename W>
748  {
749  return Profile1D<V1, V2, W>(std::move(model), "", "", "");
750  }
751 
752  ////////////////////////////////////////////////////////////////////////////
753  /// \brief Fill and return a two-dimensional profile (*lazy action*)
754  /// \tparam V1 The type of the branch used to fill the x axis of the histogram.
755  /// \tparam V2 The type of the branch used to fill the y axis of the histogram.
756  /// \tparam V2 The type of the branch used to fill the z axis of the histogram.
757  /// \param[in] model The returned profile will be constructed using this as a model.
758  /// \param[in] v1Name The name of the branch that will fill the x axis.
759  /// \param[in] v2Name The name of the branch that will fill the y axis.
760  /// \param[in] v3Name The name of the branch that will fill the z axis.
761  ///
762  /// This action is *lazy*: upon invocation of this method the calculation is
763  /// booked but not executed. See TResultProxy documentation.
764  /// The user gives up ownership of the model profile.
765  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
766  typename V3 = TDFDetail::TInferType>
767  TResultProxy<::TProfile2D> Profile2D(::TProfile2D &&model, std::string_view v1Name = "",
768  std::string_view v2Name = "", std::string_view v3Name = "")
769  {
770  auto h = std::make_shared<::TProfile2D>(std::move(model));
771  if (!TDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
772  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
773  }
774  auto bl = GetBranchNames<V1, V2, V3>({v1Name, v2Name, v3Name}, "fill the 2D profile");
775  return CreateAction<TDFInternal::ActionTypes::Profile2D, V1, V2, V3>(bl, h);
776  }
777 
778  ////////////////////////////////////////////////////////////////////////////
779  /// \brief Fill and return a two-dimensional profile (*lazy action*)
780  /// \tparam V1 The type of the branch used to fill the x axis of the histogram.
781  /// \tparam V2 The type of the branch used to fill the y axis of the histogram.
782  /// \tparam V3 The type of the branch used to fill the z axis of the histogram.
783  /// \tparam W The type of the branch used for the weights of the histogram.
784  /// \param[in] model The returned histogram will be constructed using this as a model.
785  /// \param[in] v1Name The name of the branch that will fill the x axis.
786  /// \param[in] v2Name The name of the branch that will fill the y axis.
787  /// \param[in] v3Name The name of the branch that will fill the z axis.
788  /// \param[in] wName The name of the branch that will provide the weights.
789  ///
790  /// This action is *lazy*: upon invocation of this method the calculation is
791  /// booked but not executed. See TResultProxy documentation.
792  /// The user gives up ownership of the model profile.
793  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
794  typename V3 = TDFDetail::TInferType, typename W = TDFDetail::TInferType>
795  TResultProxy<::TProfile2D> Profile2D(::TProfile2D &&model, std::string_view v1Name, std::string_view v2Name,
796  std::string_view v3Name, std::string_view wName)
797  {
798  auto h = std::make_shared<::TProfile2D>(std::move(model));
799  if (!TDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
800  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
801  }
802  auto bl = GetBranchNames<V1, V2, V3, W>({v1Name, v2Name, v3Name, wName}, "fill the histogram");
803  return CreateAction<TDFInternal::ActionTypes::Profile2D, V1, V2, V3, W>(bl, h);
804  }
805 
806  template <typename V1, typename V2, typename V3, typename W>
808  {
809  return Profile2D<V1, V2, V3, W>(std::move(model), "", "", "", "");
810  }
811 
812  ////////////////////////////////////////////////////////////////////////////
813  /// \brief Fill and return any entity with a Fill method (*lazy action*)
814  /// \tparam BranchTypes The types of the branches the values of which are used to fill the object.
815  /// \param[in] model The model to be considered to build the new return value.
816  /// \param[in] bl The name of the branches read to fill the object.
817  ///
818  /// The returned object is independent of the input one.
819  /// This action is *lazy*: upon invocation of this method the calculation is
820  /// booked but not executed. See TResultProxy documentation.
821  /// The user gives up ownership of the model object.
822  /// It is compulsory to express the branches to be considered.
823  template <typename FirstBranch, typename... OtherBranches, typename T> // need FirstBranch to disambiguate overloads
825  {
826  auto h = std::make_shared<T>(std::move(model));
827  if (!TDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
828  throw std::runtime_error("The absence of axes limits is not supported yet.");
829  }
830  return CreateAction<TDFInternal::ActionTypes::Fill, FirstBranch, OtherBranches...>(bl, h);
831  }
832 
833  template <typename T>
835  {
836  auto h = std::make_shared<T>(std::move(model));
837  if (!TDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
838  throw std::runtime_error("The absence of axes limits is not supported yet.");
839  }
840  return CreateAction<TDFInternal::ActionTypes::Fill, TDFDetail::TInferType>(bl, h);
841  }
842 
843  ////////////////////////////////////////////////////////////////////////////
844  /// \brief Return the minimum of processed branch values (*lazy action*)
845  /// \tparam T The type of the branch.
846  /// \param[in] branchName The name of the branch to be treated.
847  ///
848  /// If no branch type is specified, the implementation will try to guess one.
849  ///
850  /// This action is *lazy*: upon invocation of this method the calculation is
851  /// booked but not executed. See TResultProxy documentation.
852  template <typename T = TDFDetail::TInferType>
853  TResultProxy<double> Min(std::string_view branchName = "")
854  {
855  auto bl = GetBranchNames<T>({branchName}, "calculate the minimum");
856  auto minV = std::make_shared<double>(std::numeric_limits<double>::max());
857  return CreateAction<TDFInternal::ActionTypes::Min, T>(bl, minV);
858  }
859 
860  ////////////////////////////////////////////////////////////////////////////
861  /// \brief Return the maximum of processed branch values (*lazy action*)
862  /// \tparam T The type of the branch.
863  /// \param[in] branchName The name of the branch to be treated.
864  ///
865  /// If no branch type is specified, the implementation will try to guess one.
866  ///
867  /// This action is *lazy*: upon invocation of this method the calculation is
868  /// booked but not executed. See TResultProxy documentation.
869  template <typename T = TDFDetail::TInferType>
870  TResultProxy<double> Max(std::string_view branchName = "")
871  {
872  auto bl = GetBranchNames<T>({branchName}, "calculate the maximum");
873  auto maxV = std::make_shared<double>(std::numeric_limits<double>::min());
874  return CreateAction<TDFInternal::ActionTypes::Max, T>(bl, maxV);
875  }
876 
877  ////////////////////////////////////////////////////////////////////////////
878  /// \brief Return the mean of processed branch values (*lazy action*)
879  /// \tparam T The type of the branch.
880  /// \param[in] branchName The name of the branch to be treated.
881  ///
882  /// If no branch type is specified, the implementation will try to guess one.
883  ///
884  /// This action is *lazy*: upon invocation of this method the calculation is
885  /// booked but not executed. See TResultProxy documentation.
886  template <typename T = TDFDetail::TInferType>
887  TResultProxy<double> Mean(std::string_view branchName = "")
888  {
889  auto bl = GetBranchNames<T>({branchName}, "calculate the mean");
890  auto meanV = std::make_shared<double>(0);
891  return CreateAction<TDFInternal::ActionTypes::Mean, T>(bl, meanV);
892  }
893 
894  ////////////////////////////////////////////////////////////////////////////
895  /// \brief Print filtering statistics on screen
896  ///
897  /// Calling `Report` on the main `TDataFrame` object prints stats for
898  /// all named filters in the call graph. Calling this method on a
899  /// stored chain state (i.e. a graph node different from the first) prints
900  /// the stats for all named filters in the chain section between the original
901  /// `TDataFrame` and that node (included). Stats are printed in the same
902  /// order as the named filters have been added to the graph.
903  void Report()
904  {
905  auto df = GetDataFrameChecked();
906  if (!df->HasRunAtLeastOnce()) df->Run();
907  fProxiedPtr->Report();
908  }
909 
910 private:
911  inline const char *GetNodeTypeName() { return ""; };
912 
913  /// Returns the default branches if needed, takes care of the error handling.
914  template <typename T1, typename T2 = void, typename T3 = void, typename T4 = void>
915  ColumnNames_t GetBranchNames(const std::vector<std::string_view>& bl, std::string_view actionNameForErr)
916  {
917  constexpr auto isT2Void = std::is_same<T2, void>::value;
918  constexpr auto isT3Void = std::is_same<T3, void>::value;
919  constexpr auto isT4Void = std::is_same<T4, void>::value;
920 
921  unsigned int neededBranches = 1 + !isT2Void + !isT3Void + !isT4Void;
922 
923  unsigned int providedBranches = 0;
924  std::for_each(bl.begin(), bl.end(), [&providedBranches](std::string_view s) {
925  if (!s.empty()) providedBranches++;
926  });
927 
928  if (neededBranches == providedBranches) {
929  ColumnNames_t bl2(bl.begin(), bl.end());
930  return bl2;
931  }
932 
933  return GetDefaultBranchNames(neededBranches, actionNameForErr);
934  }
935 
936  /// \cond HIDDEN_SYMBOLS
937 
938  /****** BuildAndBook overloads *******/
939  // BuildAndBook builds a TAction with the right operation and book it with the TLoopManager
940 
941  // Generic filling (covers Histo2D, Histo3D, Profile1D and Profile2D actions, with and without weights)
942  template <typename... BranchTypes, typename ActionType, typename ActionResultType>
943  void BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &h, unsigned int nSlots,
944  ActionType *)
945  {
946  using Op_t = TDFInternal::FillTOHelper<ActionResultType>;
947  using DFA_t = TDFInternal::TAction<Op_t, Proxied, TDFInternal::TTypeList<BranchTypes...>>;
948  auto df = GetDataFrameChecked();
949  df->Book(std::make_shared<DFA_t>(Op_t(h, nSlots), bl, *fProxiedPtr));
950  }
951 
952  // Histo1D filling (must handle the special case of distinguishing FillTOHelper and FillHelper
953  template <typename... BranchTypes>
954  void BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<::TH1F> &h, unsigned int nSlots,
955  TDFInternal::ActionTypes::Histo1D *)
956  {
957  auto df = GetDataFrameChecked();
958  auto hasAxisLimits = TDFInternal::HistoUtils<::TH1F>::HasAxisLimits(*h);
959 
960  if (hasAxisLimits) {
961  using Op_t = TDFInternal::FillTOHelper<::TH1F>;
962  using DFA_t = TDFInternal::TAction<Op_t, Proxied, TDFInternal::TTypeList<BranchTypes...>>;
963  df->Book(std::make_shared<DFA_t>(Op_t(h, nSlots), bl, *fProxiedPtr));
964  } else {
965  using Op_t = TDFInternal::FillHelper;
966  using DFA_t = TDFInternal::TAction<Op_t, Proxied, TDFInternal::TTypeList<BranchTypes...>>;
967  df->Book(std::make_shared<DFA_t>(Op_t(h, nSlots), bl, *fProxiedPtr));
968  }
969  }
970 
971  // Min action
972  template <typename BranchType>
973  void BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<double> &minV, unsigned int nSlots,
975  {
976  using Op_t = TDFInternal::MinHelper;
978  auto df = GetDataFrameChecked();
979  df->Book(std::make_shared<DFA_t>(Op_t(minV, nSlots), bl, *fProxiedPtr));
980  }
981 
982  // Max action
983  template <typename BranchType>
984  void BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<double> &maxV, unsigned int nSlots,
986  {
987  using Op_t = TDFInternal::MaxHelper;
989  auto df = GetDataFrameChecked();
990  df->Book(std::make_shared<DFA_t>(Op_t(maxV, nSlots), bl, *fProxiedPtr));
991  }
992 
993  // Mean action
994  template <typename BranchType>
995  void BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<double> &meanV, unsigned int nSlots,
997  {
998  using Op_t = TDFInternal::MeanHelper;
1000  auto df = GetDataFrameChecked();
1001  df->Book(std::make_shared<DFA_t>(Op_t(meanV, nSlots), bl, *fProxiedPtr));
1002  }
1003  /****** end BuildAndBook ******/
1004  /// \endcond
1005 
1006  // Type was specified by the user, no need to infer it
1007  template <typename ActionType, typename... BranchTypes, typename ActionResultType,
1008  typename std::enable_if<!TDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
1009  TResultProxy<ActionResultType> CreateAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &r)
1010  {
1011  auto df = GetDataFrameChecked();
1012  unsigned int nSlots = df->GetNSlots();
1013  BuildAndBook<BranchTypes...>(bl, r, nSlots, (ActionType *)nullptr);
1014  fProxiedPtr->IncrChildrenCount();
1015  return MakeResultProxy(r, df);
1016  }
1017 
1018  // User did not specify type, do type inference
1019  template <typename ActionType, typename... BranchTypes, typename ActionResultType,
1020  typename std::enable_if<TDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
1021  TResultProxy<ActionResultType> CreateAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &r)
1022  {
1023  auto df = GetDataFrameChecked();
1024  unsigned int nSlots = df->GetNSlots();
1025  const auto &tmpBranches = df->GetBookedBranches();
1026  auto tree = df->GetTree();
1027  TDFInternal::JitBuildAndBook(bl, GetNodeTypeName(), this, typeid(std::shared_ptr<ActionResultType>),
1028  typeid(ActionType), &r, tree, nSlots, tmpBranches);
1029  fProxiedPtr->IncrChildrenCount();
1030  return MakeResultProxy(r, df);
1031  }
1032 
1033 protected:
1034  /// Get the TLoopManager if reachable. If not, throw.
1035  std::shared_ptr<TLoopManager> GetDataFrameChecked()
1036  {
1037  auto df = fImplWeakPtr.lock();
1038  if (!df) {
1039  throw std::runtime_error("The main TDataFrame is not reachable: did it go out of scope?");
1040  }
1041  return df;
1042  }
1043 
1044  const ColumnNames_t GetDefaultBranchNames(unsigned int nExpectedBranches, std::string_view actionNameForErr)
1045  {
1046  auto df = GetDataFrameChecked();
1047  const ColumnNames_t &defaultBranches = df->GetDefaultBranches();
1048  const auto dBSize = defaultBranches.size();
1049  if (nExpectedBranches > dBSize) {
1050  std::string msg("Trying to deduce the branches from the default list in order to ");
1051  msg += actionNameForErr;
1052  msg += ". A set of branches of size ";
1053  msg += std::to_string(dBSize);
1054  msg += " was found. ";
1055  msg += std::to_string(nExpectedBranches);
1056  msg += 1 != nExpectedBranches ? " are" : " is";
1057  msg += " needed. Please specify the branches explicitly.";
1058  throw std::runtime_error(msg);
1059  }
1060  auto bnBegin = defaultBranches.begin();
1061  return ColumnNames_t(bnBegin, bnBegin + nExpectedBranches);
1062  }
1063 
1064  ////////////////////////////////////////////////////////////////////////////
1065  /// \brief Implementation of snapshot
1066  /// \param[in] treename The name of the TTree
1067  /// \param[in] filename The name of the TFile
1068  /// \param[in] bnames The list of names of the branches to be written
1069  /// The implementation exploits Foreach. The association of the addresses to
1070  /// the branches takes place at the first event. This is possible because
1071  /// since there are no copies, the address of the value passed by reference
1072  /// is the address pointing to the storage of the read/created object in/by
1073  /// the TTreeReaderValue/TemporaryBranch
1074  template <typename... Args, int... S>
1075  TInterface<TLoopManager> SnapshotImpl(std::string_view treename, std::string_view filename,
1076  const ColumnNames_t &bnames, TDFInternal::TStaticSeq<S...> /*dummy*/)
1077  {
1078  const std::string treenameInt(treename);
1079  const std::string filenameInt(filename);
1080  const auto templateParamsN = sizeof...(S);
1081  const auto bNamesN = bnames.size();
1082  if (templateParamsN != bNamesN) {
1083  std::string err_msg = "The number of template parameters specified for the snapshot is ";
1084  err_msg += std::to_string(templateParamsN);
1085  err_msg += " while ";
1086  err_msg += std::to_string(bNamesN);
1087  err_msg += " branches have been specified.";
1088  throw std::runtime_error(err_msg.c_str());
1089  }
1090 
1091  auto df = GetDataFrameChecked();
1092  if (!ROOT::IsImplicitMTEnabled()) {
1093  std::unique_ptr<TFile> ofile(TFile::Open(filenameInt.c_str(), "RECREATE"));
1094  TTree t(treenameInt.c_str(), treenameInt.c_str());
1095 
1096  bool FirstEvent = true;
1097  // TODO move fillTree and initLambda to SnapshotHelper's body
1098  auto fillTree = [&t, &bnames, &FirstEvent](unsigned int /* unused */, Args &... args) {
1099  if (FirstEvent) {
1100  // hack to call TTree::Branch on all variadic template arguments
1101  std::initializer_list<int> expander = {(t.Branch(bnames[S].c_str(), &args), 0)..., 0};
1102  (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1103  FirstEvent = false;
1104  }
1105  t.Fill();
1106  };
1107 
1108  auto initLambda = [&t] (TTreeReader *r, unsigned int /* unused */) {
1109  if(r) {
1110  // not an empty-source TDF
1111  auto tree = r->GetTree();
1112  tree->AddClone(&t);
1113  }
1114  };
1115 
1116  using Op_t = TDFInternal::SnapshotHelper<decltype(initLambda), decltype(fillTree)>;
1118  df->Book(std::make_shared<DFA_t>(Op_t(std::move(initLambda), std::move(fillTree)), bnames, *fProxiedPtr));
1119  fProxiedPtr->IncrChildrenCount();
1120  df->Run();
1121  t.Write();
1122  } else {
1123  unsigned int nSlots = df->GetNSlots();
1124  TBufferMerger merger(filenameInt.c_str(), "RECREATE");
1125  std::vector<std::shared_ptr<TBufferMergerFile>> files(nSlots);
1126  std::vector<TTree *> trees(nSlots); // ROOT owns/manages these TTrees
1127  std::vector<int> isFirstEvent(nSlots, 1); // vector<bool> is evil
1128 
1129  auto fillTree = [&](unsigned int slot, Args &... args) {
1130  if (isFirstEvent[slot]) {
1131  // hack to call TTree::Branch on all variadic template arguments
1132  std::initializer_list<int> expander = {(trees[slot]->Branch(bnames[S].c_str(), &args), 0)..., 0};
1133  (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1134  isFirstEvent[slot] = 0;
1135  }
1136  trees[slot]->Fill();
1137  auto entries = trees[slot]->GetEntries();
1138  auto autoflush = trees[slot]->GetAutoFlush();
1139  if ((autoflush > 0) && (entries % autoflush == 0)) files[slot]->Write();
1140  };
1141 
1142  // called at the beginning of each task
1143  auto initLambda = [&trees, &merger, &files, &treenameInt, &isFirstEvent] (TTreeReader *r, unsigned int slot) {
1144  if(!trees[slot]) {
1145  // first time this thread executes something, let's create a TBufferMerger output directory
1146  files[slot] = merger.GetFile();
1147  } else {
1148  files[slot]->Write();
1149  }
1150  trees[slot] = new TTree(treenameInt.c_str(), treenameInt.c_str());
1151  trees[slot]->ResetBit(kMustCleanup);
1152  if(r) {
1153  // not an empty-source TDF
1154  auto tree = r->GetTree();
1155  tree->AddClone(trees[slot]);
1156  }
1157  isFirstEvent[slot] = 1;
1158  };
1159 
1160  using Op_t = TDFInternal::SnapshotHelper<decltype(initLambda), decltype(fillTree)>;
1162  df->Book(std::make_shared<DFA_t>(Op_t(std::move(initLambda), std::move(fillTree)), bnames, *fProxiedPtr));
1163  fProxiedPtr->IncrChildrenCount();
1164  df->Run();
1165  for (auto &&file : files) file->Write();
1166  }
1167 
1169  // Now we mimic a constructor for the TDataFrame. We cannot invoke it here
1170  // since this would introduce a cyclic headers dependency.
1171  TInterface<TLoopManager> snapshotTDF(std::make_shared<TLoopManager>(nullptr, bnames));
1172  auto chain = new TChain(treenameInt.c_str());
1173  chain->Add(filenameInt.c_str());
1174  snapshotTDF.fProxiedPtr->SetTree(std::shared_ptr<TTree>(static_cast<TTree *>(chain)));
1175 
1176  return snapshotTDF;
1177  }
1178 
1179  TInterface(const std::shared_ptr<Proxied> &proxied, const std::weak_ptr<TLoopManager> &impl)
1180  : fProxiedPtr(proxied), fImplWeakPtr(impl)
1181  {
1182  }
1183 
1184  /// Only enabled when building a TInterface<TLoopManager>
1185  template <typename T = Proxied, typename std::enable_if<std::is_same<T, TLoopManager>::value, int>::type = 0>
1186  TInterface(const std::shared_ptr<Proxied> &proxied) : fProxiedPtr(proxied), fImplWeakPtr(proxied->GetSharedPtr())
1187  {
1188  }
1189 
1190  std::shared_ptr<Proxied> fProxiedPtr;
1191  std::weak_ptr<TLoopManager> fImplWeakPtr;
1192 };
1193 
1194 template <>
1196 {
1197  return "ROOT::Experimental::TDF::TInterface<ROOT::Detail::TDF::TFilterBase>";
1198 }
1199 
1200 template <>
1202 {
1203  return "ROOT::Experimental::TDF::TInterface<ROOT::Detail::TDF::TCustomColumnBase>";
1204 }
1205 
1206 template <>
1208 {
1209  return "ROOT::Experimental::TDF::TInterface<ROOT::Detail::TDF::TLoopManager>";
1210 }
1211 
1212 template <>
1214 {
1215  return "ROOT::Experimental::TDF::TInterface<ROOT::Detail::TDF::TRangeBase>";
1216 }
1217 
1218 } // end NS TDF
1219 } // end NS Experimental
1220 } // end NS ROOT
1221 
1222 #endif // ROOT_TDF_INTERFACE
TResultProxy<::TProfile2D > Profile2D(::TProfile2D &&model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action)
TResultProxy<::TH3F > Histo3D(::TH3F &&model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action)
An array of TObjects.
Definition: TObjArray.h:37
TResultProxy<::TProfile2D > Profile2D(::TProfile2D &&model)
TResultProxy<::TH3F > Histo3D(::TH3F &&model)
TResultProxy<::TH3F > Histo3D(::TH3F &&model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action)
TTreeReader is a simple, robust and fast interface to read values from a TTree, TChain or TNtuple...
Definition: TTreeReader.h:42
TInterface< TLoopManager > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="")
Create a snapshot of the dataset on disk in the form of a TTree.
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1193
double T(double x)
Definition: ChebyshevPol.h:34
TTree * GetTree() const
Definition: TTreeReader.h:151
TH1 * h
Definition: legend2.C:5
TInterface< TCustomColumnBase > Define(std::string_view name, std::string_view expression)
Creates a temporary branch.
Ssiz_t Index(const TString &str, Ssiz_t *len, Ssiz_t start=0) const
Find the first occurrence of the regexp in string and return the position, or -1 if there is no match...
Definition: TRegexp.cxx:209
TInterface< TCustomColumnBase > Define(std::string_view name, F expression, const ColumnNames_t &bl={})
Creates a temporary branch.
TInterface< TLoopManager > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &bnames)
Create a snapshot of the dataset on disk in the form of a TTree.
Regular expression class.
Definition: TRegexp.h:31
void Foreach(F f, const ColumnNames_t &bl={})
Execute a user-defined function on each entry (instant action)
const ColumnNames_t & PickBranchNames(unsigned int nArgs, const ColumnNames_t &bl, const ColumnNames_t &defBl)
Returns local BranchNames or default BranchNames according to which one should be used...
Definition: TDFUtils.cxx:147
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
#define gInterpreter
Definition: TInterpreter.h:499
void CallBuildAndBook(TDFNode *node, const ColumnNames_t &bl, unsigned int nSlots, const std::shared_ptr< ActionResultType > &r)
Profile Histogram.
Definition: TProfile.h:32
TInterface< TFilterBase > Filter(F f, const std::initializer_list< std::string > &bn)
Append a filter to the call graph.
const ColumnNames_t GetDefaultBranchNames(unsigned int nExpectedBranches, std::string_view actionNameForErr)
TResultProxy<::TH2F > Histo2D(::TH2F &&model)
TResultProxy<::TProfile > Profile1D(::TProfile &&model)
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3909
TInterface< TFilterBase > Filter(F f, const ColumnNames_t &bn={}, std::string_view name="")
Append a filter to the call graph.
TResultProxy<::TH2F > Histo2D(::TH2F &&model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action)
TInterface< TRangeBase > Range(unsigned int start, unsigned int stop, unsigned int stride=1)
Creates a node that filters entries based on range.
TInterface< TLoopManager > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &bnames)
Create a snapshot of the dataset on disk in the form of a TTree.
TResultProxy<::TH1F > Histo1D(::TH1F &&model=::TH1F{"","", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a branch (lazy action) ...
TResultProxy<::TProfile > Profile1D(::TProfile &&model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action)
TResultProxy<::TProfile2D > Profile2D(::TProfile2D &&model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action)
TResultProxy<::TH1F > Histo1D(::TH1F &&model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the values of a branch (lazy action) ...
TResultProxy< T > Reduce(F f, std::string_view branchName={})
Execute a user-defined reduce operation on the values of a branch.
RooArgSet S(const RooAbsArg &v1)
Smart pointer for the return type of actions.
#define F(x, y, z)
std::string printValue(const TDatime *val)
Print a TDatime at the prompt.
Definition: TDatime.cxx:514
TResultProxy<::TH2F > Histo2D(::TH2F &&model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a two-dimensional histogram (lazy action)
TRandom2 r(17)
TResultProxy<::TH1F > Histo1D(std::string_view vName)
TInterface< TFilterBase > Filter(F f, std::string_view name)
Append a filter to the call graph.
TInterface(const std::shared_ptr< Proxied > &proxied, const std::weak_ptr< TLoopManager > &impl)
Double_t Mean(Long64_t n, const T *a, const Double_t *w=0)
Definition: TMath.h:973
void fillTree(TTree &t2)
Definition: testRooFit.cxx:49
std::shared_ptr< TLoopManager > GetDataFrameChecked()
Get the TLoopManager if reachable. If not, throw.
TBufferMerger is a class to facilitate writing data in parallel from multiple threads, while writing to a single output file.
Histogram class for histograms with DIMENSIONS dimensions, where each bin count is stored by a value ...
Definition: THist.hxx:33
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, TCustomColumnBase *tmpBranch)
Return a string containing the type of the given branch.
Definition: TDFUtils.cxx:30
void ForeachSlot(F f, const ColumnNames_t &bl={})
Execute a user-defined function requiring a processing slot index on each entry (instant action) ...
TInterface< TRangeBase > Range(unsigned int stop)
Creates a node that filters entries based on range.
TResultProxy< T > Fill(T &&model, const ColumnNames_t &bl)
Long_t JitTransformation(void *thisPtr, const std::string &methodName, const std::string &nodeTypeName, const std::string &name, const std::string &expression, TObjArray *branches, const std::vector< std::string > &tmpBranches, const std::map< std::string, TmpBranchBasePtr_t > &tmpBookedBranches, TTree *tree)
TResultProxy< ActionResultType > CreateAction(const ColumnNames_t &bl, const std::shared_ptr< ActionResultType > &r)
TInterface< TLoopManager > SnapshotImpl(std::string_view treename, std::string_view filename, const ColumnNames_t &bnames, TDFInternal::TStaticSeq< S...>)
Implementation of snapshot.
long Long_t
Definition: RtypesCore.h:50
TResultProxy< T > MakeResultProxy(const std::shared_ptr< T > &r, const std::shared_ptr< TLoopManager > &df)
double f(double x)
TInterface< TFilterBase > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
TResultProxy< double > Min(std::string_view branchName="")
Return the minimum of processed branch values (lazy action)
int type
Definition: TGX11.cxx:120
void Run(unsigned int slot, Long64_t entry) final
Definition: TDFNodes.hxx:239
TResultProxy< double > Max(std::string_view branchName="")
Return the maximum of processed branch values (lazy action)
void Report()
Print filtering statistics on screen.
static RooMathCoreReg dummy
TResultProxy<::TH1F > Histo1D(::TH1F &&model=::TH1F{"","", 128u, 0., 0.})
std::shared_ptr< Proxied > fProxiedPtr
Profile2D histograms are used to display the mean value of Z and its RMS for each cell in X...
Definition: TProfile2D.h:27
typedef void((*Func_t)())
TResultProxy< T > Fill(T &&model, const ColumnNames_t &bl)
Fill and return any entity with a Fill method (lazy action)
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:552
TResultProxy<::TH1F > Histo1D(std::string_view vName, std::string_view wName)
ROOT&#39;s TDataFrame offers a high level interface for analyses of data stored in TTrees.
Definition: TDataFrame.hxx:36
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
void CheckTmpBranch(std::string_view branchName, TTree *treePtr)
Definition: TDFUtils.cxx:134
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
ColumnNames_t GetBranchNames(const std::vector< std::string_view > &bl, std::string_view actionNameForErr)
Returns the default branches if needed, takes care of the error handling.
void JitBuildAndBook(const ColumnNames_t &bl, const std::string &nodeTypename, void *thisPtr, const std::type_info &art, const std::type_info &at, const void *r, TTree *tree, unsigned int nSlots, const std::map< std::string, TmpBranchBasePtr_t > &tmpBranches)
TResultProxy< T > Reduce(F f, std::string_view branchName, const T &initValue)
Execute a user-defined reduce operation on the values of a branch.
std::shared_ptr< TCustomColumnBase > TmpBranchBasePtr_t
std::vector< std::string > GetUsedBranchesNames(const std::string, TObjArray *, const std::vector< std::string > &)
TResultProxy< double > Mean(std::string_view branchName="")
Return the mean of processed branch values (lazy action)
std::weak_ptr< TLoopManager > fImplWeakPtr
static void Fill(TTree *tree, int init, int count)
char name[80]
Definition: TGX11.cxx:109
The public interface to the TDataFrame federation of classes.
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition: TChain.cxx:220
TResultProxy<::TProfile > Profile1D(::TProfile &&model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action)
TInterface(const std::shared_ptr< Proxied > &proxied)
Only enabled when building a TInterface&lt;TLoopManager&gt;
TResultProxy< COLL > Take(std::string_view branchName="")
Return a collection of values of a branch (lazy action)
TResultProxy< unsigned int > Count()
Return the number of entries processed (lazy action)