Logo ROOT   6.10/00
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
DataSetFactory.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSetFactory *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Contains all the data information *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2006: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef ROOT_TMVA_DataSetFactory
30 #define ROOT_TMVA_DataSetFactory
31 
32 //////////////////////////////////////////////////////////////////////////
33 // //
34 // DataSetFactory //
35 // //
36 // Class that contains all the data information //
37 // //
38 //////////////////////////////////////////////////////////////////////////
39 
40 #include <vector>
41 #include <stdlib.h>
42 
43 #include "TString.h"
44 #include "TTree.h"
45 #include "TCut.h"
46 #include "TTreeFormula.h"
47 #include "TMatrixDfwd.h"
48 #include "TPrincipal.h"
49 #include "TRandom3.h"
50 
51 #include "TMVA/Types.h"
52 #include "TMVA/VariableInfo.h"
53 #include "TMVA/Event.h"
54 
55 namespace TMVA {
56 
57  class DataSet;
58  class DataSetInfo;
59  class DataInputHandler;
60  class TreeInfo;
61  class MsgLogger;
62 
63  // =============== maybe move these elswhere (e.g. into the tools )
64 
65  // =============== functors =======================
66 
67 
69  public:
71  fRandom.SetSeed( seed );
72  }
74  return fRandom.Integer(n);
75  }
76  private:
77  TRandom3 fRandom; // random generator
78  };
79 
80 
81  // delete-functor (to be used in e.g. for_each algorithm)
82  template<class T>
84  {
86  delete p;
87  return *this;
88  }
89  };
90 
91  template<class T>
93  {
94  return DeleteFunctor_t<const T>();
95  }
96 
97 
98  template< typename T >
99  class Increment {
101  public:
102  Increment( T start ) : value( start ){ }
104  return value++;
105  }
106  };
107 
108 
109 
110  template <typename F>
111  class null_t
112  {
113  private:
114  // returns argF
115  public:
116  typedef F argument_type;
117  F operator()(const F& argF) const
118  {
119  return argF;
120  }
121  };
122 
123  template <typename F>
124  inline null_t<F> null() {
125  return null_t<F>();
126  }
127 
128 
129 
130  template <typename F, typename G, typename H>
131  class compose_binary_t : public std::binary_function<typename G::argument_type,
132  typename H::argument_type,
133  typename F::result_type>
134  {
135  private:
136  const F& f; // f(g(argG),h(argH))
137  const G& g;
138  const H& h;
139  public:
140  compose_binary_t(const F& _f, const G& _g, const H& _h) : f(_f), g(_g), h(_h)
141  {
142  }
143 
144  typename F::result_type operator()(const typename G::argument_type& argG,
145  const typename H::argument_type& argH) const
146  {
147  return f(g(argG),h(argH));
148  }
149  };
150 
151  template <typename F, typename G, typename H>
152  inline compose_binary_t<F,G,H> compose_binary(const F& _f, const G& _g, const H& _h) {
153  return compose_binary_t<F,G,H>(_f,_g,_h);
154  }
155 
156 
157 
158 
159  template <typename F, typename G>
160  class compose_unary_t : public std::unary_function<typename G::argument_type,
161  typename F::result_type>
162  {
163  private:
164  const F& f; // f(g(argG))
165  const G& g;
166  public:
167  compose_unary_t(const F& _f, const G& _g) : f(_f), g(_g)
168  {
169  }
170 
171  typename F::result_type operator()(const typename G::argument_type& argG) const
172  {
173  return f(g(argG));
174  }
175  };
176 
177  template <typename F, typename G>
178  inline compose_unary_t<F,G> compose_unary(const F& _f, const G& _g) {
179  return compose_unary_t<F,G>(_f,_g);
180  }
181 
182  // =============== functors =======================
183 
184 
185  // =========================================================
186 
187 
188  class DataSetFactory:public TObject {
189 
190  typedef std::vector<Event* > EventVector;
191  typedef std::vector< EventVector > EventVectorOfClasses;
192  typedef std::map<Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType;
193  typedef std::map<Types::ETreeType, EventVector > EventVectorOfTreeType;
194 
195  typedef std::vector< Double_t > ValuePerClass;
196  typedef std::map<Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType;
197 
198  class EventStats {
199  public:
214  nInitialEvents(0),
215  nEvBeforeCut(0),
216  nEvAfterCut(0),
217  nWeEvBeforeCut(0),
218  nWeEvAfterCut(0),
219  nNegWeights(0),
220  varAvLength(0)
221  {}
222  ~EventStats() { delete[] varAvLength; }
224  };
225 
226  typedef std::vector< int > NumberPerClass;
227  typedef std::vector< EventStats > EvtStatsPerClass;
228 
229  public:
230 
231  ~DataSetFactory();
232 
233  DataSetFactory();
234 
236  protected:
237 
238 
241 
242  // ---------- new versions
243  void BuildEventVector ( DataSetInfo& dsi,
244  DataInputHandler& dataInput,
246  EvtStatsPerClass& eventCounts);
247 
248  DataSet* MixEvents ( DataSetInfo& dsi,
250  EvtStatsPerClass& eventCounts,
251  const TString& splitMode,
252  const TString& mixMode,
253  const TString& normMode,
254  UInt_t splitSeed);
255 
256  void RenormEvents ( DataSetInfo& dsi,
258  const EvtStatsPerClass& eventCounts,
259  const TString& normMode );
260 
261  void InitOptions ( DataSetInfo& dsi,
262  EvtStatsPerClass& eventsmap,
263  TString& normMode, UInt_t& splitSeed,
264  TString& splitMode, TString& mixMode);
265 
266 
267  // ------------------------
268 
269  // auxiliary functions to compute correlations
270  TMatrixD* CalcCorrelationMatrix( DataSet*, const UInt_t classNumber );
271  TMatrixD* CalcCovarianceMatrix ( DataSet*, const UInt_t classNumber );
272  void CalcMinMax ( DataSet*, DataSetInfo& dsi );
273 
274  // resets branch addresses to current event
277  void ChangeToNewTree( TreeInfo&, const DataSetInfo & );
278  Bool_t CheckTTreeFormula( TTreeFormula* ttf, const TString& expression, Bool_t& hasDollar );
279 
280  // verbosity
281  Bool_t Verbose() { return fVerbose; }
282 
283  // data members
284 
285  // verbosity
286  Bool_t fVerbose; // Verbosity
287  TString fVerboseLevel; // VerboseLevel
288 
289  Bool_t fScaleWithPreselEff; // how to deal with requested #events in connection with preselection cuts
290 
291  // the event
292  TTree* fCurrentTree; // the tree, events are currently read from
293  UInt_t fCurrentEvtIdx; // the current event (to avoid reading of the same event)
294 
295  // the formulas for reading the original tree
296  std::vector<TTreeFormula*> fInputFormulas; // input variables
297  std::vector<TTreeFormula*> fTargetFormulas; // targets
298  std::vector<TTreeFormula*> fCutFormulas; // cuts
299  std::vector<TTreeFormula*> fWeightFormula; // weights
300  std::vector<TTreeFormula*> fSpectatorFormulas; // spectators
301 
302  MsgLogger* fLogger; //! message logger
303  MsgLogger& Log() const { return *fLogger; }
304  public:
305 
307  };
308 }
309 
310 #endif
void ResetBranchAndEventAddresses(TTree *)
std::vector< EventVector > EventVectorOfClasses
Random number generator class based on M.
Definition: TRandom3.h:27
std::vector< TTreeFormula * > fInputFormulas
float Float_t
Definition: RtypesCore.h:53
double T(double x)
Definition: ChebyshevPol.h:34
std::vector< TTreeFormula * > fCutFormulas
std::vector< Double_t > ValuePerClass
F operator()(const F &argF) const
virtual void SetSeed(ULong_t seed=0)
Set the random generator sequence if seed is 0 (default value) a TUUID is generated and used to fill ...
Definition: TRandom3.cxx:201
#define H(x, y, z)
compose_unary_t< F, G > compose_unary(const F &_f, const G &_g)
F::result_type operator()(const typename G::argument_type &argG, const typename H::argument_type &argH) const
Basic string class.
Definition: TString.h:129
void BuildEventVector(DataSetInfo &dsi, DataInputHandler &dataInput, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts)
build empty event vectors distributes events between kTraining/kTesting/kMaxTreeType ...
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
compose_unary_t(const F &_f, const G &_g)
void CalcMinMax(DataSet *, DataSetInfo &dsi)
compute covariance matrix
#define G(x, y, z)
DeleteFunctor_t & operator()(const T *p)
compose_binary_t(const F &_f, const G &_g, const H &_h)
std::vector< int > NumberPerClass
std::map< Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType
null_t< F > null()
void InitOptions(DataSetInfo &dsi, EvtStatsPerClass &eventsmap, TString &normMode, UInt_t &splitSeed, TString &splitMode, TString &mixMode)
the dataset splitting
DataSet * BuildDynamicDataSet(DataSetInfo &)
#define ClassDef(name, id)
Definition: Rtypes.h:297
std::vector< TTreeFormula * > fWeightFormula
F::result_type operator()(const typename G::argument_type &argG) const
virtual UInt_t Integer(UInt_t imax)
Returns a random integer on [ 0, imax-1 ].
Definition: TRandom.cxx:320
MsgLogger & Log() const
message logger
void RenormEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, const EvtStatsPerClass &eventCounts, const TString &normMode)
renormalisation of the TRAINING event weights
Class that contains all the data information.
DeleteFunctor_t< const T > DeleteFunctor()
Class that contains all the data information.
Definition: DataSetInfo.h:60
Used to pass a selection expression to the Tree drawing routine.
Definition: TTreeFormula.h:58
void ChangeToNewTree(TreeInfo &, const DataSetInfo &)
While the data gets copied into the local training and testing trees, the input tree can change (for ...
Class that contains all the data information.
Definition: DataSet.h:69
#define F(x, y, z)
DataSetFactory()
constructor
TMatrixD * CalcCorrelationMatrix(DataSet *, const UInt_t classNumber)
computes correlation matrix for variables &quot;theVars&quot; in tree; &quot;theType&quot; defines the required event &quot;ty...
Class that contains all the data information.
std::vector< TTreeFormula * > fSpectatorFormulas
DataSet * CreateDataSet(DataSetInfo &, DataInputHandler &)
steering the creation of a new dataset
unsigned int UInt_t
Definition: RtypesCore.h:42
UInt_t operator()(UInt_t n)
std::vector< TTreeFormula * > fTargetFormulas
std::vector< Event * > EventVector
DataSet * BuildInitialDataSet(DataSetInfo &, TMVA::DataInputHandler &)
if no entries, than create a DataSet with one Event which uses dynamic variables (pointers to variabl...
~DataSetFactory()
destructor
RandomGenerator(UInt_t seed)
double Double_t
Definition: RtypesCore.h:55
compose_binary_t< F, G, H > compose_binary(const F &_f, const G &_g, const H &_h)
std::map< Types::ETreeType, EventVector > EventVectorOfTreeType
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Mother of all ROOT objects.
Definition: TObject.h:37
std::vector< EventStats > EvtStatsPerClass
Bool_t CheckTTreeFormula(TTreeFormula *ttf, const TString &expression, Bool_t &hasDollar)
checks a TTreeFormula for problems
A TTree object has a header with a name and a title.
Definition: TTree.h:78
DataSet * MixEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts, const TString &splitMode, const TString &mixMode, const TString &normMode, UInt_t splitSeed)
Select and distribute unassigned events to kTraining and kTesting.
std::map< Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType
const Int_t n
Definition: legend1.C:16
TMatrixD * CalcCovarianceMatrix(DataSet *, const UInt_t classNumber)
compute covariance matrix