133 TMVA::
MethodBase( jobName,
Types::kDT, methodTitle, theData, theOption)
140 , fNodePurityLimit(0)
146 , fRandomisedTrees(kFALSE)
148 , fUsePoissonNvars(0)
149 , fDeltaPruneStrength(0)
151 fPruneBeforeBoost =
kFALSE;
158 const TString& theWeightFile) :
166 , fNodePurityLimit(0)
172 , fRandomisedTrees(
kFALSE)
174 , fDeltaPruneStrength(0)
216 DeclareOptionRef(fRandomisedTrees,
"UseRandomisedTrees",
"Choose at each node splitting a random set of variables and *bagging*");
217 DeclareOptionRef(fUseNvars,
"UseNvars",
"Number of variables used if randomised Tree option is chosen");
218 DeclareOptionRef(fUsePoissonNvars,
"UsePoissonNvars",
"Interpret \"UseNvars\" not as fixed number but as mean of a Poisson distribution in each split with RandomisedTree option");
219 DeclareOptionRef(fUseYesNoLeaf=
kTRUE,
"UseYesNoLeaf",
220 "Use Sig or Bkg node type or the ratio S/B as classification in the leaf node");
221 DeclareOptionRef(fNodePurityLimit=0.5,
"NodePurityLimit",
"In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
222 DeclareOptionRef(fSepTypeS=
"GiniIndex",
"SeparationType",
"Separation criterion for node splitting");
223 AddPreDefVal(
TString(
"MisClassificationError"));
224 AddPreDefVal(
TString(
"GiniIndex"));
225 AddPreDefVal(
TString(
"CrossEntropy"));
226 AddPreDefVal(
TString(
"SDivSqrtSPlusB"));
227 DeclareOptionRef(fMinNodeEvents=-1,
"nEventsMin",
"deprecated !!! Minimum number of events required in a leaf node");
228 DeclareOptionRef(fMinNodeSizeS,
"MinNodeSize",
"Minimum percentage of training events required in a leaf node (default: Classification: 10%, Regression: 1%)");
229 DeclareOptionRef(fNCuts,
"nCuts",
"Number of steps during node cut optimisation");
230 DeclareOptionRef(fPruneStrength,
"PruneStrength",
"Pruning strength (negative value == automatic adjustment)");
231 DeclareOptionRef(fPruneMethodS=
"NoPruning",
"PruneMethod",
"Pruning method: NoPruning (switched off), ExpectedError or CostComplexity");
233 AddPreDefVal(
TString(
"NoPruning"));
234 AddPreDefVal(
TString(
"ExpectedError"));
235 AddPreDefVal(
TString(
"CostComplexity"));
237 if (DoRegression()) {
238 DeclareOptionRef(fMaxDepth=50,
"MaxDepth",
"Max depth of the decision tree allowed");
240 DeclareOptionRef(fMaxDepth=3,
"MaxDepth",
"Max depth of the decision tree allowed");
251 DeclareOptionRef(fPruneBeforeBoost=
kFALSE,
"PruneBeforeBoost",
252 "--> removed option .. only kept for reader backward compatibility");
262 else if (fSepTypeS ==
"giniindex") fSepType =
new GiniIndex();
263 else if (fSepTypeS ==
"crossentropy") fSepType =
new CrossEntropy();
264 else if (fSepTypeS ==
"sdivsqrtsplusb") fSepType =
new SdivSqrtSplusB();
266 Log() << kINFO << GetOptions() <<
Endl;
267 Log() << kFATAL <<
"<ProcessOptions> unknown Separation Index option called" <<
Endl;
272 fPruneMethodS.ToLower();
277 Log() << kINFO << GetOptions() <<
Endl;
278 Log() << kFATAL <<
"<ProcessOptions> unknown PruneMethod option:" << fPruneMethodS <<
" called" <<
Endl;
281 if (fPruneStrength < 0) fAutomatic =
kTRUE;
285 <<
"Sorry automatic pruning strength determination is not implemented yet for ExpectedErrorPruning" <<
Endl;
289 if (this->
Data()->HasNegativeEventWeights()){
290 Log() << kINFO <<
" You are using a Monte Carlo that has also negative weights. "
291 <<
"That should in principle be fine as long as on average you end up with "
292 <<
"something positive. For this you have to make sure that the minimal number "
293 <<
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize="
295 <<
", (or the deprecated equivalent nEventsMin) you can set this via the "
296 <<
"MethodDT option string when booking the "
297 <<
"classifier) is large enough to allow for reasonable averaging!!! "
298 <<
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining "
299 <<
"which ignores events with negative weight in the training. " <<
Endl
300 <<
Endl <<
"Note: You'll get a WARNING message during the training if that should ever happen" <<
Endl;
303 if (fRandomisedTrees){
304 Log() << kINFO <<
" Randomised trees should use *bagging* as *boost* method. Did you set this in the *MethodBoost* ? . Here I can enforce only the *no pruning*" <<
Endl;
309 if (fMinNodeEvents > 0){
310 fMinNodeSize = fMinNodeEvents /
Data()->GetNTrainingEvents() * 100;
311 Log() << kWARNING <<
"You have explicitly set *nEventsMin*, the min absolute number \n"
312 <<
"of events in a leaf node. This is DEPRECATED, please use the option \n"
313 <<
"*MinNodeSize* giving the relative number as percentage of training \n"
314 <<
"events instead. \n"
315 <<
"nEventsMin="<<fMinNodeEvents<<
"--> MinNodeSize="<<fMinNodeSize<<
"%"
318 SetMinNodeSize(fMinNodeSizeS);
323 if (sizeInPercent > 0 && sizeInPercent < 50){
324 fMinNodeSize=sizeInPercent;
327 Log() << kERROR <<
"you have demanded a minimal node size of "
328 << sizeInPercent <<
"% of the training events.. \n"
329 <<
" that somehow does not make sense "<<
Endl;
335 if (sizeInPercent.
IsAlnum()) SetMinNodeSize(sizeInPercent.
Atof());
337 Log() << kERROR <<
"I had problems reading the option MinNodeEvents, which\n"
338 <<
"after removing a possible % sign now reads " << sizeInPercent <<
Endl;
349 fMinNodeSizeS =
"5%";
353 fDeltaPruneStrength=0.1;
355 fUseNvars = GetNvar();
356 fUsePoissonNvars =
kTRUE;
359 SetSignalReferenceCut( 0 );
380 fTree =
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), 0,
381 fRandomisedTrees, fUseNvars, fUsePoissonNvars,fMaxDepth,0 );
382 fTree->SetNVars(GetNvar());
383 if (fRandomisedTrees)
Log()<<kWARNING<<
" randomised Trees do not work yet in this framework,"
384 <<
" as I do not know how to give each tree a new random seed, now they"
385 <<
" will be all the same and that is not good " <<
Endl;
386 fTree->SetAnalysisType( GetAnalysisType() );
390 UInt_t nevents =
Data()->GetNTrainingEvents();
391 std::vector<const TMVA::Event*> tmp;
392 for (
Long64_t ievt=0; ievt<nevents; ievt++) {
393 const Event *
event = GetEvent(ievt);
394 tmp.push_back(event);
396 fTree->BuildTree(tmp);
418 for(
UInt_t i = 0; i < nodes.size(); i++)
419 fTree->PruneNode(nodes[i]);
503 return fPruneStrength;
513 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++)
515 const Event * ev =
Data()->GetEvent(ievt);
520 return SumCorrect / (SumCorrect + SumWrong);
527 fTree->AddXMLTo(parent);
538 fTree->ReadXML(wghtnode,GetTrainingTMVAVersionCode());
556 NoErrorCalc(err, errUpper);
558 return fTree->CheckEvent(GetEvent(),fUseYesNoLeaf);
void Optimize()
determine the pruning sequence
MsgLogger & Endl(MsgLogger &ml)
void GetHelpMessage() const
Singleton class for Global types used by TMVA.
void Init(void)
common initialisation with defaults for the DT-Method
TString & ReplaceAll(const TString &s1, const TString &s2)
Double_t Atof() const
Return floating-point value contained in string.
Double_t GetNodePurityLimit() const
Bool_t IsAlnum() const
Returns true if all characters in string are alphanumeric.
Virtual base Class for all MVA method.
Ranking for variables in method (implementation)
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
std::vector< TMVA::DecisionTreeNode * > GetOptimalPruneSequence() const
return the prune strength (=alpha) corresponding to the prune sequence
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out...
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
void SetMinNodeSize(Double_t sizeInPercent)
Implementation of the CrossEntropy as separation criterion.
void ReadWeightsFromStream(std::istream &istr)
void DeclareOptions()
Define the options (their key words) that can be set in the option string.
std::vector< std::vector< double > > Data
Class that contains all the data information.
Implementation of the SdivSqrtSplusB as separation criterion.
Implementation of the MisClassificationError as separation criterion.
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
void SetPruneStrength(Float_t alpha=-1.0)
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
Implementation of the GiniIndex as separation criterion.
Implementation of a Decision Tree.
Double_t TestTreeQuality(DecisionTree *dt)
void AddWeightsXMLTo(void *parent) const
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
the standard constructor for just an ordinar "decision trees"
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
#define REGISTER_METHOD(CLASS)
for example
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual ~MethodDT(void)
destructor
void ReadWeightsFromXML(void *wghtnode)
Float_t GetOptimalPruneStrength() const
Analysis of Boosted Decision Trees.
A helper class to prune a decision tree using the Cost Complexity method (see Classification and Regr...
const Ranking * CreateRanking()