Logo ROOT   6.10/00
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
MethodMLP.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodMLP *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * ANN Multilayer Perceptron class for the discrimination of signal *
12  * from background. BFGS implementation based on TMultiLayerPerceptron *
13  * class from ROOT (http://root.cern.ch). *
14  * *
15  * Authors (alphabetical): *
16  * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19  * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20  * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25  * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 /*! \class TMVA::MethodMLP
39 \ingroup TMVA
40 
41 Multilayer Perceptron class built off of MethodANNBase
42 
43 */
44 
45 #include "TMVA/MethodMLP.h"
46 
47 #include "TMVA/Config.h"
48 #include "TMVA/Configurable.h"
49 #include "TMVA/ConvergenceTest.h"
50 #include "TMVA/ClassifierFactory.h"
51 #include "TMVA/DataSet.h"
52 #include "TMVA/DataSetInfo.h"
53 #include "TMVA/FitterBase.h"
54 #include "TMVA/GeneticFitter.h"
55 #include "TMVA/IFitterTarget.h"
56 #include "TMVA/IMethod.h"
57 #include "TMVA/Interval.h"
58 #include "TMVA/MethodANNBase.h"
59 #include "TMVA/MsgLogger.h"
60 #include "TMVA/TNeuron.h"
61 #include "TMVA/TSynapse.h"
62 #include "TMVA/Timer.h"
63 #include "TMVA/Tools.h"
64 #include "TMVA/Types.h"
65 
66 #include "TH1.h"
67 #include "TString.h"
68 #include "TTree.h"
69 #include "Riostream.h"
70 #include "TFitter.h"
71 #include "TMatrixD.h"
72 #include "TMath.h"
73 #include "TFile.h"
74 
75 #include <cmath>
76 #include <vector>
77 
78 #ifdef MethodMLP_UseMinuit__
79 TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
80 Bool_t MethodMLP_UseMinuit = kTRUE;
81 #endif
82 
83 REGISTER_METHOD(MLP)
84 
85 ClassImp(TMVA::MethodMLP)
86 
87  using std::vector;
88 
89 ////////////////////////////////////////////////////////////////////////////////
90 /// standard constructor
91 
92 TMVA::MethodMLP::MethodMLP( const TString& jobName,
93  const TString& methodTitle,
94  DataSetInfo& theData,
95  const TString& theOption)
96  : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption),
97  fUseRegulator(false), fCalculateErrors(false),
98  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
99  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
100  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
101  fSamplingTraining(false), fSamplingTesting(false),
102  fLastAlpha(0.0), fTau(0.),
103  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
104  fBPMode(kSequential), fBpModeS("None"),
105  fBatchSize(0), fTestRate(0), fEpochMon(false),
106  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
107  fGA_SC_rate(0), fGA_SC_factor(0.0),
108  fDeviationsFromTargets(0),
109  fWeightRange (1.0)
110 {
111 
112 }
113 
114 ////////////////////////////////////////////////////////////////////////////////
115 /// constructor from a weight file
116 
118  const TString& theWeightFile)
119  : MethodANNBase( Types::kMLP, theData, theWeightFile),
120  fUseRegulator(false), fCalculateErrors(false),
121  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
122  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
123  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
124  fSamplingTraining(false), fSamplingTesting(false),
125  fLastAlpha(0.0), fTau(0.),
126  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
127  fBPMode(kSequential), fBpModeS("None"),
128  fBatchSize(0), fTestRate(0), fEpochMon(false),
129  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
130  fGA_SC_rate(0), fGA_SC_factor(0.0),
131  fDeviationsFromTargets(0),
132  fWeightRange (1.0)
133 {
134 }
135 
136 ////////////////////////////////////////////////////////////////////////////////
137 /// destructor
138 /// nothing to be done
139 
141 {
142 }
143 
145 {
146  Train(NumCycles());
147 }
148 
149 
150 
151 ////////////////////////////////////////////////////////////////////////////////
152 /// MLP can handle classification with 2 classes and regression with one regression-target
153 
155 {
156  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
157  if (type == Types::kMulticlass ) return kTRUE;
158  if (type == Types::kRegression ) return kTRUE;
159 
160  return kFALSE;
161 }
162 
163 ////////////////////////////////////////////////////////////////////////////////
164 /// default initializations
165 
167 {
168  // the minimum requirement to declare an event signal-like
169  SetSignalReferenceCut( 0.5 );
170 #ifdef MethodMLP_UseMinuit__
171  fgThis = this;
172 #endif
173 }
174 
175 ////////////////////////////////////////////////////////////////////////////////
176 /// define the options (their key words) that can be set in the option string
177 ///
178 /// know options:
179 ///
180 /// - TrainingMethod <string> Training method
181 /// available values are:
182 /// - BP Back-Propagation <default>
183 /// - GA Genetic Algorithm (takes a LONG time)
184 ///
185 /// - LearningRate <float> NN learning rate parameter
186 /// - DecayRate <float> Decay rate for learning parameter
187 /// - TestRate <int> Test for overtraining performed at each #th epochs
188 ///
189 /// - BPMode <string> Back-propagation learning mode
190 /// available values are:
191 /// - sequential <default>
192 /// - batch
193 ///
194 /// - BatchSize <int> Batch size: number of events/batch, only set if in Batch Mode,
195 /// - -1 for BatchSize=number_of_events
196 
198 {
199  DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
200  "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
201  AddPreDefVal(TString("BP"));
202  AddPreDefVal(TString("GA"));
203  AddPreDefVal(TString("BFGS"));
204 
205  DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
206  DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
207  DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
208  DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
209 
210  DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
211  DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
212  DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
213 
214  DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
215  DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
216 
217  DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
218  DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
219 
220  DeclareOptionRef(fBpModeS="sequential", "BPMode",
221  "Back-propagation learning mode: sequential or batch");
222  AddPreDefVal(TString("sequential"));
223  AddPreDefVal(TString("batch"));
224 
225  DeclareOptionRef(fBatchSize=-1, "BatchSize",
226  "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
227 
228  DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
229  "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
230 
231  DeclareOptionRef(fSteps=-1, "ConvergenceTests",
232  "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
233 
234  DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
235  "Use regulator to avoid over-training"); //zjh
236  DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
237  "Maximum times of regulator update"); //zjh
238  DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
239  "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
240 
241  DeclareOptionRef(fWeightRange=1.0, "WeightRange",
242  "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
243 
244 }
245 
246 ////////////////////////////////////////////////////////////////////////////////
247 /// process user options
248 
250 {
252 
253 
254  if (IgnoreEventsWithNegWeightsInTraining()) {
255  Log() << kINFO
256  << "Will ignore negative events in training!"
257  << Endl;
258  }
259 
260 
261  if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
262  else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
263  else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
264 
265  if (fBpModeS == "sequential") fBPMode = kSequential;
266  else if (fBpModeS == "batch") fBPMode = kBatch;
267 
268  // InitializeLearningRates();
269 
270  if (fBPMode == kBatch) {
271  Data()->SetCurrentType(Types::kTraining);
272  Int_t numEvents = Data()->GetNEvents();
273  if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
274  }
275 }
276 
277 ////////////////////////////////////////////////////////////////////////////////
278 /// initialize learning rates of synapses, used only by back propagation
279 
281 {
282  Log() << kDEBUG << "Initialize learning rates" << Endl;
283  TSynapse *synapse;
284  Int_t numSynapses = fSynapses->GetEntriesFast();
285  for (Int_t i = 0; i < numSynapses; i++) {
286  synapse = (TSynapse*)fSynapses->At(i);
287  synapse->SetLearningRate(fLearnRate);
288  }
289 }
290 
291 ////////////////////////////////////////////////////////////////////////////////
292 /// calculate the estimator that training is attempting to minimize
293 
295 {
296  // sanity check
297  if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
298  Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
299  }
300 
301  Types::ETreeType saveType = Data()->GetCurrentType();
302  Data()->SetCurrentType(treeType);
303 
304  // if epochs are counted create monitoring histograms (only available for classification)
305  TString type = (treeType == Types::kTraining ? "train" : "test");
306  TString name = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
307  TString nameB = name + "_B";
308  TString nameS = name + "_S";
309  Int_t nbin = 100;
310  Float_t limit = 2;
311  TH1* histS = 0;
312  TH1* histB = 0;
313  if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
314  histS = new TH1F( nameS, nameS, nbin, -limit, limit );
315  histB = new TH1F( nameB, nameB, nbin, -limit, limit );
316  }
317 
318  Double_t estimator = 0;
319 
320  // loop over all training events
321  Int_t nEvents = GetNEvents();
322  UInt_t nClasses = DataInfo().GetNClasses();
323  UInt_t nTgts = DataInfo().GetNTargets();
324 
325 
326  Float_t sumOfWeights = 0.f;
327  if( fWeightRange < 1.f ){
328  fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
329  }
330 
331  for (Int_t i = 0; i < nEvents; i++) {
332 
333  const Event* ev = GetEvent(i);
334 
335  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
336  && (saveType == Types::kTraining)){
337  continue;
338  }
339 
340  Double_t w = ev->GetWeight();
341 
342  ForceNetworkInputs( ev );
343  ForceNetworkCalculations();
344 
345  Double_t d = 0, v = 0;
346  if (DoRegression()) {
347  for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
348  v = GetOutputNeuron( itgt )->GetActivationValue();
349  Double_t targetValue = ev->GetTarget( itgt );
350  Double_t dt = v - targetValue;
351  d += (dt*dt);
352  }
353  estimator += d*w;
354  } else if (DoMulticlass() ) {
355  UInt_t cls = ev->GetClass();
356  if (fEstimator==kCE){
357  Double_t norm(0);
358  for (UInt_t icls = 0; icls < nClasses; icls++) {
359  Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
360  norm += exp( activationValue );
361  if(icls==cls)
362  d = exp( activationValue );
363  }
364  d = -TMath::Log(d/norm);
365  }
366  else{
367  for (UInt_t icls = 0; icls < nClasses; icls++) {
368  Double_t desired = (icls==cls) ? 1.0 : 0.0;
369  v = GetOutputNeuron( icls )->GetActivationValue();
370  d = (desired-v)*(desired-v);
371  }
372  }
373  estimator += d*w; //zjh
374  } else {
375  Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
376  v = GetOutputNeuron()->GetActivationValue();
377  if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
378  else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
379  estimator += d*w; //zjh
380  }
381 
382  if( fDeviationsFromTargets )
383  fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
384 
385  sumOfWeights += w;
386 
387 
388  // fill monitoring histograms
389  if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
390  else if (histB != 0) histB->Fill( float(v), float(w) );
391  }
392 
393 
394  if( fDeviationsFromTargets ) {
395  std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
396 
397  Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
398  estimator = 0.f;
399 
400  Float_t weightRangeCut = fWeightRange*sumOfWeights;
401  Float_t weightSum = 0.f;
402  for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
403  float deviation = (*itDev).first;
404  float devWeight = (*itDev).second;
405  weightSum += devWeight; // add the weight of this event
406  if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
407  estimator += devWeight*deviation;
408  }
409  }
410 
411  sumOfWeights = sumOfWeightsInRange;
412  delete fDeviationsFromTargets;
413  }
414 
415  if (histS != 0) fEpochMonHistS.push_back( histS );
416  if (histB != 0) fEpochMonHistB.push_back( histB );
417 
418  //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
419  //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
420  //else estimator = estimator*0.5/Float_t(nEvents);
421  if (DoRegression()) estimator = estimator/Float_t(sumOfWeights);
422  else if (DoMulticlass()) estimator = estimator/Float_t(sumOfWeights);
423  else estimator = estimator/Float_t(sumOfWeights);
424 
425 
426  //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
427 
428  Data()->SetCurrentType( saveType );
429 
430  // provide epoch-wise monitoring
431  if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
432  CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
433  }
434 
435  return estimator;
436 }
437 
438 ////////////////////////////////////////////////////////////////////////////////
439 
441 {
442  if (fNetwork == 0) {
443  //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
444  Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
445  SetAnalysisType(GetAnalysisType());
446  }
447  Log() << kDEBUG << "reinitialize learning rates" << Endl;
448  InitializeLearningRates();
449  Log() << kHEADER;
450  PrintMessage("Training Network");
451  Log() << Endl;
452  Int_t nEvents=GetNEvents();
453  Int_t nSynapses=fSynapses->GetEntriesFast();
454  if (nSynapses>nEvents)
455  Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
456 
457  fIPyMaxIter = nEpochs;
458  if (fInteractive && fInteractive->NotInitialized()){
459  std::vector<TString> titles = {"Error on training set", "Error on test set"};
460  fInteractive->Init(titles);
461  }
462 
463 #ifdef MethodMLP_UseMinuit__
464  if (useMinuit) MinuitMinimize();
465 #else
466  if (fTrainingMethod == kGA) GeneticMinimize();
467  else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
468  else BackPropagationMinimize(nEpochs);
469 #endif
470 
471  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
472  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
473  if (fUseRegulator){
474  Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
475  UpdateRegulators();
476  Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
477  }
478 
479  if( fCalculateErrors || fUseRegulator )
480  {
481  Int_t numSynapses=fSynapses->GetEntriesFast();
482  fInvHessian.ResizeTo(numSynapses,numSynapses);
483  GetApproxInvHessian( fInvHessian ,false);
484  }
485  ExitFromTraining();
486 }
487 
488 ////////////////////////////////////////////////////////////////////////////////
489 /// train network with BFGS algorithm
490 
492 {
493  Timer timer( (fSteps>0?100:nEpochs), GetName() );
494 
495  // create histograms for overtraining monitoring
496  Int_t nbinTest = Int_t(nEpochs/fTestRate);
497  if(!IsSilentFile())
498  {
499  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
500  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
501  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
502  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
503  }
504 
505  Int_t nSynapses = fSynapses->GetEntriesFast();
506  Int_t nWeights = nSynapses;
507 
508  for (Int_t i=0;i<nSynapses;i++) {
509  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
510  synapse->SetDEDw(0.0);
511  }
512 
513  std::vector<Double_t> buffer( nWeights );
514  for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
515 
516  TMatrixD Dir ( nWeights, 1 );
517  TMatrixD Hessian ( nWeights, nWeights );
518  TMatrixD Gamma ( nWeights, 1 );
519  TMatrixD Delta ( nWeights, 1 );
520  Int_t RegUpdateCD=0; //zjh
521  Int_t RegUpdateTimes=0; //zjh
522  Double_t AccuError=0;
523 
524  Double_t trainE = -1;
525  Double_t testE = -1;
526 
527  fLastAlpha = 0.;
528 
529  if(fSamplingTraining || fSamplingTesting)
530  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
531 
532  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
533  timer.DrawProgressBar( 0 );
534 
535  // start training cycles (epochs)
536  for (Int_t i = 0; i < nEpochs; i++) {
537 
538  if (fExitFromTraining) break;
539  fIPyCurrentIter = i;
540  if (Float_t(i)/nEpochs < fSamplingEpoch) {
541  if ((i+1)%fTestRate == 0 || (i == 0)) {
542  if (fSamplingTraining) {
543  Data()->SetCurrentType( Types::kTraining );
544  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
545  Data()->CreateSampling();
546  }
547  if (fSamplingTesting) {
548  Data()->SetCurrentType( Types::kTesting );
549  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
550  Data()->CreateSampling();
551  }
552  }
553  }
554  else {
555  Data()->SetCurrentType( Types::kTraining );
556  Data()->InitSampling(1.0,1.0);
557  Data()->SetCurrentType( Types::kTesting );
558  Data()->InitSampling(1.0,1.0);
559  }
560  Data()->SetCurrentType( Types::kTraining );
561 
562  //zjh
563  if (fUseRegulator) {
564  UpdatePriors();
565  RegUpdateCD++;
566  }
567  //zjh
568 
569  SetGammaDelta( Gamma, Delta, buffer );
570 
571  if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
572  SteepestDir( Dir );
573  Hessian.UnitMatrix();
574  RegUpdateCD=0; //zjh
575  }
576  else {
577  if (GetHessian( Hessian, Gamma, Delta )) {
578  SteepestDir( Dir );
579  Hessian.UnitMatrix();
580  RegUpdateCD=0; //zjh
581  }
582  else SetDir( Hessian, Dir );
583  }
584 
585  Double_t dError=0; //zjh
586  if (DerivDir( Dir ) > 0) {
587  SteepestDir( Dir );
588  Hessian.UnitMatrix();
589  RegUpdateCD=0; //zjh
590  }
591  if (LineSearch( Dir, buffer, &dError )) { //zjh
592  Hessian.UnitMatrix();
593  SteepestDir( Dir );
594  RegUpdateCD=0; //zjh
595  if (LineSearch(Dir, buffer, &dError)) { //zjh
596  i = nEpochs;
597  Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
598  }
599  }
600 
601  //zjh+
602  if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
603  AccuError+=dError;
604 
605  if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
606  Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
607  UpdateRegulators();
608  Hessian.UnitMatrix();
609  RegUpdateCD=0;
610  RegUpdateTimes++;
611  AccuError=0;
612  }
613  //zjh-
614 
615  // monitor convergence of training and control sample
616  if ((i+1)%fTestRate == 0) {
617  //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
618  //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
619  trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
620  testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
621  if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
622  if(!IsSilentFile()) //saved to see in TMVAGui, no needed without file
623  {
624  fEstimatorHistTrain->Fill( i+1, trainE );
625  fEstimatorHistTest ->Fill( i+1, testE );
626  }
627  Bool_t success = kFALSE;
628  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
629  success = kTRUE;
630  }
631  Data()->EventResult( success );
632 
633  SetCurrentValue( testE );
634  if (HasConverged()) {
635  if (Float_t(i)/nEpochs < fSamplingEpoch) {
636  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
637  i = newEpoch;
638  ResetConvergenceCounter();
639  }
640  else break;
641  }
642  }
643 
644  // draw progress
645  TString convText = Form( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
646  if (fSteps > 0) {
647  Float_t progress = 0;
648  if (Float_t(i)/nEpochs < fSamplingEpoch)
649  // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
650  progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
651  else
652  {
653  // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
654  progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
655  }
656  Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
657  if (progress2>progress) progress=progress2; //zjh
658  timer.DrawProgressBar( Int_t(progress), convText );
659  }
660  else {
661  Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
662  if (progress<i) progress=i; //zjh
663  timer.DrawProgressBar( progress, convText ); //zjh
664  }
665 
666  // some verbose output
667  if (fgPRINT_SEQ) {
668  PrintNetwork();
669  WaitForKeyboard();
670  }
671  }
672 }
673 
674 ////////////////////////////////////////////////////////////////////////////////
675 
676 void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
677 {
678  Int_t nWeights = fSynapses->GetEntriesFast();
679 
680  Int_t IDX = 0;
681  Int_t nSynapses = fSynapses->GetEntriesFast();
682  for (Int_t i=0;i<nSynapses;i++) {
683  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
684  Gamma[IDX++][0] = -synapse->GetDEDw();
685  }
686 
687  for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
688 
689  ComputeDEDw();
690 
691  IDX = 0;
692  for (Int_t i=0;i<nSynapses;i++)
693  {
694  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
695  Gamma[IDX++][0] += synapse->GetDEDw();
696  }
697 }
698 
699 ////////////////////////////////////////////////////////////////////////////////
700 
702 {
703  Int_t nSynapses = fSynapses->GetEntriesFast();
704  for (Int_t i=0;i<nSynapses;i++) {
705  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
706  synapse->SetDEDw( 0.0 );
707  }
708 
709  Int_t nEvents = GetNEvents();
710  Int_t nPosEvents = nEvents;
711  for (Int_t i=0;i<nEvents;i++) {
712 
713  const Event* ev = GetEvent(i);
714  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
715  && (Data()->GetCurrentType() == Types::kTraining)){
716  --nPosEvents;
717  continue;
718  }
719 
720  SimulateEvent( ev );
721 
722  for (Int_t j=0;j<nSynapses;j++) {
723  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
724  synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
725  }
726  }
727 
728  for (Int_t i=0;i<nSynapses;i++) {
729  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
730  Double_t DEDw=synapse->GetDEDw(); //zjh
731  if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
732  synapse->SetDEDw( DEDw / nPosEvents ); //zjh
733  }
734 }
735 
736 ////////////////////////////////////////////////////////////////////////////////
737 
739 {
740  Double_t eventWeight = ev->GetWeight();
741 
742  ForceNetworkInputs( ev );
743  ForceNetworkCalculations();
744 
745  if (DoRegression()) {
746  UInt_t ntgt = DataInfo().GetNTargets();
747  for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
748  Double_t desired = ev->GetTarget(itgt);
749  Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
750  GetOutputNeuron( itgt )->SetError(error);
751  }
752  } else if (DoMulticlass()) {
753  UInt_t nClasses = DataInfo().GetNClasses();
754  UInt_t cls = ev->GetClass();
755  for (UInt_t icls = 0; icls < nClasses; icls++) {
756  Double_t desired = ( cls==icls ? 1.0 : 0.0 );
757  Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
758  GetOutputNeuron( icls )->SetError(error);
759  }
760  } else {
761  Double_t desired = GetDesiredOutput( ev );
762  Double_t error=-1; //zjh
763  if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
764  else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
765  GetOutputNeuron()->SetError(error);
766  }
767 
768  CalculateNeuronDeltas();
769  for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
770  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
771  synapse->InitDelta();
772  synapse->CalculateDelta();
773  }
774 }
775 
776 ////////////////////////////////////////////////////////////////////////////////
777 
779 {
780  Int_t IDX = 0;
781  Int_t nSynapses = fSynapses->GetEntriesFast();
782 
783  for (Int_t i=0;i<nSynapses;i++) {
784  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
785  Dir[IDX++][0] = -synapse->GetDEDw();
786  }
787 }
788 
789 ////////////////////////////////////////////////////////////////////////////////
790 
792 {
793  TMatrixD gd(Gamma, TMatrixD::kTransposeMult, Delta);
794  if ((Double_t) gd[0][0] == 0.) return kTRUE;
795  TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
796  TMatrixD tmp(Gamma, TMatrixD::kTransposeMult, Hessian);
797  TMatrixD gHg(Gamma, TMatrixD::kTransposeMult, aHg);
798  Double_t a = 1 / (Double_t) gd[0][0];
799  Double_t f = 1 + ((Double_t)gHg[0][0]*a);
801  res *= f;
802  res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
804  res *= a;
805  Hessian += res;
806 
807  return kFALSE;
808 }
809 
810 ////////////////////////////////////////////////////////////////////////////////
811 
813 {
814  Int_t IDX = 0;
815  Int_t nSynapses = fSynapses->GetEntriesFast();
816  TMatrixD DEDw(nSynapses, 1);
817 
818  for (Int_t i=0;i<nSynapses;i++) {
819  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
820  DEDw[IDX++][0] = synapse->GetDEDw();
821  }
822 
823  dir = Hessian * DEDw;
824  for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
825 }
826 
827 ////////////////////////////////////////////////////////////////////////////////
828 
830 {
831  Int_t IDX = 0;
832  Int_t nSynapses = fSynapses->GetEntriesFast();
833  Double_t Result = 0.0;
834 
835  for (Int_t i=0;i<nSynapses;i++) {
836  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
837  Result += Dir[IDX++][0] * synapse->GetDEDw();
838  }
839  return Result;
840 }
841 
842 ////////////////////////////////////////////////////////////////////////////////
843 
844 Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
845 {
846  Int_t IDX = 0;
847  Int_t nSynapses = fSynapses->GetEntriesFast();
848  Int_t nWeights = nSynapses;
849 
850  std::vector<Double_t> Origin(nWeights);
851  for (Int_t i=0;i<nSynapses;i++) {
852  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
853  Origin[i] = synapse->GetWeight();
854  }
855 
856  Double_t err1 = GetError();
857  Double_t errOrigin=err1;//zjh
858  Double_t alpha1 = 0.;
859  Double_t alpha2 = fLastAlpha;
860 
861 
862  if (alpha2 < 0.01) alpha2 = 0.01;
863  else if (alpha2 > 2.0) alpha2 = 2.0;
864  Double_t alpha_original = alpha2;
865  Double_t alpha3 = alpha2;
866 
867  SetDirWeights( Origin, Dir, alpha2 );
868  Double_t err2 = GetError();
869  //Double_t err2 = err1;
870  Double_t err3 = err2;
871  Bool_t bingo = kFALSE;
872 
873 
874  if (err1 > err2) {
875  for (Int_t i=0;i<100;i++) {
876  alpha3 *= fTau;
877  SetDirWeights(Origin, Dir, alpha3);
878  err3 = GetError();
879  if (err3 > err2) {
880  bingo = kTRUE;
881  break;
882  }
883  alpha1 = alpha2;
884  err1 = err2;
885  alpha2 = alpha3;
886  err2 = err3;
887  }
888  if (!bingo) {
889  SetDirWeights(Origin, Dir, 0.);
890  return kTRUE;
891  }
892  }
893  else {
894  for (Int_t i=0;i<100;i++) {
895  alpha2 /= fTau;
896  if (i==50) {
897  Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
898  alpha2 = -alpha_original;
899  }
900  SetDirWeights(Origin, Dir, alpha2);
901  err2 = GetError();
902  if (err1 > err2) {
903  bingo = kTRUE;
904  break;
905  }
906  alpha3 = alpha2;
907  err3 = err2;
908  }
909  if (!bingo) {
910  SetDirWeights(Origin, Dir, 0.);
911  Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
912  fLastAlpha = 0.05;
913  return kTRUE;
914  }
915  }
916 
917  if (alpha1>0 && alpha2>0 && alpha3 > 0) {
918  fLastAlpha = 0.5 * (alpha1 + alpha3 -
919  (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
920  - ( err2 - err1 ) / (alpha2 - alpha1 )));
921  }
922  else {
923  fLastAlpha = alpha2;
924  }
925 
926  fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
927 
928  SetDirWeights(Origin, Dir, fLastAlpha);
929 
930  // leaving these lines uncommented is a heavy price to pay for only a warning message
931  // (which shouldn't appear anyway)
932  // --> about 15% of time is spent in the final GetError().
933  //
934  Double_t finalError = GetError();
935  if (finalError > err1) {
936  Log() << kWARNING << "Line search increased error! Something is wrong."
937  << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
938  << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
939  }
940 
941  for (Int_t i=0;i<nSynapses;i++) {
942  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
943  buffer[IDX] = synapse->GetWeight() - Origin[IDX];
944  IDX++;
945  }
946 
947  if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
948 
949  return kFALSE;
950 }
951 
952 ////////////////////////////////////////////////////////////////////////////////
953 
954 void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
955 {
956  Int_t IDX = 0;
957  Int_t nSynapses = fSynapses->GetEntriesFast();
958 
959  for (Int_t i=0;i<nSynapses;i++) {
960  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
961  synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
962  IDX++;
963  }
964  if (fUseRegulator) UpdatePriors();//zjh
965 }
966 
967 
968 ////////////////////////////////////////////////////////////////////////////////
969 
971 {
972  Int_t nEvents = GetNEvents();
973  UInt_t ntgts = GetNTargets();
974  Double_t Result = 0.;
975 
976  for (Int_t i=0;i<nEvents;i++) {
977  const Event* ev = GetEvent(i);
978 
979  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
980  && (Data()->GetCurrentType() == Types::kTraining)){
981  continue;
982  }
983  SimulateEvent( ev );
984 
985  Double_t error = 0.;
986  if (DoRegression()) {
987  for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
988  error += GetMSEErr( ev, itgt );//zjh
989  }
990  } else if ( DoMulticlass() ){
991  for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
992  error += GetMSEErr( ev, icls );
993  }
994  } else {
995  if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
996  else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
997  }
998  Result += error * ev->GetWeight();
999  }
1000  if (fUseRegulator) Result+=fPrior; //zjh
1001  if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
1002  return Result;
1003 }
1004 
1005 ////////////////////////////////////////////////////////////////////////////////
1006 
1008 {
1009  Double_t error = 0;
1010  Double_t output = GetOutputNeuron( index )->GetActivationValue();
1011  Double_t target = 0;
1012  if (DoRegression()) target = ev->GetTarget( index );
1013  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1014  else target = GetDesiredOutput( ev );
1015 
1016  error = 0.5*(output-target)*(output-target); //zjh
1017 
1018  return error;
1019 
1020 }
1021 
1022 ////////////////////////////////////////////////////////////////////////////////
1023 
1025 {
1026  Double_t error = 0;
1027  Double_t output = GetOutputNeuron( index )->GetActivationValue();
1028  Double_t target = 0;
1029  if (DoRegression()) target = ev->GetTarget( index );
1030  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1031  else target = GetDesiredOutput( ev );
1032 
1033  error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1034 
1035  return error;
1036 }
1037 
1038 ////////////////////////////////////////////////////////////////////////////////
1039 /// minimize estimator / train network with back propagation algorithm
1040 
1042 {
1043  // Timer timer( nEpochs, GetName() );
1044  Timer timer( (fSteps>0?100:nEpochs), GetName() );
1045  Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1046 
1047  // create histograms for overtraining monitoring
1048  Int_t nbinTest = Int_t(nEpochs/fTestRate);
1049  if(!IsSilentFile())
1050  {
1051  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1052  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1053  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1054  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1055  }
1056  if(fSamplingTraining || fSamplingTesting)
1057  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1058 
1059  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1060  timer.DrawProgressBar(0);
1061 
1062  // estimators
1063  Double_t trainE = -1;
1064  Double_t testE = -1;
1065 
1066  // start training cycles (epochs)
1067  for (Int_t i = 0; i < nEpochs; i++) {
1068 
1069  if (fExitFromTraining) break;
1070  fIPyCurrentIter = i;
1071  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1072  if ((i+1)%fTestRate == 0 || (i == 0)) {
1073  if (fSamplingTraining) {
1074  Data()->SetCurrentType( Types::kTraining );
1075  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1076  Data()->CreateSampling();
1077  }
1078  if (fSamplingTesting) {
1079  Data()->SetCurrentType( Types::kTesting );
1080  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1081  Data()->CreateSampling();
1082  }
1083  }
1084  }
1085  else {
1086  Data()->SetCurrentType( Types::kTraining );
1087  Data()->InitSampling(1.0,1.0);
1088  Data()->SetCurrentType( Types::kTesting );
1089  Data()->InitSampling(1.0,1.0);
1090  }
1091  Data()->SetCurrentType( Types::kTraining );
1092 
1093  TrainOneEpoch();
1094  DecaySynapseWeights(i >= lateEpoch);
1095 
1096  // monitor convergence of training and control sample
1097  if ((i+1)%fTestRate == 0) {
1098  trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1099  testE = CalculateEstimator( Types::kTesting, i ); // estimator for test sample
1100  if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1101  if(!IsSilentFile())
1102  {
1103  fEstimatorHistTrain->Fill( i+1, trainE );
1104  fEstimatorHistTest ->Fill( i+1, testE );
1105  }
1106  Bool_t success = kFALSE;
1107  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1108  success = kTRUE;
1109  }
1110  Data()->EventResult( success );
1111 
1112  SetCurrentValue( testE );
1113  if (HasConverged()) {
1114  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1115  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1116  i = newEpoch;
1117  ResetConvergenceCounter();
1118  }
1119  else {
1120  if (lateEpoch > i) lateEpoch = i;
1121  else break;
1122  }
1123  }
1124  }
1125 
1126  // draw progress bar (add convergence value)
1127  TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1128  if (fSteps > 0) {
1129  Float_t progress = 0;
1130  if (Float_t(i)/nEpochs < fSamplingEpoch)
1131  progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1132  else
1133  progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1134 
1135  timer.DrawProgressBar( Int_t(progress), convText );
1136  }
1137  else {
1138  timer.DrawProgressBar( i, convText );
1139  }
1140  }
1141 }
1142 
1143 ////////////////////////////////////////////////////////////////////////////////
1144 /// train network over a single epoch/cycle of events
1145 
1147 {
1148  Int_t nEvents = Data()->GetNEvents();
1149 
1150  // randomize the order events will be presented, important for sequential mode
1151  Int_t* index = new Int_t[nEvents];
1152  for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1153  Shuffle(index, nEvents);
1154 
1155  // loop over all training events
1156  for (Int_t i = 0; i < nEvents; i++) {
1157 
1158  const Event * ev = GetEvent(index[i]);
1159  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1160  && (Data()->GetCurrentType() == Types::kTraining)){
1161  continue;
1162  }
1163 
1164  TrainOneEvent(index[i]);
1165 
1166  // do adjustments if in batch mode
1167  if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1168  AdjustSynapseWeights();
1169  if (fgPRINT_BATCH) {
1170  PrintNetwork();
1171  WaitForKeyboard();
1172  }
1173  }
1174 
1175  // debug in sequential mode
1176  if (fgPRINT_SEQ) {
1177  PrintNetwork();
1178  WaitForKeyboard();
1179  }
1180  }
1181 
1182  delete[] index;
1183 }
1184 
1185 ////////////////////////////////////////////////////////////////////////////////
1186 /// Input:
1187 /// - index: the array to shuffle
1188 /// - n: the size of the array
1189 /// Output:
1190 /// - index: the shuffled indexes
1191 ///
1192 /// This method is used for sequential training
1193 
1195 {
1196  Int_t j, k;
1197  Int_t a = n - 1;
1198  for (Int_t i = 0; i < n; i++) {
1199  j = (Int_t) (frgen->Rndm() * a);
1200  if (j<n){ // address the 'worries' of coverity
1201  k = index[j];
1202  index[j] = index[i];
1203  index[i] = k;
1204  }
1205  }
1206 }
1207 
1208 ////////////////////////////////////////////////////////////////////////////////
1209 /// decay synapse weights
1210 /// in last 10 epochs, lower learning rate even more to find a good minimum
1211 
1213 {
1214  TSynapse* synapse;
1215  Int_t numSynapses = fSynapses->GetEntriesFast();
1216  for (Int_t i = 0; i < numSynapses; i++) {
1217  synapse = (TSynapse*)fSynapses->At(i);
1218  if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1219  else synapse->DecayLearningRate(fDecayRate);
1220  }
1221 }
1222 
1223 ////////////////////////////////////////////////////////////////////////////////
1224 /// fast per-event training
1225 
1227 {
1228  GetEvent(ievt);
1229 
1230  // as soon as we know how to get event weights, get that here
1231 
1232  // note: the normalization of event weights will affect the choice
1233  // of learning rate, one will have to experiment to get the right value.
1234  // in general, if the "average" event weight is 1, the learning rate
1235  // should be good if set around 0.02 (a good value if all event weights are 1)
1236  Double_t eventWeight = 1.0;
1237 
1238  // get the desired output of this event
1239  Double_t desired;
1240  if (type == 0) desired = fOutput->GetMin(); // background //zjh
1241  else desired = fOutput->GetMax(); // signal //zjh
1242 
1243  // force the value for each input neuron
1244  Double_t x;
1245  TNeuron* neuron;
1246 
1247  for (UInt_t j = 0; j < GetNvar(); j++) {
1248  x = branchVar[j];
1249  if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1250  neuron = GetInputNeuron(j);
1251  neuron->ForceValue(x);
1252  }
1253 
1254  ForceNetworkCalculations();
1255  UpdateNetwork(desired, eventWeight);
1256 }
1257 
1258 ////////////////////////////////////////////////////////////////////////////////
1259 /// train network over a single event
1260 /// this uses the new event model
1261 
1263 {
1264  // note: the normalization of event weights will affect the choice
1265  // of learning rate, one will have to experiment to get the right value.
1266  // in general, if the "average" event weight is 1, the learning rate
1267  // should be good if set around 0.02 (a good value if all event weights are 1)
1268 
1269  const Event * ev = GetEvent(ievt);
1270  Double_t eventWeight = ev->GetWeight();
1271  ForceNetworkInputs( ev );
1272  ForceNetworkCalculations();
1273  if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1274  if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1275  else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1276 }
1277 
1278 ////////////////////////////////////////////////////////////////////////////////
1279 /// get the desired output of this event
1280 
1282 {
1283  return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1284 }
1285 
1286 ////////////////////////////////////////////////////////////////////////////////
1287 /// update the network based on how closely
1288 /// the output matched the desired output
1289 
1291 {
1292  Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1293  if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1294  else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1295  else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1296  error *= eventWeight;
1297  GetOutputNeuron()->SetError(error);
1298  CalculateNeuronDeltas();
1299  UpdateSynapses();
1300 }
1301 
1302 ////////////////////////////////////////////////////////////////////////////////
1303 /// update the network based on how closely
1304 /// the output matched the desired output
1305 
1306 void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1307 {
1308  // Norm for softmax
1309  Double_t norm = 0.;
1310  for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1311  Double_t act = GetOutputNeuron(i)->GetActivationValue();
1312  norm += TMath::Exp(act);
1313  }
1314 
1315  // Get output of network, and apply softmax
1316  for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1317  Double_t act = GetOutputNeuron(i)->GetActivationValue();
1318  Double_t output = TMath::Exp(act) / norm;
1319  Double_t error = output - desired.at(i);
1320  error *= eventWeight;
1321  GetOutputNeuron(i)->SetError(error);
1322  }
1323 
1324  // Do backpropagation
1325  CalculateNeuronDeltas();
1326  UpdateSynapses();
1327 }
1328 
1329 ////////////////////////////////////////////////////////////////////////////////
1330 /// have each neuron calculate its delta by back propagation
1331 
1333 {
1334  TNeuron* neuron;
1335  Int_t numNeurons;
1336  Int_t numLayers = fNetwork->GetEntriesFast();
1337  TObjArray* curLayer;
1338 
1339  // step backwards through the network (back propagation)
1340  // deltas calculated starting at output layer
1341  for (Int_t i = numLayers-1; i >= 0; i--) {
1342  curLayer = (TObjArray*)fNetwork->At(i);
1343  numNeurons = curLayer->GetEntriesFast();
1344 
1345  for (Int_t j = 0; j < numNeurons; j++) {
1346  neuron = (TNeuron*) curLayer->At(j);
1347  neuron->CalculateDelta();
1348  }
1349  }
1350 }
1351 
1352 ////////////////////////////////////////////////////////////////////////////////
1353 /// create genetics class similar to GeneticCut
1354 /// give it vector of parameter ranges (parameters = weights)
1355 /// link fitness function of this class to ComputeEstimator
1356 /// instantiate GA (see MethodCuts)
1357 /// run it
1358 /// then this should exist for GA, Minuit and random sampling
1359 
1361 {
1362  PrintMessage("Minimizing Estimator with GA");
1363 
1364  // define GA parameters
1365  fGA_preCalc = 1;
1366  fGA_SC_steps = 10;
1367  fGA_SC_rate = 5;
1368  fGA_SC_factor = 0.95;
1369  fGA_nsteps = 30;
1370 
1371  // ranges
1372  std::vector<Interval*> ranges;
1373 
1374  Int_t numWeights = fSynapses->GetEntriesFast();
1375  for (Int_t ivar=0; ivar< numWeights; ivar++) {
1376  ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1377  }
1378 
1379  FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1380  gf->Run();
1381 
1382  Double_t estimator = CalculateEstimator();
1383  Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1384 }
1385 
1386 ////////////////////////////////////////////////////////////////////////////////
1387 /// interface to the estimate
1388 
1389 Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1390 {
1391  return ComputeEstimator( parameters );
1392 }
1393 
1394 ////////////////////////////////////////////////////////////////////////////////
1395 /// this function is called by GeneticANN for GA optimization
1396 
1397 Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1398 {
1399  TSynapse* synapse;
1400  Int_t numSynapses = fSynapses->GetEntriesFast();
1401 
1402  for (Int_t i = 0; i < numSynapses; i++) {
1403  synapse = (TSynapse*)fSynapses->At(i);
1404  synapse->SetWeight(parameters.at(i));
1405  }
1406  if (fUseRegulator) UpdatePriors(); //zjh
1407 
1408  Double_t estimator = CalculateEstimator();
1409 
1410  return estimator;
1411 }
1412 
1413 ////////////////////////////////////////////////////////////////////////////////
1414 /// update synapse error fields and adjust the weights (if in sequential mode)
1415 
1417 {
1418  TNeuron* neuron;
1419  Int_t numNeurons;
1420  TObjArray* curLayer;
1421  Int_t numLayers = fNetwork->GetEntriesFast();
1422 
1423  for (Int_t i = 0; i < numLayers; i++) {
1424  curLayer = (TObjArray*)fNetwork->At(i);
1425  numNeurons = curLayer->GetEntriesFast();
1426 
1427  for (Int_t j = 0; j < numNeurons; j++) {
1428  neuron = (TNeuron*) curLayer->At(j);
1429  if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1430  else neuron->UpdateSynapsesSequential();
1431  }
1432  }
1433 }
1434 
1435 ////////////////////////////////////////////////////////////////////////////////
1436 /// just adjust the synapse weights (should be called in batch mode)
1437 
1439 {
1440  TNeuron* neuron;
1441  Int_t numNeurons;
1442  TObjArray* curLayer;
1443  Int_t numLayers = fNetwork->GetEntriesFast();
1444 
1445  for (Int_t i = numLayers-1; i >= 0; i--) {
1446  curLayer = (TObjArray*)fNetwork->At(i);
1447  numNeurons = curLayer->GetEntriesFast();
1448 
1449  for (Int_t j = 0; j < numNeurons; j++) {
1450  neuron = (TNeuron*) curLayer->At(j);
1451  neuron->AdjustSynapseWeights();
1452  }
1453  }
1454 }
1455 
1456 ////////////////////////////////////////////////////////////////////////////////
1457 
1459 {
1460  fPrior=0;
1461  fPriorDev.clear();
1462  Int_t nSynapses = fSynapses->GetEntriesFast();
1463  for (Int_t i=0;i<nSynapses;i++) {
1464  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1465  fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1466  fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1467  }
1468 }
1469 
1470 ////////////////////////////////////////////////////////////////////////////////
1471 
1473 {
1474  TMatrixD InvH(0,0);
1475  GetApproxInvHessian(InvH);
1476  Int_t numSynapses=fSynapses->GetEntriesFast();
1477  Int_t numRegulators=fRegulators.size();
1478  Float_t gamma=0,
1479  variance=1.; // Gaussian noise
1480  std::vector<Int_t> nWDP(numRegulators);
1481  std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1482  for (int i=0;i<numSynapses;i++) {
1483  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1484  Int_t idx=fRegulatorIdx[i];
1485  nWDP[idx]++;
1486  trace[idx]+=InvH[i][i];
1487  gamma+=1-fRegulators[idx]*InvH[i][i];
1488  weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1489  }
1490  if (fEstimator==kMSE) {
1491  if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1492  else variance=CalculateEstimator( Types::kTraining, 0 );
1493  }
1494 
1495  //Log() << kDEBUG << Endl;
1496  for (int i=0;i<numRegulators;i++)
1497  {
1498  //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1499  fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1500  if (fRegulators[i]<0) fRegulators[i]=0;
1501  Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1502  }
1503  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1504  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1505 
1506  Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1507 
1508 }
1509 
1510 ////////////////////////////////////////////////////////////////////////////////
1511 
1512 void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) //zjh
1513 {
1514  Int_t numSynapses=fSynapses->GetEntriesFast();
1515  InvHessian.ResizeTo( numSynapses, numSynapses );
1516  InvHessian=0;
1517  TMatrixD sens(numSynapses,1);
1518  TMatrixD sensT(1,numSynapses);
1519  Int_t nEvents = GetNEvents();
1520  for (Int_t i=0;i<nEvents;i++) {
1521  GetEvent(i);
1522  double outputValue=GetMvaValue(); // force calculation
1523  GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1524  CalculateNeuronDeltas();
1525  for (Int_t j = 0; j < numSynapses; j++){
1526  TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1527  synapses->InitDelta();
1528  synapses->CalculateDelta();
1529  sens[j][0]=sensT[0][j]=synapses->GetDelta();
1530  }
1531  if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1532  else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1533  }
1534 
1535  // TVectorD eValue(numSynapses);
1536  if (regulate) {
1537  for (Int_t i = 0; i < numSynapses; i++){
1538  InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1539  }
1540  }
1541  else {
1542  for (Int_t i = 0; i < numSynapses; i++){
1543  InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1544  }
1545  }
1546 
1547  InvHessian.Invert();
1548 
1549 }
1550 
1551 ////////////////////////////////////////////////////////////////////////////////
1552 
1554 {
1555  Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1556 
1557  // no hessian (old training file) or no error requested
1558  if (!fCalculateErrors || errLower==0 || errUpper==0)
1559  return MvaValue;
1560 
1561  Double_t MvaUpper,MvaLower,median,variance;
1562  Int_t numSynapses=fSynapses->GetEntriesFast();
1563  if (fInvHessian.GetNcols()!=numSynapses) {
1564  Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1565  }
1566  TMatrixD sens(numSynapses,1);
1567  TMatrixD sensT(1,numSynapses);
1568  GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1569  //GetOutputNeuron()->SetError(1.);
1570  CalculateNeuronDeltas();
1571  for (Int_t i = 0; i < numSynapses; i++){
1572  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1573  synapses->InitDelta();
1574  synapses->CalculateDelta();
1575  sensT[0][i]=synapses->GetDelta();
1576  }
1577  sens.Transpose(sensT);
1578  TMatrixD sig=sensT*fInvHessian*sens;
1579  variance=sig[0][0];
1580  median=GetOutputNeuron()->GetValue();
1581 
1582  if (variance<0) {
1583  Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1584  variance=0;
1585  }
1586  variance=sqrt(variance);
1587 
1588  //upper
1589  MvaUpper=fOutput->Eval(median+variance);
1590  if(errUpper)
1591  *errUpper=MvaUpper-MvaValue;
1592 
1593  //lower
1594  MvaLower=fOutput->Eval(median-variance);
1595  if(errLower)
1596  *errLower=MvaValue-MvaLower;
1597 
1598  return MvaValue;
1599 }
1600 
1601 
1602 #ifdef MethodMLP_UseMinuit__
1603 
1604 ////////////////////////////////////////////////////////////////////////////////
1605 /// minimize using Minuit
1606 
1607 void TMVA::MethodMLP::MinuitMinimize()
1608 {
1609  fNumberOfWeights = fSynapses->GetEntriesFast();
1610 
1611  TFitter* tfitter = new TFitter( fNumberOfWeights );
1612 
1613  // minuit-specific settings
1614  Double_t args[10];
1615 
1616  // output level
1617  args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1618  tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1619  tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1620 
1621  double w[54];
1622 
1623  // init parameters
1624  for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1625  TString parName = Form("w%i", ipar);
1626  tfitter->SetParameter( ipar,
1627  parName, w[ipar], 0.1, 0, 0 );
1628  }
1629 
1630  // define the CFN function
1631  tfitter->SetFCN( &IFCN );
1632 
1633  // define fit strategy
1634  args[0] = 2;
1635  tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1636 
1637  // now do the fit !
1638  args[0] = 1e-04;
1639  tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1640 
1641  Bool_t doBetter = kFALSE;
1642  Bool_t doEvenBetter = kFALSE;
1643  if (doBetter) {
1644  args[0] = 1e-04;
1645  tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1646 
1647  if (doEvenBetter) {
1648  args[0] = 500;
1649  tfitter->ExecuteCommand( "MINOS", args, 1 );
1650  }
1651  }
1652 }
1653 
1654 ////////////////////////////////////////////////////////////////////////////////
1655 /// Evaluate the minimisation function
1656 ///
1657 /// Input parameters:
1658 /// - npars: number of currently variable parameters
1659 /// CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1660 /// - fitPars: array of (constant and variable) parameters
1661 /// - iflag: indicates what is to be calculated (see example below)
1662 /// - grad: array of gradients
1663 ///
1664 /// Output parameters:
1665 /// - f: the calculated function value.
1666 /// - grad: the (optional) vector of first derivatives).
1667 
1668 void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1669 {
1670  ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1671 }
1672 
1673 TTHREAD_TLS(Int_t) nc = 0;
1674 TTHREAD_TLS(double) minf = 1000000;
1675 
1676 void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1677 {
1678  // first update the weights
1679  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1680  TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1681  synapse->SetWeight(fitPars[ipar]);
1682  }
1683 
1684  // now compute the estimator
1685  f = CalculateEstimator();
1686 
1687  nc++;
1688  if (f < minf) minf = f;
1689  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1690  Log() << kDEBUG << Endl;
1691  Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1692 }
1693 
1694 ////////////////////////////////////////////////////////////////////////////////
1695 /// global "this" pointer to be used in minuit
1696 
1697 TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1698 {
1699  return fgThis;
1700 }
1701 
1702 #endif
1703 
1704 
1705 ////////////////////////////////////////////////////////////////////////////////
1706 /// write specific classifier response
1707 
1708 void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1709 {
1710  MethodANNBase::MakeClassSpecific(fout, className);
1711 }
1712 
1713 ////////////////////////////////////////////////////////////////////////////////
1714 /// get help message text
1715 ///
1716 /// typical length of text line:
1717 /// "|--------------------------------------------------------------|"
1718 
1720 {
1721  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1722  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1723 
1724  Log() << Endl;
1725  Log() << col << "--- Short description:" << colres << Endl;
1726  Log() << Endl;
1727  Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1728  Log() << "forward multilayer perceptron implementation. The MLP has a user-" << Endl;
1729  Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1730  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1731  Log() << "signal and one background). " << Endl;
1732  Log() << Endl;
1733  Log() << col << "--- Performance optimisation:" << colres << Endl;
1734  Log() << Endl;
1735  Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1736  Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1737  Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1738  Log() << "number of input variables that have only little discrimination power. " << Endl;
1739  Log() << "" << Endl;
1740  Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1741  Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1742  Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1743  Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1744  Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1745  Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1746  Log() << "competitive results)." << Endl;
1747  Log() << Endl;
1748  Log() << col << "Overtraining: " << colres
1749  << "only the TMlpANN performs an explicit separation of the" << Endl;
1750  Log() << "full training sample into independent training and validation samples." << Endl;
1751  Log() << "We have found that in most high-energy physics applications the " << Endl;
1752  Log() << "available degrees of freedom (training events) are sufficient to " << Endl;
1753  Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1754  Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1755  Log() << "the use of validation samples would only reduce the available training" << Endl;
1756  Log() << "information. However, if the performance on the training sample is " << Endl;
1757  Log() << "found to be significantly better than the one found with the inde-" << Endl;
1758  Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1759  Log() << "are printed to standard output at the end of each training job." << Endl;
1760  Log() << Endl;
1761  Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1762  Log() << Endl;
1763  Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1764  Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1765  Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1766  Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1767  Log() << "in favour of more neurons in the first hidden layer." << Endl;
1768  Log() << "" << Endl;
1769  Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1770  Log() << "adjustable weights is small compared to the training sample size," << Endl;
1771  Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1772 }
1773 
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:145
void GetHelpMessage() const
get help message text
Definition: MethodMLP.cxx:1719
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3126
An array of TObjects.
Definition: TObjArray.h:37
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1007
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1469
Singleton class for Global types used by TMVA.
Definition: Types.h:73
virtual ~MethodMLP()
destructor nothing to be done
Definition: MethodMLP.cxx:140
Base class for TMVA fitters.
Definition: FitterBase.h:51
Double_t Log(Double_t x)
Definition: TMath.h:649
void Init()
default initializations
Definition: MethodMLP.cxx:166
void DecayLearningRate(Double_t rate)
Definition: TSynapse.h:64
Double_t GetError()
Definition: MethodMLP.cxx:970
float Float_t
Definition: RtypesCore.h:53
void SetDEDw(Double_t DEDw)
Definition: TSynapse.h:87
Synapse class used by TMVA artificial neural network methods.
Definition: TSynapse.h:44
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:311
Config & gConfig()
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
Definition: TNeuron.cxx:84
Double_t GetDelta()
Definition: TSynapse.h:89
void TrainOneEpoch()
train network over a single epoch/cycle of events
Definition: MethodMLP.cxx:1146
EAnalysisType
Definition: Types.h:125
void SteepestDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:778
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
Definition: MethodMLP.cxx:812
Basic string class.
Definition: TString.h:129
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
TArc * a
Definition: textangle.C:12
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Definition: MethodMLP.cxx:1226
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
Definition: TFitter.cxx:87
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodMLP.cxx:197
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Definition: MethodMLP.cxx:294
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:352
Int_t GetEntriesFast() const
Definition: TObjArray.h:64
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
Definition: TNeuron.cxx:242
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Definition: MethodMLP.cxx:491
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
Definition: TMatrixT.cxx:1210
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
Definition: MethodMLP.cxx:954
const char * Data() const
Definition: TString.h:344
double sqrt(double)
TStopwatch timer
Definition: pirndm.C:37
Double_t x[n]
Definition: legend1.C:17
void Shuffle(Int_t *index, Int_t n)
Input:
Definition: MethodMLP.cxx:1194
void CalculateDelta()
calculate error field
Definition: TNeuron.cxx:115
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:74
Neuron class used by TMVA artificial neural network methods.
Definition: TNeuron.h:49
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
Definition: MethodMLP.cxx:1512
std::vector< std::vector< double > > Data
void ComputeDEDw()
Definition: MethodMLP.cxx:701
virtual void ProcessOptions()
do nothing specific at this moment
Class that contains all the data information.
Definition: DataSetInfo.h:60
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Definition: MethodMLP.cxx:1262
Multilayer Perceptron class built off of MethodANNBase.
Definition: MethodMLP.h:69
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1396
void AdjustSynapseWeights()
adjust the pre-synapses&#39; weights for each neuron (input neuron has no pre-synapse) this method should...
Definition: TNeuron.cxx:263
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Definition: MethodMLP.cxx:1416
virtual void SetFCN(void(*fcn)(Int_t &, Double_t *, Double_t &f, Double_t *, Int_t))
Specify the address of the fitting algorithm.
Definition: TFitter.cxx:550
std::vector< Float_t > & GetTargets()
Definition: Event.h:98
Double_t GetCEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1024
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Definition: MethodMLP.cxx:1360
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodMLP.cxx:1708
#define None
Definition: TGWin32.h:55
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
Definition: MethodMLP.cxx:280
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum ...
Definition: MethodMLP.cxx:1212
const int nEvents
Definition: testRooFit.cxx:42
Double_t GetDEDw()
Definition: TSynapse.h:88
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
Definition: MethodMLP.cxx:1281
double gamma(double x)
void SetLearningRate(Double_t rate)
Definition: TSynapse.h:58
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Definition: MethodMLP.cxx:791
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodMLP.cxx:1553
void SetWeight(Double_t weight)
set synapse weight
Definition: TSynapse.cxx:69
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
Definition: MethodMLP.cxx:1041
SVector< double, 2 > v
Definition: Dict.h:5
The TMVA::Interval Class.
Definition: Interval.h:61
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
Definition: TNeuron.cxx:224
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
Definition: MethodMLP.cxx:844
Tools & gTools()
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
Definition: MethodMLP.cxx:1290
void(* FCN)(Int_t &npar, Double_t *gin, Double_t &f, Double_t *u, Int_t flag)
Definition: testMinim.cxx:39
const Bool_t kFALSE
Definition: RtypesCore.h:92
Double_t GetWeight()
Definition: TSynapse.h:55
Double_t Exp(Double_t x)
Definition: TMath.h:622
void SetWeight(Double_t w)
Sets the weight of the synapse.
Definition: TSynapse.cxx:105
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Definition: MethodMLP.cxx:676
#define ClassImp(name)
Definition: Rtypes.h:336
double f(double x)
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
Definition: MethodMLP.cxx:1332
double Double_t
Definition: RtypesCore.h:55
void InitDelta()
Definition: TSynapse.h:85
int type
Definition: TGX11.cxx:120
The TH1 histogram class.
Definition: TH1.h:56
void SimulateEvent(const Event *ev)
Definition: MethodMLP.cxx:738
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
Definition: MethodMLP.cxx:1438
UInt_t GetClass() const
Definition: Event.h:81
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Definition: MethodMLP.cxx:154
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
#define REGISTER_METHOD(CLASS)
for example
void ProcessOptions()
process user options
Definition: MethodMLP.cxx:249
void CalculateDelta()
calculate/adjust the error field for this synapse
Definition: TSynapse.cxx:109
Double_t EstimatorFunction(std::vector< Double_t > &parameters)
interface to the estimate
Definition: MethodMLP.cxx:1389
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Definition: MethodMLP.cxx:1397
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&amp;W caution:
Definition: Timer.cxx:190
Bool_t WriteOptionsReference() const
Definition: Config.h:62
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:122
Double_t Sqrt(Double_t x)
Definition: TMath.h:591
TObject * At(Int_t idx) const
Definition: TObjArray.h:165
Double_t DerivDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:829
void UpdateRegulators()
Definition: MethodMLP.cxx:1472
double exp(double)
const Bool_t kTRUE
Definition: RtypesCore.h:91
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
Base class for all TMVA methods using artificial neural networks.
Definition: MethodANNBase.h:62
Fitter using a Genetic Algorithm.
Definition: GeneticFitter.h:43
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
const Int_t n
Definition: legend1.C:16
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
Definition: MethodMLP.cxx:92
char name[80]
Definition: TGX11.cxx:109
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter ipar : parameter number parname : parameter name value : initial p...
Definition: TFitter.cxx:581