78 #ifdef MethodMLP_UseMinuit__
97 fUseRegulator(false), fCalculateErrors(false),
98 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
99 fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
100 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
101 fSamplingTraining(false), fSamplingTesting(false),
102 fLastAlpha(0.0), fTau(0.),
103 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
104 fBPMode(kSequential), fBpModeS("
None"),
105 fBatchSize(0), fTestRate(0), fEpochMon(false),
106 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
107 fGA_SC_rate(0), fGA_SC_factor(0.0),
108 fDeviationsFromTargets(0),
120 fUseRegulator(false), fCalculateErrors(false),
121 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
122 fTrainingMethod(kBFGS), fTrainMethodS(
"BFGS"),
123 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
124 fSamplingTraining(false), fSamplingTesting(false),
125 fLastAlpha(0.0), fTau(0.),
126 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
127 fBPMode(kSequential), fBpModeS(
"None"),
128 fBatchSize(0), fTestRate(0), fEpochMon(false),
129 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
130 fGA_SC_rate(0), fGA_SC_factor(0.0),
131 fDeviationsFromTargets(0),
169 SetSignalReferenceCut( 0.5 );
170 #ifdef MethodMLP_UseMinuit__
199 DeclareOptionRef(fTrainMethodS=
"BP",
"TrainingMethod",
200 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
205 DeclareOptionRef(fLearnRate=0.02,
"LearningRate",
"ANN learning rate parameter");
206 DeclareOptionRef(fDecayRate=0.01,
"DecayRate",
"Decay rate for learning parameter");
207 DeclareOptionRef(fTestRate =10,
"TestRate",
"Test for overtraining performed at each #th epochs");
208 DeclareOptionRef(fEpochMon =
kFALSE,
"EpochMonitoring",
"Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
210 DeclareOptionRef(fSamplingFraction=1.0,
"Sampling",
"Only 'Sampling' (randomly selected) events are trained each epoch");
211 DeclareOptionRef(fSamplingEpoch=1.0,
"SamplingEpoch",
"Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
212 DeclareOptionRef(fSamplingWeight=1.0,
"SamplingImportance",
" The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
214 DeclareOptionRef(fSamplingTraining=
kTRUE,
"SamplingTraining",
"The training sample is sampled");
215 DeclareOptionRef(fSamplingTesting=
kFALSE,
"SamplingTesting" ,
"The testing sample is sampled");
217 DeclareOptionRef(fResetStep=50,
"ResetStep",
"How often BFGS should reset history");
218 DeclareOptionRef(fTau =3.0,
"Tau",
"LineSearch \"size step\"");
220 DeclareOptionRef(fBpModeS=
"sequential",
"BPMode",
221 "Back-propagation learning mode: sequential or batch");
222 AddPreDefVal(
TString(
"sequential"));
223 AddPreDefVal(
TString(
"batch"));
225 DeclareOptionRef(fBatchSize=-1,
"BatchSize",
226 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
228 DeclareOptionRef(fImprovement=1
e-30,
"ConvergenceImprove",
229 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
231 DeclareOptionRef(fSteps=-1,
"ConvergenceTests",
232 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
234 DeclareOptionRef(fUseRegulator=
kFALSE,
"UseRegulator",
235 "Use regulator to avoid over-training");
236 DeclareOptionRef(fUpdateLimit=10000,
"UpdateLimit",
237 "Maximum times of regulator update");
238 DeclareOptionRef(fCalculateErrors=
kFALSE,
"CalculateErrors",
239 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value");
241 DeclareOptionRef(fWeightRange=1.0,
"WeightRange",
242 "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
254 if (IgnoreEventsWithNegWeightsInTraining()) {
256 <<
"Will ignore negative events in training!"
261 if (fTrainMethodS ==
"BP" ) fTrainingMethod = kBP;
262 else if (fTrainMethodS ==
"BFGS") fTrainingMethod = kBFGS;
263 else if (fTrainMethodS ==
"GA" ) fTrainingMethod = kGA;
265 if (fBpModeS ==
"sequential") fBPMode = kSequential;
266 else if (fBpModeS ==
"batch") fBPMode = kBatch;
270 if (fBPMode == kBatch) {
273 if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
282 Log() << kDEBUG <<
"Initialize learning rates" <<
Endl;
284 Int_t numSynapses = fSynapses->GetEntriesFast();
285 for (
Int_t i = 0; i < numSynapses; i++) {
286 synapse = (
TSynapse*)fSynapses->At(i);
298 Log() << kFATAL <<
"<CalculateEstimator> fatal error: wrong tree type: " << treeType <<
Endl;
302 Data()->SetCurrentType(treeType);
313 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
314 histS =
new TH1F( nameS, nameS, nbin, -limit, limit );
315 histB =
new TH1F( nameB, nameB, nbin, -limit, limit );
322 UInt_t nClasses = DataInfo().GetNClasses();
323 UInt_t nTgts = DataInfo().GetNTargets();
327 if( fWeightRange < 1.
f ){
328 fDeviationsFromTargets =
new std::vector<std::pair<Float_t,Float_t> >(
nEvents);
333 const Event* ev = GetEvent(i);
335 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
342 ForceNetworkInputs( ev );
343 ForceNetworkCalculations();
346 if (DoRegression()) {
347 for (
UInt_t itgt = 0; itgt < nTgts; itgt++) {
348 v = GetOutputNeuron( itgt )->GetActivationValue();
354 }
else if (DoMulticlass() ) {
356 if (fEstimator==kCE){
358 for (
UInt_t icls = 0; icls < nClasses; icls++) {
359 Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
360 norm +=
exp( activationValue );
362 d =
exp( activationValue );
367 for (
UInt_t icls = 0; icls < nClasses; icls++) {
368 Double_t desired = (icls==cls) ? 1.0 : 0.0;
369 v = GetOutputNeuron( icls )->GetActivationValue();
370 d = (desired-
v)*(desired-
v);
375 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
376 v = GetOutputNeuron()->GetActivationValue();
377 if (fEstimator==kMSE) d = (desired-
v)*(desired-
v);
382 if( fDeviationsFromTargets )
383 fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
389 if (DataInfo().IsSignal(ev) && histS != 0) histS->
Fill(
float(
v),
float(w) );
390 else if (histB != 0) histB->
Fill(
float(
v),
float(w) );
394 if( fDeviationsFromTargets ) {
395 std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
397 Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
400 Float_t weightRangeCut = fWeightRange*sumOfWeights;
402 for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
403 float deviation = (*itDev).first;
404 float devWeight = (*itDev).second;
405 weightSum += devWeight;
406 if( weightSum <= weightRangeCut ) {
407 estimator += devWeight*deviation;
411 sumOfWeights = sumOfWeightsInRange;
412 delete fDeviationsFromTargets;
415 if (histS != 0) fEpochMonHistS.push_back( histS );
416 if (histB != 0) fEpochMonHistB.push_back( histB );
421 if (DoRegression()) estimator = estimator/
Float_t(sumOfWeights);
422 else if (DoMulticlass()) estimator = estimator/
Float_t(sumOfWeights);
423 else estimator = estimator/
Float_t(sumOfWeights);
428 Data()->SetCurrentType( saveType );
431 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType ==
Types::kTraining) {
432 CreateWeightMonitoringHists(
Form(
"epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
444 Log() << kFATAL <<
"ANN Network is not initialized, doing it now!"<<
Endl;
445 SetAnalysisType(GetAnalysisType());
447 Log() << kDEBUG <<
"reinitialize learning rates" <<
Endl;
448 InitializeLearningRates();
450 PrintMessage(
"Training Network");
453 Int_t nSynapses=fSynapses->GetEntriesFast();
454 if (nSynapses>nEvents)
455 Log()<<kWARNING<<
"ANN too complicated: #events="<<nEvents<<
"\t#synapses="<<nSynapses<<
Endl;
457 fIPyMaxIter = nEpochs;
458 if (fInteractive && fInteractive->NotInitialized()){
459 std::vector<TString> titles = {
"Error on training set",
"Error on test set"};
460 fInteractive->Init(titles);
463 #ifdef MethodMLP_UseMinuit__
464 if (useMinuit) MinuitMinimize();
466 if (fTrainingMethod == kGA) GeneticMinimize();
467 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
468 else BackPropagationMinimize(nEpochs);
474 Log()<<kINFO<<
"Finalizing handling of Regulator terms, trainE="<<trainE<<
" testE="<<testE<<
Endl;
476 Log()<<kINFO<<
"Done with handling of Regulator terms"<<
Endl;
479 if( fCalculateErrors || fUseRegulator )
481 Int_t numSynapses=fSynapses->GetEntriesFast();
482 fInvHessian.ResizeTo(numSynapses,numSynapses);
483 GetApproxInvHessian( fInvHessian ,
false);
499 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
500 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
501 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
502 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
505 Int_t nSynapses = fSynapses->GetEntriesFast();
506 Int_t nWeights = nSynapses;
508 for (
Int_t i=0;i<nSynapses;i++) {
513 std::vector<Double_t> buffer( nWeights );
514 for (
Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
517 TMatrixD Hessian ( nWeights, nWeights );
521 Int_t RegUpdateTimes=0;
529 if(fSamplingTraining || fSamplingTesting)
530 Data()->InitSampling(1.0,1.0,fRandomSeed);
532 if (fSteps > 0)
Log() << kINFO <<
"Inaccurate progress timing for MLP... " <<
Endl;
536 for (
Int_t i = 0; i < nEpochs; i++) {
538 if (fExitFromTraining)
break;
540 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
541 if ((i+1)%fTestRate == 0 || (i == 0)) {
542 if (fSamplingTraining) {
544 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
545 Data()->CreateSampling();
547 if (fSamplingTesting) {
549 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
550 Data()->CreateSampling();
556 Data()->InitSampling(1.0,1.0);
558 Data()->InitSampling(1.0,1.0);
569 SetGammaDelta( Gamma, Delta, buffer );
571 if (i % fResetStep == 0 && i<0.5*nEpochs) {
577 if (GetHessian( Hessian, Gamma, Delta )) {
582 else SetDir( Hessian, Dir );
586 if (DerivDir( Dir ) > 0) {
591 if (LineSearch( Dir, buffer, &dError )) {
595 if (LineSearch(Dir, buffer, &dError)) {
597 Log() << kFATAL <<
"Line search failed! Huge troubles somewhere..." <<
Endl;
602 if (dError<0)
Log()<<kWARNING<<
"\nnegative dError=" <<dError<<
Endl;
605 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 &&
fabs(dError)<0.1*AccuError) {
606 Log()<<kDEBUG<<
"\n\nUpdate regulators "<<RegUpdateTimes<<
" on epoch "<<i<<
"\tdError="<<dError<<
Endl;
616 if ((i+1)%fTestRate == 0) {
621 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
624 fEstimatorHistTrain->Fill( i+1, trainE );
625 fEstimatorHistTest ->Fill( i+1, testE );
628 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1
e-100)) {
631 Data()->EventResult( success );
633 SetCurrentValue( testE );
634 if (HasConverged()) {
635 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
636 Int_t newEpoch =
Int_t(fSamplingEpoch*nEpochs);
638 ResetConvergenceCounter();
645 TString convText =
Form(
"<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i );
648 if (
Float_t(i)/nEpochs < fSamplingEpoch)
650 progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
654 progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
656 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit;
657 if (progress2>progress) progress=progress2;
662 if (progress<i) progress=i;
678 Int_t nWeights = fSynapses->GetEntriesFast();
681 Int_t nSynapses = fSynapses->GetEntriesFast();
682 for (
Int_t i=0;i<nSynapses;i++) {
684 Gamma[IDX++][0] = -synapse->
GetDEDw();
687 for (
Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
692 for (
Int_t i=0;i<nSynapses;i++)
695 Gamma[IDX++][0] += synapse->
GetDEDw();
703 Int_t nSynapses = fSynapses->GetEntriesFast();
704 for (
Int_t i=0;i<nSynapses;i++) {
713 const Event* ev = GetEvent(i);
714 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
722 for (
Int_t j=0;j<nSynapses;j++) {
728 for (
Int_t i=0;i<nSynapses;i++) {
731 if (fUseRegulator) DEDw+=fPriorDev[i];
732 synapse->
SetDEDw( DEDw / nPosEvents );
742 ForceNetworkInputs( ev );
743 ForceNetworkCalculations();
745 if (DoRegression()) {
746 UInt_t ntgt = DataInfo().GetNTargets();
747 for (
UInt_t itgt = 0; itgt < ntgt; itgt++) {
749 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
750 GetOutputNeuron( itgt )->SetError(error);
752 }
else if (DoMulticlass()) {
753 UInt_t nClasses = DataInfo().GetNClasses();
755 for (
UInt_t icls = 0; icls < nClasses; icls++) {
756 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
757 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
758 GetOutputNeuron( icls )->SetError(error);
761 Double_t desired = GetDesiredOutput( ev );
763 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight;
764 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired);
765 GetOutputNeuron()->SetError(error);
768 CalculateNeuronDeltas();
769 for (
Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
781 Int_t nSynapses = fSynapses->GetEntriesFast();
783 for (
Int_t i=0;i<nSynapses;i++) {
785 Dir[IDX++][0] = -synapse->
GetDEDw();
815 Int_t nSynapses = fSynapses->GetEntriesFast();
818 for (
Int_t i=0;i<nSynapses;i++) {
820 DEDw[IDX++][0] = synapse->
GetDEDw();
823 dir = Hessian * DEDw;
824 for (
Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
832 Int_t nSynapses = fSynapses->GetEntriesFast();
835 for (
Int_t i=0;i<nSynapses;i++) {
837 Result += Dir[IDX++][0] * synapse->
GetDEDw();
847 Int_t nSynapses = fSynapses->GetEntriesFast();
848 Int_t nWeights = nSynapses;
850 std::vector<Double_t> Origin(nWeights);
851 for (
Int_t i=0;i<nSynapses;i++) {
862 if (alpha2 < 0.01) alpha2 = 0.01;
863 else if (alpha2 > 2.0) alpha2 = 2.0;
867 SetDirWeights( Origin, Dir, alpha2 );
875 for (
Int_t i=0;i<100;i++) {
877 SetDirWeights(Origin, Dir, alpha3);
889 SetDirWeights(Origin, Dir, 0.);
894 for (
Int_t i=0;i<100;i++) {
897 Log() << kWARNING <<
"linesearch, starting to investigate direction opposite of steepestDIR" <<
Endl;
898 alpha2 = -alpha_original;
900 SetDirWeights(Origin, Dir, alpha2);
910 SetDirWeights(Origin, Dir, 0.);
911 Log() << kWARNING <<
"linesearch, failed even in opposite direction of steepestDIR" <<
Endl;
917 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
918 fLastAlpha = 0.5 * (alpha1 + alpha3 -
919 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
920 - ( err2 - err1 ) / (alpha2 - alpha1 )));
926 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
928 SetDirWeights(Origin, Dir, fLastAlpha);
935 if (finalError > err1) {
936 Log() << kWARNING <<
"Line search increased error! Something is wrong."
937 <<
"fLastAlpha=" << fLastAlpha <<
"al123=" << alpha1 <<
" "
938 << alpha2 <<
" " << alpha3 <<
" err1="<< err1 <<
" errfinal=" << finalError <<
Endl;
941 for (
Int_t i=0;i<nSynapses;i++) {
943 buffer[IDX] = synapse->
GetWeight() - Origin[IDX];
947 if (dError) (*dError)=(errOrigin-finalError)/finalError;
957 Int_t nSynapses = fSynapses->GetEntriesFast();
959 for (
Int_t i=0;i<nSynapses;i++) {
961 synapse->
SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
964 if (fUseRegulator) UpdatePriors();
973 UInt_t ntgts = GetNTargets();
977 const Event* ev = GetEvent(i);
979 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
986 if (DoRegression()) {
987 for (
UInt_t itgt = 0; itgt < ntgts; itgt++) {
988 error += GetMSEErr( ev, itgt );
990 }
else if ( DoMulticlass() ){
991 for(
UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
992 error += GetMSEErr( ev, icls );
995 if (fEstimator==kMSE) error = GetMSEErr( ev );
996 else if (fEstimator==kCE) error= GetCEErr( ev );
1000 if (fUseRegulator) Result+=fPrior;
1001 if (Result<0)
Log()<<kWARNING<<
"\nNegative Error!!! :"<<Result-fPrior<<
"+"<<fPrior<<
Endl;
1010 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1012 if (DoRegression()) target = ev->
GetTarget( index );
1013 else if (DoMulticlass()) target = (ev->
GetClass() == index ? 1.0 : 0.0 );
1014 else target = GetDesiredOutput( ev );
1016 error = 0.5*(output-target)*(output-target);
1027 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1029 if (DoRegression()) target = ev->
GetTarget( index );
1030 else if (DoMulticlass()) target = (ev->
GetClass() == index ? 1.0 : 0.0 );
1031 else target = GetDesiredOutput( ev );
1051 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
1052 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
1053 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
1054 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
1056 if(fSamplingTraining || fSamplingTesting)
1057 Data()->InitSampling(1.0,1.0,fRandomSeed);
1059 if (fSteps > 0)
Log() << kINFO <<
"Inaccurate progress timing for MLP... " <<
Endl;
1067 for (
Int_t i = 0; i < nEpochs; i++) {
1069 if (fExitFromTraining)
break;
1070 fIPyCurrentIter = i;
1071 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
1072 if ((i+1)%fTestRate == 0 || (i == 0)) {
1073 if (fSamplingTraining) {
1075 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1076 Data()->CreateSampling();
1078 if (fSamplingTesting) {
1080 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1081 Data()->CreateSampling();
1087 Data()->InitSampling(1.0,1.0);
1089 Data()->InitSampling(1.0,1.0);
1094 DecaySynapseWeights(i >= lateEpoch);
1097 if ((i+1)%fTestRate == 0) {
1100 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1103 fEstimatorHistTrain->Fill( i+1, trainE );
1104 fEstimatorHistTest ->Fill( i+1, testE );
1107 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1
e-100)) {
1110 Data()->EventResult( success );
1112 SetCurrentValue( testE );
1113 if (HasConverged()) {
1114 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
1115 Int_t newEpoch =
Int_t(fSamplingEpoch*nEpochs);
1117 ResetConvergenceCounter();
1120 if (lateEpoch > i) lateEpoch = i;
1127 TString convText =
Form(
"<D^2> (train/test): %.4g/%.4g", trainE, testE );
1130 if (
Float_t(i)/nEpochs < fSamplingEpoch)
1131 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1133 progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1153 Shuffle(index, nEvents);
1158 const Event * ev = GetEvent(index[i]);
1159 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1164 TrainOneEvent(index[i]);
1167 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1168 AdjustSynapseWeights();
1169 if (fgPRINT_BATCH) {
1198 for (
Int_t i = 0; i <
n; i++) {
1199 j = (
Int_t) (frgen->Rndm() *
a);
1202 index[j] = index[i];
1215 Int_t numSynapses = fSynapses->GetEntriesFast();
1216 for (
Int_t i = 0; i < numSynapses; i++) {
1217 synapse = (
TSynapse*)fSynapses->At(i);
1240 if (type == 0) desired = fOutput->GetMin();
1241 else desired = fOutput->GetMax();
1247 for (
UInt_t j = 0; j < GetNvar(); j++) {
1250 neuron = GetInputNeuron(j);
1254 ForceNetworkCalculations();
1255 UpdateNetwork(desired, eventWeight);
1269 const Event * ev = GetEvent(ievt);
1271 ForceNetworkInputs( ev );
1272 ForceNetworkCalculations();
1273 if (DoRegression()) UpdateNetwork( ev->
GetTargets(), eventWeight );
1274 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1275 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1283 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin();
1292 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1293 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ;
1294 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired);
1295 else Log() << kFATAL <<
"Estimator type unspecified!!" <<
Endl;
1296 error *= eventWeight;
1297 GetOutputNeuron()->SetError(error);
1298 CalculateNeuronDeltas();
1310 for (
UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1311 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1316 for (
UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1317 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1319 Double_t error = output - desired.at(i);
1320 error *= eventWeight;
1321 GetOutputNeuron(i)->SetError(error);
1325 CalculateNeuronDeltas();
1336 Int_t numLayers = fNetwork->GetEntriesFast();
1341 for (
Int_t i = numLayers-1; i >= 0; i--) {
1345 for (
Int_t j = 0; j < numNeurons; j++) {
1362 PrintMessage(
"Minimizing Estimator with GA");
1368 fGA_SC_factor = 0.95;
1372 std::vector<Interval*> ranges;
1374 Int_t numWeights = fSynapses->GetEntriesFast();
1375 for (
Int_t ivar=0; ivar< numWeights; ivar++) {
1376 ranges.push_back(
new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1382 Double_t estimator = CalculateEstimator();
1383 Log() << kINFO <<
"GA: estimator after optimization: " << estimator <<
Endl;
1391 return ComputeEstimator( parameters );
1400 Int_t numSynapses = fSynapses->GetEntriesFast();
1402 for (
Int_t i = 0; i < numSynapses; i++) {
1403 synapse = (
TSynapse*)fSynapses->At(i);
1406 if (fUseRegulator) UpdatePriors();
1408 Double_t estimator = CalculateEstimator();
1423 for (
Int_t i = 0; i < numLayers; i++) {
1427 for (
Int_t j = 0; j < numNeurons; j++) {
1445 for (
Int_t i = numLayers-1; i >= 0; i--) {
1449 for (
Int_t j = 0; j < numNeurons; j++) {
1462 Int_t nSynapses = fSynapses->GetEntriesFast();
1463 for (
Int_t i=0;i<nSynapses;i++) {
1465 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->
GetWeight())*(synapse->
GetWeight());
1466 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->
GetWeight()));
1475 GetApproxInvHessian(InvH);
1476 Int_t numSynapses=fSynapses->GetEntriesFast();
1477 Int_t numRegulators=fRegulators.size();
1480 std::vector<Int_t> nWDP(numRegulators);
1481 std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1482 for (
int i=0;i<numSynapses;i++) {
1484 Int_t idx=fRegulatorIdx[i];
1486 trace[idx]+=InvH[i][i];
1487 gamma+=1-fRegulators[idx]*InvH[i][i];
1490 if (fEstimator==kMSE) {
1491 if (GetNEvents()>
gamma) variance=CalculateEstimator(
Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1496 for (
int i=0;i<numRegulators;i++)
1499 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1500 if (fRegulators[i]<0) fRegulators[i]=0;
1501 Log()<<kDEBUG<<
"R"<<i<<
":"<<fRegulators[i]<<
"\t";
1506 Log()<<kDEBUG<<
"\n"<<
"trainE:"<<trainE<<
"\ttestE:"<<testE<<
"\tvariance:"<<variance<<
"\tgamma:"<<gamma<<
Endl;
1514 Int_t numSynapses=fSynapses->GetEntriesFast();
1515 InvHessian.
ResizeTo( numSynapses, numSynapses );
1522 double outputValue=GetMvaValue();
1523 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1524 CalculateNeuronDeltas();
1525 for (
Int_t j = 0; j < numSynapses; j++){
1529 sens[j][0]=sensT[0][j]=synapses->
GetDelta();
1531 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1532 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1537 for (
Int_t i = 0; i < numSynapses; i++){
1538 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1542 for (
Int_t i = 0; i < numSynapses; i++){
1543 InvHessian[i][i]+=1
e-6;
1558 if (!fCalculateErrors || errLower==0 || errUpper==0)
1561 Double_t MvaUpper,MvaLower,median,variance;
1562 Int_t numSynapses=fSynapses->GetEntriesFast();
1563 if (fInvHessian.GetNcols()!=numSynapses) {
1564 Log() << kWARNING <<
"inconsistent dimension " << fInvHessian.GetNcols() <<
" vs " << numSynapses <<
Endl;
1568 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1570 CalculateNeuronDeltas();
1571 for (
Int_t i = 0; i < numSynapses; i++){
1578 TMatrixD sig=sensT*fInvHessian*sens;
1580 median=GetOutputNeuron()->GetValue();
1583 Log()<<kWARNING<<
"Negative variance!!! median=" << median <<
"\tvariance(sigma^2)=" << variance <<
Endl;
1586 variance=
sqrt(variance);
1589 MvaUpper=fOutput->Eval(median+variance);
1591 *errUpper=MvaUpper-MvaValue;
1594 MvaLower=fOutput->Eval(median-variance);
1596 *errLower=MvaValue-MvaLower;
1602 #ifdef MethodMLP_UseMinuit__
1607 void TMVA::MethodMLP::MinuitMinimize()
1609 fNumberOfWeights = fSynapses->GetEntriesFast();
1624 for (
Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1627 parName, w[ipar], 0.1, 0, 0 );
1631 tfitter->
SetFCN( &IFCN );
1670 ((MethodMLP*)GetThisPtr())->
FCN( npars, grad, f, fitPars, iflag );
1673 TTHREAD_TLS(
Int_t) nc = 0;
1674 TTHREAD_TLS(
double) minf = 1000000;
1679 for (
Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1685 f = CalculateEstimator();
1688 if (f < minf) minf =
f;
1689 for (
Int_t ipar=0; ipar<fNumberOfWeights; ipar++)
Log() << kDEBUG << fitPars[ipar] <<
" ";
1691 Log() << kDEBUG <<
"***** New estimator: " << f <<
" min: " << minf <<
" --> ncalls: " << nc <<
Endl;
1725 Log() << col <<
"--- Short description:" << colres <<
Endl;
1727 Log() <<
"The MLP artificial neural network (ANN) is a traditional feed-" <<
Endl;
1728 Log() <<
"forward multilayer perceptron implementation. The MLP has a user-" <<
Endl;
1729 Log() <<
"defined hidden layer architecture, while the number of input (output)" <<
Endl;
1730 Log() <<
"nodes is determined by the input variables (output classes, i.e., " <<
Endl;
1731 Log() <<
"signal and one background). " <<
Endl;
1733 Log() << col <<
"--- Performance optimisation:" << colres <<
Endl;
1735 Log() <<
"Neural networks are stable and performing for a large variety of " <<
Endl;
1736 Log() <<
"linear and non-linear classification problems. However, in contrast" <<
Endl;
1737 Log() <<
"to (e.g.) boosted decision trees, the user is advised to reduce the " <<
Endl;
1738 Log() <<
"number of input variables that have only little discrimination power. " <<
Endl;
1740 Log() <<
"In the tests we have carried out so far, the MLP and ROOT networks" <<
Endl;
1741 Log() <<
"(TMlpANN, interfaced via TMVA) performed equally well, with however" <<
Endl;
1742 Log() <<
"a clear speed advantage for the MLP. The Clermont-Ferrand neural " <<
Endl;
1743 Log() <<
"net (CFMlpANN) exhibited worse classification performance in these" <<
Endl;
1744 Log() <<
"tests, which is partly due to the slow convergence of its training" <<
Endl;
1745 Log() <<
"(at least 10k training cycles are required to achieve approximately" <<
Endl;
1746 Log() <<
"competitive results)." <<
Endl;
1748 Log() << col <<
"Overtraining: " << colres
1749 <<
"only the TMlpANN performs an explicit separation of the" <<
Endl;
1750 Log() <<
"full training sample into independent training and validation samples." <<
Endl;
1751 Log() <<
"We have found that in most high-energy physics applications the " <<
Endl;
1752 Log() <<
"available degrees of freedom (training events) are sufficient to " <<
Endl;
1753 Log() <<
"constrain the weights of the relatively simple architectures required" <<
Endl;
1754 Log() <<
"to achieve good performance. Hence no overtraining should occur, and " <<
Endl;
1755 Log() <<
"the use of validation samples would only reduce the available training" <<
Endl;
1756 Log() <<
"information. However, if the performance on the training sample is " <<
Endl;
1757 Log() <<
"found to be significantly better than the one found with the inde-" <<
Endl;
1758 Log() <<
"pendent test sample, caution is needed. The results for these samples " <<
Endl;
1759 Log() <<
"are printed to standard output at the end of each training job." <<
Endl;
1761 Log() << col <<
"--- Performance tuning via configuration options:" << colres <<
Endl;
1763 Log() <<
"The hidden layer architecture for all ANNs is defined by the option" <<
Endl;
1764 Log() <<
"\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" <<
Endl;
1765 Log() <<
"neurons and the second N neurons (and so on), and where N is the number " <<
Endl;
1766 Log() <<
"of input variables. Excessive numbers of hidden layers should be avoided," <<
Endl;
1767 Log() <<
"in favour of more neurons in the first hidden layer." <<
Endl;
1769 Log() <<
"The number of cycles should be above 500. As said, if the number of" <<
Endl;
1770 Log() <<
"adjustable weights is small compared to the training sample size," <<
Endl;
1771 Log() <<
"using a large number of training samples should not lead to overtraining." <<
Endl;
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
std::string GetName(const std::string &scope_name)
void GetHelpMessage() const
get help message text
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
MsgLogger & Endl(MsgLogger &ml)
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Singleton class for Global types used by TMVA.
virtual ~MethodMLP()
destructor nothing to be done
Base class for TMVA fitters.
void Init()
default initializations
void DecayLearningRate(Double_t rate)
void SetDEDw(Double_t DEDw)
Synapse class used by TMVA artificial neural network methods.
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
void TrainOneEpoch()
train network over a single epoch/cycle of events
void SteepestDir(TMatrixD &Dir)
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
void DeclareOptions()
define the options (their key words) that can be set in the option string
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Int_t GetEntriesFast() const
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
const char * Data() const
void Shuffle(Int_t *index, Int_t n)
Input:
void CalculateDelta()
calculate error field
Double_t Run()
estimator function interface for fitting
Neuron class used by TMVA artificial neural network methods.
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
std::vector< std::vector< double > > Data
virtual void ProcessOptions()
do nothing specific at this moment
Class that contains all the data information.
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Multilayer Perceptron class built off of MethodANNBase.
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
virtual void SetFCN(void(*fcn)(Int_t &, Double_t *, Double_t &f, Double_t *, Int_t))
Specify the address of the fitting algorithm.
std::vector< Float_t > & GetTargets()
Double_t GetCEErr(const Event *ev, UInt_t index=0)
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
TMatrixT< Double_t > TMatrixD
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum ...
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
void SetLearningRate(Double_t rate)
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
void SetWeight(Double_t weight)
set synapse weight
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
The TMVA::Interval Class.
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
char * Form(const char *fmt,...)
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
void(* FCN)(Int_t &npar, Double_t *gin, Double_t &f, Double_t *u, Int_t flag)
void SetWeight(Double_t w)
Sets the weight of the synapse.
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
void SimulateEvent(const Event *ev)
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Float_t GetTarget(UInt_t itgt) const
#define REGISTER_METHOD(CLASS)
for example
void ProcessOptions()
process user options
void CalculateDelta()
calculate/adjust the error field for this synapse
Double_t EstimatorFunction(std::vector< Double_t > ¶meters)
interface to the estimate
Double_t ComputeEstimator(std::vector< Double_t > ¶meters)
this function is called by GeneticANN for GA optimization
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Bool_t WriteOptionsReference() const
Double_t Sqrt(Double_t x)
TObject * At(Int_t idx) const
Double_t DerivDir(TMatrixD &Dir)
double norm(double *x, double *p)
Base class for all TMVA methods using artificial neural networks.
Fitter using a Genetic Algorithm.
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Timing information for training and evaluation of MVA methods.
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter ipar : parameter number parname : parameter name value : initial p...