48 template<
typename Architecture_t,
typename Layer_t = TLayer<Architecture_t>>
52 using Matrix_t =
typename Architecture_t::Matrix_t;
53 using Scalar_t =
typename Architecture_t::Scalar_t;
70 template<
typename OtherArchitecture_t>
74 TNet(
size_t batchSize,
91 template <
typename SharedLayer>
158 template<
typename Architecture_t,
typename Layer_t>
160 : fBatchSize(0), fInputWidth(0), fLayers(), fDummy(0,0),
168 template<
typename Architecture_t,
typename Layer_t>
170 : fBatchSize(other.fBatchSize), fInputWidth(other.fInputWidth),
171 fLayers(other.fLayers), fDummy(0,0), fJ(other.fJ), fR(other.fR),
172 fWeightDecay(other.fWeightDecay)
178 template<
typename Architecture_t,
typename Layer_t>
179 template<
typename OtherArchitecture_t>
182 : fBatchSize(batchSize), fInputWidth(other.GetInputWidth()), fLayers(),
183 fDummy(0,0), fJ(other.GetLossFunction()), fR(other.GetRegularization()),
184 fWeightDecay(other.GetWeightDecay())
187 for (
size_t i = 0; i < other.
GetDepth(); i++) {
189 other.
GetLayer(i).GetActivationFunction(),
190 other.
GetLayer(i).GetDropoutProbability());
197 template<
typename Architecture_t,
typename Layer_t>
203 : fBatchSize(batchSize), fInputWidth(inputWidth), fLayers(), fDummy(0,0),
204 fJ(J), fR(R), fWeightDecay(weightDecay)
210 template<
typename Architecture_t,
typename Layer_t>
216 for (
auto &
l : fLayers) {
223 template<
typename Architecture_t,
typename Layer_t>
228 if (fLayers.size() == 0) {
229 fLayers.emplace_back(fBatchSize, fInputWidth, width, f, dropoutProbability);
231 size_t prevWidth = fLayers.back().GetWidth();
232 fLayers.emplace_back(fBatchSize, prevWidth, width, f, dropoutProbability);
237 template<
typename Architecture_t,
typename Layer_t>
244 template<
typename Architecture_t,
typename Layer_t>
245 template<
typename SharedLayer_t>
248 fLayers.emplace_back(fBatchSize, layer);
252 template<
typename Architecture_t,
typename Layer_t>
255 for (
auto &
l : fLayers) {
261 template<
typename Architecture_t,
typename Layer_t>
264 for (
auto &
l : fLayers) {
271 template<
typename Architecture_t,
typename Layer_t>
275 fLayers.front().Forward(input, applyDropout);
277 for (
size_t i = 1; i < fLayers.size(); i++) {
278 fLayers[i].Forward(fLayers[i-1].GetOutput(), applyDropout);
283 template<
typename Architecture_t,
typename Layer_t>
288 evaluateGradients<Architecture_t>(fLayers.back().GetActivationGradients(),
289 fJ,
Y, fLayers.back().GetOutput());
291 for (
size_t i = fLayers.size()-1; i > 0; i--) {
292 auto & activation_gradient_backward
293 = fLayers[i-1].GetActivationGradients();
294 auto & activations_backward
295 = fLayers[i-1].GetOutput();
296 fLayers[i].Backward(activation_gradient_backward,
297 activations_backward, fR, fWeightDecay);
299 fLayers[0].Backward(fDummy, X, fR, fWeightDecay);
304 template<
typename Architecture_t,
typename Layer_t>
306 bool includeRegularization)
const
309 auto loss = evaluate<Architecture_t>(fJ,
Y, fLayers.back().GetOutput());
311 if (includeRegularization) {
312 for (
auto &
l : fLayers) {
313 loss += fWeightDecay * regularization<Architecture_t>(
l.GetWeights(), fR);
320 template<
typename Architecture_t,
typename Layer_t>
326 Forward(
X, applyDropout);
331 template<
typename Architecture_t,
typename Layer_t>
337 evaluate<Architecture_t>(Yhat,
f, fLayers.back().GetOutput());
341 template<
typename Architecture_t,
typename Layer_t>
345 evaluate<Architecture_t>(Y_hat,
f, fLayers.back().GetOutput());
349 template<
typename Architecture_t,
typename Layer_t>
358 for(
size_t i = 0; i < fLayers.size(); i++) {
359 Layer_t & layer = fLayers[i];
363 flops += nb * nl * (2.0 * nlp - 1);
365 flops += 2 * nb * nl;
369 flops += nlp * nl * (2.0 * nb - 1.0);
370 flops += nl * (nb - 1);
372 flops += nlp * nb * (2.0 * nl - 1.0);
380 template<
typename Architecture_t,
typename Layer_t>
382 const std::vector<Double_t> & probabilities)
384 for (
size_t i = 0; i < fLayers.size(); i++) {
385 if (i < probabilities.size()) {
386 fLayers[i].SetDropoutProbability(probabilities[i]);
388 fLayers[i].SetDropoutProbability(1.0);
394 template<
typename Architecture_t,
typename Layer_t>
397 std::cout <<
"DEEP NEURAL NETWORK:";
398 std::cout <<
" Loss function = " <<
static_cast<char>(fJ);
399 std::cout <<
", Depth = " << fLayers.size() << std::endl;
402 for (
auto &
l : fLayers) {
403 std::cout <<
"DNN Layer " << i <<
":" << std::endl;
size_t GetOutputWidth() const
virtual void Print(Option_t *option="") const
Dump this line with its attributes.
Matrix_t fDummy
Empty matrix for last step in back propagation.
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
const Layer_t & GetLayer(size_t i) const
size_t GetBatchSize() const
LayerIterator_t LayersBegin()
Iterator to the first layer of the net.
std::vector< Layer_t > fLayers
Layers in the network.
Scalar_t Loss(const Matrix_t &Y, bool includeRegularization=true) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
void Backward(const Matrix_t &X, const Matrix_t &Y)
Compute the weight gradients in the net from the given training samples X and training labels Y...
ELossFunction GetLossFunction() const
void SetWeightDecay(Scalar_t weightDecay)
ELossFunction fJ
The loss function of the network.
void AddLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Add a layer of the given size to the neural net.
size_t fInputWidth
Number of features in a single input event.
void Initialize(EInitialization m)
Initialize the weights in the net with the initialization method.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
typename std::vector< TLayer< Architecture_t > >::iterator LayerIterator_t
Generic neural network class.
void Forward(Matrix_t &X, bool applyDropout=false)
Forward a given input through the neural net.
size_t fBatchSize
Batch size for training and evaluation of the Network.
Scalar_t fWeightDecay
The weight decay factor.
LayerIterator_t LayersEnd()
Iterator to the last layer of the net.
typename Architecture_t::Matrix_t Matrix_t
void SetRegularization(ERegularization R)
ERegularization GetRegularization() const
void Clear()
Remove all layers from the network.
size_t GetInputWidth() const
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size...
typename Architecture_t::Scalar_t Scalar_t
ERegularization fR
The regularization used for the network.
EOutputFunction
Enum that represents output functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
Scalar_t GetWeightDecay() const
void InitializeGradients()
Initialize the gradients in the net to zero.
void SetLossFunction(ELossFunction J)
void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f)
Compute the neural network predictionion obtained from forwarding the batch X through the neural netw...
void SetInputWidth(size_t inputWidth)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
Layer_t & GetLayer(size_t i)
void SetBatchSize(size_t batchSize)