Logo ROOT   6.10/00
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
Namespaces
tdf007_snapshot.py File Reference

Namespaces

 tdf007_snapshot
 

Detailed Description

This tutorial shows how to write out datasets in ROOT formatusing the TDataFrame.

1 
2 import ROOT
3 
4 fill_tree_code ='''
5 void fill_tree(const char *filename, const char *treeName)
6 {
7  TFile f(filename, "RECREATE");
8  TTree t(treeName, treeName);
9  int b1;
10  float b2;
11  t.Branch("b1", &b1);
12  t.Branch("b2", &b2);
13  for (int i = 0; i < 10000; ++i) {
14  b1 = i;
15  b2 = i * i;
16  t.Fill();
17  }
18  t.Write();
19  f.Close();
20  return;
21 }
22 '''
23 
24 # We prepare an input tree to run on
25 fileName = "tdf007_snapshot_py.root"
26 outFileName = "tdf007_snapshot_output_py.root"
27 outFileNameAllColumns = "tdf007_snapshot_output_allColumns_py.root"
28 treeName = "myTree"
29 ROOT.gInterpreter.Declare(fill_tree_code)
30 ROOT.fill_tree(fileName, treeName)
31 
32 # We read the tree from the file and create a TDataFrame.
33 TDF = ROOT.ROOT.Experimental.TDataFrame
34 d = TDF(treeName, fileName)
35 
36 # ## Select entries
37 # We now select some entries in the dataset
38 d_cut = d.Filter("b1 % 2 == 0")
39 # ## Enrich the dataset
40 # Build some temporary columns: we'll write them out
41 
42 getVector_code ='''
43 std::vector<float> getVector (float b2)
44 {
45  std::vector<float> v;
46  for (int i = 0; i < 3; i++) v.push_back(b2*i);
47  return v;
48 }
49 '''
50 ROOT.gInterpreter.Declare(getVector_code)
51 
52 d2 = d_cut.Define("b1_square", "b1 * b1") \
53  .Define("b2_vector", "getVector( b2 )")
54 
55 # ## Write it to disk in ROOT format
56 # We now write to disk a new dataset with one of the variables originally
57 # present in the tree and the new variables.
58 # The user can explicitly specify the types of the columns as template
59 # arguments of the Snapshot method, otherwise they will be automatically
60 # inferred.
61 branchList = ROOT.vector('string')()
62 for branchName in ["b1", "b1_square", "b2_vector"]:
63  branchList.push_back(branchName)
64 d2.Snapshot(treeName, outFileName, branchList)
65 
66 # Open the new file and list the columns of the tree
67 f1 = ROOT.TFile(outFileName)
68 t = f1.myTree
69 print("These are the columns b1, b1_square and b2_vector:")
70 for branch in t.GetListOfBranches():
71  print("Branch: %s" %branch.GetName())
72 
73 f1.Close()
74 
75 # We are not forced to write the full set of column names. We can also
76 # specify a regular expression for that. In case nothing is specified, all
77 # columns are persistified.
78 d2.Snapshot(treeName, outFileNameAllColumns)
79 
80 # Open the new file and list the columns of the tree
81 f2 = ROOT.TFile(outFileNameAllColumns)
82 t = f2.myTree
83 print("These are all the columns available to this tdf:")
84 for branch in t.GetListOfBranches():
85  print("Branch: %s" %branch.GetName())
86 
87 f2.Close()
88 
89 # We can also get a fresh TDataFrame out of the snapshot and restart the
90 # analysis chain from it.
91 
92 branchList.clear()
93 branchList.push_back("b1_square")
94 snapshot_tdf = d2.Snapshot(treeName, outFileName, branchList);
95 h = snapshot_tdf.Histo1D("b1_square")
96 c = ROOT.TCanvas()
97 h.Draw()
98 
Date
April 2017
Author
Danilo Piparo

Definition in file tdf007_snapshot.py.