Skip to content

Commit 93b8e09

Browse files
committed
A tool to combine multiple TTrees into one (union of branches)
Use cases: - store TPC clusters (per sector) in separate files and later provide a single file that TPC tracking expects.
1 parent 5bc2219 commit 93b8e09

2 files changed

Lines changed: 162 additions & 0 deletions

File tree

Common/Utils/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,8 @@ o2_add_test(MemFileHelper
6767
LABELS utils
6868
SOURCES test/testMemFileHelper.cxx
6969
PUBLIC_LINK_LIBRARIES O2::CommonUtils)
70+
71+
o2_add_executable(treemergertool
72+
COMPONENT_NAME CommonUtils
73+
SOURCES src/TreeMergerTool.cxx
74+
PUBLIC_LINK_LIBRARIES O2::CommonUtils Boost::program_options ROOT::Core)
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
// Copyright CERN and copyright holders of ALICE O2. This software is
2+
// distributed under the terms of the GNU General Public License v3 (GPL
3+
// Version 3), copied verbatim in the file "COPYING".
4+
//
5+
// See http://alice-o2.web.cern.ch/license for full licensing information.
6+
//
7+
// In applying this license CERN does not waive the privileges and immunities
8+
// granted to it by virtue of its status as an Intergovernmental Organization
9+
// or submit itself to any jurisdiction.
10+
11+
// A utility for the purpose to produce a global merged TTree
12+
// from multiple TTree (containing a subset of branches).
13+
// A typical example is TPC clusterization/digitization: Clusters per TPC
14+
// sector may sit in different files and we want to produce an aggregate TTree
15+
// for further processing. The utility offers options to use TFriends or to make
16+
// a deep copy.
17+
18+
#include <TTree.h>
19+
#include <TFile.h>
20+
#include <ROOT/RDataFrame.hxx>
21+
#include <boost/program_options.hpp>
22+
#include <set>
23+
#include <vector>
24+
#include <iostream>
25+
26+
struct Options {
27+
std::vector<std::string> infilenames;
28+
std::string treename;
29+
std::string outfilename;
30+
bool asfriend = false;
31+
};
32+
33+
bool parseOptions(int argc, char* argv[], Options& optvalues)
34+
{
35+
namespace bpo = boost::program_options;
36+
bpo::options_description options(
37+
"A tool to create a single TTree from a list of TTrees (each in its own file).\nMerging is "
38+
"done vertically - over branches - instead over entries (like in a TChain).\nIt corresponds to the TFriend mechanism but makes a deep copy\n"
39+
"(unless the friend is asked).\n\n"
40+
"Allowed options");
41+
42+
options.add_options()(
43+
"infiles,i", bpo::value<std::vector<std::string>>(&optvalues.infilenames)->multitoken(), "All input files to be merged")(
44+
"treename,t", bpo::value<std::string>(&optvalues.treename), "Name of tree (assumed same in all files).")(
45+
"outfile,o", bpo::value<std::string>(&optvalues.outfilename)->default_value(""), "Outfile to be created with merged tree.")(
46+
"asfriend", "If merging is done using the friend mechanism.");
47+
options.add_options()("help,h", "Produce help message.");
48+
49+
bpo::variables_map vm;
50+
try {
51+
bpo::store(bpo::command_line_parser(argc, argv).options(options).run(), vm);
52+
bpo::notify(vm);
53+
54+
// help
55+
if (vm.count("help")) {
56+
std::cout << options << std::endl;
57+
return false;
58+
}
59+
if (vm.count("asfriend")) {
60+
optvalues.asfriend = true;
61+
}
62+
63+
} catch (const bpo::error& e) {
64+
std::cerr << e.what() << "\n\n";
65+
std::cerr << "Error parsing options; Available options:\n";
66+
std::cerr << options << std::endl;
67+
return false;
68+
}
69+
return true;
70+
}
71+
72+
// Checks if all given files have a TTree of this name
73+
// and if all entries are the same
74+
// TODO: add more checks such as for non-overlapping branch names etc.
75+
bool checkFiles(std::vector<std::string> const& filenames, std::string const& treename)
76+
{
77+
bool ok = true;
78+
int entries = -1;
79+
for (auto& f : filenames) {
80+
TFile _tmpfile(f.c_str(), "OPEN");
81+
auto tree = (TTree*)_tmpfile.Get(treename.c_str());
82+
if (tree == nullptr) {
83+
ok = false;
84+
std::cerr << "File " << f << " doesn't have a tree of name " << treename;
85+
} else {
86+
if (entries == -1) {
87+
entries = tree->GetEntries();
88+
} else {
89+
if (entries != tree->GetEntries()) {
90+
std::cerr << "Trees have inconsistent number of entries ";
91+
ok = false;
92+
}
93+
}
94+
}
95+
}
96+
return ok;
97+
}
98+
99+
void merge(Options const& options)
100+
{
101+
if (options.asfriend) {
102+
// open the output file
103+
auto newfile = TFile::Open(options.outfilename.c_str(), "RECREATE");
104+
auto newtree = new TTree(options.treename.c_str(), "");
105+
// add remaining stuff as friend
106+
for (int i = 0; i < options.infilenames.size(); ++i) {
107+
newtree->AddFriend(options.treename.c_str(), options.infilenames[i].c_str());
108+
}
109+
newfile->Write();
110+
newfile->Close();
111+
112+
// P. Canal suggests that this can be done in the following way to fix the branch names
113+
// in the merged file and to keep only the final file:
114+
//auto mainfile = TFile::Open(firsttreefilename, "UPDATE");
115+
//auto friendfile = TFile::Open(secondtreefilename, "READ");
116+
//auto friendtree = ffriendfile>Get<Tree>(secondtreename);
117+
//mainfile->cd();
118+
//auto friendcopy = friendtree->CloneTree(-1, "fast");
119+
//auto maintree = mainfile->Get<TTree>(firsttreename);
120+
//maintree->AddFriend(friendcopy);
121+
//mainfile->Write();
122+
} else {
123+
// NOTE: This is functional but potentially slow solution.
124+
// We should adapt this function as soon as more performant
125+
// ways are known.
126+
// See also: https://root-forum.cern.ch/t/make-a-new-ttree-from-a-deep-vertical-union-of-existing-ttrees/44250
127+
128+
// open the first Tree
129+
TFile _tmpfile(options.infilenames[0].c_str(), "OPEN");
130+
auto t1 = (TTree*)_tmpfile.Get(options.treename.c_str());
131+
132+
// add remaining stuff as friend
133+
for (int i = 1; i < options.infilenames.size(); ++i) {
134+
t1->AddFriend(options.treename.c_str(), options.infilenames[i].c_str());
135+
}
136+
ROOT::RDataFrame df(*t1);
137+
df.Snapshot(options.treename, options.outfilename);
138+
}
139+
}
140+
141+
int main(int argc, char* argv[])
142+
{
143+
Options optvalues;
144+
if (!parseOptions(argc, argv, optvalues)) {
145+
return 0;
146+
}
147+
148+
auto ok = checkFiles(optvalues.infilenames, optvalues.treename);
149+
if (!ok) {
150+
return 1;
151+
}
152+
153+
// merge files
154+
merge(optvalues);
155+
156+
return 0;
157+
}

0 commit comments

Comments
 (0)