|
| 1 | +// Copyright CERN and copyright holders of ALICE O2. This software is |
| 2 | +// distributed under the terms of the GNU General Public License v3 (GPL |
| 3 | +// Version 3), copied verbatim in the file "COPYING". |
| 4 | +// |
| 5 | +// See http://alice-o2.web.cern.ch/license for full licensing information. |
| 6 | +// |
| 7 | +// In applying this license CERN does not waive the privileges and immunities |
| 8 | +// granted to it by virtue of its status as an Intergovernmental Organization |
| 9 | +// or submit itself to any jurisdiction. |
| 10 | + |
| 11 | +// A utility for the purpose to produce a global merged TTree |
| 12 | +// from multiple TTree (containing a subset of branches). |
| 13 | +// A typical example is TPC clusterization/digitization: Clusters per TPC |
| 14 | +// sector may sit in different files and we want to produce an aggregate TTree |
| 15 | +// for further processing. The utility offers options to use TFriends or to make |
| 16 | +// a deep copy. |
| 17 | + |
| 18 | +#include <TTree.h> |
| 19 | +#include <TFile.h> |
| 20 | +#include <ROOT/RDataFrame.hxx> |
| 21 | +#include <boost/program_options.hpp> |
| 22 | +#include <set> |
| 23 | +#include <vector> |
| 24 | +#include <iostream> |
| 25 | + |
| 26 | +struct Options { |
| 27 | + std::vector<std::string> infilenames; |
| 28 | + std::string treename; |
| 29 | + std::string outfilename; |
| 30 | + bool asfriend = false; |
| 31 | +}; |
| 32 | + |
| 33 | +bool parseOptions(int argc, char* argv[], Options& optvalues) |
| 34 | +{ |
| 35 | + namespace bpo = boost::program_options; |
| 36 | + bpo::options_description options( |
| 37 | + "A tool to create a single TTree from a list of TTrees (each in its own file).\nMerging is " |
| 38 | + "done vertically - over branches - instead over entries (like in a TChain).\nIt corresponds to the TFriend mechanism but makes a deep copy\n" |
| 39 | + "(unless the friend is asked).\n\n" |
| 40 | + "Allowed options"); |
| 41 | + |
| 42 | + options.add_options()( |
| 43 | + "infiles,i", bpo::value<std::vector<std::string>>(&optvalues.infilenames)->multitoken(), "All input files to be merged")( |
| 44 | + "treename,t", bpo::value<std::string>(&optvalues.treename), "Name of tree (assumed same in all files).")( |
| 45 | + "outfile,o", bpo::value<std::string>(&optvalues.outfilename)->default_value(""), "Outfile to be created with merged tree.")( |
| 46 | + "asfriend", "If merging is done using the friend mechanism."); |
| 47 | + options.add_options()("help,h", "Produce help message."); |
| 48 | + |
| 49 | + bpo::variables_map vm; |
| 50 | + try { |
| 51 | + bpo::store(bpo::command_line_parser(argc, argv).options(options).run(), vm); |
| 52 | + bpo::notify(vm); |
| 53 | + |
| 54 | + // help |
| 55 | + if (vm.count("help")) { |
| 56 | + std::cout << options << std::endl; |
| 57 | + return false; |
| 58 | + } |
| 59 | + if (vm.count("asfriend")) { |
| 60 | + optvalues.asfriend = true; |
| 61 | + } |
| 62 | + |
| 63 | + } catch (const bpo::error& e) { |
| 64 | + std::cerr << e.what() << "\n\n"; |
| 65 | + std::cerr << "Error parsing options; Available options:\n"; |
| 66 | + std::cerr << options << std::endl; |
| 67 | + return false; |
| 68 | + } |
| 69 | + return true; |
| 70 | +} |
| 71 | + |
| 72 | +// Checks if all given files have a TTree of this name |
| 73 | +// and if all entries are the same |
| 74 | +// TODO: add more checks such as for non-overlapping branch names etc. |
| 75 | +bool checkFiles(std::vector<std::string> const& filenames, std::string const& treename) |
| 76 | +{ |
| 77 | + bool ok = true; |
| 78 | + int entries = -1; |
| 79 | + for (auto& f : filenames) { |
| 80 | + TFile _tmpfile(f.c_str(), "OPEN"); |
| 81 | + auto tree = (TTree*)_tmpfile.Get(treename.c_str()); |
| 82 | + if (tree == nullptr) { |
| 83 | + ok = false; |
| 84 | + std::cerr << "File " << f << " doesn't have a tree of name " << treename; |
| 85 | + } else { |
| 86 | + if (entries == -1) { |
| 87 | + entries = tree->GetEntries(); |
| 88 | + } else { |
| 89 | + if (entries != tree->GetEntries()) { |
| 90 | + std::cerr << "Trees have inconsistent number of entries "; |
| 91 | + ok = false; |
| 92 | + } |
| 93 | + } |
| 94 | + } |
| 95 | + } |
| 96 | + return ok; |
| 97 | +} |
| 98 | + |
| 99 | +void merge(Options const& options) |
| 100 | +{ |
| 101 | + if (options.asfriend) { |
| 102 | + // open the output file |
| 103 | + auto newfile = TFile::Open(options.outfilename.c_str(), "RECREATE"); |
| 104 | + auto newtree = new TTree(options.treename.c_str(), ""); |
| 105 | + // add remaining stuff as friend |
| 106 | + for (int i = 0; i < options.infilenames.size(); ++i) { |
| 107 | + newtree->AddFriend(options.treename.c_str(), options.infilenames[i].c_str()); |
| 108 | + } |
| 109 | + newfile->Write(); |
| 110 | + newfile->Close(); |
| 111 | + |
| 112 | + // P. Canal suggests that this can be done in the following way to fix the branch names |
| 113 | + // in the merged file and to keep only the final file: |
| 114 | + //auto mainfile = TFile::Open(firsttreefilename, "UPDATE"); |
| 115 | + //auto friendfile = TFile::Open(secondtreefilename, "READ"); |
| 116 | + //auto friendtree = ffriendfile>Get<Tree>(secondtreename); |
| 117 | + //mainfile->cd(); |
| 118 | + //auto friendcopy = friendtree->CloneTree(-1, "fast"); |
| 119 | + //auto maintree = mainfile->Get<TTree>(firsttreename); |
| 120 | + //maintree->AddFriend(friendcopy); |
| 121 | + //mainfile->Write(); |
| 122 | + } else { |
| 123 | + // NOTE: This is functional but potentially slow solution. |
| 124 | + // We should adapt this function as soon as more performant |
| 125 | + // ways are known. |
| 126 | + // See also: https://root-forum.cern.ch/t/make-a-new-ttree-from-a-deep-vertical-union-of-existing-ttrees/44250 |
| 127 | + |
| 128 | + // open the first Tree |
| 129 | + TFile _tmpfile(options.infilenames[0].c_str(), "OPEN"); |
| 130 | + auto t1 = (TTree*)_tmpfile.Get(options.treename.c_str()); |
| 131 | + |
| 132 | + // add remaining stuff as friend |
| 133 | + for (int i = 1; i < options.infilenames.size(); ++i) { |
| 134 | + t1->AddFriend(options.treename.c_str(), options.infilenames[i].c_str()); |
| 135 | + } |
| 136 | + ROOT::RDataFrame df(*t1); |
| 137 | + df.Snapshot(options.treename, options.outfilename); |
| 138 | + } |
| 139 | +} |
| 140 | + |
| 141 | +int main(int argc, char* argv[]) |
| 142 | +{ |
| 143 | + Options optvalues; |
| 144 | + if (!parseOptions(argc, argv, optvalues)) { |
| 145 | + return 0; |
| 146 | + } |
| 147 | + |
| 148 | + auto ok = checkFiles(optvalues.infilenames, optvalues.treename); |
| 149 | + if (!ok) { |
| 150 | + return 1; |
| 151 | + } |
| 152 | + |
| 153 | + // merge files |
| 154 | + merge(optvalues); |
| 155 | + |
| 156 | + return 0; |
| 157 | +} |
0 commit comments