Skip to content

Commit acd6b46

Browse files
committed
Improve handling of multiple input files
Osm2pgsql can handle any number of input files. The old code will just read the files one after the other which will not work if there is any overlap between the files, i.e. if the same object is in two input files. The new code will read the files in parallel. We construct a priority queue feeding in the next objects from all input files, taking off the "smallest" one by one. If the same object is in multiple files, we only process it once. If there is only a single input file a shortcut is taken which basically behaves like the old code. Note that the input files have to be from the same point in time. If there are multiple versions of the same object in the input, this will still not magically work. This commit removes support for unsorted input files which were already deprecated. See #1167 This commit removes support for negative ids which were already deprecated. Fixes #1097
1 parent f2a866c commit acd6b46

10 files changed

Lines changed: 437 additions & 158 deletions

src/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11

22
set(osm2pgsql_lib_SOURCES
3-
check-order.cpp
43
db-check.cpp
54
db-copy.cpp
65
dependency-manager.cpp

src/check-order.cpp

Lines changed: 0 additions & 80 deletions
This file was deleted.

src/check-order.hpp

Lines changed: 0 additions & 40 deletions
This file was deleted.

src/osm2pgsql.cpp

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,40 @@ static std::shared_ptr<middle_t> create_middle(options_t const &options)
4949
return std::make_shared<middle_ram_t>(&options);
5050
}
5151

52+
/**
53+
* Prepare input file(s). Does format checks as far as this is possible
54+
* without actually opening the files.
55+
*/
56+
static std::vector<osmium::io::File>
57+
prepare_input_files(options_t const &options)
58+
{
59+
std::vector<osmium::io::File> files;
60+
61+
for (auto const &filename : options.input_files) {
62+
osmium::io::File file{filename, options.input_format};
63+
64+
if (file.format() == osmium::io::file_format::unknown) {
65+
if (options.input_format.empty()) {
66+
throw std::runtime_error{
67+
"Cannot detect file format. Try using -r."};
68+
}
69+
throw std::runtime_error{
70+
"Unknown file format '{}'."_format(options.input_format)};
71+
}
72+
73+
if (!options.append && file.has_multiple_object_versions()) {
74+
throw std::runtime_error{
75+
"Reading an OSM change file only works in append mode."};
76+
}
77+
78+
log_info("Reading file: {}", filename);
79+
80+
files.emplace_back(file);
81+
}
82+
83+
return files;
84+
}
85+
5286
int main(int argc, char *argv[])
5387
{
5488
try {
@@ -61,6 +95,8 @@ int main(int argc, char *argv[])
6195

6296
check_db(options);
6397

98+
auto const files = prepare_input_files(options);
99+
64100
auto middle = create_middle(options);
65101
middle->start();
66102

@@ -81,28 +117,14 @@ int main(int argc, char *argv[])
81117
// Processing: In this phase the input file(s) are read and parsed,
82118
// populating some of the tables.
83119
progress_display_t progress;
84-
for (auto const &filename : options.input_files) {
85-
log_info("Reading file: {}", filename);
86-
util::timer_t timer_parse;
87-
88-
osmium::io::File file{filename, options.input_format};
89-
if (file.format() == osmium::io::file_format::unknown) {
90-
if (options.input_format.empty()) {
91-
throw std::runtime_error{
92-
"Cannot detect file format. Try using -r."};
93-
}
94-
throw std::runtime_error{
95-
"Unknown file format '{}'."_format(options.input_format)};
96-
}
97120

98-
progress.update(osmdata.process_file(file, options.bbox));
121+
util::timer_t timer_parse;
99122

100-
if (get_logger().show_progress()) {
101-
progress.print_status(std::time(nullptr));
102-
fmt::print(stderr, " parse time: {}\n",
103-
util::human_readable_duration(timer_parse.stop()));
104-
}
105-
}
123+
progress.update(osmdata.process_files(files, options.bbox));
124+
125+
progress.print_status(std::time(nullptr));
126+
fmt::print(stderr, " parse time: {}\n",
127+
util::human_readable_duration(timer_parse.stop()));
106128

107129
progress.print_summary();
108130

0 commit comments

Comments
 (0)