Skip to content

Commit 25d448b

Browse files
committed
Fix code review comments
1 parent fb6dade commit 25d448b

6 files changed

Lines changed: 131 additions & 76 deletions

File tree

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
*.db
22
!sample_data/*.db
33
test_sqlite_blaster
4-
test_sqlite_blaster.dSYM
4+
*.dSYM
55
*.txt
66
!sample_data/*.txt
77
test.cpp
88
a.out
9+
test
10+

README.md

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,16 @@ In this mode, a table is created with just 2 columns, `key` and `value` as shown
5151

5252
```c++
5353
#include "sqlite_index_blaster.h"
54+
#include <string>
55+
#include <vector>
5456

5557
int main() {
5658

57-
// Use new and delete to have control of when the database is closed
58-
sqlite_index_blaster *sqib = new sqlite_index_blaster(2, 1,
59-
(const char *[]) {"key", "value"}, "kv_index", 4096, 40, "kv_idx.db");
60-
sqib->put("hello", 5, "world", 5);
61-
delete sqib; // Close kv_kdx.db
59+
std::vector<std::string> col_names = {"key", "value"}; // -std >= c++11
60+
sqlite_index_blaster sqib(2, 1, col_names, "kv_index", 4096, 40, "kv_idx.db");
61+
sqib.put_string("hello", "world");
6262
return 0;
63+
// db file is flushed and closed when sqib is destroyed
6364

6465
}
6566
```
@@ -84,18 +85,14 @@ To retrieve the inserted values, use `get` method as shown below
8485

8586
```c++
8687
#include "sqlite_index_blaster.h"
88+
#include <string>
89+
#include <vector>
8790

8891
int main() {
89-
sqlite_index_blaster *sqib = new sqlite_index_blaster(2, 1,
90-
(const char *[]) {"key", "value"}, "kv_index", 4096, 40, "kv_idx.db");
91-
sqib->put("hello", 5, "world", 5);
92-
char out_val[10];
93-
int out_val_len = 10;
94-
if (sqib->get("hello", 5, &out_val_len, out_val)) {
95-
out_val[out_val_len] = '\0';
96-
cout << "Value of hello is " << out_val << endl;
97-
}
98-
delete sqib;
92+
std::vector<std::string> col_names = {"key", "value"}; // -std >= c++11
93+
sqlite_index_blaster sqib(2, 1, col_names, "kv_index", 4096, 40, "kv_idx.db");
94+
sqib.put_string("hello", "world");
95+
cout << "Value of hello is " << sqib.get_string("hello", "not_found") << endl;
9996
return 0;
10097
}
10198
```
@@ -106,16 +103,17 @@ In this mode, a table is created with just 2 columns, `key` and `doc` as shown b
106103

107104
```c++
108105
#include "sqlite_index_blaster.h"
106+
#include <string>
107+
#include <vector>
109108

110109
const char * json1 = "{\"name\": \"Alice\", \"age\": 25, \"email\": \"alice@example.com\"}";
111110
const char * json2 = "{\"name\": \"George\", \"age\": 32, \"email\": \"george@example.com\"}";
112111

113112
int main() {
114-
sqlite_index_blaster *sqib = new sqlite_index_blaster(2, 1,
115-
(const char *[]) {"key", "doc"}, "doc_index", 4096, 40, "doc_store.db");
116-
sqib->put("primary_contact", 15, json1, strlen(json1));
117-
sqib->put("secondary_contact", 17, json2, strlen(json2));
118-
delete sqib;
113+
std::vector<std::string> col_names = {"key", "doc"}; // -std >= c++11
114+
sqlite_index_blaster sqib(2, 1, col_names, "doc_index", 4096, 40, "doc_store.db");
115+
sqib.put_string("primary_contact", json1);
116+
sqib.put_string("secondary_contact", json2);
119117
return 0;
120118
}
121119
```
@@ -134,34 +132,35 @@ This repo can be used to create regular tables with primary key(s) as shown belo
134132

135133
```c++
136134
#include <cmath>
135+
#include <string>
136+
#include <vector>
137+
137138
#include "sqlite_index_blaster.h"
138139

139140
const uint8_t col_types[] = {SQLT_TYPE_TEXT, SQLT_TYPE_INT8, SQLT_TYPE_INT8, SQLT_TYPE_INT8, SQLT_TYPE_INT8, SQLT_TYPE_REAL};
140141

141142
int main() {
142143

143-
sqlite_index_blaster *sqib = new sqlite_index_blaster(6, 2,
144-
(const char *[]) {"student_name", "age", "maths_marks", "physics_marks", "chemistry_marks", "average_marks"},
145-
"student_marks", 4096, 40, "student_marks.db");
144+
std::vector<std::string> col_names = {"student_name", "age", "maths_marks", "physics_marks", "chemistry_marks", "average_marks"};
145+
sqlite_index_blaster sqib(6, 2, col_names, "student_marks", 4096, 40, "student_marks.db");
146146

147147
int8_t maths, physics, chemistry, age;
148148
double average;
149149
uint8_t rec_buf[500];
150150
int rec_len;
151151

152152
age = 19; maths = 80; physics = 69; chemistry = 98; average = round((maths + physics + chemistry) * 100 / 3) / 100;
153-
rec_len = sqib->make_new_rec(rec_buf, 6, (const void *[]) {"Robert", &age, &maths, &physics, &chemistry, &average}, NULL, col_types);
154-
sqib->put(rec_buf, -rec_len, NULL, 0);
153+
rec_len = sqib.make_new_rec(rec_buf, 6, (const void *[]) {"Robert", &age, &maths, &physics, &chemistry, &average}, NULL, col_types);
154+
sqib.put(rec_buf, -rec_len, NULL, 0);
155155

156156
age = 20; maths = 82; physics = 99; chemistry = 83; average = round((maths + physics + chemistry) * 100 / 3) / 100;
157-
rec_len = sqib->make_new_rec(rec_buf, 6, (const void *[]) {"Barry", &age, &maths, &physics, &chemistry, &average}, NULL, col_types);
158-
sqib->put(rec_buf, -rec_len, NULL, 0);
157+
rec_len = sqib.make_new_rec(rec_buf, 6, (const void *[]) {"Barry", &age, &maths, &physics, &chemistry, &average}, NULL, col_types);
158+
sqib.put(rec_buf, -rec_len, NULL, 0);
159159

160160
age = 23; maths = 84; physics = 89; chemistry = 74; average = round((maths + physics + chemistry) * 100 / 3) / 100;
161-
rec_len = sqib->make_new_rec(rec_buf, 6, (const void *[]) {"Elizabeth", &age, &maths, &physics, &chemistry, &average}, NULL, col_types);
162-
sqib->put(rec_buf, -rec_len, NULL, 0);
161+
rec_len = sqib.make_new_rec(rec_buf, 6, (const void *[]) {"Elizabeth", &age, &maths, &physics, &chemistry, &average}, NULL, col_types);
162+
sqib.put(rec_buf, -rec_len, NULL, 0);
163163

164-
delete sqib;
165164
return 0;
166165
}
167166
```
@@ -233,7 +232,7 @@ Valentine's Day,Comedy,Warner Bros.
233232

234233
This code has been tested with more than 200 million records, so it is expected to be quite stable, but bear in mind that this is so fast because there is no crash recovery.
235234

236-
So this repo is best suited for one time inserts of large datasets, power backed systems such as those hosted in Cloud and battery backed systems.
235+
So this repo is best suited for one time inserts of large datasets. It may be suitable for power backed systems such as those hosted in Cloud and battery backed systems.
237236

238237
# License
239238

src/btree_handler.h

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
#include <stdint.h>
77
#include "lru_cache.h"
88

9-
using namespace std;
10-
119
#define BPT_LEAF0_LVL 14
1210
#define BPT_STAGING_LVL 15
1311
#define BPT_PARENT0_LVL 16
@@ -41,8 +39,6 @@ class util {
4139
}
4240
};
4341

44-
#define descendant static_cast<T*>(this)
45-
4642
union page_ptr {
4743
unsigned long page;
4844
uint8_t *ptr;
@@ -83,6 +79,7 @@ class btree_handler {
8379
int max_key_len;
8480
int is_block_given;
8581
int root_page_num;
82+
bool is_closed;
8683

8784
public:
8885
lru_cache *cache;
@@ -105,6 +102,7 @@ class btree_handler {
105102
block_size (block_sz), cache_size (cache_sz_kb), filename (fname) {
106103
descendant->init_derived();
107104
init_stats();
105+
is_closed = false;
108106
is_block_given = 0;
109107
root_page_num = start_page_num;
110108
is_btree = whether_btree;
@@ -129,6 +127,7 @@ class btree_handler {
129127
btree_handler(uint32_t block_sz, uint8_t *block, bool is_leaf, bool should_init = true) :
130128
block_size (block_sz), cache_size (0), filename (NULL) {
131129
is_block_given = 1;
130+
is_closed = false;
132131
root_block = current_block = block;
133132
if (should_init) {
134133
descendant->set_leaf(is_leaf ? 1 : 0);
@@ -139,11 +138,17 @@ class btree_handler {
139138
}
140139

141140
~btree_handler() {
141+
if (!is_closed)
142+
close();
143+
}
144+
145+
void close() {
142146
descendant->cleanup();
143147
if (cache_size > 0)
144148
delete cache;
145149
else if (!is_block_given)
146150
free(root_block);
151+
is_closed = true;
147152
}
148153

149154
void init_current_block() {
@@ -166,6 +171,17 @@ class btree_handler {
166171
return current_block;
167172
}
168173

174+
std::string get_string(std::string key, std::string not_found_value) {
175+
bool ret = get(key.c_str(), key.length(), NULL, NULL);
176+
if (ret) {
177+
uint8_t *val = (uint8_t *) malloc(key_at_len);
178+
int val_len;
179+
descendant->copy_value(val, &val_len);
180+
return std::string((const char *) val, val_len);
181+
}
182+
return not_found_value;
183+
}
184+
169185
bool get(const char *key, int key_len, int *in_size_out_val_len = NULL,
170186
char *val = NULL, bptree_iter_ctx *ctx = NULL) {
171187
return get((uint8_t *) key, key_len, in_size_out_val_len, (uint8_t *) val, ctx);
@@ -208,6 +224,9 @@ class btree_handler {
208224
return descendant->search_current_block(ctx);
209225
}
210226

227+
bool put_string(std::string key, std::string value) {
228+
return put(key.c_str(), key.length(), value.c_str(), value.length());
229+
}
211230
bool put(const char *key, int key_len, const char *value,
212231
int value_len, bptree_iter_ctx *ctx = NULL) {
213232
return put((const uint8_t *) key, key_len, (const uint8_t *) value, value_len, ctx);

src/lru_cache.h

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,11 @@
1515
#include <errno.h>
1616
#include <cstring>
1717
#include <time.h>
18-
#include <chrono>
19-
#include <brotli/encode.h>
18+
//#include <brotli/encode.h>
2019
//#include <snappy.h>
2120

2221
#define USE_FOPEN 1
2322

24-
using namespace std;
25-
using namespace chrono;
26-
2723
typedef struct dbl_lnklst_st {
2824
int disk_page;
2925
int cache_loc;
@@ -52,7 +48,7 @@ class lru_cache {
5248
dbl_lnklst **disk_to_cache_map;
5349
size_t disk_to_cache_map_size;
5450
dbl_lnklst *llarr;
55-
set<int> new_pages;
51+
std::set<int> new_pages;
5652
char filename[100];
5753
#if USE_FOPEN == 1
5854
FILE *fp;
@@ -65,10 +61,8 @@ class lru_cache {
6561
void *(*malloc_fn)(size_t);
6662
bool (*const is_changed_fn)(uint8_t *, int);
6763
void (*const set_changed_fn)(uint8_t *, int, bool);
68-
void write_pages(set<int>& pages_to_write) {
69-
time_point<steady_clock> start;
70-
start = steady_clock::now();
71-
for (set<int>::iterator it = pages_to_write.begin(); it != pages_to_write.end(); it++) {
64+
void write_pages(std::set<int>& pages_to_write) {
65+
for (std::set<int>::iterator it = pages_to_write.begin(); it != pages_to_write.end(); it++) {
7266
uint8_t *block = &page_cache[page_size * disk_to_cache_map[*it]->cache_loc];
7367
set_changed_fn(block, page_size, false);
7468
//if (page_size < 65537 && block[5] < 255)
@@ -147,7 +141,7 @@ if (page_size == 4096) {
147141
if (lnklst_last_entry == NULL)
148142
return;
149143
stats.cache_flush_count++;
150-
set<int> pages_to_write(new_pages);
144+
std::set<int> pages_to_write(new_pages);
151145
calc_flush_count();
152146
int pages_to_check = stats.last_pages_to_flush * 3;
153147
dbl_lnklst *cur_entry = lnklst_last_entry;
@@ -157,7 +151,7 @@ if (page_size == 4096) {
157151
if (is_changed_fn(block, page_size)) {
158152
pages_to_write.insert(cur_entry->disk_page);
159153
if (cur_entry->disk_page == 0 || !disk_to_cache_map[cur_entry->disk_page])
160-
cout << "Disk cache map entry missing" << endl;
154+
std::cout << "Disk cache map entry missing" << std::endl;
161155
}
162156
if (pages_to_write.size() > (stats.last_pages_to_flush + new_pages.size()))
163157
break;
@@ -243,7 +237,7 @@ if (page_size == 4096) {
243237
if (file_page_count > 0)
244238
file_page_count /= page_size;
245239
//cout << "File page count: " << file_page_count << endl;
246-
disk_to_cache_map_size = max(file_page_count + 1000, (size_t) cache_size_in_pages);
240+
disk_to_cache_map_size = std::max(file_page_count + 1000, (size_t) cache_size_in_pages);
247241
disk_to_cache_map = (dbl_lnklst **) alloc_fn(disk_to_cache_map_size * sizeof(dbl_lnklst *));
248242
memset(disk_to_cache_map, '\0', disk_to_cache_map_size * sizeof(dbl_lnklst *));
249243
empty = 0;
@@ -274,7 +268,7 @@ if (page_size == 4096) {
274268
}
275269
~lru_cache() {
276270
flush_pages_in_seq(0);
277-
set<int> pages_to_write;
271+
std::set<int> pages_to_write;
278272
for (size_t ll = 0; ll < cache_size_in_pages; ll++) {
279273
if (llarr[ll].disk_page == 0)
280274
continue;
@@ -307,7 +301,7 @@ if (page_size == 4096) {
307301
}
308302
return read_count;
309303
} else {
310-
cout << "file_pos: " << file_pos << errno << endl;
304+
std::cout << "file_pos: " << file_pos << errno << std::endl;
311305
}
312306
#else
313307
if (lseek(fd, file_pos, SEEK_SET) != -1) {
@@ -343,7 +337,7 @@ if (page_size == 4096) {
343337
}
344338
uint8_t *get_disk_page_in_cache(int disk_page, uint8_t *block_to_keep = NULL, bool is_new = false) {
345339
if (disk_page < skip_page_count)
346-
cout << "WARNING: asking disk_page: " << disk_page << endl;
340+
std::cout << "WARNING: asking disk_page: " << disk_page << std::endl;
347341
if (disk_page == skip_page_count)
348342
return root_block;
349343
int cache_pos = 0;
@@ -436,12 +430,12 @@ if (page_size == 4096) {
436430
off_t file_pos = page_size;
437431
file_pos *= disk_page;
438432
if (read_page(&page_cache[page_size * cache_pos], file_pos, page_size) != page_size)
439-
cout << "Unable to read: " << disk_page << endl;
433+
std::cout << "Unable to read: " << disk_page << std::endl;
440434
stats.pages_read++;
441435
}
442436
} else {
443437
if (is_new)
444-
cout << "WARNING: How was new page found in cache?" << endl;
438+
std::cout << "WARNING: How was new page found in cache?" << std::endl;
445439
dbl_lnklst *current_entry = disk_to_cache_map[disk_page];
446440
if (lnklst_last_free == current_entry && lnklst_last_free != NULL
447441
&& lnklst_last_free->prev != NULL)

0 commit comments

Comments
 (0)