Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 :
4 : trivial database library
5 :
6 : Copyright (C) Andrew Tridgell 1999-2005
7 : Copyright (C) Paul `Rusty' Russell 2000
8 : Copyright (C) Jeremy Allison 2000-2003
9 :
10 : ** NOTE! The following LGPL license applies to the tdb
11 : ** library. This does NOT imply that all of Samba is released
12 : ** under the LGPL
13 :
14 : This library is free software; you can redistribute it and/or
15 : modify it under the terms of the GNU Lesser General Public
16 : License as published by the Free Software Foundation; either
17 : version 3 of the License, or (at your option) any later version.
18 :
19 : This library is distributed in the hope that it will be useful,
20 : but WITHOUT ANY WARRANTY; without even the implied warranty of
21 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 : Lesser General Public License for more details.
23 :
24 : You should have received a copy of the GNU Lesser General Public
25 : License along with this library; if not, see <http://www.gnu.org/licenses/>.
26 : */
27 :
28 : #include "tdb_private.h"
29 :
30 : #define TDB_NEXT_LOCK_ERR ((tdb_off_t)-1)
31 :
32 : /* Uses traverse lock: 0 = finish, TDB_NEXT_LOCK_ERR = error,
33 : other = record offset */
34 574475382 : static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tlock,
35 : struct tdb_record *rec)
36 : {
37 574475382 : int want_next = (tlock->off != 0);
38 :
39 : /* Lock each chain from the start one. */
40 833804007 : for (; tlock->list < tdb->hash_size; tlock->list++) {
41 827665894 : if (!tlock->off && tlock->list != 0) {
42 : /* this is an optimisation for the common case where
43 : the hash chain is empty, which is particularly
44 : common for the use of tdb with ldb, where large
45 : hashes are used. In that case we spend most of our
46 : time in tdb_brlock(), locking empty hash chains.
47 :
48 : To avoid this, we do an unlocked pre-check to see
49 : if the hash chain is empty before starting to look
50 : inside it. If it is empty then we can avoid that
51 : hash chain. If it isn't empty then we can't believe
52 : the value we get back, as we read it without a
53 : lock, so instead we get the lock and re-fetch the
54 : value below.
55 :
56 : Notice that not doing this optimisation on the
57 : first hash chain is critical. We must guarantee
58 : that we have done at least one fcntl lock at the
59 : start of a search to guarantee that memory is
60 : coherent on SMP systems. If records are added by
61 : others during the search then that's OK, and we
62 : could possibly miss those with this trick, but we
63 : could miss them anyway without this trick, so the
64 : semantics don't change.
65 :
66 : With a non-indexed ldb search this trick gains us a
67 : factor of around 80 in speed on a linux 2.6.x
68 : system (testing using ldbtest).
69 : */
70 253190512 : tdb->methods->next_hash_chain(tdb, &tlock->list);
71 253190512 : if (tlock->list == tdb->hash_size) {
72 5551006 : continue;
73 : }
74 : }
75 :
76 822114888 : if (tdb_lock(tdb, tlock->list, tlock->lock_rw) == -1)
77 0 : return TDB_NEXT_LOCK_ERR;
78 :
79 : /* No previous record? Start at top of chain. */
80 822114888 : if (!tlock->off) {
81 253782418 : if (tdb_ofs_read(tdb, TDB_HASH_TOP(tlock->list),
82 253782418 : &tlock->off) == -1)
83 0 : goto fail;
84 : } else {
85 : /* Otherwise unlock the previous record. */
86 568332470 : if (tdb_unlock_record(tdb, tlock->off) != 0)
87 0 : goto fail;
88 : }
89 :
90 822114888 : if (want_next) {
91 : /* We have offset of old record: grab next */
92 568332470 : if (tdb_rec_read(tdb, tlock->off, rec) == -1)
93 0 : goto fail;
94 568332470 : tlock->off = rec->next;
95 : }
96 :
97 : /* Iterate through chain */
98 838388117 : while( tlock->off) {
99 584610498 : if (tdb_rec_read(tdb, tlock->off, rec) == -1)
100 0 : goto fail;
101 :
102 : /* Detect infinite loops. From "Shlomi Yaakobovich" <Shlomi@exanet.com>. */
103 584610498 : if (tlock->off == rec->next) {
104 0 : tdb->ecode = TDB_ERR_CORRUPT;
105 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: loop detected.\n"));
106 0 : goto fail;
107 : }
108 :
109 584610498 : if (!TDB_DEAD(rec)) {
110 : /* Woohoo: we found one! */
111 568337269 : if (tdb_lock_record(tdb, tlock->off) != 0)
112 0 : goto fail;
113 568337269 : return tlock->off;
114 : }
115 :
116 16273229 : tlock->off = rec->next;
117 : }
118 253777619 : tdb_unlock(tdb, tlock->list, tlock->lock_rw);
119 253777619 : want_next = 0;
120 : }
121 : /* We finished iteration without finding anything */
122 6138113 : tdb->ecode = TDB_SUCCESS;
123 6138113 : return 0;
124 :
125 0 : fail:
126 0 : tlock->off = 0;
127 0 : if (tdb_unlock(tdb, tlock->list, tlock->lock_rw) != 0)
128 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: On error unlock failed!\n"));
129 0 : return TDB_NEXT_LOCK_ERR;
130 : }
131 :
132 : /* traverse the entire database - calling fn(tdb, key, data) on each element.
133 : return -1 on error or the record count traversed
134 : if fn is NULL then it is not called
135 : a non-zero return value from fn() indicates that the traversal should stop
136 : */
137 6142878 : static int tdb_traverse_internal(struct tdb_context *tdb,
138 : tdb_traverse_func fn, void *private_data,
139 : struct tdb_traverse_lock *tl)
140 : {
141 166160 : TDB_DATA key, dbuf;
142 166160 : struct tdb_record rec;
143 6142878 : int ret = 0, count = 0;
144 166160 : tdb_off_t off;
145 166160 : size_t recbuf_len;
146 :
147 6142878 : recbuf_len = 4096;
148 6142878 : key.dptr = malloc(recbuf_len);
149 6142878 : if (key.dptr == NULL) {
150 0 : return -1;
151 : }
152 :
153 : /* This was in the initialization, above, but the IRIX compiler
154 : * did not like it. crh
155 : */
156 6142878 : tl->next = tdb->travlocks.next;
157 :
158 : /* fcntl locks don't stack: beware traverse inside traverse */
159 6142878 : tdb->travlocks.next = tl;
160 :
161 : /* tdb_next_lock places locks on the record returned, and its chain */
162 574475150 : while ((off = tdb_next_lock(tdb, tl, &rec)) != 0) {
163 5462612 : tdb_len_t full_len;
164 5462612 : int nread;
165 :
166 568337071 : if (off == TDB_NEXT_LOCK_ERR) {
167 0 : ret = -1;
168 0 : goto out;
169 : }
170 :
171 568337071 : full_len = rec.key_len + rec.data_len;
172 :
173 568337071 : if (full_len > recbuf_len) {
174 268941 : recbuf_len = full_len;
175 :
176 : /*
177 : * No realloc, we don't need the old data and thus can
178 : * do without the memcpy
179 : */
180 268941 : free(key.dptr);
181 268941 : key.dptr = malloc(recbuf_len);
182 :
183 268941 : if (key.dptr == NULL) {
184 0 : ret = -1;
185 0 : if (tdb_unlock(tdb, tl->list, tl->lock_rw)
186 : != 0) {
187 0 : goto out;
188 : }
189 0 : if (tdb_unlock_record(tdb, tl->off) != 0) {
190 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL,
191 : "tdb_traverse: malloc "
192 : "failed and unlock_record "
193 : "failed!\n"));
194 : }
195 0 : goto out;
196 : }
197 : }
198 :
199 568337071 : count++;
200 : /* now read the full record */
201 568337071 : nread = tdb->methods->tdb_read(tdb, tl->off + sizeof(rec),
202 562874459 : key.dptr, full_len, 0);
203 568337071 : if (nread == -1) {
204 0 : ret = -1;
205 0 : if (tdb_unlock(tdb, tl->list, tl->lock_rw) != 0)
206 0 : goto out;
207 0 : if (tdb_unlock_record(tdb, tl->off) != 0)
208 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_traverse: key.dptr == NULL and unlock_record failed!\n"));
209 0 : goto out;
210 : }
211 568337071 : key.dsize = rec.key_len;
212 568337071 : dbuf.dptr = key.dptr + rec.key_len;
213 568337071 : dbuf.dsize = rec.data_len;
214 :
215 5462612 : tdb_trace_1rec_retrec(tdb, "traverse", key, dbuf);
216 :
217 : /* Drop chain lock, call out */
218 568337071 : if (tdb_unlock(tdb, tl->list, tl->lock_rw) != 0) {
219 0 : ret = -1;
220 0 : goto out;
221 : }
222 568337071 : if (fn && fn(tdb, key, dbuf, private_data)) {
223 : /* They want us to terminate traversal */
224 9 : tdb_trace_ret(tdb, "tdb_traverse_end", count);
225 4795 : if (tdb_unlock_record(tdb, tl->off) != 0) {
226 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_traverse: unlock_record failed!\n"));;
227 0 : ret = -1;
228 : }
229 4795 : goto out;
230 : }
231 : }
232 166156 : tdb_trace(tdb, "tdb_traverse_end");
233 6138079 : out:
234 6142874 : SAFE_FREE(key.dptr);
235 6142874 : tdb->travlocks.next = tl->next;
236 6142874 : if (ret < 0)
237 0 : return -1;
238 : else
239 6142874 : return count;
240 : }
241 :
242 :
243 : /*
244 : a read style traverse - temporarily marks each record read only
245 : */
246 413213 : _PUBLIC_ int tdb_traverse_read(struct tdb_context *tdb,
247 : tdb_traverse_func fn, void *private_data)
248 : {
249 413213 : struct tdb_traverse_lock tl = { NULL, 0, 0, F_RDLCK };
250 1239 : int ret;
251 :
252 413213 : tdb->traverse_read++;
253 1239 : tdb_trace(tdb, "tdb_traverse_read_start");
254 413213 : ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
255 413209 : tdb->traverse_read--;
256 :
257 413209 : return ret;
258 : }
259 :
260 : /*
261 : a write style traverse - needs to get the transaction lock to
262 : prevent deadlocks
263 :
264 : WARNING: The data buffer given to the callback fn does NOT meet the
265 : alignment guarantees malloc gives you.
266 : */
267 5730236 : _PUBLIC_ int tdb_traverse(struct tdb_context *tdb,
268 : tdb_traverse_func fn, void *private_data)
269 : {
270 5730236 : struct tdb_traverse_lock tl = { NULL, 0, 0, F_WRLCK };
271 164972 : enum tdb_lock_flags lock_flags;
272 164972 : int ret;
273 :
274 5730236 : if (tdb->read_only || tdb->traverse_read) {
275 569 : return tdb_traverse_read(tdb, fn, private_data);
276 : }
277 :
278 5729667 : lock_flags = TDB_LOCK_WAIT;
279 :
280 5729667 : if (tdb->allrecord_lock.count != 0) {
281 : /*
282 : * This avoids a deadlock between tdb_lockall() and
283 : * tdb_traverse(). See
284 : * https://bugzilla.samba.org/show_bug.cgi?id=11381
285 : */
286 89023 : lock_flags = TDB_LOCK_NOWAIT;
287 : }
288 :
289 5729667 : if (tdb_transaction_lock(tdb, F_WRLCK, lock_flags)) {
290 2 : return -1;
291 : }
292 :
293 5729665 : tdb->traverse_write++;
294 164921 : tdb_trace(tdb, "tdb_traverse_start");
295 5729665 : ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
296 5729665 : tdb->traverse_write--;
297 :
298 5729665 : tdb_transaction_unlock(tdb, F_WRLCK);
299 :
300 5729665 : return ret;
301 : }
302 :
303 :
304 : /* find the first entry in the database and return its key */
305 34 : _PUBLIC_ TDB_DATA tdb_firstkey(struct tdb_context *tdb)
306 : {
307 22 : TDB_DATA key;
308 22 : struct tdb_record rec;
309 22 : tdb_off_t off;
310 :
311 : /* release any old lock */
312 34 : if (tdb_unlock_record(tdb, tdb->travlocks.off) != 0)
313 0 : return tdb_null;
314 34 : tdb->travlocks.off = tdb->travlocks.list = 0;
315 34 : tdb->travlocks.lock_rw = F_RDLCK;
316 :
317 : /* Grab first record: locks chain and returned record. */
318 34 : off = tdb_next_lock(tdb, &tdb->travlocks, &rec);
319 34 : if (off == 0 || off == TDB_NEXT_LOCK_ERR) {
320 4 : tdb_trace_retrec(tdb, "tdb_firstkey", tdb_null);
321 8 : return tdb_null;
322 : }
323 : /* now read the key */
324 26 : key.dsize = rec.key_len;
325 26 : key.dptr =tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),key.dsize);
326 :
327 18 : tdb_trace_retrec(tdb, "tdb_firstkey", key);
328 :
329 : /* Unlock the hash chain of the record we just read. */
330 26 : if (tdb_unlock(tdb, tdb->travlocks.list, tdb->travlocks.lock_rw) != 0)
331 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_firstkey: error occurred while tdb_unlocking!\n"));
332 26 : return key;
333 : }
334 :
335 : /* find the next entry in the database, returning its key */
336 198 : _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
337 : {
338 184 : uint32_t oldlist;
339 198 : TDB_DATA key = tdb_null;
340 184 : struct tdb_record rec;
341 198 : unsigned char *k = NULL;
342 184 : tdb_off_t off;
343 :
344 : /* Is locked key the old key? If so, traverse will be reliable. */
345 198 : if (tdb->travlocks.off) {
346 198 : if (tdb_lock(tdb,tdb->travlocks.list,tdb->travlocks.lock_rw))
347 0 : return tdb_null;
348 198 : if (tdb_rec_read(tdb, tdb->travlocks.off, &rec) == -1
349 198 : || !(k = tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),
350 : rec.key_len))
351 198 : || memcmp(k, oldkey.dptr, oldkey.dsize) != 0) {
352 : /* No, it wasn't: unlock it and start from scratch */
353 0 : if (tdb_unlock_record(tdb, tdb->travlocks.off) != 0) {
354 : tdb_trace_1rec_retrec(tdb, "tdb_nextkey",
355 0 : oldkey, tdb_null);
356 0 : SAFE_FREE(k);
357 0 : return tdb_null;
358 : }
359 0 : if (tdb_unlock(tdb, tdb->travlocks.list, tdb->travlocks.lock_rw) != 0) {
360 0 : SAFE_FREE(k);
361 0 : return tdb_null;
362 : }
363 0 : tdb->travlocks.off = 0;
364 : }
365 :
366 198 : SAFE_FREE(k);
367 : }
368 :
369 198 : if (!tdb->travlocks.off) {
370 : /* No previous element: do normal find, and lock record */
371 0 : tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb->hash_fn(&oldkey), tdb->travlocks.lock_rw, &rec);
372 0 : if (!tdb->travlocks.off) {
373 0 : tdb_trace_1rec_retrec(tdb, "tdb_nextkey", oldkey, tdb_null);
374 0 : return tdb_null;
375 : }
376 0 : tdb->travlocks.list = BUCKET(rec.full_hash);
377 0 : if (tdb_lock_record(tdb, tdb->travlocks.off) != 0) {
378 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: lock_record failed (%s)!\n", strerror(errno)));
379 0 : return tdb_null;
380 : }
381 : }
382 198 : oldlist = tdb->travlocks.list;
383 :
384 : /* Grab next record: locks chain and returned record,
385 : unlocks old record */
386 198 : off = tdb_next_lock(tdb, &tdb->travlocks, &rec);
387 198 : if (off != TDB_NEXT_LOCK_ERR && off != 0) {
388 172 : key.dsize = rec.key_len;
389 178 : key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec),
390 6 : key.dsize);
391 : /* Unlock the chain of this new record */
392 172 : if (tdb_unlock(tdb, tdb->travlocks.list, tdb->travlocks.lock_rw) != 0)
393 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
394 : }
395 : /* Unlock the chain of old record */
396 198 : if (tdb_unlock(tdb, oldlist, tdb->travlocks.lock_rw) != 0)
397 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
398 184 : tdb_trace_1rec_retrec(tdb, "tdb_nextkey", oldkey, key);
399 198 : return key;
400 : }
401 :
402 222882 : _PUBLIC_ int tdb_traverse_chain(struct tdb_context *tdb,
403 : unsigned chain,
404 : tdb_traverse_func fn,
405 : void *private_data)
406 : {
407 20417 : tdb_off_t rec_ptr;
408 20417 : struct tdb_chainwalk_ctx chainwalk;
409 222882 : int count = 0;
410 20417 : int ret;
411 :
412 222882 : if (chain >= tdb->hash_size) {
413 0 : tdb->ecode = TDB_ERR_EINVAL;
414 0 : return -1;
415 : }
416 :
417 222882 : if (tdb->traverse_read != 0) {
418 0 : tdb->ecode = TDB_ERR_LOCK;
419 0 : return -1;
420 : }
421 :
422 222882 : ret = tdb_lock(tdb, chain, F_RDLCK);
423 222882 : if (ret == -1) {
424 0 : return -1;
425 : }
426 :
427 222882 : tdb->traverse_read += 1;
428 :
429 222882 : ret = tdb_ofs_read(tdb, TDB_HASH_TOP(chain), &rec_ptr);
430 222882 : if (ret == -1) {
431 0 : goto fail;
432 : }
433 :
434 222882 : tdb_chainwalk_init(&chainwalk, rec_ptr);
435 :
436 269952 : while (rec_ptr != 0) {
437 1739 : struct tdb_record rec;
438 1739 : bool ok;
439 :
440 47070 : ret = tdb_rec_read(tdb, rec_ptr, &rec);
441 47070 : if (ret == -1) {
442 0 : goto fail;
443 : }
444 :
445 47070 : if (!TDB_DEAD(&rec)) {
446 : /* no overflow checks, tdb_rec_read checked it */
447 46919 : tdb_off_t key_ofs = rec_ptr + sizeof(rec);
448 46919 : size_t full_len = rec.key_len + rec.data_len;
449 46919 : uint8_t *buf = NULL;
450 :
451 46919 : TDB_DATA key = { .dsize = rec.key_len };
452 46919 : TDB_DATA data = { .dsize = rec.data_len };
453 :
454 46919 : if ((tdb->transaction == NULL) &&
455 46919 : (tdb->map_ptr != NULL)) {
456 46919 : ret = tdb_oob(tdb, key_ofs, full_len, 0);
457 45180 : if (ret == -1) {
458 0 : goto fail;
459 : }
460 46919 : key.dptr = (uint8_t *)tdb->map_ptr + key_ofs;
461 : } else {
462 0 : buf = tdb_alloc_read(tdb, key_ofs, full_len);
463 0 : if (buf == NULL) {
464 0 : goto fail;
465 : }
466 0 : key.dptr = buf;
467 : }
468 46919 : data.dptr = key.dptr + key.dsize;
469 :
470 46919 : ret = fn(tdb, key, data, private_data);
471 46919 : free(buf);
472 :
473 46919 : count += 1;
474 :
475 46919 : if (ret != 0) {
476 0 : break;
477 : }
478 : }
479 :
480 47070 : rec_ptr = rec.next;
481 :
482 47070 : ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
483 47070 : if (!ok) {
484 0 : goto fail;
485 : }
486 : }
487 222882 : tdb->traverse_read -= 1;
488 222882 : tdb_unlock(tdb, chain, F_RDLCK);
489 222882 : return count;
490 :
491 0 : fail:
492 0 : tdb->traverse_read -= 1;
493 0 : tdb_unlock(tdb, chain, F_RDLCK);
494 0 : return -1;
495 : }
496 :
497 222882 : _PUBLIC_ int tdb_traverse_key_chain(struct tdb_context *tdb,
498 : TDB_DATA key,
499 : tdb_traverse_func fn,
500 : void *private_data)
501 : {
502 20417 : uint32_t hash, chain;
503 20417 : int ret;
504 :
505 222882 : hash = tdb->hash_fn(&key);
506 222882 : chain = BUCKET(hash);
507 222882 : ret = tdb_traverse_chain(tdb, chain, fn, private_data);
508 :
509 222882 : return ret;
510 : }
|