Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 :
4 : main select loop and event handling - epoll implementation
5 :
6 : Copyright (C) Andrew Tridgell 2003-2005
7 : Copyright (C) Stefan Metzmacher 2005-2013
8 : Copyright (C) Jeremy Allison 2013
9 :
10 : ** NOTE! The following LGPL license applies to the tevent
11 : ** library. This does NOT imply that all of Samba is released
12 : ** under the LGPL
13 :
14 : This library is free software; you can redistribute it and/or
15 : modify it under the terms of the GNU Lesser General Public
16 : License as published by the Free Software Foundation; either
17 : version 3 of the License, or (at your option) any later version.
18 :
19 : This library is distributed in the hope that it will be useful,
20 : but WITHOUT ANY WARRANTY; without even the implied warranty of
21 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 : Lesser General Public License for more details.
23 :
24 : You should have received a copy of the GNU Lesser General Public
25 : License along with this library; if not, see <http://www.gnu.org/licenses/>.
26 : */
27 :
28 : #include "replace.h"
29 : #include "system/filesys.h"
30 : #include "system/select.h"
31 : #include "tevent.h"
32 : #include "tevent_internal.h"
33 : #include "tevent_util.h"
34 :
35 : struct epoll_event_context {
36 : /* a pointer back to the generic event_context */
37 : struct tevent_context *ev;
38 :
39 : /* when using epoll this is the handle from epoll_create1(2) */
40 : int epoll_fd;
41 :
42 : pid_t pid;
43 :
44 : bool panic_force_replay;
45 : bool *panic_state;
46 : bool (*panic_fallback)(struct tevent_context *ev, bool replay);
47 : };
48 :
49 : #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
50 : #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<1)
51 :
52 : #ifdef TEST_PANIC_FALLBACK
53 :
54 : static int epoll_create1_panic_fallback(struct epoll_event_context *epoll_ev,
55 : int flags)
56 : {
57 : if (epoll_ev->panic_fallback == NULL) {
58 : return epoll_create1(flags);
59 : }
60 :
61 : /* 50% of the time, fail... */
62 : if ((random() % 2) == 0) {
63 : errno = EINVAL;
64 : return -1;
65 : }
66 :
67 : return epoll_create1(flags);
68 : }
69 :
70 : static int epoll_ctl_panic_fallback(struct epoll_event_context *epoll_ev,
71 : int epfd, int op, int fd,
72 : struct epoll_event *event)
73 : {
74 : if (epoll_ev->panic_fallback == NULL) {
75 : return epoll_ctl(epfd, op, fd, event);
76 : }
77 :
78 : /* 50% of the time, fail... */
79 : if ((random() % 2) == 0) {
80 : errno = EINVAL;
81 : return -1;
82 : }
83 :
84 : return epoll_ctl(epfd, op, fd, event);
85 : }
86 :
87 : static int epoll_wait_panic_fallback(struct epoll_event_context *epoll_ev,
88 : int epfd,
89 : struct epoll_event *events,
90 : int maxevents,
91 : int timeout)
92 : {
93 : if (epoll_ev->panic_fallback == NULL) {
94 : return epoll_wait(epfd, events, maxevents, timeout);
95 : }
96 :
97 : /* 50% of the time, fail... */
98 : if ((random() % 2) == 0) {
99 : errno = EINVAL;
100 : return -1;
101 : }
102 :
103 : return epoll_wait(epfd, events, maxevents, timeout);
104 : }
105 :
106 : #define epoll_create1(_flags) \
107 : epoll_create1_panic_fallback(epoll_ev, _flags)
108 : #define epoll_ctl(_epfd, _op, _fd, _event) \
109 : epoll_ctl_panic_fallback(epoll_ev,_epfd, _op, _fd, _event)
110 : #define epoll_wait(_epfd, _events, _maxevents, _timeout) \
111 : epoll_wait_panic_fallback(epoll_ev, _epfd, _events, _maxevents, _timeout)
112 : #endif
113 :
114 : /*
115 : called to set the panic fallback function.
116 : */
117 82971730 : _PRIVATE_ void tevent_epoll_set_panic_fallback(struct tevent_context *ev,
118 : bool (*panic_fallback)(struct tevent_context *ev,
119 : bool replay))
120 : {
121 3090324 : struct epoll_event_context *epoll_ev =
122 82971730 : talloc_get_type_abort(ev->additional_data,
123 : struct epoll_event_context);
124 :
125 82971730 : epoll_ev->panic_fallback = panic_fallback;
126 82971730 : }
127 :
128 : /*
129 : called when a epoll call fails
130 : */
131 5 : static void epoll_panic(struct epoll_event_context *epoll_ev,
132 : const char *reason, bool replay)
133 : {
134 5 : struct tevent_context *ev = epoll_ev->ev;
135 0 : bool (*panic_fallback)(struct tevent_context *ev, bool replay);
136 :
137 5 : panic_fallback = epoll_ev->panic_fallback;
138 :
139 5 : if (epoll_ev->panic_state != NULL) {
140 0 : *epoll_ev->panic_state = true;
141 : }
142 :
143 5 : if (epoll_ev->panic_force_replay) {
144 0 : replay = true;
145 : }
146 :
147 5 : TALLOC_FREE(ev->additional_data);
148 :
149 5 : if (panic_fallback == NULL) {
150 0 : tevent_debug(ev, TEVENT_DEBUG_FATAL,
151 : "%s (%s) replay[%u] - calling abort()\n",
152 0 : reason, strerror(errno), (unsigned)replay);
153 0 : abort();
154 : }
155 :
156 5 : tevent_debug(ev, TEVENT_DEBUG_ERROR,
157 : "%s (%s) replay[%u] - calling panic_fallback\n",
158 5 : reason, strerror(errno), (unsigned)replay);
159 :
160 5 : if (!panic_fallback(ev, replay)) {
161 : /* Fallback failed. */
162 0 : tevent_debug(ev, TEVENT_DEBUG_FATAL,
163 : "%s (%s) replay[%u] - calling abort()\n",
164 0 : reason, strerror(errno), (unsigned)replay);
165 0 : abort();
166 : }
167 5 : }
168 :
169 : /*
170 : map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
171 : */
172 21430574 : static uint32_t epoll_map_flags(uint16_t flags)
173 : {
174 21430574 : uint32_t ret = 0;
175 :
176 : /*
177 : * we do not need to specify EPOLLERR | EPOLLHUP
178 : * they are always reported.
179 : */
180 :
181 21430574 : if (flags & TEVENT_FD_READ) {
182 : /*
183 : * Note that EPOLLRDHUP always
184 : * returns EPOLLIN in addition,
185 : * so EPOLLRDHUP is not strictly needed,
186 : * but we want to make it explicit.
187 : */
188 21336358 : ret |= EPOLLIN | EPOLLRDHUP;
189 : }
190 21430574 : if (flags & TEVENT_FD_WRITE) {
191 2534148 : ret |= EPOLLOUT;
192 : }
193 21430574 : if (flags & TEVENT_FD_ERROR) {
194 3725866 : ret |= EPOLLRDHUP;
195 : }
196 21430574 : return ret;
197 : }
198 :
199 : /*
200 : free the epoll fd
201 : */
202 82946166 : static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
203 : {
204 82946166 : close(epoll_ev->epoll_fd);
205 82946166 : epoll_ev->epoll_fd = -1;
206 82946166 : return 0;
207 : }
208 :
209 : /*
210 : init the epoll fd
211 : */
212 82971739 : static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
213 : {
214 82971739 : epoll_ev->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
215 82971739 : if (epoll_ev->epoll_fd == -1) {
216 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
217 : "Failed to create epoll handle (%s).\n",
218 0 : strerror(errno));
219 0 : return -1;
220 : }
221 :
222 82971739 : epoll_ev->pid = tevent_cached_getpid();
223 82971737 : talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
224 :
225 82971737 : return 0;
226 : }
227 :
228 : static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
229 :
230 : /*
231 : reopen the epoll handle when our pid changes
232 : see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
233 : demonstration of why this is needed
234 : */
235 72558 : static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
236 : {
237 1854 : struct tevent_fd *fde;
238 72558 : bool *caller_panic_state = epoll_ev->panic_state;
239 72558 : bool panic_triggered = false;
240 72558 : pid_t pid = tevent_cached_getpid();
241 :
242 72558 : if (epoll_ev->pid == pid) {
243 0 : return;
244 : }
245 :
246 72558 : close(epoll_ev->epoll_fd);
247 72558 : epoll_ev->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
248 72558 : if (epoll_ev->epoll_fd == -1) {
249 0 : epoll_panic(epoll_ev, "epoll_create() failed", false);
250 0 : return;
251 : }
252 :
253 72558 : epoll_ev->pid = pid;
254 72558 : epoll_ev->panic_state = &panic_triggered;
255 669299 : for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
256 : /*
257 : * We leave the mpx mappings alive
258 : * so that we'll just re-add events for
259 : * the existing primary events in the loop
260 : * below.
261 : */
262 596741 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
263 : }
264 669299 : for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
265 596741 : epoll_update_event(epoll_ev, fde);
266 :
267 596741 : if (panic_triggered) {
268 0 : if (caller_panic_state != NULL) {
269 0 : *caller_panic_state = true;
270 : }
271 0 : return;
272 : }
273 : }
274 72558 : epoll_ev->panic_state = NULL;
275 : }
276 :
277 : /*
278 : epoll cannot add the same file descriptor twice, once
279 : with read, once with write which is allowed by the
280 : tevent poll backend. Multiplex the existing fde, flag it
281 : as such so we can search for the correct fde on
282 : event triggering.
283 : */
284 :
285 37421 : static int epoll_add_multiplex_fd(struct epoll_event_context *epoll_ev,
286 : struct tevent_fd *add_fde)
287 : {
288 37421 : struct tevent_fd *primary = NULL;
289 966 : uint16_t effective_flags;
290 966 : struct epoll_event event;
291 37421 : uint64_t clear_flags = 0;
292 37421 : uint64_t add_flags = 0;
293 966 : int ret;
294 :
295 : /*
296 : * Check if there is another fde we can attach to
297 : */
298 37421 : primary = tevent_common_fd_mpx_add(add_fde);
299 37421 : if (primary == NULL) {
300 : /* the caller calls epoll_panic() */
301 0 : return -1;
302 : }
303 :
304 : /*
305 : * First propagate the HAS_EVENT flag from
306 : * the primary to all others (mainly add_fde)
307 : */
308 37421 : if (primary->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
309 37421 : add_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
310 37421 : tevent_common_fd_mpx_additional_flags(primary, 0, add_flags);
311 : }
312 :
313 : /*
314 : * Update the mpx internals and check if
315 : * there is an update needed.
316 : */
317 37421 : primary = tevent_common_fd_mpx_update(primary);
318 37421 : if (primary == NULL) {
319 : /*
320 : * It seems the primary was already
321 : * watching (at least) the same flags
322 : * as add_fde, so we are done.
323 : */
324 0 : return 0;
325 : }
326 :
327 : /*
328 : * Before me modify the low level epoll state,
329 : * we clear HAS_EVENT on all fdes.
330 : */
331 37387 : clear_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
332 37387 : tevent_common_fd_mpx_additional_flags(primary, clear_flags, 0);
333 :
334 37387 : effective_flags = tevent_common_fd_mpx_flags(primary);
335 :
336 : /*
337 : * Modify the low level epoll state to reflect
338 : * the effective flags we want to monitor.
339 : */
340 37387 : ZERO_STRUCT(event);
341 37387 : event.events = epoll_map_flags(effective_flags);
342 37387 : event.data.ptr = primary;
343 37387 : ret = epoll_ctl(epoll_ev->epoll_fd,
344 : EPOLL_CTL_MOD,
345 : primary->fd,
346 : &event);
347 37387 : if (ret != 0 && errno == EBADF) {
348 0 : struct tevent_common_fd_buf pbuf = {};
349 0 : TEVENT_DEBUG(epoll_ev->ev, TEVENT_DEBUG_ERROR,
350 : "EPOLL_CTL_MOD EBADF for "
351 : "%s - disabling\n",
352 : tevent_common_fd_str(&pbuf, "primary", primary));
353 0 : tevent_common_fd_mpx_disarm_all(primary);
354 0 : return 0;
355 37387 : } else if (ret != 0) {
356 0 : struct tevent_common_fd_buf pbuf = {};
357 0 : TEVENT_DEBUG(epoll_ev->ev, TEVENT_DEBUG_FATAL,
358 : "EPOLL_CTL_MOD for %s - failed - %s",
359 : tevent_common_fd_str(&pbuf, "primary", primary),
360 : strerror(errno));
361 : /* the caller calls epoll_panic() */
362 0 : return ret;
363 : }
364 :
365 : /*
366 : * Finally re-add HAS_EVENT to all fdes
367 : */
368 37387 : add_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
369 37387 : tevent_common_fd_mpx_additional_flags(primary, 0, add_flags);
370 :
371 36455 : return 0;
372 : }
373 :
374 : /*
375 : add the epoll event to the given fd_event
376 : */
377 16901799 : static void epoll_add_event(struct epoll_event_context *epoll_ev,
378 : struct tevent_fd *_primary)
379 : {
380 16901799 : struct tevent_fd *primary = tevent_common_fd_mpx_primary(_primary);
381 16901799 : uint16_t effective_flags = tevent_common_fd_mpx_flags(primary);
382 63424 : struct epoll_event event;
383 16901799 : uint64_t clear_flags = 0;
384 16901799 : uint64_t add_flags = 0;
385 63424 : int ret;
386 :
387 : /*
388 : * Before me modify the low level epoll state,
389 : * we clear HAS_EVENT on all fdes.
390 : */
391 16901799 : clear_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
392 16901799 : tevent_common_fd_mpx_additional_flags(primary, clear_flags, 0);
393 :
394 : /*
395 : * Modify the low level epoll state to reflect
396 : * the effective flags we want to monitor.
397 : *
398 : * Most likely we won't trigger the EEXIST
399 : * case, so it's much cheaper to try and
400 : * react on EEXIST if needed, than to always
401 : * scan the list of all existing events.
402 : */
403 16901799 : ZERO_STRUCT(event);
404 16901797 : event.events = epoll_map_flags(effective_flags);
405 16901797 : event.data.ptr = primary;
406 16901797 : ret = epoll_ctl(epoll_ev->epoll_fd,
407 : EPOLL_CTL_ADD,
408 : primary->fd,
409 : &event);
410 16901799 : if (ret != 0 && errno == EBADF) {
411 0 : struct tevent_common_fd_buf pbuf = {};
412 0 : TEVENT_DEBUG(epoll_ev->ev, TEVENT_DEBUG_ERROR,
413 : "EPOLL_CTL_ADD EBADF for "
414 : "%s - disabling\n",
415 : tevent_common_fd_str(&pbuf, "primary", primary));
416 0 : tevent_common_fd_mpx_disarm_all(primary);
417 0 : return;
418 16901799 : } else if (ret != 0 && errno == EEXIST) {
419 37421 : ret = epoll_add_multiplex_fd(epoll_ev, primary);
420 37421 : if (ret != 0) {
421 0 : epoll_panic(epoll_ev, "epoll_add_multiplex_fd failed",
422 : false);
423 0 : return;
424 : }
425 : /*
426 : * epoll_add_multiplex_fd() already
427 : * added EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
428 : */
429 36455 : return;
430 16864378 : } else if (ret != 0) {
431 5 : epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed", false);
432 5 : return;
433 : }
434 :
435 : /*
436 : * Finally re-add HAS_EVENT to all fdes
437 : */
438 16864373 : add_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
439 16926831 : tevent_common_fd_mpx_additional_flags(primary, 0, add_flags);
440 : }
441 :
442 : /*
443 : delete the epoll event for given fd_event
444 : */
445 15194481 : static void epoll_del_event(struct epoll_event_context *epoll_ev,
446 : struct tevent_fd *_primary)
447 : {
448 15194481 : struct tevent_fd *primary = tevent_common_fd_mpx_primary(_primary);
449 47926 : struct epoll_event event;
450 15194481 : uint64_t clear_flags = 0;
451 47926 : int ret;
452 :
453 : /*
454 : * Before me delete the low level epoll state,
455 : * we clear HAS_EVENT on all fdes.
456 : */
457 15194481 : clear_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
458 15194481 : tevent_common_fd_mpx_additional_flags(primary, clear_flags, 0);
459 :
460 : /*
461 : * Delete the low level epoll state to reflect
462 : * the effective flags we want to monitor.
463 : */
464 15194481 : ZERO_STRUCT(event);
465 15194481 : ret = epoll_ctl(epoll_ev->epoll_fd,
466 : EPOLL_CTL_DEL,
467 : primary->fd,
468 : &event);
469 15194481 : if (ret != 0 && errno == ENOENT) {
470 76759 : struct tevent_common_fd_buf pbuf = {};
471 : /*
472 : * This can happen after a epoll_check_reopen
473 : * within epoll_event_fd_destructor.
474 : */
475 76759 : TEVENT_DEBUG(epoll_ev->ev, TEVENT_DEBUG_TRACE,
476 : "EPOLL_CTL_DEL ignoring ENOENT for %s\n",
477 : tevent_common_fd_str(&pbuf, "primary", primary));
478 76759 : return;
479 15117722 : } else if (ret != 0 && errno == EBADF) {
480 1 : struct tevent_common_fd_buf pbuf = {};
481 1 : TEVENT_DEBUG(epoll_ev->ev, TEVENT_DEBUG_WARNING,
482 : "EPOLL_CTL_DEL EBADF for %s - disabling\n",
483 : tevent_common_fd_str(&pbuf, "primary", primary));
484 1 : tevent_common_fd_mpx_disarm_all(primary);
485 1 : return;
486 15117721 : } else if (ret != 0) {
487 0 : struct tevent_common_fd_buf pbuf = {};
488 0 : TEVENT_DEBUG(epoll_ev->ev, TEVENT_DEBUG_FATAL,
489 : "EPOLL_CTL_DEL for %s - failed - %s",
490 : tevent_common_fd_str(&pbuf, "primary", primary),
491 : strerror(errno));
492 0 : epoll_panic(epoll_ev, "EPOLL_CTL_DEL failed", false);
493 0 : return;
494 : }
495 : }
496 :
497 : /*
498 : change the epoll event to the given fd_event
499 : */
500 4491390 : static void epoll_mod_event(struct epoll_event_context *epoll_ev,
501 : struct tevent_fd *_primary)
502 : {
503 4491390 : struct tevent_fd *primary = tevent_common_fd_mpx_primary(_primary);
504 4491390 : uint16_t effective_flags = tevent_common_fd_mpx_flags(primary);
505 10459 : struct epoll_event event;
506 4491390 : uint64_t clear_flags = 0;
507 4491390 : uint64_t add_flags = 0;
508 10459 : int ret;
509 :
510 : /*
511 : * Before me modify the low level epoll state,
512 : * we clear HAS_EVENT on all fdes.
513 : */
514 4491390 : clear_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
515 4491390 : tevent_common_fd_mpx_additional_flags(primary, clear_flags, 0);
516 :
517 : /*
518 : * Modify the low level epoll state to reflect
519 : * the effective flags we want to monitor.
520 : */
521 4491390 : ZERO_STRUCT(event);
522 4491390 : event.events = epoll_map_flags(effective_flags);
523 4491390 : event.data.ptr = primary;
524 4491390 : ret = epoll_ctl(epoll_ev->epoll_fd,
525 : EPOLL_CTL_MOD,
526 : primary->fd,
527 : &event);
528 4491390 : if (ret != 0 && errno == EBADF) {
529 0 : struct tevent_common_fd_buf pbuf = {};
530 0 : TEVENT_DEBUG(epoll_ev->ev, TEVENT_DEBUG_ERROR,
531 : "EPOLL_CTL_MOD EBADF for %s - disabling\n",
532 : tevent_common_fd_str(&pbuf, "primary", primary));
533 0 : tevent_common_fd_mpx_disarm_all(primary);
534 0 : return;
535 4491390 : } else if (ret != 0) {
536 0 : struct tevent_common_fd_buf pbuf = {};
537 0 : TEVENT_DEBUG(epoll_ev->ev, TEVENT_DEBUG_FATAL,
538 : "EPOLL_CTL_MOD for %s - failed - %s",
539 : tevent_common_fd_str(&pbuf, "primary", primary),
540 : strerror(errno));
541 0 : epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed", false);
542 0 : return;
543 : }
544 :
545 : /*
546 : * Finally re-add HAS_EVENT to all fdes
547 : */
548 4491390 : add_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
549 4501849 : tevent_common_fd_mpx_additional_flags(primary, 0, add_flags);
550 : }
551 :
552 21803734 : static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
553 : {
554 21803734 : struct tevent_fd *primary = tevent_common_fd_mpx_primary(fde);
555 21803734 : uint64_t _paf = primary->additional_flags;
556 21803734 : bool got_error = (_paf & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
557 21803734 : uint16_t effective_flags = tevent_common_fd_mpx_flags(primary);
558 21803734 : bool want_read = (effective_flags & TEVENT_FD_READ);
559 21803734 : bool want_write= (effective_flags & TEVENT_FD_WRITE);
560 21803734 : bool want_error= (effective_flags & TEVENT_FD_ERROR);
561 :
562 : /* there's already an event */
563 21803734 : if (primary->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
564 4622921 : if (want_read || want_error || (want_write && !got_error)) {
565 4491390 : epoll_mod_event(epoll_ev, primary);
566 4491390 : return;
567 : }
568 : /*
569 : * if we want to match the select behavior, we need to remove the epoll_event
570 : * when the caller isn't interested in events.
571 : *
572 : * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
573 : */
574 131531 : epoll_del_event(epoll_ev, primary);
575 131531 : return;
576 : }
577 :
578 : /* there's no epoll_event attached to the fde */
579 17180813 : if (want_read || want_error || (want_write && !got_error)) {
580 16901799 : epoll_add_event(epoll_ev, primary);
581 16901799 : return;
582 : }
583 : }
584 :
585 : /*
586 : event loop handling using epoll
587 : */
588 185398481 : static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
589 : {
590 14558059 : int ret, i;
591 : #define MAXEVENTS 1
592 14558059 : struct epoll_event events[MAXEVENTS];
593 185398481 : int timeout = -1;
594 14558059 : int wait_errno;
595 :
596 185398481 : if (tvalp) {
597 : /* it's better to trigger timed events a bit later than too early */
598 185398481 : timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
599 : }
600 :
601 334897218 : if (epoll_ev->ev->signal_events &&
602 149498737 : tevent_common_check_signal(epoll_ev->ev)) {
603 0 : return 0;
604 : }
605 :
606 185398481 : tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_BEFORE_WAIT);
607 185398481 : ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
608 185398481 : wait_errno = errno;
609 185398481 : tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_AFTER_WAIT);
610 :
611 185398481 : if (ret == -1 && wait_errno == EINTR && epoll_ev->ev->signal_events) {
612 35195130 : if (tevent_common_check_signal(epoll_ev->ev)) {
613 34935771 : return 0;
614 : }
615 : }
616 :
617 150203393 : if (ret == -1 && wait_errno != EINTR) {
618 0 : epoll_panic(epoll_ev, "epoll_wait() failed", true);
619 0 : return -1;
620 : }
621 :
622 150203393 : if (ret == 0 && tvalp) {
623 : /* we don't care about a possible delay here */
624 1883465 : tevent_common_loop_timer_delay(epoll_ev->ev);
625 1883431 : return 0;
626 : }
627 :
628 148319928 : for (i=0;i<ret;i++) {
629 148319755 : struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
630 : struct tevent_fd);
631 148319755 : struct tevent_fd *selected = NULL;
632 14287714 : uint16_t effective_flags;
633 148319755 : uint16_t flags = 0;
634 148319755 : bool got_error = false;
635 :
636 148319755 : if (fde == NULL) {
637 0 : epoll_panic(epoll_ev, "epoll_wait() gave bad data", true);
638 0 : return -1;
639 : }
640 148319755 : effective_flags = tevent_common_fd_mpx_flags(fde);
641 148319755 : if (events[i].events & (EPOLLHUP|EPOLLERR|EPOLLRDHUP)) {
642 10222693 : uint64_t add_flags = 0;
643 :
644 10222693 : add_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
645 10222693 : tevent_common_fd_mpx_additional_flags(fde,
646 : 0,
647 : add_flags);
648 :
649 10222693 : if (effective_flags & TEVENT_FD_ERROR) {
650 317667 : flags |= TEVENT_FD_ERROR;
651 : }
652 10222693 : if (effective_flags & TEVENT_FD_READ) {
653 10222688 : flags |= TEVENT_FD_READ;
654 : }
655 : }
656 148319755 : if (events[i].events & EPOLLIN) {
657 113945776 : if (effective_flags & TEVENT_FD_READ) {
658 113945776 : flags |= TEVENT_FD_READ;
659 : }
660 : }
661 148319755 : if (events[i].events & EPOLLOUT) {
662 24542104 : if (effective_flags & TEVENT_FD_WRITE) {
663 24542104 : flags |= TEVENT_FD_WRITE;
664 : }
665 : }
666 :
667 148319755 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR)
668 : {
669 10222693 : got_error = true;
670 : }
671 :
672 148319755 : selected = tevent_common_fd_mpx_select(fde, flags, got_error);
673 148319755 : if (selected == NULL) {
674 0 : if (got_error) {
675 : /*
676 : * if we only wait for TEVENT_FD_WRITE, we
677 : * should not tell the event handler about it,
678 : * and remove the epoll_event, as we only
679 : * report errors when waiting for read events,
680 : * to match the select() behavior
681 : *
682 : * Do the same as the poll backend and
683 : * remove the writeable flag.
684 : */
685 0 : tevent_common_fd_mpx_clear_writeable(fde);
686 0 : epoll_update_event(epoll_ev, fde);
687 : }
688 0 : continue;
689 : }
690 :
691 : /*
692 : * make sure we only pass the flags
693 : * the handler is expecting.
694 : */
695 148319755 : flags &= selected->flags;
696 148319755 : return tevent_common_invoke_fd_handler(selected,
697 : flags,
698 : NULL);
699 : }
700 :
701 173 : return 0;
702 : }
703 :
704 : /*
705 : create a epoll_event_context structure.
706 : */
707 82971739 : static int epoll_event_context_init(struct tevent_context *ev)
708 : {
709 3090333 : int ret;
710 3090333 : struct epoll_event_context *epoll_ev;
711 :
712 : /*
713 : * We might be called during tevent_re_initialise()
714 : * which means we need to free our old additional_data.
715 : */
716 82971739 : TALLOC_FREE(ev->additional_data);
717 :
718 82971739 : epoll_ev = talloc_zero(ev, struct epoll_event_context);
719 82971739 : if (!epoll_ev) return -1;
720 82971739 : epoll_ev->ev = ev;
721 82971739 : epoll_ev->epoll_fd = -1;
722 :
723 82971739 : ret = epoll_init_ctx(epoll_ev);
724 82971737 : if (ret != 0) {
725 0 : talloc_free(epoll_ev);
726 0 : return ret;
727 : }
728 :
729 82971737 : ev->additional_data = epoll_ev;
730 82971737 : return 0;
731 : }
732 :
733 : /*
734 : destroy an fd_event
735 : */
736 24716309 : static int epoll_event_fd_destructor(struct tevent_fd *fde)
737 : {
738 24716309 : struct tevent_fd *old_primary = NULL;
739 24716309 : struct tevent_fd *new_primary = NULL;
740 24716309 : struct tevent_fd *update_primary = NULL;
741 24716309 : struct tevent_context *ev = fde->event_ctx;
742 24716309 : struct epoll_event_context *epoll_ev = NULL;
743 24716309 : bool panic_triggered = false;
744 :
745 24716309 : if (ev == NULL) {
746 9620532 : tevent_common_fd_mpx_reinit(fde);
747 9620532 : return tevent_common_fd_destructor(fde);
748 : }
749 :
750 15095777 : epoll_ev = talloc_get_type_abort(ev->additional_data,
751 : struct epoll_event_context);
752 :
753 : /*
754 : * we must remove the event from the list
755 : * otherwise a panic fallback handler may
756 : * reuse invalid memory
757 : */
758 15095777 : DLIST_REMOVE(ev->fd_events, fde);
759 :
760 15095777 : epoll_ev->panic_state = &panic_triggered;
761 15095777 : if (epoll_ev->pid != tevent_cached_getpid()) {
762 64784 : epoll_check_reopen(epoll_ev);
763 64784 : if (panic_triggered) {
764 0 : tevent_common_fd_mpx_reinit(fde);
765 0 : return tevent_common_fd_destructor(fde);
766 : }
767 : }
768 :
769 15095777 : old_primary = tevent_common_fd_mpx_primary(fde);
770 :
771 15095777 : if (old_primary == fde) {
772 15062950 : epoll_del_event(epoll_ev, fde);
773 15062950 : if (panic_triggered) {
774 0 : tevent_common_fd_mpx_reinit(fde);
775 0 : return tevent_common_fd_destructor(fde);
776 : }
777 : }
778 :
779 15095777 : new_primary = tevent_common_fd_mpx_remove(fde);
780 15095777 : if (new_primary == NULL) {
781 15058390 : epoll_ev->panic_state = NULL;
782 15058390 : return tevent_common_fd_destructor(fde);
783 : }
784 37387 : update_primary = tevent_common_fd_mpx_update(new_primary);
785 37387 : if (update_primary == NULL) {
786 2 : epoll_ev->panic_state = NULL;
787 2 : return tevent_common_fd_destructor(fde);
788 : }
789 :
790 37385 : epoll_update_event(epoll_ev, update_primary);
791 37385 : if (panic_triggered) {
792 0 : return tevent_common_fd_destructor(fde);
793 : }
794 37385 : epoll_ev->panic_state = NULL;
795 :
796 37385 : return tevent_common_fd_destructor(fde);
797 : }
798 :
799 : /*
800 : add a fd based event
801 : return NULL on failure (memory allocation error)
802 : */
803 16188746 : static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
804 : int fd, uint16_t flags,
805 : tevent_fd_handler_t handler,
806 : void *private_data,
807 : const char *handler_name,
808 : const char *location)
809 : {
810 57368 : struct epoll_event_context *epoll_ev =
811 16188746 : talloc_get_type_abort(ev->additional_data,
812 : struct epoll_event_context);
813 57366 : struct tevent_fd *fde;
814 16188744 : bool panic_triggered = false;
815 16188744 : pid_t old_pid = epoll_ev->pid;
816 :
817 16188744 : fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
818 : handler, private_data,
819 : handler_name, location);
820 16188746 : if (!fde) return NULL;
821 :
822 16188746 : talloc_set_destructor(fde, epoll_event_fd_destructor);
823 :
824 : /*
825 : * prepare for tevent_common_fd_mpx_flags()
826 : * in epoll_update_event()
827 : */
828 16188746 : tevent_common_fd_mpx_update_flags(fde);
829 :
830 16188746 : if (epoll_ev->pid != tevent_cached_getpid()) {
831 410 : epoll_ev->panic_state = &panic_triggered;
832 410 : epoll_check_reopen(epoll_ev);
833 410 : if (panic_triggered) {
834 0 : return fde;
835 : }
836 410 : epoll_ev->panic_state = NULL;
837 : }
838 :
839 16188746 : if (epoll_ev->pid == old_pid) {
840 16188336 : epoll_update_event(epoll_ev, fde);
841 : }
842 :
843 16131378 : return fde;
844 : }
845 :
846 : /*
847 : set the fd event flags
848 : */
849 25195873 : static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
850 : {
851 76998 : struct tevent_context *ev;
852 76998 : struct epoll_event_context *epoll_ev;
853 25195873 : bool panic_triggered = false;
854 76998 : pid_t old_pid;
855 :
856 25195873 : if (fde->flags == flags) return;
857 :
858 4988636 : ev = fde->event_ctx;
859 4988636 : epoll_ev = talloc_get_type_abort(ev->additional_data,
860 : struct epoll_event_context);
861 4988636 : old_pid = epoll_ev->pid;
862 :
863 4988636 : fde->flags = flags;
864 : /*
865 : * prepare for tevent_common_fd_mpx_flags()
866 : * in epoll_update_event()
867 : */
868 4988636 : tevent_common_fd_mpx_update_flags(fde);
869 :
870 4988636 : if (epoll_ev->pid != tevent_cached_getpid()) {
871 7364 : epoll_ev->panic_state = &panic_triggered;
872 7364 : epoll_check_reopen(epoll_ev);
873 7364 : if (panic_triggered) {
874 0 : return;
875 : }
876 7364 : epoll_ev->panic_state = NULL;
877 : }
878 :
879 4988636 : if (epoll_ev->pid == old_pid) {
880 4981272 : epoll_update_event(epoll_ev, fde);
881 : }
882 : }
883 :
884 : /*
885 : do a single event loop using the events defined in ev
886 : */
887 421992805 : static int epoll_event_loop_once(struct tevent_context *ev, const char *location)
888 : {
889 20942691 : struct epoll_event_context *epoll_ev =
890 421992805 : talloc_get_type_abort(ev->additional_data,
891 : struct epoll_event_context);
892 20942691 : struct timeval tval;
893 421992805 : bool panic_triggered = false;
894 :
895 589367078 : if (ev->signal_events &&
896 167374286 : tevent_common_check_signal(ev)) {
897 329290 : return 0;
898 : }
899 :
900 421292159 : if (ev->threaded_contexts != NULL) {
901 1723668 : tevent_common_threaded_activate_immediate(ev);
902 : }
903 :
904 450794423 : if (ev->immediate_events &&
905 29518637 : tevent_common_loop_immediate(ev)) {
906 29234199 : return 0;
907 : }
908 :
909 391773522 : tval = tevent_common_loop_timer_delay(ev);
910 391773474 : if (tevent_timeval_is_zero(&tval)) {
911 200629840 : return 0;
912 : }
913 :
914 185398481 : if (epoll_ev->pid != tevent_cached_getpid()) {
915 0 : epoll_ev->panic_state = &panic_triggered;
916 0 : epoll_ev->panic_force_replay = true;
917 0 : epoll_check_reopen(epoll_ev);
918 0 : if (panic_triggered) {
919 0 : errno = EINVAL;
920 0 : return -1;
921 : }
922 0 : epoll_ev->panic_force_replay = false;
923 0 : epoll_ev->panic_state = NULL;
924 : }
925 :
926 185398481 : return epoll_event_loop(epoll_ev, &tval);
927 : }
928 :
929 : static const struct tevent_ops epoll_event_ops = {
930 : .context_init = epoll_event_context_init,
931 : .add_fd = epoll_event_add_fd,
932 : .set_fd_close_fn = tevent_common_fd_set_close_fn,
933 : .get_fd_flags = tevent_common_fd_get_flags,
934 : .set_fd_flags = epoll_event_set_fd_flags,
935 : .add_timer = tevent_common_add_timer_v2,
936 : .schedule_immediate = tevent_common_schedule_immediate,
937 : .add_signal = tevent_common_add_signal,
938 : .loop_once = epoll_event_loop_once,
939 : .loop_wait = tevent_common_loop_wait,
940 : };
941 :
942 77927 : _PRIVATE_ bool tevent_epoll_init(void)
943 : {
944 77927 : return tevent_register_backend("epoll", &epoll_event_ops);
945 : }
|