1 | /***************************************
2 | $Revision: 1.8 $
3 |
4 | Socket module - cd_watchdog.c - Socket watchdog - when activated, checks the
5 | socket for new data and discards it. If the
6 | socket is closed, it triggers predefined
7 | functions - executes a function and/or
8 | cancels a thread.
9 |
10 | Status: NOT REVUED, TESTED
11 |
12 | Design and implementation by Marek Bukowy.
13 |
14 | Modification history:
15 | marek (August 2000) Created the watchdog part
16 | marek (December 2000) Modified watchdog deactivation -
17 | replaced signals by pthread cancellation.
18 | ******************/ /******************
19 | Copyright (c) 1999,2000,2001,2002 RIPE NCC
20 |
21 | All Rights Reserved
22 |
23 | Permission to use, copy, modify, and distribute this software and its
24 | documentation for any purpose and without fee is hereby granted,
25 | provided that the above copyright notice appear in all copies and that
26 | both that copyright notice and this permission notice appear in
27 | supporting documentation, and that the name of the author not be
28 | used in advertising or publicity pertaining to distribution of the
29 | software without specific, written prior permission.
30 |
31 | THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
32 | ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS; IN NO EVENT SHALL
33 | AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
34 | DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
35 | AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
36 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37 | ***************************************/
38 |
39 | #include "rip.h"
40 |
41 | /*+ Uncomment this to use watchdog deactivation by signal (may be risky)
42 |
43 | #define WATCHDOG_BY_SIGNAL
44 | +*/
45 |
46 | static pthread_once_t sk_init_once = PTHREAD_ONCE_INIT;
47 |
48 | #ifdef WATCHDOG_BY_SIGNAL
49 |
50 | /*+ The signal version is complicated to cope with all timing situations.
51 | It uses a thread specific flag to see if the signal handler was invoked
52 | in case the signal arrives before select(3) is called in watchdog.
53 | +*/
54 |
55 | /* thread specific flag */
56 | static pthread_key_t sk_watch_tsd;
57 |
58 | /*++++++++++++++++++++++++++++++++++++++
59 | initialisation for the SIGNAL cancellation mode
60 | - initialises the thread specific flag.
61 | ++++++++++++++++++++++++++++++++++++++*/
62 | static void sk_real_init(void)
63 | {
64 | dieif( pthread_key_create( &sk_watch_tsd, NULL) != 0 );
65 | }
66 |
67 |
68 | /*++++++++++++++++++++++++++++++++++++++
69 | sk_watchdog signal handler - sets the thread-specific flag.
70 |
71 | int n signal received. (not used)
72 | ++++++++++++++++++++++++++++++++++++++*/
73 | static void func_sigusr(int n) {
74 | #if 0
75 | /* just for debugging - we don't check the value here */
76 | int *tsd_flag = (int *) pthread_getspecific(sk_watch_tsd);
77 | #endif
78 |
79 | /* 2000/12/18 MB:
80 | DEADLOCK has happened - the watchdog was just getting a mutex
81 | for the ER rwlock when a signal arrived and the execution of the
82 | pthread_mutex_lock function was interrupted AFTER the lock was
83 | grabbed. The this handler was invoked and tried to get that mutex
84 | again. As a result, everything stopped.
85 |
86 | Cures:
87 | 1. Not invoke this here:
88 | ER_dbg_va(FAC_SK, ASP_SK_GEN,"func_sigusr(%d) called", n);
89 |
90 | 2. Not accept any signals during any pthread calls so that this
91 | does not happen again. Must be reimplemented with pthread_cancel
92 | and all the signal stuff must go away. (Done, 2000/12/19).
93 | */
94 | /* set a thread-specific flag that the handler was invoked */
95 |
96 | pthread_setspecific(sk_watch_tsd, (void *)1 );
97 | }
98 |
99 | /*++++++++++++++++++++++++++++++++++++++
100 | watchdog (SIGNAL VERSION) - started as a separate thread.
101 |
102 | Selects on the given socket; discards all input.
103 | whenever it sees end of file (socket closed), it
104 | * sets a corresponding flag in the condat structure,
105 | * triggers the predefined actions (by SK_watchtrigger).
106 |
107 | void *arg - pointer to the connection data structure
108 | ++++++++++++++++++++++++++++++++++++++*/
109 | static
110 | void *sk_watchdog(void *arg)
111 | {
112 | sk_conn_st *condat = (sk_conn_st *) arg;
113 | int nready;
114 | int n;
115 | fd_set rset;
116 | char buff[STR_S];
117 | int socket = condat->sock;
118 | sigset_t sset;
119 | struct sigaction act;
120 |
121 | struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */
122 |
123 | FD_ZERO(&rset);
124 | FD_SET(socket, &rset);
125 |
126 | sigemptyset(&sset);
127 | sigaddset(&sset, SIGUSR2);
128 |
129 | act.sa_handler = func_sigusr;
130 | act.sa_flags = 0;
131 | dieif(sigaction(SIGUSR2, &act, NULL) != 0);
132 |
133 | /* XXX in fact, it's unblocked already. Should be blocked on startup */
134 | dieif(pthread_sigmask(SIG_UNBLOCK, &sset, NULL) != 0);
135 |
136 | /* clear the handler's flag */
137 | pthread_setspecific(sk_watch_tsd, NULL);
138 |
139 | /* now ready for signal */
140 | pthread_mutex_unlock( & condat->watchmutex );
141 |
142 | /* hey, viva threaded signal handling! There is no way for select
143 | to unblock a blocked signal, It must be done by "hand" (above).
144 |
145 | Consequently, every once in a while, the signal will be delivered
146 | before the select starts :-/. So, we have to introduce a timeout
147 | for select and check if the signal was delivered anyway....aARGH!!!
148 |
149 | This adds a <timeout interval> to unlucky queries, about 0.1% of all.
150 | */
151 |
152 | while ((nready=select(socket+1, &rset, NULL, NULL, &timeout))!=-1) {
153 |
154 | ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready);
155 |
156 | /* don't even try to read if we have been killed */
157 | if( errno == EINTR || pthread_getspecific(sk_watch_tsd) != NULL ) {
158 | break;
159 | }
160 |
161 | /* retry if the timeout has triggered */
162 | if( nready == 0 ) {
163 | continue;
164 | }
165 |
166 | /* There was some input or client half of connection was closed */
167 | /* Check for the latter */
168 | if (( n=read(socket, buff, sizeof(buff))) == 0) {
169 | /* Connection was closed by client */
170 | /* Now send a cancellation request to the whois thread. */
171 | /* mysql thread will be terminated by thread cleanup routine */
172 |
173 | /* call the actions: kill and exec (the SK_ functions called
174 | check if the action is defined. Will set the RTC flag on condat
175 | */
176 | SK_watchtrigger(condat);
177 |
178 | /* quit */
179 | break;
180 | }
181 | /* Otherwise dump input and continue */
182 |
183 | }
184 |
185 | /* Exit the watchdog thread, passing NULL as we don't expect a join */
186 | pthread_exit(NULL);
187 |
188 | /* oh yes. Shouldn't compilers _recognize_ library functions ? */
189 | return NULL;
190 | }
191 |
192 |
193 | #else /* not WATCHDOG_BY_SIGNAL */
194 |
195 |
196 | /*++++++++++++++++++++++++++++++++++++++
197 | watchdog (CANCEL VERSION) - started as a separate thread.
198 |
199 | Selects on the given socket; discards all input.
200 | whenever it sees end of file (socket closed), it
201 | * sets a corresponding flag in the condat structure,
202 | * triggers the predefined actions (by SK_watchtrigger).
203 |
204 | void *arg - pointer to the connection data structure
205 | ++++++++++++++++++++++++++++++++++++++*/
206 | static
207 | void *sk_watchdog(void *arg)
208 | {
209 | sk_conn_st *condat = (sk_conn_st *) arg;
210 | int nready;
211 | int n;
212 | char buff[STR_S];
213 | int socket = condat->sock;
214 | struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */
215 | fd_set rset;
216 |
217 | /* this is to allow cancellation of the select(3) call */
218 | pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
219 |
220 | /* now ready for the cancellation */
221 | pthread_mutex_unlock( & condat->watchmutex );
222 |
223 | FD_ZERO(&rset);
224 | FD_SET(socket, &rset);
225 | do {
226 | /* run the select exposed to cancellation */
227 | pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
228 | nready=select(socket+1, &rset, NULL, NULL, &timeout);
229 | pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
230 |
231 | ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready);
232 | /* quit on error */
233 | if( nready < 0 ) {
234 | break;
235 | }
236 |
237 | /* retry if the timeout has triggered */
238 | if( nready == 0 ) {
239 | continue;
240 | }
241 |
242 | /* There was some input or client half of connection was closed */
243 | /* Check for the latter */
244 | if (( n=read(socket, buff, sizeof(buff))) == 0) {
245 | /* Connection was closed by client */
246 | /* Now send a cancellation request to the whois thread. */
247 | /* mysql thread will be terminated by thread cleanup routine */
248 |
249 | /* call the actions: kill and exec (the SK_ functions called
250 | check if the action is defined. Will set the RTC flag on condat
251 | */
252 | SK_watchtrigger(condat);
253 |
254 | /* quit */
255 | break;
256 | }
257 | /* Otherwise dump input and continue */
258 |
259 | } while(nready != -1);
260 |
261 | return NULL; /* quit */
262 | }
263 |
264 |
265 | /*++++++++++++++++++++++++++++++++++++++
266 | initialisation for the PTHREAD_CANCEL mode is not needed.
267 | ++++++++++++++++++++++++++++++++++++++*/
268 | static void sk_real_init(void) {
269 | /* EMPTY */
270 | }
271 |
272 | #endif /* WATCHDOG_BY_SIGNAL */
273 |
274 |
275 | /*++++++++++++++++++++++++++++++++++++++
276 | starts sk_watchdog thread unless already started,
277 | and registers its threadid in the condat structure
278 |
279 | dies if watchdog already running
280 |
281 | er_ret_t SK_watchstart Returns SK_OK on success.
282 |
283 | sk_conn_st *condat pointer to the connection data structure
284 |
285 | The structure may (and normally, should) contain the predefined actions
286 | set by SK_watch_set... functions.
287 | ++++++++++++++++++++++++++++++++++++++*/
288 | er_ret_t
289 | SK_watchstart(sk_conn_st *condat)
290 | {
291 | pthread_attr_t attr;
292 | size_t ssize;
293 |
294 | dieif( condat->watchdog != 0 );
295 |
296 | dieif(pthread_attr_init(&attr) != 0);
297 |
298 | #if defined(HAVE_PTHREAD_ATTR_GETSTACKSIZE) && \
299 | defined(HAVE_PTHREAD_ATTR_SETSTACKSIZE)
300 | /*********
301 | For SCO, we need to increase the stack size, because the default is
302 | exceedingly small. This also works on FreeBSD. In Solaris, the
303 | stack size is 0, which is interpreted as the default, meaning 1
304 | Mbyte for 32-bit processes or 2 Mbyte for 64-bit processes.
305 | However, trying to *set* the stack size to 0 results in an error.
306 | Therefore, we don't want to set the size to 0. Probably not a good
307 | idea in any event. :) Linux doesn't support this function (as of
308 | the 2.4.2 kernel).
309 |
310 | Note: see also modules/th/thread.c
311 | *********/
312 | dieif(pthread_attr_getstacksize(&attr, &ssize) != 0);
313 | if (ssize > 0) {
314 | dieif(pthread_attr_setstacksize(&attr, ssize * 4) != 0);
315 | }
316 | #endif
317 |
318 | /* init the mutex in locked state, watchdog will unlock it when
319 | it's ready for signal/cancellation */
320 | pthread_mutex_init( & condat->watchmutex, NULL );
321 | pthread_mutex_lock( & condat->watchmutex );
322 |
323 | /*
324 | Linux doesn't seem to like getting signals in select(), which isn't
325 | technically allowed by POSIX. The workaround in this case is simply
326 | to not create a watchdog for Linux. This is probably acceptable
327 | because we will be changing the query path to perform queries in small
328 | chunks, so if a disconnect occurs it won't consume a lot of database
329 | resources in any case, even without a watchdog.
330 | */
331 | #ifndef __linux__
332 | /* NOT DETACHED! */
333 | pthread_create(&condat->watchdog, &attr, sk_watchdog, (void *) condat );
334 | #endif /* __linux__ */
335 |
336 | dieif(pthread_attr_destroy(&attr) != 0);
337 |
338 | return SK_OK;
339 | }
340 |
341 |
342 | /*++++++++++++++++++++++++++++++++++++++
343 |
344 | stops running sk_watchdog thread.
345 | If it is not running ( == not registered in the connection struct),
346 | it does nothing.
347 |
348 | er_ret_t SK_watchstop always succeeds (returns SK_OK)
349 |
350 | sk_conn_st *condat pointer to the connection data structure
351 | ++++++++++++++++++++++++++++++++++++++*/
352 | er_ret_t
353 | SK_watchstop(sk_conn_st *condat)
354 | {
355 | void *res;
356 |
357 | if(condat->watchdog > 0) {
358 | int ret;
359 |
360 | /* wait until the watchdog is ready for signal */
361 | pthread_mutex_lock( & condat->watchmutex );
362 |
363 | #ifdef WATCHDOG_BY_SIGNAL
364 | ret = pthread_kill(condat->watchdog, SIGUSR2);
365 | #else
366 | ret = pthread_cancel(condat->watchdog);
367 | #endif
368 |
369 | ret = pthread_join(condat->watchdog, &res);
370 |
371 | pthread_mutex_destroy( & condat->watchmutex );
372 | condat->watchdog = 0;
373 | }
374 | return SK_OK;
375 | }
376 |
377 |
378 | /*++++++++++++++++++++++++++++++++++++++
379 |
380 | void SK_watch_setkill sets the thread id of the thread to be
381 | cancelled by the watchdog watching this socket.
382 | 0 (default) means do not cancel anything.
383 |
384 | sk_conn_st *condat pointer to the connection data structure.
385 |
386 | pthread_t killthis thread id of the thread to be cancelled, or 0.
387 | ++++++++++++++++++++++++++++++++++++++*/
388 | void
389 | SK_watch_setkill(sk_conn_st *condat, pthread_t killthis)
390 | {
391 | condat->killthis = killthis;
392 | }
393 |
394 |
395 | /*++++++++++++++++++++++++++++++++++++++
396 |
397 | void SK_watch_setexec sets the function to be invoked by the watchdog
398 | watching this socket. NULL (default) means do
399 | not invoke anything.
400 |
401 | sk_conn_st *condat pointer to the connection data structure.
402 |
403 | void *(*function)(void *) function to be invoked
404 |
405 | void *args argument to be passed to the function.
406 |
407 | ++++++++++++++++++++++++++++++++++++++*/
408 | void
409 | SK_watch_setexec( sk_conn_st *condat, void *(*function)(void *) , void *args)
410 | {
411 | condat->execthis = function;
412 | condat->execargs = args;
413 | }
414 |
415 |
416 | /*++++++++++++++++++++++++++++++++++++++
417 |
418 | void SK_watch_setclear clears the function and thread id fields so that
419 | nothing gets cancelled or invoked by the
420 | watchdog.
421 |
422 | sk_conn_st *condat pointer to the connection data structure.
423 |
424 | ++++++++++++++++++++++++++++++++++++++*/
425 | void
426 | SK_watch_setclear(sk_conn_st *condat)
427 | {
428 | condat->execthis = NULL;
429 | condat->execargs = NULL;
430 | condat->killthis = 0;
431 | }
432 |
433 | /* call the function to be called if defined */
434 |
435 |
436 | /*++++++++++++++++++++++++++++++++++++++
437 |
438 | void SK_watchexec invokes the predefined function if defined.
439 | (usually called from the watchdog).
440 | Also sets the reason-to-close
441 | flag on this connection to SK_INTERRUPT.
442 |
443 | sk_conn_st *condat pointer to the connection data structure.
444 |
445 | ++++++++++++++++++++++++++++++++++++++*/
446 | void
447 | SK_watchexec(sk_conn_st *condat)
448 | {
449 | /* set the reason-to-close flag on this connection */
450 | condat->rtc |= SK_INTERRUPT;
451 |
452 | if( condat->execthis != NULL ) {
453 | condat->execthis(condat->execargs);
454 | }
455 | }
456 |
457 | /* cancel the thread to be cancelled if defined */
458 |
459 |
460 | /*++++++++++++++++++++++++++++++++++++++
461 |
462 | void SK_watchkill cancels the predefined thread if defined.
463 | (usually called from the watchdog).
464 | Also sets the reason-to-close
465 | flag on this connection to SK_INTERRUPT.
466 |
467 | sk_conn_st *condat pointer to the connection data structure.
468 |
469 | ++++++++++++++++++++++++++++++++++++++*/
470 | void
471 | SK_watchkill(sk_conn_st *condat) {
472 |
473 | /* set the reason-to-close flag on this connection */
474 | condat->rtc |= SK_INTERRUPT;
475 |
476 | /* cancel thread if defined */
477 | if( condat->killthis != 0 ) {
478 | pthread_cancel(condat->killthis);
479 | /* The only possible error is ESRCH, so we do not care about it*/
480 | }
481 | }
482 |
483 |
484 | /*++++++++++++++++++++++++++++++++++++++
485 |
486 | void SK_watchtrigger Wrapper around SK_watchkill and SK_watchexec.
487 | First executes the function, then cancels the
488 | thread.
489 |
490 | sk_conn_st *condat pointer to the connection data structure.
491 |
492 | ++++++++++++++++++++++++++++++++++++++*/
493 | void SK_watchtrigger(sk_conn_st *condat)
494 | {
495 | SK_watchexec(condat);
496 | SK_watchkill(condat);
497 | }
498 |
499 |
500 | /*++++++++++++++++++++++++++++++++++++++
501 | Initialisation function, should be called exactly once
502 | (well, it ignores repeated calls). The actions depend on cancellation
503 | mode (signal or pthread_cancel).
504 | ++++++++++++++++++++++++++++++++++++++*/
505 | void SK_init(void)
506 | {
507 | /* can be called only once */
508 | pthread_once( &sk_init_once, sk_real_init);
509 | }