|
@@ -37,8 +37,10 @@ int responder_loop()
|
37
|
37
|
struct timespec timeout;
|
38
|
38
|
/**@brief watchdog timeout */
|
39
|
39
|
struct timespec pool_timeout;
|
40
|
|
- short idle;
|
|
40
|
+ time_t idle_start, busy_start;
|
|
41
|
+ short idle, busy;
|
41
|
42
|
struct sigaction act;
|
|
43
|
+ char *statusstr;
|
42
|
44
|
|
43
|
45
|
act.sa_handler = pool_sighandler;
|
44
|
46
|
sigemptyset(&act.sa_mask);
|
|
@@ -57,7 +59,7 @@ int responder_loop()
|
57
|
59
|
sop.sem_flg = 0;
|
58
|
60
|
timeout.tv_sec = 0;
|
59
|
61
|
timeout.tv_nsec = 100000000;
|
60
|
|
- idle = 0;
|
|
62
|
+ idle = busy = 0;
|
61
|
63
|
|
62
|
64
|
pyfcgi_logger_set_ident("Workpool");
|
63
|
65
|
|
|
@@ -126,53 +128,32 @@ int responder_loop()
|
126
|
128
|
ret);
|
127
|
129
|
continue;
|
128
|
130
|
}
|
129
|
|
- idle=0;
|
130
|
|
- sop.sem_op = -1;
|
131
|
|
- ret = semtimedop(semid, &sop, 1, &timeout);
|
132
|
|
- sop.sem_op = 0;
|
133
|
|
- if(ret < 0)
|
134
|
|
- {
|
135
|
|
- err = errno;
|
136
|
|
- if(err != EAGAIN) //can fail if wrokers timeout
|
137
|
|
- {
|
138
|
|
- pyfcgi_log(LOG_ALERT,
|
139
|
|
- "Unable to dec sem after child exit : %s",
|
140
|
|
- strerror(err));
|
141
|
|
- clean_exit(err);
|
142
|
|
- }
|
143
|
|
- }
|
144
|
|
- if(status)
|
|
131
|
+ if(WIFSIGNALED(status))
|
145
|
132
|
{
|
146
|
|
- if(WIFSIGNALED(status))
|
147
|
|
- {
|
148
|
|
- if(WTERMSIG(status) == 11)
|
149
|
|
- {
|
150
|
|
- pyfcgi_log(LOG_ALERT,
|
151
|
|
- "Worker[%d] segfault !",
|
152
|
|
- n);
|
153
|
|
- }
|
154
|
|
- else
|
155
|
|
- {
|
156
|
|
- pyfcgi_log(LOG_ALERT,
|
157
|
|
- "Worker[%d] terminated by signal %d",
|
158
|
|
- n, WTERMSIG(status));
|
159
|
|
- }
|
160
|
|
- }
|
161
|
|
- if(WEXITSTATUS(status) & PYFCGI_FATAL)
|
|
133
|
+ if(WTERMSIG(status) == 11)
|
162
|
134
|
{
|
163
|
135
|
pyfcgi_log(LOG_ALERT,
|
164
|
|
- "Worker[%d] exited with status FATAL",
|
|
136
|
+ "Worker[%d] segfault !",
|
165
|
137
|
n);
|
166
|
|
- //TODO : restart ?
|
167
|
138
|
}
|
168
|
139
|
else
|
169
|
140
|
{
|
170
|
|
- pyfcgi_log(LOG_WARNING,
|
171
|
|
- "Worker[%d] exited with status %d",
|
172
|
|
- n, WEXITSTATUS(status));
|
|
141
|
+ pyfcgi_log(LOG_ALERT,
|
|
142
|
+ "Worker[%d] terminated by signal %s(%d)",
|
|
143
|
+ n, strsignal(WTERMSIG(status)),
|
|
144
|
+ WTERMSIG(status));
|
173
|
145
|
}
|
174
|
146
|
}
|
175
|
|
- else
|
|
147
|
+ if(WEXITSTATUS(status))
|
|
148
|
+ {
|
|
149
|
+ statusstr = status2str(WEXITSTATUS(status));
|
|
150
|
+ pyfcgi_log((WEXITSTATUS(status)&PYFCGI_FATAL)?
|
|
151
|
+ LOG_ALERT:LOG_WARNING,
|
|
152
|
+ "Worker[%d] exited with status %s",
|
|
153
|
+ n, statusstr);
|
|
154
|
+ free(statusstr);
|
|
155
|
+ }
|
|
156
|
+ if(!status)
|
176
|
157
|
{
|
177
|
158
|
pyfcgi_log(LOG_INFO,
|
178
|
159
|
"Worker[%d] PID %d exited normally",
|
|
@@ -180,73 +161,88 @@ int responder_loop()
|
180
|
161
|
}
|
181
|
162
|
|
182
|
163
|
// respawn on same slot
|
183
|
|
- pyfcgi_log(LOG_INFO, "respawn #%d", n);
|
|
164
|
+ pyfcgi_log(LOG_DEBUG, "respawning worker #%d", n);
|
184
|
165
|
wrk_pids[n] = spawn(n);
|
185
|
166
|
|
186
|
|
-
|
187
|
|
- }
|
188
|
|
- // Stopping & deleting useless childs
|
189
|
|
- if(wanted_n < n_wrk)
|
190
|
|
- { // need to shift the list and dec n_wrk
|
191
|
|
- pyfcgi_log(LOG_DEBUG, "GC Workers");
|
192
|
|
- n_wrk--;
|
193
|
|
- kill(wrk_pids[n_wrk], SIGTERM);
|
194
|
|
- nanosleep(&timeout, NULL);
|
195
|
|
- kill(wrk_pids[n_wrk], SIGKILL);
|
196
|
|
- nanosleep(&timeout, NULL);
|
197
|
|
- if( (ret = waitpid(wrk_pids[n_wrk], &status, WNOHANG)) < 0 )
|
198
|
|
- {
|
199
|
|
- pyfcgi_log(LOG_ERR, "Unable to kill child %d (PID %d)",
|
200
|
|
- n_wrk, wrk_pids[n_wrk]);
|
201
|
|
- }
|
202
|
|
- else
|
203
|
|
- {
|
204
|
|
- pyfcgi_log(LOG_INFO, "worker[%d](%d) killed",
|
205
|
|
- n_wrk, wrk_pids[n_wrk]);
|
206
|
|
- }
|
207
|
167
|
continue;
|
208
|
168
|
}
|
209
|
169
|
|
|
170
|
+ // Check if the pool is idle or busy
|
210
|
171
|
ret = semtimedop(semid, &sop, 1, &timeout);
|
211
|
|
-//pyfcgi_log( LOG_DEBUG, "semtimeop ret=%d want %d have %d", ret, wanted_n, n_wrk);
|
212
|
172
|
if(ret < 0)
|
213
|
173
|
{
|
214
|
174
|
err = errno;
|
215
|
|
- if(err == EAGAIN)
|
|
175
|
+ if(err != EAGAIN)
|
216
|
176
|
{
|
217
|
|
-//pyfcgi_log(LOG_DEBUG, "IDLE want %d have %d\t min=%d", wanted_n, n_wrk, min_wrk);
|
218
|
|
- // workers idle
|
219
|
|
- if(!idle)
|
220
|
|
- {
|
221
|
|
- idle = 1;
|
222
|
|
- }
|
223
|
|
- else if(wanted_n > PyFCGI_conf.min_wrk
|
224
|
|
- && n_wrk - wanted_n < 2)
|
225
|
|
- {
|
226
|
|
- wanted_n--;
|
227
|
|
- }
|
228
|
|
- continue;
|
|
177
|
+ pyfcgi_log(LOG_ERR, "Unable to read semaphore : %s",
|
|
178
|
+ strerror(err));
|
|
179
|
+ exit(PYFCGI_FATAL);
|
|
180
|
+ }
|
|
181
|
+ // workers idle
|
|
182
|
+ busy = 0;
|
|
183
|
+ if(!idle)
|
|
184
|
+ {
|
|
185
|
+ idle = 1;
|
|
186
|
+ idle_start = time(NULL);
|
|
187
|
+ }
|
|
188
|
+ else if((time(NULL) - idle_start) > PyFCGI_conf.worker_gc_timeout &&
|
|
189
|
+ wanted_n > PyFCGI_conf.min_wrk
|
|
190
|
+ && n_wrk - wanted_n < 2)
|
|
191
|
+ {
|
|
192
|
+ wanted_n--;
|
|
193
|
+ idle = 0;
|
229
|
194
|
}
|
230
|
|
- pyfcgi_log(LOG_ERR, "Unable to read semaphore : %s",
|
231
|
|
- strerror(err));
|
232
|
195
|
}
|
233
|
|
- if(!ret)
|
|
196
|
+ else if(!ret)
|
234
|
197
|
{
|
235
|
|
- if(n_wrk < PyFCGI_conf.max_wrk)
|
|
198
|
+ idle = 0;
|
|
199
|
+ if(!busy)
|
|
200
|
+ {
|
|
201
|
+ busy = 1;
|
|
202
|
+ busy_start = time(NULL);
|
|
203
|
+ }
|
|
204
|
+ else if(time(NULL) - busy_start > 0 &&
|
|
205
|
+ wanted_n <= PyFCGI_conf.max_wrk)
|
236
|
206
|
{
|
237
|
|
- idle=0;
|
238
|
207
|
pyfcgi_log( LOG_DEBUG,
|
239
|
208
|
"All workers busy, spawning a new one");
|
240
|
209
|
n = n_wrk;
|
241
|
210
|
n_wrk++;
|
242
|
211
|
wanted_n = n_wrk;
|
243
|
212
|
wrk_pids[n] = spawn(n);
|
|
213
|
+ if(!PyFCGI_conf.worker_fast_spawn)
|
|
214
|
+ {
|
|
215
|
+ busy_start = time(NULL);
|
|
216
|
+ }
|
|
217
|
+ }
|
|
218
|
+ }
|
|
219
|
+
|
|
220
|
+ // Stopping & deleting useless childs
|
|
221
|
+ if(wanted_n < n_wrk && idle)
|
|
222
|
+ { // need to shift the list and dec n_wrk
|
|
223
|
+ busy = 0;
|
|
224
|
+ n_wrk--;
|
|
225
|
+ kill(wrk_pids[n_wrk], SIGTERM);
|
|
226
|
+ nanosleep(&timeout, NULL);
|
|
227
|
+ if( (ret = waitpid(wrk_pids[n_wrk], &status, WNOHANG)) < 0 )
|
|
228
|
+ {
|
|
229
|
+ pyfcgi_log(LOG_ERR, "Pool idle since %ds but unable to kill child %d (PID %d)",
|
|
230
|
+ PyFCGI_conf.worker_gc_timeout,
|
|
231
|
+ n_wrk, wrk_pids[n_wrk]);
|
|
232
|
+ kill(wrk_pids[n_wrk], SIGKILL);
|
244
|
233
|
}
|
245
|
234
|
else
|
246
|
235
|
{
|
247
|
|
- nanosleep(&timeout, NULL);
|
|
236
|
+ pyfcgi_log(LOG_INFO, "Pool idle since %ds : worker[%d](%d) killed",
|
|
237
|
+ PyFCGI_conf.worker_gc_timeout,
|
|
238
|
+ n_wrk, wrk_pids[n_wrk]);
|
248
|
239
|
}
|
|
240
|
+ idle = 0;
|
|
241
|
+ continue;
|
249
|
242
|
}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+ nanosleep(&timeout, NULL);
|
250
|
246
|
}
|
251
|
247
|
|
252
|
248
|
pyfcgi_wd_arm();
|
|
@@ -267,17 +263,12 @@ int responder_loop()
|
267
|
263
|
pid_t spawn(int wrk_id)
|
268
|
264
|
{
|
269
|
265
|
pid_t res;
|
270
|
|
- struct timespec timeout;
|
271
|
266
|
struct timespec wd_timeout;
|
272
|
267
|
struct sigaction act;
|
273
|
268
|
char ident[128];
|
274
|
269
|
|
275
|
|
- timeout.tv_sec = 0;
|
276
|
|
- timeout.tv_nsec = 100000000;
|
277
|
|
-
|
278
|
270
|
act.sa_handler = worker_sighandler;
|
279
|
271
|
sigemptyset(&act.sa_mask);
|
280
|
|
- sigaddset(&act.sa_mask, SIGTERM);
|
281
|
272
|
act.sa_flags = 0;
|
282
|
273
|
act.sa_restorer = NULL;
|
283
|
274
|
|
|
@@ -292,6 +283,8 @@ pid_t spawn(int wrk_id)
|
292
|
283
|
else if(!res)
|
293
|
284
|
{
|
294
|
285
|
// Child process
|
|
286
|
+ PyFCGI_conf.context.ppid = PyFCGI_conf.context.pid;
|
|
287
|
+ PyFCGI_conf.context.pid = getpid();
|
295
|
288
|
snprintf(ident, 128, "Worker%2d", wrk_id);
|
296
|
289
|
pyfcgi_logger_set_ident(ident);
|
297
|
290
|
// Set handler for SIGINT & SIGTERM
|
|
@@ -300,6 +293,11 @@ pid_t spawn(int wrk_id)
|
300
|
293
|
perror("Sigaction error for pool process");
|
301
|
294
|
exit(PYFCGI_FATAL);
|
302
|
295
|
}
|
|
296
|
+ if(sigaction(SIGTERM, &act, NULL))
|
|
297
|
+ {
|
|
298
|
+ perror("Sigaction2 error for pool process");
|
|
299
|
+ exit(PYFCGI_FATAL);
|
|
300
|
+ }
|
303
|
301
|
// Set watchdog
|
304
|
302
|
if(PyFCGI_conf.worker_timeout)
|
305
|
303
|
{
|
|
@@ -320,7 +318,7 @@ pid_t spawn(int wrk_id)
|
320
|
318
|
// Sleep to avoid spawning like hell thinking all workers are
|
321
|
319
|
// busy. Let some time to this one to go up...
|
322
|
320
|
// TODO: find a better way to avoid spawning to max_wrk
|
323
|
|
- nanosleep(&timeout, NULL);
|
|
321
|
+ //nanosleep(&timeout, NULL);
|
324
|
322
|
pyfcgi_log( LOG_INFO,
|
325
|
323
|
"Worker #%d spawned with PID %d", wrk_id, res);
|
326
|
324
|
return res;
|