From 3cfb30225123e56e7ba85f5c92c79c16ffff995f Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 20 Mar 2014 12:47:17 -0400 Subject: fix crashes in mira --- mteval/external_scorer.cc | 10 +++------- mteval/ns_ext.cc | 45 ++++++++++++++++++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/mteval/external_scorer.cc b/mteval/external_scorer.cc index c7c3de1a..efd880fe 100644 --- a/mteval/external_scorer.cc +++ b/mteval/external_scorer.cc @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -16,7 +15,7 @@ using namespace std; extern const char* meteor_jar_path; -extern void metric_child_signal_handler(int); +extern void setup_child_process_handler(); map > ScoreServerManager::servers_; @@ -48,11 +47,7 @@ ScoreServer* ScoreServerManager::Instance(const string& score_type) { } ScoreServer::ScoreServer(const string& cmd) { - static bool need_init = true; - if (need_init) { - need_init = false; - signal(SIGCHLD, metric_child_signal_handler); - } + setup_child_process_handler(); cerr << "Invoking " << cmd << " ..." << endl; if (pipe(p2c) < 0) { perror("pipe"); exit(1); } if (pipe(c2p) < 0) { perror("pipe"); exit(1); } @@ -83,6 +78,7 @@ ScoreServer::ScoreServer(const string& cmd) { } ScoreServer::~ScoreServer() { + cerr << "ScoreServer::~ScoreServer()\n"; // TODO close stuff, join stuff } diff --git a/mteval/ns_ext.cc b/mteval/ns_ext.cc index 9d2c75c6..efe48afe 100644 --- a/mteval/ns_ext.cc +++ b/mteval/ns_ext.cc @@ -10,17 +10,49 @@ #include #include +#include "filelib.h" #include "stringlib.h" #include "tdict.h" using namespace std; +static volatile bool child_need_init = true; + void metric_child_signal_handler(int signo) { int status = 0; - cerr << "Received SIGCHLD(" << signo << ") ... aborting.\n"; + string cmd; + { + ReadFile rf("/proc/self/cmdline"); + if (rf) getline(*rf.stream(), cmd); + } + for (unsigned i = 0; i < cmd.size(); ++i) + if (cmd[i] == 0) cmd[i] = ' '; + cerr << "Received SIGCHLD(" << signo << ")\n"; + if (cmd.size()) + cerr << " Parent command line: " << cmd << endl; + else + cerr << " Parent command line not available!\n"; // reap zombies - while (waitpid(-1, &status, WNOHANG) > 0) {} - abort(); + bool should_exit = false; + while (waitpid(-1, &status, WNOHANG) > 0) { + cerr << " Child status: " << status << (status ? " [FAILURE]" : " [OK]") << endl; + if (status) should_exit = true; + } + if (should_exit) { + cerr << "Exiting on account of non-zero child exit code...\n"; + exit(1); + } +} + +void setup_child_process_handler() { + if (child_need_init == true) { + child_need_init = false; + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_NOCLDSTOP; + sa.sa_handler = metric_child_signal_handler; + sigaction(SIGCHLD, &sa, NULL); + } } struct NScoreServer { @@ -37,12 +69,7 @@ struct NScoreServer { }; NScoreServer::NScoreServer(const string& cmd) { - static bool need_init = true; - if (need_init) { - need_init = false; - signal(SIGCHLD, metric_child_signal_handler); - } - + setup_child_process_handler(); cerr << "Invoking " << cmd << " ..." << endl; if (pipe(p2c) < 0) { perror("pipe"); exit(1); } if (pipe(c2p) < 0) { perror("pipe"); exit(1); } -- cgit v1.2.3