summaryrefslogtreecommitdiff
path: root/utils/synutils/srl_sentence.h
diff options
context:
space:
mode:
authorWu, Ke <wuke@cs.umd.edu>2014-10-07 18:44:05 -0400
committerWu, Ke <wuke@cs.umd.edu>2014-10-07 18:44:05 -0400
commit8c26c195213805face566a6407597ba2a871a122 (patch)
tree378301ff345bf465f407f1447ad5fe126b3cd47c /utils/synutils/srl_sentence.h
parent6c7bf8cf49db88ca47e5b08aa449032995736854 (diff)
Move synutils under utils
Diffstat (limited to 'utils/synutils/srl_sentence.h')
-rw-r--r--utils/synutils/srl_sentence.h214
1 files changed, 0 insertions, 214 deletions
diff --git a/utils/synutils/srl_sentence.h b/utils/synutils/srl_sentence.h
deleted file mode 100644
index 1afdec45..00000000
--- a/utils/synutils/srl_sentence.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * srl_sentence.h
- *
- * Created on: May 26, 2013
- * Author: junhuili
- */
-
-#ifndef SRL_SENTENCE_H_
-#define SRL_SENTENCE_H_
-
-
-#include <sstream>
-#include <vector>
-
-#include "tree.h"
-#include "utils/stringlib.h"
-
-using namespace std;
-
-struct SArgument {
- SArgument(const char* pszRole, int iBegin, int iEnd, float fProb) {
- m_pszRole = new char[strlen(pszRole) + 1];
- strcpy(m_pszRole, pszRole);
- m_iBegin = iBegin;
- m_iEnd = iEnd;
- m_fProb = fProb;
- m_pTreeItem = NULL;
- }
- ~SArgument() { delete[] m_pszRole; }
-
- void fnSetTreeItem(STreeItem* pTreeItem) {
- m_pTreeItem = pTreeItem;
- if (m_pTreeItem != NULL && m_pTreeItem->m_iBegin != -1) {
- assert(m_pTreeItem->m_iBegin == m_iBegin);
- assert(m_pTreeItem->m_iEnd == m_iEnd);
- }
- }
-
- char* m_pszRole; // argument rule, e.g., ARG0, ARGM-TMP
- int m_iBegin;
- int m_iEnd; // the span of the argument, [m_iBegin, m_iEnd]
- float m_fProb; // the probability of this role,
- STreeItem* m_pTreeItem;
-};
-
-struct SPredicate {
- SPredicate(const char* pszLemma, int iPosition) {
- if (pszLemma != NULL) {
- m_pszLemma = new char[strlen(pszLemma) + 1];
- strcpy(m_pszLemma, pszLemma);
- } else
- m_pszLemma = NULL;
- m_iPosition = iPosition;
- }
- ~SPredicate() {
- if (m_pszLemma != NULL) delete[] m_pszLemma;
- for (size_t i = 0; i < m_vecArgt.size(); i++) delete m_vecArgt[i];
- }
- int fnAppend(const char* pszRole, int iBegin, int iEnd) {
- SArgument* pArgt = new SArgument(pszRole, iBegin, iEnd, 1.0);
- return fnAppend(pArgt);
- }
- int fnAppend(SArgument* pArgt) {
- m_vecArgt.push_back(pArgt);
- int iPosition = m_vecArgt.size() - 1;
- return iPosition;
- }
-
- char* m_pszLemma; // lemma of the predicate, for Chinese, it's always as same
- // as the predicate itself
- int m_iPosition; // the position in sentence
- vector<SArgument*> m_vecArgt; // arguments associated to the predicate
-};
-
-struct SSrlSentence {
- SSrlSentence() { m_pTree = NULL; }
- ~SSrlSentence() {
- if (m_pTree != NULL) delete m_pTree;
-
- for (size_t i = 0; i < m_vecPred.size(); i++) delete m_vecPred[i];
- }
- int fnAppend(const char* pszLemma, int iPosition) {
- SPredicate* pPred = new SPredicate(pszLemma, iPosition);
- return fnAppend(pPred);
- }
- int fnAppend(SPredicate* pPred) {
- m_vecPred.push_back(pPred);
- int iPosition = m_vecPred.size() - 1;
- return iPosition;
- }
- int GetPredicateNum() { return m_vecPred.size(); }
-
- SParsedTree* m_pTree;
- vector<SPredicate*> m_vecPred;
-};
-
-struct SSrlSentenceReader {
- SSrlSentenceReader(const char* pszSrlFname) {
- m_fpIn = fopen(pszSrlFname, "r");
- assert(m_fpIn != NULL);
- }
- ~SSrlSentenceReader() {
- if (m_fpIn != NULL) fclose(m_fpIn);
- }
-
- inline void fnReplaceAll(std::string& str, const std::string& from,
- const std::string& to) {
- size_t start_pos = 0;
- while ((start_pos = str.find(from, start_pos)) != std::string::npos) {
- str.replace(start_pos, from.length(), to);
- start_pos += to.length(); // In case 'to' contains 'from', like replacing
- // 'x' with 'yx'
- }
- }
-
- // TODO: here only considers flat predicate-argument structure
- // i.e., no overlap among them
- SSrlSentence* fnReadNextSrlSentence() {
- vector<vector<string> > vecContent;
- if (fnReadNextContent(vecContent) == false) return NULL;
-
- SSrlSentence* pSrlSentence = new SSrlSentence();
- int iSize = vecContent.size();
- // put together syntactic text
- std::ostringstream ostr;
- for (int i = 0; i < iSize; i++) {
- string strSynSeg =
- vecContent[i][5]; // the 5th column is the syntactic segment
- size_t iPosition = strSynSeg.find_first_of('*');
- assert(iPosition != string::npos);
- ostringstream ostrTmp;
- ostrTmp << "(" << vecContent[i][2] << " " << vecContent[i][0]
- << ")"; // the 2th column is POS-tag, and the 0th column is word
- strSynSeg.replace(iPosition, 1, ostrTmp.str());
- fnReplaceAll(strSynSeg, "(", " (");
- ostr << strSynSeg;
- }
- string strSyn = ostr.str();
- pSrlSentence->m_pTree = SParsedTree::fnConvertFromString(strSyn.c_str());
- pSrlSentence->m_pTree->fnSetHeadWord();
- pSrlSentence->m_pTree->fnSetSpanInfo();
-
- // read predicate-argument structure
- int iNumPred = vecContent[0].size() - 8;
- for (int i = 0; i < iNumPred; i++) {
- vector<string> vecRole;
- vector<int> vecBegin;
- vector<int> vecEnd;
- int iPred = -1;
- for (int j = 0; j < iSize; j++) {
- const char* p = vecContent[j][i + 8].c_str();
- const char* q;
- if (p[0] == '(') {
- // starting position of an argument(or predicate)
- vecBegin.push_back(j);
- q = strchr(p, '*');
- assert(q != NULL);
- vecRole.push_back(vecContent[j][i + 8].substr(1, q - p - 1));
- if (vecRole.back().compare("V") == 0) {
- assert(iPred == -1);
- iPred = vecRole.size() - 1;
- }
- }
- if (p[strlen(p) - 1] == ')') {
- // end position of an argument(or predicate)
- vecEnd.push_back(j);
- assert(vecBegin.size() == vecEnd.size());
- }
- }
- assert(iPred != -1);
- SPredicate* pPred = new SPredicate(
- pSrlSentence->m_pTree->m_vecTerminals[vecBegin[iPred]]->m_pszTerm,
- vecBegin[iPred]);
- pSrlSentence->fnAppend(pPred);
- for (size_t j = 0; j < vecBegin.size(); j++) {
- if (j == iPred) continue;
- pPred->fnAppend(vecRole[j].c_str(), vecBegin[j], vecEnd[j]);
- pPred->m_vecArgt.back()->fnSetTreeItem(
- pSrlSentence->m_pTree->fnFindNodeForSpan(vecBegin[j], vecEnd[j],
- false));
- }
- }
- return pSrlSentence;
- }
-
- private:
- bool fnReadNextContent(vector<vector<string> >& vecContent) {
- vecContent.clear();
- if (feof(m_fpIn) == true) return false;
- char* pszLine;
- pszLine = new char[100001];
- pszLine[0] = '\0';
- int iLen;
- while (!feof(m_fpIn)) {
- fgets(pszLine, 10001, m_fpIn);
- iLen = strlen(pszLine);
- while (iLen > 0 && pszLine[iLen - 1] > 0 && pszLine[iLen - 1] < 33) {
- pszLine[iLen - 1] = '\0';
- iLen--;
- }
- if (iLen == 0) break; // end of this sentence
-
- vector<string> terms = SplitOnWhitespace(string(pszLine));
- assert(terms.size() > 7);
- vecContent.push_back(terms);
- }
- delete[] pszLine;
- return true;
- }
-
- private:
- FILE* m_fpIn;
-};
-#endif /* SRL_SENTENCE_H_ */