From 9fd2929a0921e7a46b7822c20118eb5e6a14378c Mon Sep 17 00:00:00 2001 From: moetheneuron Date: Tue, 10 May 2022 15:39:05 +0200 Subject: [PATCH 1/4] Added initial .gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..73ae6a6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# jupyter notebooks meta files +.ipynb_checkpoints/ + +# OS irrelevant files +.DS_Store From 9ba56c57bd55d42eb19ceed1c0a9194723e9b240 Mon Sep 17 00:00:00 2001 From: moetheneuron Date: Tue, 10 May 2022 15:42:35 +0200 Subject: [PATCH 2/4] Fixed misspelling in negex triggers --- negex.python/negex_triggers.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) mode change 100755 => 100644 negex.python/negex_triggers.txt diff --git a/negex.python/negex_triggers.txt b/negex.python/negex_triggers.txt old mode 100755 new mode 100644 index b9f3712..a8a4122 --- a/negex.python/negex_triggers.txt +++ b/negex.python/negex_triggers.txt @@ -114,10 +114,10 @@ can rule her out [PREN] can rule the patient out [PREN] can rule him out for [PREN] can rule her out for [PREN] -can rule the patinet out for [PREN] +can rule the patient out for [PREN] can rule him out against [PREN] can rule her out against [PREN] -can rule the patinet out against [PREN] +can rule the patient out against [PREN] adequate to rule out [PREN] adequate to rule him out [PREN] adequate to rule her out [PREN] @@ -148,7 +148,7 @@ rule the patient out [PREP] rule out for [PREP] rule him out for [PREP] rule her out for [PREP] -rule the patinet out for [PREP] +rule the patient out for [PREP] be ruled out for [PREP] should be ruled out for [PREP] ought to be ruled out for [PREP] From fd2cd4a262ceadf31300bcda8ca2d544c12c2108 Mon Sep 17 00:00:00 2001 From: moetheneuron Date: Tue, 10 May 2022 15:43:33 +0200 Subject: [PATCH 3/4] Enabled tagging for possible/speculated terms --- negex.python/wrapper.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) mode change 100755 => 100644 negex.python/wrapper.py diff --git a/negex.python/wrapper.py b/negex.python/wrapper.py old mode 100755 new mode 100644 index cbbeadd..d7996f2 --- a/negex.python/wrapper.py +++ b/negex.python/wrapper.py @@ -5,26 +5,30 @@ def main(): rfile = open(r'negex_triggers.txt') irules = sortRules(rfile.readlines()) - reports = csv.reader(open(r'Annotations-1-120.txt','rb'), delimiter = '\t') + reports = csv.reader(open(r'Annotations-1-120.txt', 'rb'), delimiter='\t') reports.next() reportNum = 0 correctNum = 0 ofile = open(r'negex_output.txt', 'w') output = [] - outputfile = csv.writer(ofile, delimiter = '\t') + outputfile = csv.writer(ofile, delimiter='\t') for report in reports: - tagger = negTagger(sentence = report[2], phrases = [report[1]], rules = irules, negP=False) + tagger = negTagger(sentence=report[2], phrases=[ + report[1]], rules=irules, negP=True) report.append(tagger.getNegTaggedSentence()) report.append(tagger.getNegationFlag()) report = report + tagger.getScopes() reportNum += 1 if report[3].lower() == report[5]: - correctNum +=1 + correctNum += 1 output.append(report) - outputfile.writerow(['Percentage correct:', float(correctNum)/float(reportNum)]) + outputfile.writerow( + ['Percentage correct:', float(correctNum)/float(reportNum)]) for row in output: if row: outputfile.writerow(row) ofile.close() -if __name__ == '__main__': main() \ No newline at end of file + +if __name__ == '__main__': + main() From 424e6dfe251e730efb290e393d856edf62b56f57 Mon Sep 17 00:00:00 2001 From: moetheneuron Date: Tue, 10 May 2022 15:44:55 +0200 Subject: [PATCH 4/4] Fixed a bug causing TypeError when negP enabled --- negex.python/negex.py | 113 +++++++++++++++++++++++------------------- 1 file changed, 62 insertions(+), 51 deletions(-) mode change 100755 => 100644 negex.python/negex.py diff --git a/negex.python/negex.py b/negex.python/negex.py old mode 100755 new mode 100644 index 6166270..caf21e1 --- a/negex.python/negex.py +++ b/negex.python/negex.py @@ -1,13 +1,14 @@ import re -def sortRules (ruleList): + +def sortRules(ruleList): """Return sorted list of rules. - + Rules should be in a tab-delimited format: 'rule\t\t[four letter negation tag]' Sorts list of rules descending based on length of the rule, splits each rule into components, converts pattern to regular expression, and appends it to the end of the rule. """ - ruleList.sort(key = len, reverse = True) + ruleList.sort(key=len, reverse=True) sortedList = [] for rule in ruleList: s = rule.strip().split('\t') @@ -18,61 +19,63 @@ def sortRules (ruleList): sortedList.append(s) return sortedList + class negTagger(object): '''Take a sentence and tag negation terms and negated phrases. - + Keyword arguments: sentence -- string to be tagged phrases -- list of phrases to check for negation rules -- list of negation trigger terms from the sortRules function negP -- tag 'possible' terms as well (default = True) ''' - def __init__(self, sentence = '', phrases = None, rules = None, - negP = True): + + def __init__(self, sentence='', phrases=None, rules=None, + negP=True): self.__sentence = sentence self.__phrases = phrases self.__rules = rules self.__negTaggedSentence = '' self.__scopesToReturn = [] self.__negationFlag = None - + filler = '_' - + for rule in self.__rules: reformatRule = re.sub(r'\s+', filler, rule[0].strip()) - self.__sentence = rule[3].sub (' ' + rule[2].strip() - + reformatRule - + rule[2].strip() + ' ', self.__sentence) + self.__sentence = rule[3].sub(' ' + rule[2].strip() + + reformatRule + + rule[2].strip() + ' ', self.__sentence) for phrase in self.__phrases: phrase = re.sub(r'([.^$*+?{\\|()[\]])', r'\\\1', phrase) splitPhrase = phrase.split() joiner = r'\W+' - joinedPattern = r'\b' + joiner.join(splitPhrase) + r'\b' + joinedPattern = r'\b' + joiner.join(splitPhrase) + r'\b' reP = re.compile(joinedPattern, re.IGNORECASE) m = reP.search(self.__sentence) if m: self.__sentence = self.__sentence.replace(m.group(0), '[PHRASE]' + re.sub(r'\s+', filler, m.group(0).strip()) + '[PHRASE]') - -# Exchanges the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED] -# based on PREN, POST rules and if negPoss is set to True then based on + +# Exchanges the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED] +# based on PREN, POST rules and if negPoss is set to True then based on # PREP and POSP, as well. # Because PRENEGATION [PREN} is checked first it takes precedent over # POSTNEGATION [POST]. Similarly POSTNEGATION [POST] takes precedent over -# POSSIBLE PRENEGATION [PREP] and [PREP] takes precedent over POSSIBLE +# POSSIBLE PRENEGATION [PREP] and [PREP] takes precedent over POSSIBLE # POSTNEGATION [POSP]. - + overlapFlag = 0 prenFlag = 0 postFlag = 0 prePossibleFlag = 0 postPossibleFlag = 0 - + sentenceTokens = self.__sentence.split() sentencePortion = '' aScopes = [] sb = [] - #check for [PREN] + # check for [PREN] for i in range(len(sentenceTokens)): if sentenceTokens[i][:6] == '[PREN]': prenFlag = 1 @@ -80,23 +83,24 @@ def __init__(self, sentence = '', phrases = None, rules = None, if sentenceTokens[i][:6] in ['[CONJ]', '[PSEU]', '[POST]', '[PREP]', '[POSP]']: overlapFlag = 1 - + if i+1 < len(sentenceTokens): if sentenceTokens[i+1][:6] == '[PREN]': overlapFlag = 1 if sentencePortion.strip(): aScopes.append(sentencePortion.strip()) sentencePortion = '' - + if prenFlag == 1 and overlapFlag == 0: - sentenceTokens[i] = sentenceTokens[i].replace('[PHRASE]', '[NEGATED]') + sentenceTokens[i] = sentenceTokens[i].replace( + '[PHRASE]', '[NEGATED]') sentencePortion = sentencePortion + ' ' + sentenceTokens[i] - + sb.append(sentenceTokens[i]) - + if sentencePortion.strip(): aScopes.append(sentencePortion.strip()) - + sentencePortion = '' sb.reverse() sentenceTokens = sb @@ -109,26 +113,27 @@ def __init__(self, sentence = '', phrases = None, rules = None, if sentenceTokens[i][:6] in ['[CONJ]', '[PSEU]', '[PREN]', '[PREP]', '[POSP]']: overlapFlag = 1 - + if i+1 < len(sentenceTokens): if sentenceTokens[i+1][:6] == '[POST]': overlapFlag = 1 if sentencePortion.strip(): aScopes.append(sentencePortion.strip()) sentencePortion = '' - + if postFlag == 1 and overlapFlag == 0: - sentenceTokens[i] = sentenceTokens[i].replace('[PHRASE]', '[NEGATED]') + sentenceTokens[i] = sentenceTokens[i].replace( + '[PHRASE]', '[NEGATED]') sentencePortion = sentenceTokens[i] + ' ' + sentencePortion - + sb2.insert(0, sentenceTokens[i]) - + if sentencePortion.strip(): aScopes.append(sentencePortion.strip()) - + sentencePortion = '' self.__negTaggedSentence = ' '.join(sb2) - + if negP: sentenceTokens = sb2 sb3 = [] @@ -140,26 +145,28 @@ def __init__(self, sentence = '', phrases = None, rules = None, if sentenceTokens[i][:6] in ['[CONJ]', '[PSEU]', '[POST]', '[PREN]', '[POSP]']: overlapFlag = 1 - + if i+1 < len(sentenceTokens): if sentenceTokens[i+1][:6] == '[PREP]': overlapFlag = 1 if sentencePortion.strip(): aScopes.append(sentencePortion.strip()) sentencePortion = '' - + if prePossibleFlag == 1 and overlapFlag == 0: - sentenceTokens[i] = sentenceTokens[i].replace('[PHRASE]', '[POSSIBLE]') + sentenceTokens[i] = sentenceTokens[i].replace( + '[PHRASE]', '[POSSIBLE]') sentencePortion = sentencePortion + ' ' + sentenceTokens[i] - - sb3 = sb3 + ' ' + sentenceTokens[i] - + + # sb3 = sb3 + ' ' + sentenceTokens[i] + sb3.append(sentenceTokens[i]) + if sentencePortion.strip(): aScopes.append(sentencePortion.strip()) - + sentencePortion = '' sb3.reverse() - sentenceTokens = sb3 + sentenceTokens = sb3 sb4 = [] # Check for [POSP] for i in range(len(sentenceTokens)): @@ -169,34 +176,36 @@ def __init__(self, sentence = '', phrases = None, rules = None, if sentenceTokens[i][:6] in ['[CONJ]', '[PSEU]', '[PREN]', '[PREP]', '[POST]']: overlapFlag = 1 - + if i+1 < len(sentenceTokens): if sentenceTokens[i+1][:6] == '[POSP]': overlapFlag = 1 if sentencePortion.strip(): aScopes.append(sentencePortion.strip()) sentencePortion = '' - + if postPossibleFlag == 1 and overlapFlag == 0: - sentenceTokens[i] = sentenceTokens[i].replace('[PHRASE]', '[POSSIBLE]') + sentenceTokens[i] = sentenceTokens[i].replace( + '[PHRASE]', '[POSSIBLE]') sentencePortion = sentenceTokens[i] + ' ' + sentencePortion - + sb4.insert(0, sentenceTokens[i]) - + if sentencePortion.strip(): aScopes.append(sentencePortion.strip()) - + self.__negTaggedSentence = ' '.join(sb4) - + if '[NEGATED]' in self.__negTaggedSentence: self.__negationFlag = 'negated' elif '[POSSIBLE]' in self.__negTaggedSentence: self.__negationFlag = 'possible' else: self.__negationFlag = 'affirmed' - - self.__negTaggedSentence = self.__negTaggedSentence.replace(filler, ' ') - + + self.__negTaggedSentence = self.__negTaggedSentence.replace( + filler, ' ') + for line in aScopes: tokensToReturn = [] thisLineTokens = line.split() @@ -207,11 +216,13 @@ def __init__(self, sentence = '', phrases = None, rules = None, def getNegTaggedSentence(self): return self.__negTaggedSentence + def getNegationFlag(self): return self.__negationFlag + def getScopes(self): return self.__scopesToReturn - + def __str__(self): text = self.__negTaggedSentence text += '\t' + self.__negationFlag