Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# jupyter notebooks meta files
.ipynb_checkpoints/

# OS irrelevant files
.DS_Store
113 changes: 62 additions & 51 deletions negex.python/negex.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import re

def sortRules (ruleList):

def sortRules(ruleList):
"""Return sorted list of rules.

Rules should be in a tab-delimited format: 'rule\t\t[four letter negation tag]'
Sorts list of rules descending based on length of the rule,
splits each rule into components, converts pattern to regular expression,
and appends it to the end of the rule. """
ruleList.sort(key = len, reverse = True)
ruleList.sort(key=len, reverse=True)
sortedList = []
for rule in ruleList:
s = rule.strip().split('\t')
Expand All @@ -18,85 +19,88 @@ def sortRules (ruleList):
sortedList.append(s)
return sortedList


class negTagger(object):
'''Take a sentence and tag negation terms and negated phrases.

Keyword arguments:
sentence -- string to be tagged
phrases -- list of phrases to check for negation
rules -- list of negation trigger terms from the sortRules function
negP -- tag 'possible' terms as well (default = True) '''
def __init__(self, sentence = '', phrases = None, rules = None,
negP = True):

def __init__(self, sentence='', phrases=None, rules=None,
negP=True):
self.__sentence = sentence
self.__phrases = phrases
self.__rules = rules
self.__negTaggedSentence = ''
self.__scopesToReturn = []
self.__negationFlag = None

filler = '_'

for rule in self.__rules:
reformatRule = re.sub(r'\s+', filler, rule[0].strip())
self.__sentence = rule[3].sub (' ' + rule[2].strip()
+ reformatRule
+ rule[2].strip() + ' ', self.__sentence)
self.__sentence = rule[3].sub(' ' + rule[2].strip()
+ reformatRule
+ rule[2].strip() + ' ', self.__sentence)
for phrase in self.__phrases:
phrase = re.sub(r'([.^$*+?{\\|()[\]])', r'\\\1', phrase)
splitPhrase = phrase.split()
joiner = r'\W+'
joinedPattern = r'\b' + joiner.join(splitPhrase) + r'\b'
joinedPattern = r'\b' + joiner.join(splitPhrase) + r'\b'
reP = re.compile(joinedPattern, re.IGNORECASE)
m = reP.search(self.__sentence)
if m:
self.__sentence = self.__sentence.replace(m.group(0), '[PHRASE]'
+ re.sub(r'\s+', filler, m.group(0).strip())
+ '[PHRASE]')
# Exchanges the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED]
# based on PREN, POST rules and if negPoss is set to True then based on

# Exchanges the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED]
# based on PREN, POST rules and if negPoss is set to True then based on
# PREP and POSP, as well.
# Because PRENEGATION [PREN} is checked first it takes precedent over
# POSTNEGATION [POST]. Similarly POSTNEGATION [POST] takes precedent over
# POSSIBLE PRENEGATION [PREP] and [PREP] takes precedent over POSSIBLE
# POSSIBLE PRENEGATION [PREP] and [PREP] takes precedent over POSSIBLE
# POSTNEGATION [POSP].

overlapFlag = 0
prenFlag = 0
postFlag = 0
prePossibleFlag = 0
postPossibleFlag = 0

sentenceTokens = self.__sentence.split()
sentencePortion = ''
aScopes = []
sb = []
#check for [PREN]
# check for [PREN]
for i in range(len(sentenceTokens)):
if sentenceTokens[i][:6] == '[PREN]':
prenFlag = 1
overlapFlag = 0

if sentenceTokens[i][:6] in ['[CONJ]', '[PSEU]', '[POST]', '[PREP]', '[POSP]']:
overlapFlag = 1

if i+1 < len(sentenceTokens):
if sentenceTokens[i+1][:6] == '[PREN]':
overlapFlag = 1
if sentencePortion.strip():
aScopes.append(sentencePortion.strip())
sentencePortion = ''

if prenFlag == 1 and overlapFlag == 0:
sentenceTokens[i] = sentenceTokens[i].replace('[PHRASE]', '[NEGATED]')
sentenceTokens[i] = sentenceTokens[i].replace(
'[PHRASE]', '[NEGATED]')
sentencePortion = sentencePortion + ' ' + sentenceTokens[i]

sb.append(sentenceTokens[i])

if sentencePortion.strip():
aScopes.append(sentencePortion.strip())

sentencePortion = ''
sb.reverse()
sentenceTokens = sb
Expand All @@ -109,26 +113,27 @@ def __init__(self, sentence = '', phrases = None, rules = None,

if sentenceTokens[i][:6] in ['[CONJ]', '[PSEU]', '[PREN]', '[PREP]', '[POSP]']:
overlapFlag = 1

if i+1 < len(sentenceTokens):
if sentenceTokens[i+1][:6] == '[POST]':
overlapFlag = 1
if sentencePortion.strip():
aScopes.append(sentencePortion.strip())
sentencePortion = ''

if postFlag == 1 and overlapFlag == 0:
sentenceTokens[i] = sentenceTokens[i].replace('[PHRASE]', '[NEGATED]')
sentenceTokens[i] = sentenceTokens[i].replace(
'[PHRASE]', '[NEGATED]')
sentencePortion = sentenceTokens[i] + ' ' + sentencePortion

sb2.insert(0, sentenceTokens[i])

if sentencePortion.strip():
aScopes.append(sentencePortion.strip())

sentencePortion = ''
self.__negTaggedSentence = ' '.join(sb2)

if negP:
sentenceTokens = sb2
sb3 = []
Expand All @@ -140,26 +145,28 @@ def __init__(self, sentence = '', phrases = None, rules = None,

if sentenceTokens[i][:6] in ['[CONJ]', '[PSEU]', '[POST]', '[PREN]', '[POSP]']:
overlapFlag = 1

if i+1 < len(sentenceTokens):
if sentenceTokens[i+1][:6] == '[PREP]':
overlapFlag = 1
if sentencePortion.strip():
aScopes.append(sentencePortion.strip())
sentencePortion = ''

if prePossibleFlag == 1 and overlapFlag == 0:
sentenceTokens[i] = sentenceTokens[i].replace('[PHRASE]', '[POSSIBLE]')
sentenceTokens[i] = sentenceTokens[i].replace(
'[PHRASE]', '[POSSIBLE]')
sentencePortion = sentencePortion + ' ' + sentenceTokens[i]

sb3 = sb3 + ' ' + sentenceTokens[i]


# sb3 = sb3 + ' ' + sentenceTokens[i]
sb3.append(sentenceTokens[i])

if sentencePortion.strip():
aScopes.append(sentencePortion.strip())

sentencePortion = ''
sb3.reverse()
sentenceTokens = sb3
sentenceTokens = sb3
sb4 = []
# Check for [POSP]
for i in range(len(sentenceTokens)):
Expand All @@ -169,34 +176,36 @@ def __init__(self, sentence = '', phrases = None, rules = None,

if sentenceTokens[i][:6] in ['[CONJ]', '[PSEU]', '[PREN]', '[PREP]', '[POST]']:
overlapFlag = 1

if i+1 < len(sentenceTokens):
if sentenceTokens[i+1][:6] == '[POSP]':
overlapFlag = 1
if sentencePortion.strip():
aScopes.append(sentencePortion.strip())
sentencePortion = ''

if postPossibleFlag == 1 and overlapFlag == 0:
sentenceTokens[i] = sentenceTokens[i].replace('[PHRASE]', '[POSSIBLE]')
sentenceTokens[i] = sentenceTokens[i].replace(
'[PHRASE]', '[POSSIBLE]')
sentencePortion = sentenceTokens[i] + ' ' + sentencePortion

sb4.insert(0, sentenceTokens[i])

if sentencePortion.strip():
aScopes.append(sentencePortion.strip())

self.__negTaggedSentence = ' '.join(sb4)

if '[NEGATED]' in self.__negTaggedSentence:
self.__negationFlag = 'negated'
elif '[POSSIBLE]' in self.__negTaggedSentence:
self.__negationFlag = 'possible'
else:
self.__negationFlag = 'affirmed'

self.__negTaggedSentence = self.__negTaggedSentence.replace(filler, ' ')


self.__negTaggedSentence = self.__negTaggedSentence.replace(
filler, ' ')

for line in aScopes:
tokensToReturn = []
thisLineTokens = line.split()
Expand All @@ -207,11 +216,13 @@ def __init__(self, sentence = '', phrases = None, rules = None,

def getNegTaggedSentence(self):
return self.__negTaggedSentence

def getNegationFlag(self):
return self.__negationFlag

def getScopes(self):
return self.__scopesToReturn

def __str__(self):
text = self.__negTaggedSentence
text += '\t' + self.__negationFlag
Expand Down
6 changes: 3 additions & 3 deletions negex.python/negex_triggers.txt
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,10 @@ can rule her out [PREN]
can rule the patient out [PREN]
can rule him out for [PREN]
can rule her out for [PREN]
can rule the patinet out for [PREN]
can rule the patient out for [PREN]
can rule him out against [PREN]
can rule her out against [PREN]
can rule the patinet out against [PREN]
can rule the patient out against [PREN]
adequate to rule out [PREN]
adequate to rule him out [PREN]
adequate to rule her out [PREN]
Expand Down Expand Up @@ -148,7 +148,7 @@ rule the patient out [PREP]
rule out for [PREP]
rule him out for [PREP]
rule her out for [PREP]
rule the patinet out for [PREP]
rule the patient out for [PREP]
be ruled out for [PREP]
should be ruled out for [PREP]
ought to be ruled out for [PREP]
Expand Down
16 changes: 10 additions & 6 deletions negex.python/wrapper.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,30 @@
def main():
rfile = open(r'negex_triggers.txt')
irules = sortRules(rfile.readlines())
reports = csv.reader(open(r'Annotations-1-120.txt','rb'), delimiter = '\t')
reports = csv.reader(open(r'Annotations-1-120.txt', 'rb'), delimiter='\t')
reports.next()
reportNum = 0
correctNum = 0
ofile = open(r'negex_output.txt', 'w')
output = []
outputfile = csv.writer(ofile, delimiter = '\t')
outputfile = csv.writer(ofile, delimiter='\t')
for report in reports:
tagger = negTagger(sentence = report[2], phrases = [report[1]], rules = irules, negP=False)
tagger = negTagger(sentence=report[2], phrases=[
report[1]], rules=irules, negP=True)
report.append(tagger.getNegTaggedSentence())
report.append(tagger.getNegationFlag())
report = report + tagger.getScopes()
reportNum += 1
if report[3].lower() == report[5]:
correctNum +=1
correctNum += 1
output.append(report)
outputfile.writerow(['Percentage correct:', float(correctNum)/float(reportNum)])
outputfile.writerow(
['Percentage correct:', float(correctNum)/float(reportNum)])
for row in output:
if row:
outputfile.writerow(row)
ofile.close()

if __name__ == '__main__': main()

if __name__ == '__main__':
main()