Commit 54d3f347 authored by  Joel  Oksanen's avatar Joel Oksanen
Browse files

Noun phrase parsing now returns tree instead of string. TODO: Use the tree in...

Noun phrase parsing now returns tree instead of string. TODO: Use the tree in opinion target replacement
parent 6c348770
......@@ -4,7 +4,7 @@ import string
from nltk.tree import Tree
included_labels = ['NN', 'NNS', 'NNP', 'NNPS', 'DT', 'CD']
included_labels = ['NN', 'NNS', 'NNP', 'NNPS', 'DT', 'CD', 'FW']
# Marks all subtrees with descriptive label with 'DESC'
def get_np_words(np):
......@@ -19,6 +19,22 @@ def get_np_words(np):
return [np_sub]
return w
def get_np_tree(np):
children = []
for np_sub in reversed(np):
if type(np_sub) is Tree:
if np_sub.label() not in included_labels:
return (Tree(np.label(), children) if children else None, False)
else:
subtree, cont = get_np_tree(np_sub)
assert subtree != None
children = [subtree] + children
if not cont:
return (Tree(np.label(), children), False)
else:
children = [np_sub] + children
return (Tree(np.label(), children), True)
def filtered_np(np):
w = get_np_words(np)
i = (len(w) - w[::-1].index('DESC')) if 'DESC' in w else 0
......@@ -31,8 +47,10 @@ def extract_extended_nouns(tree_str):
for tree in trees:
for subtree in tree.subtrees():
if subtree.label() == 'NP':
# np = ' '.join(np.leaves())
phrases.append(filtered_np(subtree))
np, _ = get_np_tree(subtree)
if np:
np = ' '.join(np.leaves())
phrases.append(np)
return phrases
tree = ET.parse('ABSA16_Laptops_Train_SB1_v2_with_parse_trees.xml')
......@@ -92,16 +110,49 @@ def replace_feature_nps(feature, text, nps):
if len(detected_nps) == 0:
return None
print(nps)
print(detected_nps)
unique_nps = list(filter(lambda np: not any(other_np in np for other_np in detected_nps.difference({np})), detected_nps))
print(unique_nps)
for unique_np in unique_nps:
if text == text.replace(unique_np, '$T$'):
print('***')
print(text)
print(nps)
print(detected_nps)
print(unique_nps)
text = text.replace(unique_np, '$T$')
return text
# tree_str = '''(ROOT
# (S
# (S
# (NP (PRP I))
# (VP (MD would)
# (VP (VB recommend)
# (NP (PRP it)))))
# (, ,)
# (PP (IN for)
# (NP
# (NP (NN anybody))
# (VP (VBG needing)
# (NP (DT a) (JJ reliable) (JJ simple) (NN laptop)))))
# (. .)))'''
# trees = Tree.fromstring(tree_str)
# for tree in trees:
# for subtree in tree.subtrees():
# if subtree.label() == 'NP':
# np, t = get_np_tree(subtree)
# print(np)
# print(t)
# # print(' '.join(np.leaves()))
#
# ns = filtered_np(subtree)
# print(ns)
#
# print('---')
for review in reviews:
sentences = review[0]
assert sentences.tag == 'sentences'
......@@ -118,11 +169,11 @@ for review in reviews:
replaced_text = replace_feature_nps(opinion[0], text, nps)
if replaced_text:
prepped_opinions += 1
print('---')
print(text)
print(replaced_text)
print(opinion)
print('---')
# print('---')
# print(text)
# print(replaced_text)
# print(opinion)
# print('---')
else:
pass
# print('---')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment