Skip to content
Snippets Groups Projects
Commit 54d3f347 authored by Joel  Oksanen's avatar Joel Oksanen
Browse files

Noun phrase parsing now returns tree instead of string. TODO: Use the tree in...

Noun phrase parsing now returns tree instead of string. TODO: Use the tree in opinion target replacement
parent 6c348770
No related branches found
No related tags found
No related merge requests found
...@@ -4,7 +4,7 @@ import string ...@@ -4,7 +4,7 @@ import string
from nltk.tree import Tree from nltk.tree import Tree
included_labels = ['NN', 'NNS', 'NNP', 'NNPS', 'DT', 'CD'] included_labels = ['NN', 'NNS', 'NNP', 'NNPS', 'DT', 'CD', 'FW']
# Marks all subtrees with descriptive label with 'DESC' # Marks all subtrees with descriptive label with 'DESC'
def get_np_words(np): def get_np_words(np):
...@@ -19,6 +19,22 @@ def get_np_words(np): ...@@ -19,6 +19,22 @@ def get_np_words(np):
return [np_sub] return [np_sub]
return w return w
def get_np_tree(np):
children = []
for np_sub in reversed(np):
if type(np_sub) is Tree:
if np_sub.label() not in included_labels:
return (Tree(np.label(), children) if children else None, False)
else:
subtree, cont = get_np_tree(np_sub)
assert subtree != None
children = [subtree] + children
if not cont:
return (Tree(np.label(), children), False)
else:
children = [np_sub] + children
return (Tree(np.label(), children), True)
def filtered_np(np): def filtered_np(np):
w = get_np_words(np) w = get_np_words(np)
i = (len(w) - w[::-1].index('DESC')) if 'DESC' in w else 0 i = (len(w) - w[::-1].index('DESC')) if 'DESC' in w else 0
...@@ -31,8 +47,10 @@ def extract_extended_nouns(tree_str): ...@@ -31,8 +47,10 @@ def extract_extended_nouns(tree_str):
for tree in trees: for tree in trees:
for subtree in tree.subtrees(): for subtree in tree.subtrees():
if subtree.label() == 'NP': if subtree.label() == 'NP':
# np = ' '.join(np.leaves()) np, _ = get_np_tree(subtree)
phrases.append(filtered_np(subtree)) if np:
np = ' '.join(np.leaves())
phrases.append(np)
return phrases return phrases
tree = ET.parse('ABSA16_Laptops_Train_SB1_v2_with_parse_trees.xml') tree = ET.parse('ABSA16_Laptops_Train_SB1_v2_with_parse_trees.xml')
...@@ -92,16 +110,49 @@ def replace_feature_nps(feature, text, nps): ...@@ -92,16 +110,49 @@ def replace_feature_nps(feature, text, nps):
if len(detected_nps) == 0: if len(detected_nps) == 0:
return None return None
print(nps)
print(detected_nps)
unique_nps = list(filter(lambda np: not any(other_np in np for other_np in detected_nps.difference({np})), detected_nps)) unique_nps = list(filter(lambda np: not any(other_np in np for other_np in detected_nps.difference({np})), detected_nps))
print(unique_nps)
for unique_np in unique_nps: for unique_np in unique_nps:
if text == text.replace(unique_np, '$T$'):
print('***')
print(text)
print(nps)
print(detected_nps)
print(unique_nps)
text = text.replace(unique_np, '$T$') text = text.replace(unique_np, '$T$')
return text return text
# tree_str = '''(ROOT
# (S
# (S
# (NP (PRP I))
# (VP (MD would)
# (VP (VB recommend)
# (NP (PRP it)))))
# (, ,)
# (PP (IN for)
# (NP
# (NP (NN anybody))
# (VP (VBG needing)
# (NP (DT a) (JJ reliable) (JJ simple) (NN laptop)))))
# (. .)))'''
# trees = Tree.fromstring(tree_str)
# for tree in trees:
# for subtree in tree.subtrees():
# if subtree.label() == 'NP':
# np, t = get_np_tree(subtree)
# print(np)
# print(t)
# # print(' '.join(np.leaves()))
#
# ns = filtered_np(subtree)
# print(ns)
#
# print('---')
for review in reviews: for review in reviews:
sentences = review[0] sentences = review[0]
assert sentences.tag == 'sentences' assert sentences.tag == 'sentences'
...@@ -118,11 +169,11 @@ for review in reviews: ...@@ -118,11 +169,11 @@ for review in reviews:
replaced_text = replace_feature_nps(opinion[0], text, nps) replaced_text = replace_feature_nps(opinion[0], text, nps)
if replaced_text: if replaced_text:
prepped_opinions += 1 prepped_opinions += 1
print('---') # print('---')
print(text) # print(text)
print(replaced_text) # print(replaced_text)
print(opinion) # print(opinion)
print('---') # print('---')
else: else:
pass pass
# print('---') # print('---')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment