Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Joel Oksanen
individual_project
Commits
43892e17
Commit
43892e17
authored
Apr 23, 2020
by
Joel Oksanen
Browse files
Started implementing bert in server
parent
605365c2
Changes
7
Hide whitespace changes
Inline
Side-by-side
ADA/SA/bert_analyzer.py
View file @
43892e17
...
...
@@ -12,7 +12,7 @@ import shap
semeval_2014_train_path
=
'data/SemEval-2014/Laptop_Train_v2.xml'
semeval_2014_test_path
=
'data/SemEval-2014/Laptops_Test_Gold.xml'
amazon_test_path
=
'data/Amazon/a
mazon_camera_test
.xml'
amazon_test_path
=
'data/Amazon/a
nnotated_amazon_laptop_reviews
.xml'
trained_model_path
=
'semeval_2014_2.pt'
BATCH_SIZE
=
32
...
...
@@ -26,6 +26,12 @@ def loss(outputs, labels):
class
BertAnalyzer
:
@
staticmethod
def
default
():
sa
=
BertAnalyzer
()
sa
.
load_saved
(
'semeval_2014.pt'
)
return
sa
def
load_saved
(
self
,
path
):
self
.
net
=
TDBertNet
(
len
(
polarity_indices
))
self
.
net
.
load_state_dict
(
torch
.
load
(
path
))
...
...
@@ -93,7 +99,7 @@ class BertAnalyzer:
f1
=
metrics
.
f1_score
(
truths
,
predicted
,
labels
=
range
(
len
(
polarity_indices
)),
average
=
'macro'
)
print
(
'macro F1:'
,
f1
)
def
analyze_sentence
(
self
,
text
,
char_from
,
char_to
):
def
get_sentiment_polarity
(
self
,
text
,
char_from
,
char_to
):
instance
=
Instance
(
text
,
char_from
,
char_to
)
tokens
,
tg_from
,
tg_to
=
instance
.
get
()
text
,
target_indices
=
instance
.
to_tensor
()
...
...
@@ -116,10 +122,18 @@ class BertAnalyzer:
# ax.set_xticklabels(tokens, rotation=45, rotation_mode='anchor', ha='right')
# plt.show()
_
,
pred
=
torch
.
max
(
outputs
.
data
,
1
)
return
pred
sentiment_analyzer
=
BertAnalyzer
()
sentiment_analyzer
.
load_saved
(
'semeval_2014.pt'
)
print
(
sentiment_analyzer
.
analyze_sentence
(
"Well built laptop with win7."
,
11
,
17
))
\ No newline at end of file
val
,
pred
=
torch
.
max
(
outputs
.
data
,
1
)
if
pred
==
0
:
# positive
return
val
elif
pred
==
1
:
# negative
return
-
val
else
:
# neutral or conflicted
return
0
sentiment_analyzer
=
BertAnalyzer
.
default
()
sentiment_analyzer
.
evaluate
(
semeval_2014_test_path
)
sentiment_analyzer
.
evaluate
(
amazon_test_path
)
ADA/SA/sentiment_analyzer.py
View file @
43892e17
...
...
@@ -11,6 +11,7 @@ from sklearn.feature_extraction.text import CountVectorizer
import
os
import
math
def
resample_data
(
instances
,
labels
):
label_instances
=
{
label
:
[
instance
for
instance
in
instances
if
instance
.
opinion
==
label
]
for
label
in
labels
}
max_n_instances
=
max
([
len
(
v
)
for
v
in
label_instances
.
values
()])
...
...
@@ -22,6 +23,7 @@ def resample_data(instances, labels):
print
(
len
(
resampled_data
))
return
resampled_data
class
SentimentAnalyzer
:
expr_clf
=
svm
.
SVC
()
# determines if sentence expresses sentiment towards ARG
...
...
@@ -39,10 +41,11 @@ class SentimentAnalyzer:
def
expresses_sentiment
(
self
,
instances
):
return
self
.
expr_clf
.
predict
([
instance
.
vector
for
instance
in
instances
])
semeval_2014_train_path
=
'data/SemEval-2014/SemEval_2014_Laptop_Train_with_labelled_parse_trees.xml'
semeval_2014_test_path
=
'data/SemEval-2014/SemEval_2014_Laptop_Test_with_labelled_parse_trees.xml'
amazon_train_path
=
'data/Amazon/amazon_camera_train.xml'
amazon_test_path
=
'data/Amazon/amazon_camera_test2.xml'
# 'data/Amazon/prepared_amazon_camera_reviews.xml'
amazon_test_path
=
'data/Amazon/amazon_camera_test2.xml'
# 'data/Amazon/prepared_amazon_camera_reviews.xml'
semeval_train_path
=
'data/SemEval-2016/ABSA16_Laptops_Train_SB1_v2_with_labelled_parse_trees.xml'
semeval_test_path
=
'data/SemEval-2016/ABSA16_Laptops_Test_with_labelled_parse_trees.xml'
#
tweet_train_path
=
'data/acl-14-short-data/tweet_train_with_labelled_parse_trees.xml'
...
...
@@ -56,7 +59,7 @@ sa = SentimentAnalyzer()
train_tree
=
ET
.
parse
(
train_path
)
train_instances
=
[
Instance
(
instance
)
for
instance
in
train_tree
.
getroot
()]
train_instances
=
resample_data
(
train_instances
,
labels
)
#
train_instances = resample_data(train_instances, labels)
# create and train vectorizer model
vec
=
Vectorizer
(
train_instances
)
...
...
ADA/SA/vectorizer.py
View file @
43892e17
...
...
@@ -17,7 +17,7 @@ class Vectorizer:
self
.
transformer
=
TfidfTransformer
()
# indep features:
self
.
bow_vectorizer
=
CountVectorizer
(
stop_words
=
'english'
,
ngram_range
=
(
1
,
2
))
self
.
bow_vectorizer
=
CountVectorizer
(
stop_words
=
'english'
,
ngram_range
=
(
1
,
5
))
texts
=
[
instance
.
text
for
instance
in
train_instances
]
train_bow_vectors
=
self
.
bow_vectorizer
.
fit_transform
(
texts
).
toarray
()
train_sent_vectors
=
[
self
.
sentiment_scores
(
instance
)
for
instance
in
train_instances
]
...
...
ADA/agent.py
View file @
43892e17
...
...
@@ -5,11 +5,12 @@ from anytree import PostOrderIter
import
pickle
from
argument
import
*
from
functools
import
reduce
from
SA.bert_analyzer
import
BertAnalyzer
class
Agent
:
sentiment_threshold
=
0.95
review_tokenizer
=
ReviewTokenizer
()
bert_analyzer
=
BertAnalyzer
.
default
()
def
__init__
(
self
):
# load classifier
...
...
@@ -27,7 +28,7 @@ class Agent:
return
phrases
# analyze sentiment
def
get_sentiment
(
self
,
phrase
):
def
get_
bayes_
sentiment
(
self
,
phrase
):
# get classification
tokens
=
self
.
review_tokenizer
.
tokenize_review
(
phrase
)
prob_classification
=
self
.
classifier
.
prob_classify
(
dict
([
token
,
True
]
for
token
in
tokens
))
...
...
@@ -35,6 +36,9 @@ class Agent:
strength
=
(
prob_classification
.
prob
(
classification
)
-
0.5
)
*
2
return
strength
if
classification
==
'+'
else
-
strength
def
get_bert_sentiment
(
self
,
text
,
char_from
,
char_to
):
return
self
.
bert_analyzer
.
get_sentiment_polarity
(
text
,
char_from
,
char_to
)
# remove all ancestors of node in list l
def
remove_ancestors
(
self
,
node
,
l
):
if
node
.
parent
!=
None
:
...
...
@@ -51,8 +55,9 @@ class Agent:
while
len
(
arguments
)
>
0
:
f
=
arguments
.
pop
(
0
)
for
word
in
glossary
[
f
]:
if
word
in
phrase
:
argument_matches
.
append
(
f
)
matches
=
[(
f
,
m
.
start
(),
m
.
end
())
for
m
in
re
.
finditer
(
word
,
phrase
)]
if
matches
:
argument_matches
+=
matches
self
.
remove_ancestors
(
f
,
arguments
)
break
return
argument_matches
...
...
@@ -61,17 +66,16 @@ class Agent:
votes
=
{}
vote_phrases
=
{}
for
phrase
in
phrases
:
arguments
=
self
.
get_arguments
(
phrase
)
sentiment
=
self
.
get_sentiment
(
phrase
)
if
abs
(
sentiment
)
>
self
.
sentiment_threshold
:
for
argument
in
arguments
:
for
argument
,
start
,
end
in
self
.
get_arguments
(
phrase
):
sentiment
=
self
.
get_bayes_sentiment
(
phrase
)
# self.get_bert_sentiment(phrase, start, end)
if
abs
(
sentiment
)
>
self
.
sentiment_threshold
:
if
(
argument
not
in
votes
)
or
(
abs
(
votes
[
argument
])
<
abs
(
sentiment
)):
votes
[
argument
]
=
sentiment
# what if there's two phrases with same argument?
vote_phrases
[
argument
]
=
{
'phrase'
:
phrase
,
'sentiment'
:
sentiment
}
# normalize votes to 1 (+) or -1 (-)
for
argument
in
votes
:
votes
[
argument
]
=
1
if
votes
[
argument
]
>
0
else
-
1
return
(
votes
,
vote_phrases
)
return
votes
,
vote_phrases
# augment votes (Definition 4.3) obtained for a single critic
def
augment_votes
(
self
,
votes
):
...
...
ADA/
D
ata
L
oader.py
→
ADA/
d
ata
l
oader.py
View file @
43892e17
import
pandas
as
pd
class
DataLoader
:
class
DataLoader
:
data_location
=
'camera_prepared_data.tsv'
reviews
=
pd
.
read_csv
(
data_location
,
sep
=
'
\t
'
,
error_bad_lines
=
False
)
...
...
ADA/server/ios_server/views.py
View file @
43892e17
...
...
@@ -4,7 +4,7 @@ import jsonpickle
from
django.views.decorators.csrf
import
csrf_exempt
import
sys
sys
.
path
.
append
(
'/
Users/joeloksanen
/individual_project/ADA'
)
sys
.
path
.
append
(
'/
home/joel
/individual_project/ADA'
)
from
dataloader
import
DataLoader
from
communicator
import
Communicator
...
...
ADA/server/server/settings.py
View file @
43892e17
...
...
@@ -25,7 +25,7 @@ SECRET_KEY = 'z)tj_b=**v@b5-l6s!$*+_0=nzmor8dc#y$-%4%45kt8e8q@-f'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG
=
True
ALLOWED_HOSTS
=
[
'192.168.
0.13'
,
'146.169.222.109'
,
'146.169.218.37
'
]
ALLOWED_HOSTS
=
[
'192.168.
1.104
'
]
# Application definition
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment