Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Joel Oksanen
individual_project
Commits
59076bb3
Commit
59076bb3
authored
Apr 12, 2020
by
Joel Oksanen
Browse files
Annotated some Amazon reviews
parent
16cb2dcc
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
ADA/SA/data/Amazon/amazon_camera_test.xml
View file @
59076bb3
This diff is collapsed.
Click to expand it.
ADA/SA/data/Amazon/amazon_camera_test2.xml
deleted
100644 → 0
View file @
16cb2dcc
This diff is collapsed.
Click to expand it.
ADA/SA/data/Amazon/amazon_camera_train.xml
deleted
100644 → 0
View file @
16cb2dcc
This diff is collapsed.
Click to expand it.
ADA/SA/data/Amazon/prepared_amazon_camera_reviews.xml
deleted
100644 → 0
View file @
16cb2dcc
This diff is collapsed.
Click to expand it.
ADA/review_annotation.py
View file @
59076bb3
...
...
@@ -18,7 +18,7 @@ min_characters = 0
max_characters
=
200
n
=
500
sentiment_mappings
=
{
'+'
:
'positive'
,
'0'
:
'neutral'
,
'-'
:
'negative'
,
'c'
:
'conflict'
}
ann_bgs
=
{
'positive'
:
bg
.
green
,
'neutral'
:
bg
.
li_black
,
'negative'
:
bg
.
red
,
'conflict'
:
bg
.
yellow
}
ann_bgs
=
{
'positive'
:
bg
.
green
,
'neutral'
:
bg
.
blue
,
'negative'
:
bg
.
red
,
'conflict'
:
bg
.
yellow
}
annotated_reviews_location
=
'annotated_camera_reviews.xml'
included_labels
=
[
'NN'
,
'NNS'
,
'NP'
,
'NNP'
,
'NNPS'
,
'DT'
,
'CD'
,
'FW'
,
'PRP$'
]
nouns
=
[
'NN'
,
'NNS'
,
'NP'
,
'NNP'
,
'NNPS'
]
...
...
@@ -81,7 +81,7 @@ def prepare_reviews():
reviews
=
reviews
[
~
reviews
[
'review_body'
].
isnull
()]
# try to filter out reviews for camera accessories
filter_words
=
[
'accessor'
,
'batter
y
'
,
'charger'
,
'tripod'
,
'strap'
,
'case'
,
'bag'
,
filter_words
=
[
'accessor'
,
'batter'
,
'charger'
,
'tripod'
,
'strap'
,
'case'
,
'bag'
,
'filter'
,
'backpack'
,
'kit'
,
'printer'
,
'adapter'
,
'album'
,
'surveillance'
,
'security'
]
filter_pat
=
''
for
word
in
filter_words
:
...
...
@@ -170,6 +170,7 @@ def annotate_reviews():
print
(
bcolors
.
OKBLUE
+
'next:
\'
n
\'
'
+
bcolors
.
ENDC
)
print
(
bcolors
.
OKBLUE
+
'skip:
\'
s
\'
'
+
bcolors
.
ENDC
)
print
(
bcolors
.
OKBLUE
+
'undo:
\'
u
\'
'
+
bcolors
.
ENDC
)
print
(
bcolors
.
OKBLUE
+
'quit:
\'
q
\'
'
+
bcolors
.
ENDC
)
print
(
''
)
...
...
@@ -180,11 +181,16 @@ def annotate_reviews():
text_row
=
''
for
t
in
range
(
len
(
text
)):
char
=
text
[
t
]
if
t
==
cursor_pos
:
char
=
bg
.
blue
+
char
+
bg
.
rs
if
start
!=
None
and
cursor_pos
>=
start
and
t
in
range
(
start
,
cursor_pos
+
1
):
char
=
bg
.
li_black
+
char
+
bg
.
rs
elif
t
==
cursor_pos
:
char
=
bg
.
li_black
+
char
+
bg
.
rs
for
ann
in
annotations
:
if
t
in
range
(
ann
[
0
][
0
],
ann
[
0
][
1
]):
char
=
ann_bgs
[
ann
[
1
]]
+
char
+
bg
.
rs
text_row
+=
char
if
(
t
+
1
)
%
row_character_count
==
0
:
...
...
@@ -219,8 +225,11 @@ def annotate_reviews():
cursor_pos
=
min
(
cursor_pos
+
1
,
len
(
text
)
-
1
)
break
if
task
==
'u'
and
annotations
:
del
annotations
[
-
1
]
if
task
in
[
'n'
,
's'
,
'q'
]:
if
task
in
[
'n'
]:
if
task
in
[
'n'
]
and
annotations
:
# save annotations to tree
annotations_node
=
SubElement
(
sentence
,
'annotations'
)
for
annotation
in
annotations
:
...
...
@@ -230,15 +239,20 @@ def annotate_reviews():
sent_node
=
SubElement
(
annotation_node
,
'sentiment'
)
sent_node
.
text
=
annotation
[
1
]
break
if
task
==
'q'
:
if
task
==
'q'
or
task
==
's'
:
break
if
task
==
'q'
:
os
.
system
(
'clear'
)
break
elif
task
==
's'
:
root
.
remove
(
review
)
elif
task
==
'n'
:
n_annotated
+=
1
review
.
set
(
'annotated'
,
'true'
)
# save tree to file
n_annotated
+=
1
review
.
set
(
'annotated'
,
'true'
)
xmlstr
=
minidom
.
parseString
(
tostring
(
root
)).
toprettyxml
(
indent
=
' '
)
xmlstr
=
os
.
linesep
.
join
([
s
for
s
in
xmlstr
.
splitlines
()
if
s
.
strip
()])
with
open
(
selected_reviews_location
,
'w'
)
as
f
:
...
...
@@ -304,38 +318,17 @@ def prepare_annotated_reviews():
for
annotation
in
sentence
.
find
(
'annotations'
):
start
,
end
=
annotation
.
find
(
'range'
).
text
.
split
(
','
)
aspect_term_node
=
SubElement
(
aspect_terms_node
,
'aspectTerm'
)
aspect_term_node
.
set
(
'term'
,
text
[
start
:
end
])
aspect_term_node
.
set
(
'term'
,
text
[
int
(
start
):
int
(
end
)
])
aspect_term_node
.
set
(
'polarity'
,
annotation
.
find
(
'sentiment'
).
text
)
aspect_term_node
.
set
(
'from'
,
start
)
aspect_term_node
.
set
(
'to'
,
end
)
train_count
=
1000
train_root
=
Element
(
'data'
)
test_root
=
Element
(
'data'
)
counts
=
{
'positive'
:
0
,
'neutral'
:
0
,
'negative'
:
0
,
'conflict'
:
0
}
for
instance
in
prepared_root
:
if
counts
[
instance
.
find
(
'opinion'
).
text
]
<
train_count
:
train_root
.
append
(
instance
)
else
:
test_root
.
append
(
instance
)
counts
[
instance
.
find
(
'opinion'
).
text
]
+=
1
print
(
counts
)
print
(
len
(
train_root
))
print
(
len
(
test_root
))
xmlstr
=
minidom
.
parseString
(
tostring
(
train_root
)).
toprettyxml
(
indent
=
' '
)
xmlstr
=
os
.
linesep
.
join
([
s
for
s
in
xmlstr
.
splitlines
()
if
s
.
strip
()])
with
open
(
'amazon_camera_train.xml'
,
'w'
)
as
f
:
f
.
write
(
xmlstr
)
xmlstr
=
minidom
.
parseString
(
tostring
(
test_root
)).
toprettyxml
(
indent
=
' '
)
xmlstr
=
minidom
.
parseString
(
tostring
(
prepared_root
)).
toprettyxml
(
indent
=
' '
)
xmlstr
=
os
.
linesep
.
join
([
s
for
s
in
xmlstr
.
splitlines
()
if
s
.
strip
()])
with
open
(
'amazon_camera_test.xml'
,
'w'
)
as
f
:
f
.
write
(
xmlstr
)
# prepare_reviews()
annotate_reviews
()
#
prepare_annotated_reviews()
#
annotate_reviews()
prepare_annotated_reviews
()
ADA/reviews_to_be_annotated.xml
View file @
59076bb3
This diff is collapsed.
Click to expand it.
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment