Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Joel Oksanen
individual_project
Commits
a56456f5
Commit
a56456f5
authored
Apr 27, 2020
by
Joel Oksanen
Browse files
Created script for preparing metadata
parent
af560174
Changes
4
Hide whitespace changes
Inline
Side-by-side
ADA/.gitignore
View file @
a56456f5
...
...
@@ -2,4 +2,5 @@
*.pt
__pycache__/
server/agent/amazon_data/
server/agent/target_extraction/data/
.DS_Store
ADA/server/agent/SA/bert_analyzer.py
View file @
a56456f5
...
...
@@ -8,10 +8,10 @@ import time
import
numpy
as
np
from
sklearn
import
metrics
semeval_2014_train_path
=
'data/SemEval-2014/Laptop_Train_v2.xml'
semeval_2014_test_path
=
'data/SemEval-2014/Laptops_Test_Gold.xml'
semeval_2014_train_path
=
'
agent/SA/
data/SemEval-2014/Laptop_Train_v2.xml'
semeval_2014_test_path
=
'
agent/SA/
data/SemEval-2014/Laptops_Test_Gold.xml'
amazon_test_path
=
'agent/SA/data/Amazon/annotated_amazon_laptop_reviews.xml'
trained_model_path
=
'semeval_2014_2.pt'
trained_model_path
=
'
agent/SA/
semeval_2014_2.pt'
BATCH_SIZE
=
32
MAX_EPOCHS
=
6
...
...
ADA/server/agent/concept_net.py
View file @
a56456f5
...
...
@@ -19,10 +19,10 @@ class ConceptNet:
obj
=
requests
.
get
(
self
.
url
+
uri
).
json
()
return
obj
[
'value'
]
def
append_
synonyms
(
self
,
feature
,
rel
,
synonyms
,
lock
):
def
append_
result
(
self
,
feature
,
rel
,
result_set
,
lock
):
rels
=
self
.
find_rels
(
feature
,
rel
)
lock
.
acquire
()
synonyms
.
update
(
rels
)
result_set
.
update
(
rels
)
lock
.
release
()
def
parent_check
(
self
,
node
,
parent
,
synonyms
):
...
...
@@ -40,14 +40,14 @@ class ConceptNet:
self
.
parent_check
(
node
,
parent
.
parent
,
synonyms
)
def
sem_synonyms_for_node
(
self
,
node
):
rels
=
[
'DefinedAs'
,
'Synonym'
,
'IsA'
,
'RelatedTo'
]
rels
=
[
'DefinedAs'
,
'Synonym'
,
'IsA'
,
'RelatedTo'
]
# SimilarTo? FormOf?
synonyms
=
set
()
lock
=
threading
.
Lock
()
threads
=
[]
for
rel
in
rels
:
t
=
threading
.
Thread
(
target
=
self
.
append_
synonyms
,
args
=
(
node
.
name
,
rel
,
synonyms
,
lock
))
t
=
threading
.
Thread
(
target
=
self
.
append_
result
,
args
=
(
node
.
name
,
rel
,
synonyms
,
lock
))
t
.
start
()
threads
.
append
(
t
)
for
t
in
threads
:
...
...
@@ -57,8 +57,26 @@ class ConceptNet:
return
synonyms
def
sub_features_for_node
(
self
,
node
):
rels
=
[
'UsedFor'
,
'HasA'
,
'CapableOf'
,
'Causes'
,
'HasSubevent'
,
'HasProperty'
,
'MadeOf'
]
features
=
set
()
lock
=
threading
.
Lock
()
threads
=
[]
for
rel
in
rels
:
t
=
threading
.
Thread
(
target
=
self
.
append_result
,
args
=
(
node
.
name
,
rel
,
features
,
lock
))
t
.
start
()
threads
.
append
(
t
)
for
t
in
threads
:
t
.
join
()
return
features
net
=
ConceptNet
()
parent
=
Node
(
str
(
sys
.
argv
[
1
]))
child
=
Node
(
str
(
sys
.
argv
[
2
]),
parent
=
parent
)
syns
=
net
.
sem_synonyms_for_node
(
child
)
print
(
syns
)
# parent = Node(str(sys.argv[1]))
# child = Node(str(sys.argv[2]), parent=parent)
# syns = net.sem_synonyms_for_node(child)
# print(syns)
node
=
Node
(
'camera'
)
print
(
net
.
sub_features_for_node
(
node
))
ADA/server/agent/prep_metadata.py
0 → 100644
View file @
a56456f5
import
pandas
as
pd
import
gzip
import
json
import
re
output_location
=
'target_extraction/data/camera_metadata.tsv'
def
parse
(
path
):
g
=
gzip
.
open
(
path
,
'rb'
)
for
line
in
g
:
yield
json
.
loads
(
line
)
def
get_df
(
path
):
i
=
0
df
=
{}
for
d
in
parse
(
path
):
df
[
i
]
=
d
i
+=
1
return
pd
.
DataFrame
.
from_dict
(
df
,
orient
=
'index'
)
metadata
=
get_df
(
'amazon_data/meta_Electronics.json.gz'
)
for
col
in
metadata
.
columns
:
print
(
col
)
# get metadata for camera products
metadata
=
metadata
[
metadata
[
'main_cat'
]
==
'Camera & Photo'
]
# try to filter out camera accessories
filter_words
=
[
'accessor'
,
'battery'
,
'charger'
,
'tripod'
,
'strap'
,
'case'
,
'bag'
,
'book'
,
'filter'
,
'light'
,
'drive'
,
'backpack'
,
'kit'
,
'printer'
,
'adapter'
,
'album'
,
'surveillance'
,
'security'
,
'cctv'
,
'cassette'
]
filter_pat
=
''
for
word
in
filter_words
:
word_filter
=
'['
+
word
[
0
].
upper
()
+
word
[
0
].
lower
()
+
']'
+
word
[
1
:]
filter_pat
+=
word_filter
+
'|'
filter_pat
=
filter_pat
[:
-
1
]
r
=
re
.
compile
(
filter_pat
)
metadata
=
metadata
[
~
metadata
[
'title'
].
str
.
contains
(
pat
=
filter_pat
,
na
=
False
,
regex
=
True
)]
metadata
=
metadata
[
~
metadata
[
'category'
].
apply
(
lambda
cats
:
any
(
r
.
search
(
cat
)
for
cat
in
cats
))]
for
_
,
row
in
metadata
.
head
(
20
).
iterrows
():
print
(
'features:'
,
row
[
'feature'
])
print
(
'description:'
,
row
[
'description'
])
print
(
'tech1:'
,
row
[
'tech1'
])
print
(
'tech2:'
,
row
[
'tech2'
])
metadata
.
to_csv
(
output_location
,
sep
=
'
\t
'
,
index
=
False
)
print
(
'Successfully prepared data for'
,
len
(
metadata
.
index
),
'products'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment