Skip to content
Snippets Groups Projects
Commit 64448925 authored by Joel Oksanen's avatar Joel Oksanen
Browse files

Target extraction integrated to server

parent ef388c59
No related branches found
No related tags found
No related merge requests found
import re
from agent.review_tokenizer import ReviewTokenizer
from anytree import PostOrderIter
import pickle
from agent.argument import *
from functools import reduce
from agent.SA.bert_analyzer import BertAnalyzer
from agent.review import Review
......@@ -10,23 +6,13 @@ from agent.review import Review
class Agent:
review_tokenizer = ReviewTokenizer()
bert_analyzer = BertAnalyzer.default()
def __init__(self):
# load classifier
f = open('agent/camera_review_classifier.pickle', 'rb')
self.classifier = pickle.load(f)
f.close()
# analyze sentiment
def get_bayes_sentiment(self, phrase):
# get classification
tokens = self.review_tokenizer.tokenize_review(phrase)
prob_classification = self.classifier.prob_classify(dict([token, True] for token in tokens))
classification = prob_classification.max()
strength = (prob_classification.prob(classification) - 0.5) * 2
return strength if classification == '+' else -strength
def __init__(self, product):
self.product = product
self.product_node = product.root
self.arguments = product.argument_nodes
self.features = product.feature_nodes
def get_bert_sentiments(self, data):
return list(self.bert_analyzer.get_batch_sentiment_polarity(data))
......@@ -39,20 +25,14 @@ class Agent:
for review in reviews:
for phrase in review.phrases:
bayes_sentiment = self.get_bayes_sentiment(phrase.text)
for arg in phrase.args:
sentiment = sentiments.pop(0)
print(phrase.text)
print('arg:', arg.start, '-', arg.end)
print('bert:', sentiment)
print('bayes:', bayes_sentiment)
arg.set_sentiment(sentiment)
@staticmethod
def get_aggregates(reviews):
def get_aggregates(self, reviews):
ra = []
vote_sum = {arg: 0 for arg in arguments}
vote_phrases = {arg: [] for arg in arguments}
vote_sum = {arg: 0 for arg in self.arguments}
vote_phrases = {arg: [] for arg in self.arguments}
for review in reviews:
for phrase in review.phrases:
for arg, sentiment in phrase.get_votes().items():
......@@ -62,20 +42,19 @@ class Agent:
vote_sum[arg] += sentiment
return ra, vote_sum, vote_phrases
@staticmethod
def get_qbaf(ra, review_count):
def get_qbaf(self, ra, review_count):
# sums of all positive and negative votes for arguments
argument_sums = {}
for argument in arguments:
for argument in self.arguments:
argument_sums[argument] = 0
for r in ra:
if r['argument'] == argument:
argument_sums[argument] += r['vote']
# calculate attack/support relations for camera
supporters = {r: [] for r in arguments}
attackers = {r: [] for r in arguments}
for r in arguments:
supporters = {r: [] for r in self.arguments}
attackers = {r: [] for r in self.arguments}
for r in self.arguments:
for subf in r.children:
if argument_sums[subf] > 0:
supporters[r].append(subf)
......@@ -83,22 +62,23 @@ class Agent:
attackers[r].append(subf)
# calculate base scores for arguments
base_scores = {}
base_scores[camera] = 0.5 + 0.5 * argument_sums[camera] / review_count
for feature in features:
base_scores = {self.product_node: 0.5 + 0.5 * argument_sums[self.product_node] / review_count}
for feature in self.features:
base_scores[feature] = abs(argument_sums[feature]) / review_count
qbaf = {'supporters': supporters, 'attackers': attackers, 'base_scores': base_scores}
return qbaf
def combined_strength(self, args):
@staticmethod
def combined_strength(args):
if len(args) != 0:
return 1 - reduce(lambda x, y: x * y, map(lambda v: 1 - v, args))
return 0
def argument_strength(self, base_score, attacker_strengths, supporter_strengths):
attack = self.combined_strength(attacker_strengths)
support = self.combined_strength(supporter_strengths)
@staticmethod
def argument_strength(base_score, attacker_strengths, supporter_strengths):
attack = Agent.combined_strength(attacker_strengths)
support = Agent.combined_strength(supporter_strengths)
if attack > support:
return base_score - (base_score * abs(attack - support))
elif attack < support:
......@@ -108,7 +88,7 @@ class Agent:
# apply DF-QUAD gradual semantics to qbaf
def get_strengths(self, qbaf):
strengths = {}
arguments = [node for node in PostOrderIter(camera)]
arguments = [node for node in PostOrderIter(self.product_node)]
for argument in arguments:
attacker_strengths = []
supporter_strengths = []
......@@ -117,19 +97,19 @@ class Agent:
attacker_strengths.append(strengths[child])
elif child in qbaf['supporters'][argument]:
supporter_strengths.append(strengths[child])
strengths[argument] = self.argument_strength(qbaf['base_scores'][argument], attacker_strengths,
supporter_strengths)
strengths[argument] = Agent.argument_strength(qbaf['base_scores'][argument], attacker_strengths,
supporter_strengths)
return strengths
def analyze_reviews(self, csv):
reviews = [Review(row) for _, row in csv.iterrows()]
reviews = [Review(row, self.product) for _, row in csv.iterrows()]
# extract augmented votes
self.extract_votes(reviews)
voting_reviews = list(filter(lambda r: r.is_voting(), reviews))
if len(voting_reviews) / len(reviews) < 0.33:
print('warning: only a small fraction of reviews generated votes')
# get aggregates
ra, self.vote_sum, self.vote_phrases = Agent.get_aggregates(reviews)
ra, self.vote_sum, self.vote_phrases = self.get_aggregates(reviews)
# get qbaf from ra
self.qbaf = self.get_qbaf(ra, len(reviews))
# apply gradual semantics
......@@ -140,7 +120,7 @@ class Agent:
print('strengths:')
print(self.strengths)
print('votes:')
for argument in arguments:
for argument in self.arguments:
print(argument, 'direct: {} positive, {} negative'.format(len(self.supporting_phrases(argument)),
len(self.attacking_phrases(argument))))
print(argument, 'augmented sum: {}'.format(self.vote_sum[argument]))
......@@ -164,11 +144,11 @@ class Agent:
argument] >= 0 # len(self.supporting_phrases(argument)) >= len(self.attacking_phrases(argument))
def supported_argument(self, argument):
return (self.get_strongest_supporting_subfeature(argument) != None and
return (self.get_strongest_supporting_subfeature(argument) is not None and
self.strengths[self.get_strongest_supporting_subfeature(argument)] > 0)
def attacked_argument(self, argument):
return (self.get_strongest_attacking_subfeature(argument) != None and
return (self.get_strongest_attacking_subfeature(argument) is not None and
self.strengths[self.get_strongest_attacking_subfeature(argument)] > 0)
def best_supporting_phrase(self, argument):
......
from anytree import Node
camera = Node('camera')
images = Node('images', parent=camera)
video = Node('video', parent=camera)
battery = Node('battery', parent=camera)
flash = Node('flash', parent=camera)
audio = Node('audio', parent=camera)
price = Node('price', parent=camera)
shipping = Node('shipping', parent=camera)
lens = Node('lens', parent=camera)
zoom = Node('zoom', parent=lens)
af = Node('autofocus', parent=lens)
arguments = [camera, images, video, battery, flash, audio, price, shipping, lens, zoom, af]
features = [images, video, battery, flash, audio, price, shipping, lens, zoom, af]
glossary = {
camera: ['camera', 'device', 'product'],
images: ['image', 'picture', ' pic '],
video: ['video'],
battery: ['battery'],
flash: ['flash'],
audio: ['audio', 'sound'],
price: ['price', 'value', 'cost'],
shipping: ['ship'],
lens: ['lens'],
zoom: ['zoom'],
af: ['autofocus', 'auto-focus']
}
class Argument:
def __init__(self, id, name):
self.id = id
self.name = name
self.queries = []
def withQueries(self, queries):
arg = Argument(self.id, self.name)
arg.queries = queries
return arg
......@@ -4,7 +4,7 @@ class ArgumentQuery:
self.queryID = queryID
self.text = text
def withArgument(self, argument):
def with_argument(self, argument):
query = ArgumentQuery(self.queryID, self.text)
query.argumentID = argument.id
query.text = query.text.format(arg=argument.name)
......
from agent.argumentquery import ArgumentQuery
from agent.argument import *
from agent.agent import Agent
from agent.target_extraction.product import Product
import inflect
from nltk.stem.snowball import SnowballStemmer
from nltk.stem import WordNetLemmatizer
from threading import Thread
class ADAMessage:
......@@ -10,6 +12,7 @@ class ADAMessage:
self.text = text
self.arguments = arguments
class Communicator:
queries = [
......@@ -21,43 +24,51 @@ class Communicator:
ArgumentQuery(5, 'What did users say about the {arg} being poor?'),
]
agent = Agent()
inflect = inflect.engine()
stemmer = SnowballStemmer("english")
wnl = WordNetLemmatizer()
def __init__(self, dl):
self.dl = dl
self.product_id = None
self.product = None
self.agent = None
self.loading = False
def has_loaded_product(self, product_id):
return self.product_id == product_id
return self.product is not None and self.product.id == product_id and not self.loading
def load_product(self, product_id):
self.product_id = product_id
self.arguments = {arguments[i] : Argument(i, arguments[i].name) for i in range(len(arguments))}
self.argument_nodes = arguments
self.agent.analyze_reviews(self.dl.get_reviews(self.product_id))
def load_product(self, product_id, product_type): # product_type e.g. 'camera'
if self.product is None or product_id != self.product.id:
self.loading = True
self.product = Product.get_product(product_type)
self.product.id = product_id
Thread(target=self.load_product_bg).start()
def load_product_bg(self):
self.agent = Agent(self.product)
self.agent.analyze_reviews(self.dl.get_reviews(self.product.id))
self.loading = False
def get_init_message(self):
prod_node = self.argument_nodes[0]
prod = self.arguments[prod_node]
prod_node = self.product.root
prod = self.product.argument_for_node(prod_node)
text = 'What would you like to know about the {}?'.format(prod.name)
queries = self.get_queries(prod_node)
args = [prod.withQueries(queries)]
args = [prod.with_queries(queries)]
return ADAMessage(text, args)
def get_response(self, query_id, arg_id):
q_arg_node = self.argument_nodes[arg_id]
q_arg = self.arguments[q_arg_node]
q_arg_node = self.product.argument_node_for_id(arg_id)
q_arg = self.product.argument_for_id(arg_id)
if query_id == 0:
supp_node = self.agent.get_strongest_supporting_subfeature(q_arg_node)
att_node = self.agent.get_strongest_attacking_subfeature(q_arg_node)
text = 'The {} was highly rated because the {} {} good'.format(
q_arg.name, self.arguments[supp_node].name, self.was_were(self.arguments[supp_node]))
supp_name = self.product.argument_for_node(supp_node).name
text = 'The {} was highly rated because the {} {} good'.format(q_arg.name, supp_name,
self.was_were(supp_name))
if att_node:
text += ', although the {} {} poor.'.format(
self.arguments[att_node].name, self.was_were(self.arguments[att_node]))
att_name = self.product.argument_for_node(att_node).name
text += ', although the {} {} poor.'.format(att_name, self.was_were(att_name))
args = [q_arg_node, supp_node, att_node]
else:
text += '.'
......@@ -66,47 +77,48 @@ class Communicator:
if query_id == 2:
supp_node = self.agent.get_strongest_supporting_subfeature(q_arg_node)
att_node = self.agent.get_strongest_attacking_subfeature(q_arg_node)
text = 'The {} was considered to be good because the {} {} good'.format(
q_arg.name, self.arguments[supp_node].name, self.was_were(self.arguments[supp_node]))
supp_name = self.product.argument_for_node(supp_node).name
text = 'The {} was considered to be good because the {} {} good'.format(q_arg.name, supp_name,
self.was_were(supp_name))
if att_node:
text += ', although the {} {} poor.'.format(
self.arguments[att_node].name, self.was_were(self.arguments[att_node]))
att_name = self.product.argument_for_node(att_node).name
text += ', although the {} {} poor.'.format(att_name, self.was_were(att_name))
args = [q_arg_node, supp_node, att_node]
else:
text += '.'
args = [q_arg_node, supp_node]
if query_id == 4 or query_id == 5:
phrase = self.agent.best_supporting_phrase(q_arg_node) if query_id == 4 else self.agent.best_attacking_phrase(q_arg_node)
phrase = (self.agent.best_supporting_phrase(q_arg_node) if query_id == 4
else self.agent.best_attacking_phrase(q_arg_node))
while phrase[-1] == '.':
phrase = phrase[:-1]
text = '\"...{}...\"'.format(phrase)
args = [q_arg_node]
args = [self.arguments[arg].withQueries(self.get_queries(arg)) for arg in args]
args = [self.product.argument_for_node(arg).with_queries(self.get_queries(arg)) for arg in args]
return ADAMessage(text, args)
def get_queries(self, arg_node):
arg = self.arguments[arg_node]
arg = self.product.argument_for_node(arg_node)
queries = []
base = 0 if arg.id == 0 else 2
if self.agent.liked_argument(arg_node):
if self.agent.supported_argument(arg_node):
queries.append(self.queries[base].withArgument(arg))
queries.append(self.queries[base].with_argument(arg))
supp_phrase = self.agent.best_supporting_phrase(arg_node)
if supp_phrase:
queries.append(self.queries[4].withArgument(arg))
queries.append(self.queries[4].with_argument(arg))
else:
if self.agent.attacked_argument(arg_node):
queries.append(self.queries[base + 1].withArgument(arg))
queries.append(self.queries[base + 1].with_argument(arg))
att_phrase = self.agent.best_attacking_phrase(arg_node)
if att_phrase:
queries.append(self.queries[5].withArgument(arg))
queries.append(self.queries[5].with_argument(arg))
return queries
def was_were(self, arg):
return 'was' if self.stemmer.stem(arg.name) == arg.name else 'were'
def was_were(self, term):
return 'was' if self.wnl.lemmatize(term) == term else 'were'
from anytree import Node
camera = Node('camera')
image = Node('image', parent=camera)
video = Node('video', parent=camera)
battery = Node('battery', parent=camera)
flash = Node('flash', parent=camera)
audio = Node('audio', parent=camera)
price = Node('price', parent=camera)
shipping = Node('shipping', parent=camera)
lens = Node('lens', parent=camera)
zoom = Node('zoom', parent=lens)
af = Node('af', parent=lens)
reviewables = [camera, image, video, battery, flash, audio, price, shipping, lens, zoom, af]
features = [image, video, battery, flash, audio, price, shipping, lens, zoom, af]
glossary = {
camera: ['camera', 'device', 'product'],
image: ['image', 'picture', ' pic '],
video: ['video'],
battery: ['battery'],
flash: ['flash'],
audio: ['audio', 'sound'],
price: ['price', 'value', 'cost'],
shipping: ['ship']
}
......@@ -24,8 +24,8 @@ def get_df(path):
pd.set_option('display.max_colwidth', None)
category = 'Laptops'
metadata = pd.read_json('amazon_data/meta_Electronics.json', lines=True)# get_df('amazon_data/meta_Electronics.json.gz')
category = 'Backpacks'
metadata = pd.read_json('amazon_data/meta_Clothing_Shoes_and_Jewelry.json', lines=True)
for col in metadata.columns:
print(col)
......@@ -34,12 +34,12 @@ metadata = metadata[metadata['category'].apply(lambda cats: category in cats)]
print(metadata['category'][:5])
print(len(metadata.index))
review_iter = pd.read_json('amazon_data/Electronics.json', lines=True, chunksize=1000)
review_iter = pd.read_json('amazon_data/Clothing_Shoes_and_Jewelry.json', lines=True, chunksize=1000)
reviews = pd.concat([reviews[reviews['asin'].isin(metadata['asin'])] for reviews in review_iter])
print(len(reviews.index))
reviews.to_csv('target_extraction/data/verified_laptop_reviews.tsv', sep='\t', index=False)
reviews.to_csv('target_extraction/data/verified_backpack_reviews.tsv', sep='\t', index=False)
# child_product = 'speaker'
# reviews = pd.read_csv('amazon_data/amazon_reviews_us_Electronics_v1_00.tsv.gz', sep='\t', error_bad_lines=False,
......
......@@ -2,29 +2,29 @@ import re
from nltk.tokenize import sent_tokenize
from agent.SA.bert_dataset import MAX_SEQ_LEN
from anytree import PostOrderIter
from agent.argument import *
class Review:
SENTIMENT_THRESHOLD = 0.95
PHRASE_MAX_WORDS = MAX_SEQ_LEN * 0.3
def __init__(self, data):
def __init__(self, data, product):
self.product = product
self.id = data['review_id']
self.body = data['review_body']
self.phrases = Review.extract_phrases(self.body)
self.phrases = Review.extract_phrases(self.body, product)
self.votes = {}
# extract phrases
@staticmethod
def extract_phrases(review_body):
def extract_phrases(review_body, product):
sentences = sent_tokenize(review_body)
texts = []
for sentence in sentences:
texts += re.split(' but | although | though | otherwise | however | unless | whereas | despite |<br />',
sentence)
texts = filter(lambda t: len(t.split()) < Review.PHRASE_MAX_WORDS, texts)
phrases = [Phrase(text) for text in texts]
phrases = [Phrase(text, product) for text in texts]
return phrases
def get_votes(self):
......@@ -39,7 +39,7 @@ class Review:
# augment votes (Definition 4.3) obtained for a single critic
def augment_votes(self):
arguments = [node for node in PostOrderIter(camera)]
arguments = [node for node in PostOrderIter(self.product.root)]
for argument in arguments:
if argument not in self.votes:
polar_sum = 0
......@@ -55,7 +55,8 @@ class Review:
class Phrase:
def __init__(self, text):
def __init__(self, text, product):
self.product = product
self.text = text
self.args = self.get_args(text)
self.votes = {}
......@@ -63,10 +64,10 @@ class Phrase:
# get argument(s) that match phrase
def get_args(self, phrase):
argument_matches = []
arguments = [node for node in PostOrderIter(camera)]
arguments = [node for node in PostOrderIter(self.product.root)]
while len(arguments) > 0:
f = arguments.pop(0)
for word in glossary[f]:
for word in self.product.glossary[f]:
matches = [Arg(f, m.start(), m.end()) for m in re.finditer(word, phrase)]
if matches:
argument_matches += matches
......@@ -76,7 +77,7 @@ class Phrase:
# remove all ancestors of node in list l
def remove_ancestors(self, node, l):
if node.parent != None:
if node.parent is not None:
try:
l.remove(node.parent)
except ValueError:
......
from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords
from agent.item import glossary
import string
class ReviewTokenizer:
def __init__(self, product):
self.flat_glossary = [val for sublist in list(product.glossary.values()) for val in sublist]
tokenizer = TweetTokenizer()
stop_words = stopwords.words('english')
flat_glossary = [val for sublist in list(glossary.values()) for val in sublist]
def tokenize_review(self, review):
return self.reduce_noise(self.tokenizer.tokenize(review))
......
class Argument:
def __init__(self, id, name):
self.id = id
self.name = name
self.queries = []
def with_queries(self, queries):
arg = Argument(self.id, self.name)
arg.queries = queries
return arg
from anytree import Node
import pickle
from os.path import isfile
from agent.target_extraction.argument import Argument
class Product:
FILE_DIR = 'agent/target_extraction/extracted_products/'
FILE_EXTENSION = '.pickle'
def __init__(self, root: Node, syn_dict):
self.root = root
self.feature_nodes = [n for n in root.descendants]
self.argument_nodes = [root] + self.feature_nodes
self.glossary = {a_node: syns for a, syns in syn_dict.items() for a_node in self.argument_nodes
if a_node.name == a}
self.arguments = {a_node: Argument(a_idx, a_node.name) for a_idx, a_node in enumerate(self.argument_nodes)}
def argument_node_for_id(self, id):
return self.argument_nodes[id]
def argument_for_id(self, id):
return self.argument_for_node(self.argument_node_for_id(id))
def argument_for_node(self, n):
return self.arguments[n]
@staticmethod
def get_product(name):
path = Product.FILE_DIR + name + Product.FILE_EXTENSION
if isfile(path):
f = open(path, 'rb')
product: Product = pickle.load(f)
f.close()
return product
else:
raise Exception('No representation found for product {} at {}'.format(name, path))
# camera = Node('camera')
# image = Node('image', parent=camera)
# video = Node('video', parent=camera)
# battery = Node('battery', parent=camera)
# flash = Node('flash', parent=camera)
# audio = Node('audio', parent=camera)
# price = Node('price', parent=camera)
# shipping = Node('shipping', parent=camera)
# lens = Node('lens', parent=camera)
# zoom = Node('zoom', parent=lens)
# af = Node('af', parent=lens)
#
# reviewables = [camera, image, video, battery, flash, audio, price, shipping, lens, zoom, af]
# features = [image, video, battery, flash, audio, price, shipping, lens, zoom, af]
#
# glossary = {
# camera: ['camera', 'device', 'product'],
# image: ['image', 'picture', ' pic '],
# video: ['video'],
# battery: ['battery'],
# flash: ['flash'],
# audio: ['audio', 'sound'],
# price: ['price', 'value', 'cost'],
# shipping: ['ship']
# }
import pandas as pd
import ast
from collections import Counter
from nltk import pos_tag
from nltk.tokenize import word_tokenize, sent_tokenize
......@@ -11,14 +10,16 @@ from concept_net import ConceptNet
from anytree import Node, RenderTree
import numpy as np
import re
from gensim.models import Word2Vec, KeyedVectors
from gensim.models import Word2Vec
import pickle
import math
from agent.target_extraction.product import Product
stop_words = stopwords.words('english')
wnl = WordNetLemmatizer()
cnet = ConceptNet()
def obtain_texts(path, col):
file = pd.read_csv(path, sep='\t', error_bad_lines=False)
return [text for _, text in file[col].items() if not pd.isnull(text)]
......@@ -41,6 +42,7 @@ class TargetExtractor:
OUTLIER_COEFFICIENT = 5
N_DIRECT_FEATURES = 3 # top N_DIRECT_FEATURES features will be direct children of the product (not subfeatures)
PARENT_COUNT_FRAC = 0.5 # feature f1 will only be considered as a subfeature of f2 if c(f1) / c(f2) > this value
WV_SIZE = 100
# word2vec
MIN_TERM_COUNT = 100
......@@ -68,6 +70,12 @@ class TargetExtractor:
self.counter = self.count_nouns()
self.total_count = sum(self.counter.values())
def save_product_representation(self):
f = open(Product.FILE_DIR + self.product + Product.FILE_EXTENSION, 'wb')
p = Product(self.tree, self.syn_dict)
pickle.dump(p, f)
f.close()
def get_tree_and_synonyms(self):
print('training word2vec model...')
# train word2vec model
......@@ -231,7 +239,7 @@ class TargetExtractor:
return self.counter[term] / self.total_count
def get_word2vec_model(self):
model = Word2Vec(self.phraser[self.phrases], min_count=TargetExtractor.MIN_TERM_COUNT).wv
model = Word2Vec(self.phraser[self.phrases], size=TargetExtractor.WV_SIZE, min_count=TargetExtractor.MIN_TERM_COUNT).wv
return model
def save(self):
......@@ -492,17 +500,39 @@ class Synset:
return None
electronics_texts = obtain_texts('data/electronics_reviews.tsv', 'review_body')[:300000]
electronics_extractor = TargetExtractor('device', electronics_texts)
texts = obtain_texts('data/verified_laptop_reviews.tsv', 'reviewText')
extractor = TargetExtractor('laptop', texts, parent=electronics_extractor)
tree, syns = extractor.get_tree_and_synonyms()
print(RenderTree(tree))
extractor.save()
# electronics_texts = obtain_texts('data/electronics_reviews.tsv', 'review_body')[:300000]
# electronics_extractor = TargetExtractor('device', electronics_texts)
# texts = obtain_texts('data/verified_laptop_reviews.tsv', 'reviewText')
# extractor = TargetExtractor('laptop', texts, parent=electronics_extractor)
# tree, syns = extractor.get_tree_and_synonyms()
# print(RenderTree(tree))
# extractor.save()
extractor: TargetExtractor = TargetExtractor.load_saved('camera')
extractor.save_product_representation()
# tree, syns = extractor.get_tree_and_synonyms()
# print(RenderTree(tree))
# print(extractor.syn_dict)
# extractor.save()
# print(extractor.wv.similarity('keyboard', 'backlit_keyboard'))
# print(extractor.wv.similarity('touchpad', 'backlit_keyboard'))
# for t in {'mouse_pad', 'mouse', 'track_pad', 'touch_pad', 'touchscreen', 'touchpad', 'backlit_keyboard', 'keys_are', 'trackpad'}:
# print(t, extractor.wv.similarity('keyboard', t))
# wv = KeyedVectors.load_word2vec_format('data/knowledge-vectors-skipgram1000-en.bin', binary=True)
# print(wv.relative_cosine_similarity()
# extractor: TargetExtractor = TargetExtractor.load_saved('laptop')
# print(extractor.counts)
# print(extractor.wv.most_similar(positive=['asus', 'lenovo', 'acer', 'hp', 'toshiba', 'dell'], negative=[], topn=20))
# print(extractor.wv.doesnt_match(['asus', 'lenovo', 'acer', 'hp', 'company', 'vizio', 'toshiba', 'dell']))
# for a in ['touchpad', 'mouse']:
# print(a)
# extractor.print_relations_from(a)
# np.set_printoptions(precision=4, suppress=True, threshold=np.inf)
# extractor: TargetExtractor = TargetExtractor.load_saved()
# extractor.relatedness_matrix = extractor.get_scaled_relations()
# tree, _ = extractor.get_tree_and_synonyms()
# print(RenderTree(tree))
......@@ -512,9 +542,7 @@ extractor.save()
# print(extractor.aspects)
# print(extractor.relatedness_matrix)
# extractor.save()
# for a in ['lcd_screen', 'viewfinder', 'lens', 'image_stabilization']:
# print(a)
# extractor.print_relations_from(a)
# print(extractor.counts['lcd_screen'], extractor.counts['viewfinder'])
# print(RenderTree(extractor.get_product_tree2()))
......@@ -6,7 +6,7 @@ from agent.dataloader import DataLoader
from agent.communicator import Communicator
dl = DataLoader()
communicator = Communicator(dl)
communicators = [] # change into dict with cookie key to support several connections
def index(request):
......@@ -15,23 +15,29 @@ def index(request):
def product(request):
id = request.GET.get('id', '')
product_type = request.GET.get('type', '')
name = dl.get_product_name(id)
star_rating = dl.get_avg_star_rating(id)
image_url = 'https://ws-na.amazon-adsystem.com/widgets/q?_encoding=UTF8&MarketPlace=US&ASIN=' + id + '&ServiceVersion=20070822&ID=AsinImage&WS=1&Format=SL250'
if not communicators:
print(1)
communicators.append(Communicator(dl))
communicator = communicators[0]
if not communicator.has_loaded_product(id):
communicator.load_product(id)
communicator.load_product(id, product_type)
return HttpResponse("OK")
init_message = communicator.get_init_message()
product_title = dl.get_product_name(id)
star_rating = dl.get_avg_star_rating(id)
image_url = 'https://ws-na.amazon-adsystem.com/widgets/q?_encoding=UTF8&MarketPlace=US&ASIN=' + id + '&ServiceVersion=20070822&ID=AsinImage&WS=1&Format=SL250'
class Empty:
pass
product_info = Empty()
product_info.id = id
product_info.name = name
product_info.name = product_title
product_info.starRating = star_rating
product_info.imageURL = image_url
init_response = Empty()
......@@ -46,5 +52,5 @@ def message(request):
parsed = json.loads(request.body)
query_id = parsed['queryID']
arg_id = parsed['argumentID']
response = communicator.get_response(query_id, arg_id)
response = communicators[0].get_response(query_id, arg_id)
return HttpResponse(jsonpickle.encode(response, unpicklable=False), content_type="application/json")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment