text_analyzer.py 1.92 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from stanfordnlp.server import CoreNLPClient
from nltk.tree import *

pos_dict = {
    'CC': 'coordinating conjunction','CD': 'cardinal digit','DT': 'determiner',
    'EX': 'existential there',
    'FW': 'foreign word','IN': 'preposition/subordinating conjunction','JJ': 'adjective',
    'JJR': 'adjective, comparative','JJS': 'adjective, superlative',
    'LS': 'list marker','MD': 'modal','NN': 'noun, singular',
    'NNS': 'noun plural','NNP': 'proper noun, singular',
    'NNPS': 'proper noun, plural','PDT': 'predeterminer',
    'POS': 'possessive ending','PRP': 'personal pronoun',
    'PRP$': 'possessive pronoun','RB': 'adverb',
    'RBR': 'adverb, comparative','RBS': 'adverb, superlative',
    'RP': 'particle give up','TO': 'to go \'to\' the store.','UH': 'interjection errrrrrrrm',
    'VB': 'verb, base form take','VBD': 'verb, past tense took',
    'VBG': 'verb, gerund/present participle taking','VBN': 'verb, past participle taken',
    'VBP': 'verb, sing. present, non-3d take','VBZ': 'verb, 3rd person sing. present takes',
    'WDT': 'wh-determiner which','WP': 'wh-pronoun who, what','WP$': 'possessive wh-pronoun whose',
    'WRB': 'wh-abverb where, when','QF' : 'quantifier, bahut, thoda, kam (Hindi)','VM' : 'main verb',
    'PSP' : 'postposition, common in indian langs','DEM' : 'demonstrative, common in indian langs'
}

sentence = 'Also the battery life on this camera is dismal even if you are not using the GPS function or autofocus mode.'

with CoreNLPClient(annotators=['tokenize','ssplit','pos','lemma','ner','parse','depparse','coref'], timeout=60000, memory='16G') as client:
    print('annotating...')
    ann = client.annotate(sentence)
    # print('tags:')
    # print([[(w.text, pos_dict[w.pos] if w.pos in pos_dict.keys() else '-') for w in sent.words] for sent in ann.sentence])
    # print('dependencies:')
    # [sent.print_dependencies() for sent in doc.sentences]
    print('tree:')
    print(ann.sentence[0].parseTree)