基于BOSON情感词典( https://bosonnlp.com/dev/resource ), 对中文文本进行情感分析。
import re
import jieba
class DictBasedSentAnal:
def __init__(self):
self.__sent_dict__ = self.__read_dict('BosonNLP_sentiment_score.txt')
def analyse(self, sentence):
score = 0.0
for words in jieba.cut(sentence):
score += self.__sent_dict__.get(words, 0)
return score
@staticmethod
def __read_dict(path, encoding='utf-8'):
sent_dict = {}
with open(path, encoding=encoding) as input_file:
for line in input_file:
array = re.split('\s+', line.strip())
if len(array) == 2:
sent_dict[array[0]] = float(array[1])
return sent_dict
if __name__ == '__main__':
sentAnal = DictBasedSentAnal()
texts = ['书很棒,是正版,发货速度很快,下次还来买。',
'真的是谁买谁后悔,这个版本太旧了。']
for text in texts:
print('情感得分\t' + '%.2f' % sentAnal.analyse(text))