Django 中使用 xlwt 导出生成的 Excel 关键词推荐结果-白红宇

Django 中使用 xlwt 导出生成的 Excel 关键词推荐结果

阅读量：4230 次

发布时间：2019-05-26

本文共 3810 字，大约阅读时间需要 12 分钟。

好多博客中都是直接上代码，不说使用的Python2还是Python3，代码中出现的模块也不提示import方式。我虽受其利，也深受其害，在此引以为戒。本文在Python3.6的编译环境下运行(其实只要是Python3应该都可以)

import gensimimport timeimport datetimeimport numpy as npimport xlwtfrom io import BytesIO  # Python2中请使用from StringIO import StringIOfrom django.http import HttpResponse

# 计算n-grams词组def compute_ngrams(word, min_n, max_n):    extended_word = word    ngrams = []    for ngram_length in range(min_n, min(len(extended_word), max_n) + 1):        for i in range(0, len(extended_word) - ngram_length + 1):            ngrams.append(extended_word[i:i + ngram_length])    return list(set(ngrams))

# 获得词的词向量def word_vector(word, wv_from_text, min_n=1, max_n=3): # 获得词语的词向量，实在找不到就返回0    # 确认词向量维度    word_size = wv_from_text.wv.syn0[0].shape[0]    # 计算word的ngrams词组    ngrams = compute_ngrams(word, min_n=min_n, max_n=max_n)    # 如果在词典之中，直接返回词向量    if word in wv_from_text.index2word:        return wv_from_text[word]    else:        # 不在词典的情况下，计算与词相近的词向量        word_vec = np.zeros(word_size, dtype=np.float32)        ngrams_found = 0        ngrams_single = [ng for ng in ngrams if len(ng) == 1]          ngrams_more = [ng for ng in ngrams if len(ng) > 1]          # 先只接受2个单词长度以上的词向量        for ngram in ngrams_more:            if ngram in wv_from_text.index2word:                word_vec += wv_from_text[ngram]                ngrams_found += 1                # print(ngram)        # 如果，没有匹配到，那么最后是考虑单个词向量，不过此时效果已经很不好了。        if ngrams_found == 0:            for ngram in ngrams_single:                if ngram in wv_from_text.index2word:                    word_vec += wv_from_text[ngram]                    ngrams_found += 1        if word_vec.any():  # 只要有一个不为0，进行算术平均            return word_vec / max(1, ngrams_found)        else: # 这里原本会抛出一个错误，但是我想让他跳过错误继续执行，就没主动raise Error。            print('all ngrams for word %s absent from model' % word)            return 0

# 获取关键词推荐结果并导出Exceldef get_keyword_recommend_result(result_list):  # result_list是一个关键词列表，这个你随意。    workbook = xlwt.Workbook(encoding='utf-8')    sheet = workbook.add_sheet('sheet1')    blue_style = xlwt.easyxf("font:colour_index blue;") # 设置样式    red_style = xlwt.easyxf("font:colour_index red;")    print("开始载入文件...", flush=True)    print("Now：", datetime.datetime.now(), flush=True)    t1 = time.time()    wv_from_text = gensim.models.KeyedVectors.load('ChineseEmbedding.bin', mmap='r')    # 腾讯的Tencent_AILab_ChineseEmbedding.txt的二进制存储形式，这样加载会快一些。    # 至于怎么生成bin文件，用model.save()    # wv_from_text = gensim.models.KeyedVectors.load_word2vec_format('ChineseEmbedding.txt', binary=False)    # wv_from_text.init_sims(replace=True)    # wv_from_text.save("ChineseEmbedding.bin")    print("文件载入完毕", flush=True)    row = 0    for keyword in result_list:        vec = word_vector(keyword, wv_from_text, min_n=1, max_n=3)  # 词向量获取        if vec is 0: # 如果确实找不到词向量，那只能跳过不管了            continue        similar_word = wv_from_text.most_similar(positive=[vec], topn=15)  # 相似词查找        result_word = [x[0] for x in similar_word] # 只要词，相似度数据暂时不用        # 开始往Excel写入结果        # Excel第一列是原始关键词，后面各列是推荐结果        if keyword in wv_from_text.index2word:            sheet.write(row, 0, keyword, blue_style)        else:            sheet.write(row, 0, keyword, red_style) # 对未登录词用红色进行标记        for col in range(0, len(result_word)):            sheet.write(row, col + 1, result_word[col]) # 写入关键词推荐结果        row += 1    output = BytesIO() # 据说Python2中使用StringIO.StringIO()，不过我没试过    workbook.save(output)    output.seek(0)    response = HttpResponse(content_type='application/vnd.ms-excel')    response['Content-Disposition'] = 'attachment;filename=keyword_recommend_result.xls'    response.write(output.getvalue())    print("推荐结果耗费时间：", (time.time() - t1) / 60, "minutes", flush=True)    return response # 在Django的视图函数中返回这个response可以在页面直接下载内存中生成好的Excel文件

转载地址：http://jsjqi.baihongyu.com/

你可能感兴趣的文章

Beginning Visual Web Developer 2005 Express: From Novice to Professional

查看>>

Beginning Programming

查看>>

Windows .NET Server 2003 Domains & Active Directory

查看>>

Information Systems : Achieving Success by Avoiding Failure

查看>>

Web Systems Design and Online Consumer Behavior

查看>>