pythonjieba库函数

技术博客来源：网络编辑：小编发布时间：2025-06-10 21:01:03 浏览量：152

import jieba # 创建停用词列表 def stopwordslist(): stopwords = [line.strip() for line in open('stop_words.txt', encoding='UTF-8').readlines()] return stopwords # 对句子进行中文分词 def seg_depart(sentence): # 对文档中的每一行进行中文分词 print("正在分词") sentence_depart = jieba.cut(sentence.strip()) # 创建一个停用词列表 stopwords = stopwordslist() # 输出结果为outstr outstr = '' # 去停用词 for word in sentence_depart: if word not in stopwords: if word != ' ': outstr += word outstr += " " return outstr # 给出文档路径 filename = "Init.txt" outfilename = "out.txt" inputs = open(filename, 'rb') outputs = open(outfilename, 'w') # 将输出结果写入ou.txt中 for line in inputs: line_seg = seg_depart(line) outputs.write(line_seg + ' ') print("-------------------正在分词和去停用词-----------") outputs.close() inputs.close() print("删除停用词和分词成功！！！")

上一篇： mysql8.0触发器

下一篇：免费的dns

版权声明：
本文来源网络，所有图片文章版权属于原作者，如有侵权，联系删除。

本文网址：https://www.mushiming.com/mjsbk/4210.html

相关文章：