-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
205 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
# encoding=utf-8 | ||
import docx | ||
import collections | ||
import jieba | ||
import re | ||
import string | ||
import xlwt | ||
import os | ||
|
||
def CountWordsNum(file): | ||
result = [0, 0, 0, 0, 0, 0, 0, 0] | ||
dict = ['相关公众','公众','相关消费者','普通消费者','消费者','普通购买者','购买者','他人'] | ||
PATTEN = ["[\u4e00-\u9fa5]?(相关公众)", | ||
"[\u4e00-\u9fa5]?(公众)", | ||
"[\u4e00-\u9fa5]?(相关消费者)", | ||
"[\u4e00-\u9fa5]?(普通消费者)", | ||
"[\u4e00-\u9fa5]?(消费者)", | ||
"[\u4e00-\u9fa5]?(普通购买者)", | ||
"[\u4e00-\u9fa5]?(购买者)", | ||
"[\u4e00-\u9fa5]?(他人)" | ||
] | ||
for p in file.paragraphs: | ||
#words = jieba.cut(p.text, cut_all = True) | ||
#for w in words: | ||
# if w == "相关公众": | ||
# print("once!") | ||
i = 0 | ||
for patten in PATTEN: | ||
pa = re.compile(patten) | ||
m = pa.findall(p.text) | ||
if len(m) != 0: | ||
#print(m) | ||
result[i] += len(m) | ||
i += 1 | ||
result[1] = result[1] - result[0] | ||
result[4] = result[4] - result[3] - result[2] | ||
result[6] = result[6] - result[5] | ||
|
||
maxnum = max(result) | ||
resstr = "" | ||
j = 0 | ||
for r in result: | ||
if r == maxnum: | ||
resstr = resstr + dict[j] + " " | ||
j += 1 | ||
return resstr | ||
|
||
def JudgeCorporation(file): | ||
k = 0 | ||
for p in file.paragraphs: | ||
if len(p.text) != 0: | ||
break | ||
k += 1 | ||
resstr = [] | ||
title = file.paragraphs[k] | ||
title = title.text | ||
subtitle = title.split("诉") | ||
PATTEN = "[\u4e00-\u9fa5]?(公司)" | ||
patten = re.compile(PATTEN) | ||
|
||
res1 = patten.search(subtitle[0]) | ||
if res1 == None: | ||
print("原告自然人") | ||
resstr.append("原告自然人") | ||
else: | ||
print("原告法人") | ||
resstr.append("原告法人") | ||
|
||
res2 = patten.search(subtitle[1]) | ||
if res2 == None: | ||
print("被告自然人") | ||
resstr.append("被告自然人") | ||
else: | ||
print("被告法人") | ||
resstr.append("被告法人") | ||
return resstr | ||
|
||
|
||
def CharacterofCase(file): | ||
start = 0 | ||
end = 0 | ||
j = 0 | ||
|
||
for p in file.paragraphs: | ||
#去除空格 | ||
s = p.text.split() | ||
if len(s) != 0: | ||
if s[0] == "【裁判理由】": | ||
start = j | ||
if s[0] == "本案法律依据": | ||
end = j | ||
break | ||
j += 1 | ||
|
||
PATTEN1 = "[\u4e00-\u9fa5]?(混淆)" | ||
PATTEN2 = "[\u4e00-\u9fa5]?(近似)" | ||
PATTEN3 = "[\u4e00-\u9fa5]?(。)" | ||
patten1 = re.compile(PATTEN1) | ||
patten2 = re.compile(PATTEN2) | ||
patteb3 = re.compile(PATTEN3) | ||
flag1 = 0 | ||
flag2 = 0 | ||
|
||
for n in range(start + 1, end): | ||
p = file.paragraphs[n] | ||
res1 = patten1.search(p.text) | ||
res2 = patten2.search(p.text) | ||
if res1 != None: | ||
flag1 = 1 | ||
if res2 != None: | ||
flag2 = 1 | ||
|
||
can_judge = 0 | ||
if flag1 == 1 and flag2 == 0: | ||
print("混淆标准") | ||
return("混淆标准") | ||
elif flag1 == 0 and flag2 == 1: | ||
print("近似标准") | ||
return("近似标准") | ||
elif flag1 == 0 and flag2 == 0: | ||
print("无关案件") | ||
return("无关案件") | ||
else: | ||
for n in range(start + 1, end): | ||
p = file.paragraphs[n].text | ||
subp = p.split("。") | ||
for sp in subp: | ||
res_hx = patten1.search(sp) | ||
res_js = patten2.search(sp) | ||
if res_hx != None and res_js != None: | ||
doublesub = sp.split("混淆") | ||
res_former = patten2.search(doublesub[0]) | ||
if res_former == None: | ||
print("混淆导致相似") | ||
can_judge = 1 | ||
return("混淆导致相似") | ||
else: | ||
print("相似导致混淆") | ||
can_judge = 1 | ||
return("相似导致混淆") | ||
|
||
if can_judge == 0: | ||
print("无法判断") | ||
return("无法判断") | ||
|
||
def set_style(name, height, bold=False): | ||
style = xlwt.XFStyle() # 初始化样式 | ||
font = xlwt.Font() # 为样式创建字体 | ||
font.name = name | ||
font.bold = bold | ||
font.color_index = 4 | ||
font.height = height | ||
|
||
style.font = font | ||
return style | ||
|
||
|
||
def write_excel(path, rows): | ||
#创建工作表 | ||
workbook = xlwt.Workbook(encoding='utf-8') | ||
#创建sheet | ||
sheet = workbook.add_sheet('sheet1') | ||
firstline = ["原告性质", "被告性质", "最高频词汇", "案件性质"] | ||
for col in range(len(firstline)): | ||
sheet.write(0, col, firstline[col], set_style('黑体', 220, True)) | ||
rownum = 1 | ||
for r in rows: | ||
col = 0 | ||
for rr in r: | ||
sheet.write(rownum, col, rr) | ||
col += 1 | ||
rownum += 1 | ||
workbook.save(path) | ||
|
||
def ScanFile(filepath): | ||
allfiles = os.listdir(filepath) | ||
return allfiles | ||
|
||
if __name__ == '__main__': | ||
#Open file .docx | ||
#file = docx.Document('/Users/eagleying/Downloads/Cases/指导案例82号 王碎永诉深圳歌力思服饰股份有限公司、杭州银泰世纪百货有限公司侵害商标权纠纷案.docx') | ||
#print("段落数:"+str(len(file.paragraphs))) | ||
print("请输入您word案例所在的路径,例如:") | ||
print("Mac用户: /Users/eagleying/Downloads/Cases/") | ||
print("Windows用户: D:\\Case\\") | ||
filepath = input() | ||
print("请输入Excel文件名,如result.xls") | ||
filename = input() | ||
print("请输入生成Excel文件的路径:") | ||
excelpath = input() | ||
#filepath = '/Users/eagleying/Downloads/Cases/' | ||
allfiles = ScanFile(filepath) | ||
rows = [] | ||
for f in allfiles: | ||
file = docx.Document(filepath + f) | ||
row = [] | ||
for r in JudgeCorporation(file): | ||
row.append(r) | ||
row.append(CountWordsNum(file)) | ||
row.append(CharacterofCase(file)) | ||
rows.append(row) | ||
#path = '/Users/eagleying/Downloads/resuults.xls' | ||
path = excelpath + filename | ||
write_excel(path, rows) | ||
print("生成成功^o^") |