Skip to content

Commit

Permalink
handle word and excel simply
Browse files Browse the repository at this point in the history
  • Loading branch information
EagleYing authored Apr 15, 2019
1 parent 246767b commit 4b69504
Showing 1 changed file with 205 additions and 0 deletions.
205 changes: 205 additions & 0 deletions docop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
# encoding=utf-8
import docx
import collections
import jieba
import re
import string
import xlwt
import os

def CountWordsNum(file):
result = [0, 0, 0, 0, 0, 0, 0, 0]
dict = ['相关公众','公众','相关消费者','普通消费者','消费者','普通购买者','购买者','他人']
PATTEN = ["[\u4e00-\u9fa5]?(相关公众)",
"[\u4e00-\u9fa5]?(公众)",
"[\u4e00-\u9fa5]?(相关消费者)",
"[\u4e00-\u9fa5]?(普通消费者)",
"[\u4e00-\u9fa5]?(消费者)",
"[\u4e00-\u9fa5]?(普通购买者)",
"[\u4e00-\u9fa5]?(购买者)",
"[\u4e00-\u9fa5]?(他人)"
]
for p in file.paragraphs:
#words = jieba.cut(p.text, cut_all = True)
#for w in words:
# if w == "相关公众":
# print("once!")
i = 0
for patten in PATTEN:
pa = re.compile(patten)
m = pa.findall(p.text)
if len(m) != 0:
#print(m)
result[i] += len(m)
i += 1
result[1] = result[1] - result[0]
result[4] = result[4] - result[3] - result[2]
result[6] = result[6] - result[5]

maxnum = max(result)
resstr = ""
j = 0
for r in result:
if r == maxnum:
resstr = resstr + dict[j] + " "
j += 1
return resstr

def JudgeCorporation(file):
k = 0
for p in file.paragraphs:
if len(p.text) != 0:
break
k += 1
resstr = []
title = file.paragraphs[k]
title = title.text
subtitle = title.split("诉")
PATTEN = "[\u4e00-\u9fa5]?(公司)"
patten = re.compile(PATTEN)

res1 = patten.search(subtitle[0])
if res1 == None:
print("原告自然人")
resstr.append("原告自然人")
else:
print("原告法人")
resstr.append("原告法人")

res2 = patten.search(subtitle[1])
if res2 == None:
print("被告自然人")
resstr.append("被告自然人")
else:
print("被告法人")
resstr.append("被告法人")
return resstr


def CharacterofCase(file):
start = 0
end = 0
j = 0

for p in file.paragraphs:
#去除空格
s = p.text.split()
if len(s) != 0:
if s[0] == "【裁判理由】":
start = j
if s[0] == "本案法律依据":
end = j
break
j += 1

PATTEN1 = "[\u4e00-\u9fa5]?(混淆)"
PATTEN2 = "[\u4e00-\u9fa5]?(近似)"
PATTEN3 = "[\u4e00-\u9fa5]?(。)"
patten1 = re.compile(PATTEN1)
patten2 = re.compile(PATTEN2)
patteb3 = re.compile(PATTEN3)
flag1 = 0
flag2 = 0

for n in range(start + 1, end):
p = file.paragraphs[n]
res1 = patten1.search(p.text)
res2 = patten2.search(p.text)
if res1 != None:
flag1 = 1
if res2 != None:
flag2 = 1

can_judge = 0
if flag1 == 1 and flag2 == 0:
print("混淆标准")
return("混淆标准")
elif flag1 == 0 and flag2 == 1:
print("近似标准")
return("近似标准")
elif flag1 == 0 and flag2 == 0:
print("无关案件")
return("无关案件")
else:
for n in range(start + 1, end):
p = file.paragraphs[n].text
subp = p.split("。")
for sp in subp:
res_hx = patten1.search(sp)
res_js = patten2.search(sp)
if res_hx != None and res_js != None:
doublesub = sp.split("混淆")
res_former = patten2.search(doublesub[0])
if res_former == None:
print("混淆导致相似")
can_judge = 1
return("混淆导致相似")
else:
print("相似导致混淆")
can_judge = 1
return("相似导致混淆")

if can_judge == 0:
print("无法判断")
return("无法判断")

def set_style(name, height, bold=False):
style = xlwt.XFStyle() # 初始化样式
font = xlwt.Font() # 为样式创建字体
font.name = name
font.bold = bold
font.color_index = 4
font.height = height

style.font = font
return style


def write_excel(path, rows):
#创建工作表
workbook = xlwt.Workbook(encoding='utf-8')
#创建sheet
sheet = workbook.add_sheet('sheet1')
firstline = ["原告性质", "被告性质", "最高频词汇", "案件性质"]
for col in range(len(firstline)):
sheet.write(0, col, firstline[col], set_style('黑体', 220, True))
rownum = 1
for r in rows:
col = 0
for rr in r:
sheet.write(rownum, col, rr)
col += 1
rownum += 1
workbook.save(path)

def ScanFile(filepath):
allfiles = os.listdir(filepath)
return allfiles

if __name__ == '__main__':
#Open file .docx
#file = docx.Document('/Users/eagleying/Downloads/Cases/指导案例82号 王碎永诉深圳歌力思服饰股份有限公司、杭州银泰世纪百货有限公司侵害商标权纠纷案.docx')
#print("段落数:"+str(len(file.paragraphs)))
print("请输入您word案例所在的路径,例如:")
print("Mac用户: /Users/eagleying/Downloads/Cases/")
print("Windows用户: D:\\Case\\")
filepath = input()
print("请输入Excel文件名,如result.xls")
filename = input()
print("请输入生成Excel文件的路径:")
excelpath = input()
#filepath = '/Users/eagleying/Downloads/Cases/'
allfiles = ScanFile(filepath)
rows = []
for f in allfiles:
file = docx.Document(filepath + f)
row = []
for r in JudgeCorporation(file):
row.append(r)
row.append(CountWordsNum(file))
row.append(CharacterofCase(file))
rows.append(row)
#path = '/Users/eagleying/Downloads/resuults.xls'
path = excelpath + filename
write_excel(path, rows)
print("生成成功^o^")

0 comments on commit 4b69504

Please sign in to comment.