最近需要开发一个 sql 自动化跑脚本工具,由于比较少开发这种工具
所以将功能慢慢拆解开发,第一个我选择的是对比功能
本次用的库主要是 hashlib,difflib
需要解决的问题:
1.如何区分新增文件
新增文件:用文件名区分,A 文件夹的所有文件放入 setA,B 文件夹的所有文件放入 setB
onlyFiles = setA - setB#求差集
复制代码
onlyFiles 就是新增文件
2.如何区分修改文件
区别文件有没有修改,用的是 md5 值
因为考虑到每个文件都要读取内容再对比效率有点低,所以用 md5 值判断文件是否一样
不一样再进行对比
获取 md5 值的方法如下:
def getFileMd5(filename):
if not os.path.isfile(filename):
print('file not exist: ' + filename)
return
myhash = hashlib.md5()
f = open(filename, 'rb')
while True:
b = f.read(8096)
if not b:
break
myhash.update(b)
f.close()
return myhash.hexdigest()
复制代码
3.如何获取文件差异
1)读取 sql 文件,语言要设置 encoding='UTF-8-sig
af = open(apath + '\\' + f, 'r', encoding='UTF-8-sig')#读取sql文件
复制代码
2)去掉文件中的干扰项空格 &换行
for line in af.readlines():
line=line.strip()
file1_contents.append(str(line))
while '' in file1_contents:
file1_contents.remove('')
复制代码
3)利用差集获取差异脚本内容
diff_set=set(file1_contents).difference(set(file2_contents))
复制代码
4)将差异脚本内容有序化
diff_list = list(set(diff_set))
diff_list.sort(key=file1_contents.index)
复制代码
源代码:
import osimport timeimport difflibimport hashlib
def getFileMd5(filename):if not os.path.isfile(filename):print('file not exist: ' + filename)returnmyhash = hashlib.md5()f = open(filename, 'rb')while True:b = f.read(8096)if not b:breakmyhash.update(b)f.close()return myhash.hexdigest()
#获取文件夹所有文件信息 def getAllFiles(path):flist = []for root, dirs, fs in os.walk(path):for f in fs:f_fullpath = os.path.join(root, f)f_relativepath = f_fullpath[len(path):]flist.append(f_relativepath)return flist
def dirCompare(apath, bpath):afiles = getAllFiles(apath)#路径 1 的文件信息 bfiles = getAllFiles(bpath)#路径 2 的文件信息 setA = set(afiles)setB = set(bfiles)commonfiles = setA & setB # 处理共有文件
import os
import time
import difflib
import hashlib
def getFileMd5(filename):
if not os.path.isfile(filename):
print('file not exist: ' + filename)
return
myhash = hashlib.md5()
f = open(filename, 'rb')
while True:
b = f.read(8096)
if not b:
break
myhash.update(b)
f.close()
return myhash.hexdigest()
#获取文件夹所有文件信息
def getAllFiles(path):
flist = []
for root, dirs, fs in os.walk(path):
for f in fs:
f_fullpath = os.path.join(root, f)
f_relativepath = f_fullpath[len(path):]
flist.append(f_relativepath)
return flist
def dirCompare(apath, bpath):
afiles = getAllFiles(apath)#路径1的文件信息
bfiles = getAllFiles(bpath)#路径2的文件信息
setA = set(afiles)
setB = set(bfiles)
commonfiles = setA & setB # 处理共有文件
for f in sorted(commonfiles):
#通过md5比较文件
amd = getFileMd5(apath + '\\' + f)
bmd = getFileMd5(bpath + '\\' + f)
if amd != bmd:
print("差异文件: %s" % (f))
af = open(apath + '\\' + f, 'r', encoding='UTF-8-sig')#读取sql文件
bf = open(bpath + '\\' + f, 'r', encoding='UTF-8-sig')
file1_contents=[]#定义空列表
file2_contents = []
#去掉列表中的换行&空格
for line in af.readlines():
line=line.strip()
file1_contents.append(str(line))
while '' in file1_contents:
file1_contents.remove('')
for line in bf.readlines():
line=line.strip()
file2_contents.append(str(line))
while '' in file2_contents:
file2_contents.remove('')
diff_set=set(file1_contents).difference(set(file2_contents))
"""
set为无序集合,为了方便检查,将set转化为list
再使用sort排序函数,按file1_contents的下标排序
"""
diff_list = list(set(diff_set))
diff_list.sort(key=file1_contents.index)
print('差异脚本:', '\n'.join(diff_list))
af.close()
bf.close()
onlyFiles = setA - setB
#onlyFiles = setA ^ setB
# onlyInA = []
# onlyInB = []
# for of in onlyFiles:
# if of in afiles:
# onlyInA.append(of)
# elif of in bfiles:
# onlyInB.append(of)
if len(onlyFiles) > 0:
print('-' * 20, "新增文件 ", apath, '-' * 20)
for of in sorted(onlyFiles):
print(of)
def run_sql():
pass
if __name__ == '__main__':
aPath = "E:\\test"
bPath = "E:\\test1"
dirCompare(aPath, bPath)
print("\n完成对比")
复制代码
评论