Python 自动化办公之 Excel 对比工具
作者 | 周萝卜
来源丨萝卜大杂烩
需求

需求解析
代码实现
old = pd.read_excel('sample-address-1.xlsx', 'Sheet1', na_values=['NA'])
new = pd.read_excel('sample-address-2.xlsx', 'Sheet1', na_values=['NA'])
old['version'] = "old"
new['version'] = "new"

哪些是新增的 account
哪些是被删除的 account
哪些是被修改的 account
old_accts_all = set(old['account number'])
new_accts_all = set(new['account number'])
dropped_accts = old_accts_all - new_accts_all
added_accts = new_accts_all - old_accts_all

all_data = pd.concat([old,new],ignore_index=True)
changes = all_data.drop_duplicates(subset=["account number",
"name", "street",
"city","state",
"postal code"], keep='last')

dupe_accts = changes[changes['account number'].duplicated() == True]['account number'].tolist()
dupes = changes[changes["account number"].isin(dupe_accts)]dupe_accts = changes[changes['account number'].duplicated() == True]['account number'].tolist()dupes = changes[changes["account number"].isin(dupe_accts)]

change_new = dupes[(dupes["version"] == "new")]
change_old = dupes[(dupes["version"] == "old")]
change_new = change_new.drop(['version'], axis=1)
change_old = change_old.drop(['version'], axis=1)
change_new.set_index('account number', inplace=True)
change_old.set_index('account number', inplace=True)
df_all_changes = pd.concat([change_old, change_new],
axis='columns',
keys=['old', 'new'],
join='outer')
df_all_changes

def report_diff(x):
return x[0] if x[0] == x[1] else '{} ---> {}'.format(*x)def report_diff(x): return x[0] if x[0] == x[1] else '{} ---> {}'.format(*x)

df_changed = df_all_changes.groupby(level=0, axis=1).apply(lambda frame: frame.apply(report_diff, axis=1))
df_changed = df_changed.reset_index()df_changed = df_all_changes.groupby(level=0, axis=1).apply(lambda frame: frame.apply(report_diff, axis=1))df_changed = df_changed.reset_index()

df_removed = changes[changes["account number"].isin(dropped_accts)]
df_added = changes[changes["account number"].isin(added_accts)]df_removed = changes[changes["account number"].isin(dropped_accts)]df_added = changes[changes["account number"].isin(added_accts)]
output_columns = ["account number", "name", "street", "city", "state", "postal code"]
writer = pd.ExcelWriter("my-diff.xlsx")
df_changed.to_excel(writer,"changed", index=False, columns=output_columns)
df_removed.to_excel(writer,"removed",index=False, columns=output_columns)
df_added.to_excel(writer,"added",index=False, columns=output_columns)
writer.save()
import tkinter
from tkinter import *
from tkinter import Label, Button, Entry, messagebox
from tkinter import filedialog
from deal import deal_excel
window = tkinter.Tk()
path_file1 = StringVar()
path_file2 = StringVar()
path_path = StringVar()
window.geometry('380x150')
label1 = Label(window, text="文件1:").grid(column=0, row=0)
txt1 = Entry(window, width="30", textvariable=path_file1).grid(column=1, row=0)
button1 = Button(window, text="文件选择1", command=selectFile1).grid(column=2, row=0)
label2 = Label(window, text="文件2:").grid(column=0, row=1)
txt2 = Entry(window, width="30", textvariable=path_file2).grid(column=1, row=1)
button2 = Button(window, text="文件选择2", command=selectFile2).grid(row=1, column=2)
label3 = Label(window, text="新文件路径:").grid(column=0, row=2)
txt3 = Entry(window, width="30", textvariable=path_path)
txt3.grid(column=1, row=2)
button3 = Button(window, text="新文件路径", command=selectPath).grid(row=2, column=2)
button4 = Button(window, text="开始处理", command=save_path).grid(row=3, column=1)

def selectFile1():
path_ = filedialog.askopenfilename()
path_file1.set(path_)
def save_path():
path = txt3.get()
deal_excel(path)
res = "对比处理完成!"
messagebox.showinfo('萝卜大杂烩', res)


分享

点收藏

点点赞

点在看
关注公众号:拾黑(shiheibook)了解更多
[广告]赞助链接:
四季很好,只要有你,文娱排行榜:https://www.yaopaiming.com/
让资讯触达的更精准有趣:https://www.0xu.cn/

随时掌握互联网精彩
赞助链接
排名
热点
搜索指数
- 1 让祖国大地更加绿意盎然生机勃发 7987570
- 2 美股全线暴跌 7937859
- 3 中方反制!对美进口商品加征34%关税 7869412
- 4 中国经济的暖意新意诚意 7728509
- 5 杜邦中国被立案调查 7692457
- 6 中方对美一系列反制措施来了 7582683
- 7 中概股暴跌 京东、阿里巴巴跌超10% 7487668
- 8 关税重压下苹果手机或成奢侈品 7372197
- 9 18岁禁毒战士的葬礼全场被马赛克 7208391
- 10 国资央企全力护航春耕好“丰”景 7146373