Python 自动化办公之 Excel 对比工具
作者 | 周萝卜
来源丨萝卜大杂烩
需求
需求解析
代码实现
old = pd.read_excel('sample-address-1.xlsx', 'Sheet1', na_values=['NA'])
new = pd.read_excel('sample-address-2.xlsx', 'Sheet1', na_values=['NA'])
old['version'] = "old"
new['version'] = "new"
哪些是新增的 account
哪些是被删除的 account
哪些是被修改的 account
old_accts_all = set(old['account number'])
new_accts_all = set(new['account number'])
dropped_accts = old_accts_all - new_accts_all
added_accts = new_accts_all - old_accts_all
all_data = pd.concat([old,new],ignore_index=True)
changes = all_data.drop_duplicates(subset=["account number",
"name", "street",
"city","state",
"postal code"], keep='last')
dupe_accts = changes[changes['account number'].duplicated() == True]['account number'].tolist()
dupes = changes[changes["account number"].isin(dupe_accts)]dupe_accts = changes[changes['account number'].duplicated() == True]['account number'].tolist()dupes = changes[changes["account number"].isin(dupe_accts)]
change_new = dupes[(dupes["version"] == "new")]
change_old = dupes[(dupes["version"] == "old")]
change_new = change_new.drop(['version'], axis=1)
change_old = change_old.drop(['version'], axis=1)
change_new.set_index('account number', inplace=True)
change_old.set_index('account number', inplace=True)
df_all_changes = pd.concat([change_old, change_new],
axis='columns',
keys=['old', 'new'],
join='outer')
df_all_changes
def report_diff(x):
return x[0] if x[0] == x[1] else '{} ---> {}'.format(*x)def report_diff(x): return x[0] if x[0] == x[1] else '{} ---> {}'.format(*x)
df_changed = df_all_changes.groupby(level=0, axis=1).apply(lambda frame: frame.apply(report_diff, axis=1))
df_changed = df_changed.reset_index()df_changed = df_all_changes.groupby(level=0, axis=1).apply(lambda frame: frame.apply(report_diff, axis=1))df_changed = df_changed.reset_index()
df_removed = changes[changes["account number"].isin(dropped_accts)]
df_added = changes[changes["account number"].isin(added_accts)]df_removed = changes[changes["account number"].isin(dropped_accts)]df_added = changes[changes["account number"].isin(added_accts)]
output_columns = ["account number", "name", "street", "city", "state", "postal code"]
writer = pd.ExcelWriter("my-diff.xlsx")
df_changed.to_excel(writer,"changed", index=False, columns=output_columns)
df_removed.to_excel(writer,"removed",index=False, columns=output_columns)
df_added.to_excel(writer,"added",index=False, columns=output_columns)
writer.save()
import tkinter
from tkinter import *
from tkinter import Label, Button, Entry, messagebox
from tkinter import filedialog
from deal import deal_excel
window = tkinter.Tk()
path_file1 = StringVar()
path_file2 = StringVar()
path_path = StringVar()
window.geometry('380x150')
label1 = Label(window, text="文件1:").grid(column=0, row=0)
txt1 = Entry(window, width="30", textvariable=path_file1).grid(column=1, row=0)
button1 = Button(window, text="文件选择1", command=selectFile1).grid(column=2, row=0)
label2 = Label(window, text="文件2:").grid(column=0, row=1)
txt2 = Entry(window, width="30", textvariable=path_file2).grid(column=1, row=1)
button2 = Button(window, text="文件选择2", command=selectFile2).grid(row=1, column=2)
label3 = Label(window, text="新文件路径:").grid(column=0, row=2)
txt3 = Entry(window, width="30", textvariable=path_path)
txt3.grid(column=1, row=2)
button3 = Button(window, text="新文件路径", command=selectPath).grid(row=2, column=2)
button4 = Button(window, text="开始处理", command=save_path).grid(row=3, column=1)
def selectFile1():
path_ = filedialog.askopenfilename()
path_file1.set(path_)
def save_path():
path = txt3.get()
deal_excel(path)
res = "对比处理完成!"
messagebox.showinfo('萝卜大杂烩', res)
分享
点收藏
点点赞
点在看
关注公众号:拾黑(shiheibook)了解更多
[广告]赞助链接:
四季很好,只要有你,文娱排行榜:https://www.yaopaiming.com/
让资讯触达的更精准有趣:https://www.0xu.cn/
关注网络尖刀微信公众号
随时掌握互联网精彩
随时掌握互联网精彩
赞助链接
排名
热点
搜索指数
- 1 总书记这样指导旅游强国建设 4968658
- 2 警方通报胖猫事件:女方不构成诈骗 4938710
- 3 专家称要支持60-70岁老年人就业需求 4820304
- 4 科技赋能 共享美好 4759688
- 5 央媒评“指豚为鱼”:何其讽刺 4642335
- 6 为什么520这么多学生请病假? 4536819
- 7 中国选手一眨眼的功夫就夺冠了 4435469
- 8 英国年度最性感男人 4302016
- 9 河南网友山中发现日寇坟墓? 4236950
- 10 今年荔枝有的吃就不错了 4174335