|
本帖最后由 sheeboard 于 2018-1-23 09:55 编辑
- e_columns=['name','base_salary','fl_salary','suyside','count']
- jan=pd.read_excel('1.xlsx','Sheet1',names=e_columns)
- feb=pd.read_excel('2.xlsx','Sheet1',names=e_columns)
- removed_data=jan[~jan.name.isin(feb.name)]
- removed_data['status']='removed'
- newadd_data=feb[~feb.name.isin(jan.name)]
- newadd_data['status']='newadd'
- jan['version']='old'
- feb['version']='new'
- full=pd.concat([jan,feb],ignore_index=True)
- nochange_data=full[full.duplicationed(subset=e_columns)]
- nochange_data['status']='nochane'
- changes=full.drop_duplicates(subset=e_columns,keep='last')
- dupe_accts=changes.set_index('name').index.get_duplicates()
- dupes=changes[changes['name'].isin(dupe_accts)]
- change_new=dupes[(dupes['version']=='new')]
- change_old=dupes[(dupes['version']=='old')]
- change_new=change_new.drop(['version'],axis=1)
- change_old=change_old.drop(['version'],axis=1)
- nochange_data=nochange_data.drop(['version'],axis=1)
- change_new.set_index('name',inplace=True)
- change_old.set_index('name',inplace=True)
- def report_diff(x):
- return x[0] if x[0]==x[1] else '{}->{}'.format(*x)
- diff_panel=pd.Panel(dict(df1=change_old,df2=change_new))
- diff_output=diff_panel.apply(report_diff,axis=0)
- diff_output=diff_output.reset_index()
- diff_output['status']='changed'
- outputfil=np.concat([removed_data,newadd_data,nochange_data,diff_output],ignore_index=True)
- outputfile.to_excel('result.xlsx',index=False)
复制代码 |
-
sheet1对比结果
|