123456789101112131415161718192021222324252627282930313233 |
- import pandas as pd
- # 读取Excel文件
- df1 = pd.read_excel(r'D:\guozhong\19、定远县\19、定远县\定远县表层数据统计20241222.xlsx', converters={'原样品编号': str})
- df2 = pd.read_excel(r'D:\guozhong\19、定远县\19、定远县\定远县土壤容重.xlsx')
- print(df1)
- # 确保 '原样品编号' 列是字符串类型
- df1['原样品编号'] = df1['原样品编号'].astype(str)
- df2['样品编号'] = df2['样品编号'].astype(str)
- # 去除第一张表和第二张表中样品编号的后两位
- df1['Processed ID'] = df1['原样品编号'].str[:-2]
- df2['Processed ID'] = df2['样品编号'].str[:-2]
- print(df1)
- print(df2)
- # 创建一个空的DataFrame用于存储结果
- result_df = pd.DataFrame(columns=df2.columns)
- # 遍历处理后的第一张表的样品编号
- for id in df1['Processed ID']:
- # 在第二张表中查找匹配的样品编号
- matched_row = df2[df2['Processed ID'] == id]
- # 如果找到匹配项,则添加到结果DataFrame中;否则添加一个空值行
- if not matched_row.empty:
- result_df = pd.concat([result_df, matched_row], ignore_index=True)
- else:
- # ai
- empty_row = pd.Series([''] * len(df2.columns), index=df2.columns)
- result_df = pd.concat([result_df, empty_row.to_frame().T], ignore_index=True)
- result_df.to_excel('定远表层容重.xlsx', index=False)
|