1 mēnesi atpakaļ · 21ab6f875b
--- a/剖面容重.py
+++ b/剖面容重.py
@@ -0,0 +1,30 @@
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+df_1 = pd.read_excel(r"D:\guozhong\泾县数据集20241118\泾县剖面数据统计20241118.xlsx")
			
 
				+df_1
			
 
				+df_2 = pd.read_excel(r"D:\guozhong\泾县数据集20241118\泾县容重汇总20241118.xlsx")
			
 
				+df_2
			
 
				+# 确保列为字符串类型
			
 
				+df_1["原样品编号"] = df_1["原样品编号"].astype(str)
			
 
				+df_2["样品编号"] = df_2["样品编号"].astype(str)
			
 
				+
			
 
				+# 提取前16位和最后1位并组合
			
 
				+df_1["原样品编号前16位"] = df_1["原样品编号"].str[:16] + df_1["原样品编号"].str[-1]
			
 
				+df_2["样品编号前16位"] = df_2["样品编号"].str[:16] + df_2["样品编号"].str[-1]
			
 
				+
			
 
				+
			
 
				+# 合并表格（左连接，保留df_1中的所有数据）
			
 
				+merged_df = pd.merge(
			
 
				+    df_1,
			
 
				+    df_2[["样品编号前16位", "土壤容重1（g/cm³）", "土壤容重2（g/cm³）", "土壤容重3（g/cm³）", "土壤容重4（g/cm³）", "土壤容重平均值（g/cm³）"]],
			
 
				+    left_on="原样品编号前16位",
			
 
				+    right_on="样品编号前16位",
			
 
				+    how="left"
			
 
				+)
			
 
				+
			
 
				+# 删除辅助列
			
 
				+merged_df = merged_df.drop(columns=["原样品编号前16位", "样品编号前16位"])
			
 
				+
			
 
				+# 输出结果
			
 
				+print(merged_df)
			
 
				+merged_df.to_excel(r"泾县剖面容重.xlsx", index=False, engine="openpyxl")
			
--- a/剖面水稳.py
+++ b/剖面水稳.py
@@ -0,0 +1,36 @@
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+
			
 
				+df_1 = pd.read_excel(r"D:\guozhong\泾县数据集20241118\泾县剖面数据统计20241118.xlsx",converters={'容重样品编号': str})
			
 
				+df_1
			
 
				+df_2 = pd.read_excel(r"D:\guozhong\泾县数据集20241118\泾县水稳汇总20241110.xlsx")
			
 
				+df_2
			
 
				+# 确保列为字符串类型
			
 
				+df_1["原样品编号"] = df_1["原样品编号"].astype(str)
			
 
				+df_2["样品编号"] = df_2["原样品编号"].astype(str)
			
 
				+
			
 
				+# 创建一个布尔掩码，标记尾号为1的行
			
 
				+mask = df_1["原样品编号"].str.endswith("1")
			
 
				+
			
 
				+# 将未满足条件的行清空（填充为NaN），保留满足条件的行原位置不变
			
 
				+df_1.loc[~mask, :] = np.nan
			
 
				+
			
 
				+# 提取前16位进行匹配
			
 
				+df_1["原样品编号前16位"] = df_1["原样品编号"].str[:16]
			
 
				+df_2["样品编号前16位"] = df_2["样品编号"].str[:16]
			
 
				+
			
 
				+# 合并表格（左连接，保留df_1中的所有数据）
			
 
				+merged_df = pd.merge(
			
 
				+    df_1,
			
 
				+    df_2[["样品编号前16位", "水稳>5mm（%）", "水稳3mm~5mm（%）", "水稳2mm~3mm（%）", "水稳1mm~2mm（%）", "水稳0.5mm~1mm（%）", "水稳0.25mm~0.5mm（%）", "水稳性大团聚体总和（%）"]],
			
 
				+    left_on="原样品编号前16位",
			
 
				+    right_on="样品编号前16位",
			
 
				+    how="left"
			
 
				+)
			
 
				+
			
 
				+# 删除辅助列
			
 
				+merged_df = merged_df.drop(columns=["原样品编号前16位", "样品编号前16位"])
			
 
				+
			
 
				+# 输出结果
			
 
				+print(merged_df)
			
 
				+merged_df.to_excel(r"泾县剖面水稳.xlsx", index=False, engine="openpyxl")
			
--- a/地理信息.py
+++ b/地理信息.py
@@ -0,0 +1,39 @@
 
				+import pandas as pd
			
 
				+
			
 
				+# 读取两个表
			
 
				+table1 = pd.read_excel(r"D:\guozhong\庐江县\8、庐江县\剖面\庐江县剖面.xlsx")  # 表1
			
 
				+table2 = pd.read_excel(r"D:\guozhong\庐江县\8、庐江县\庐江县剖面数据统计20241124.xlsx")  # 表2
			
 
				+
			
 
				+# 获取表2的样品编号列表
			
 
				+sample_ids_table2 = table2["样品编号"].tolist()
			
 
				+sample_count = len(sample_ids_table2)
			
 
				+
			
 
				+# 用于存储结果的列表
			
 
				+result = []
			
 
				+
			
 
				+print("样品编号总数:", sample_count)
			
 
				+
			
 
				+# 遍历表2的样品编号列表
			
 
				+for sample_id in sample_ids_table2:
			
 
				+    # 在表1中查找匹配的行
			
 
				+    matched_rows = table1[table1["样品编号"] == sample_id]
			
 
				+
			
 
				+    # 如果有匹配的行
			
 
				+    if not matched_rows.empty:
			
 
				+        # 只取匹配到的第一行
			
 
				+        first_matched_row = matched_rows.iloc[0]
			
 
				+        result.append((sample_id, first_matched_row.to_dict()))
			
 
				+    else:
			
 
				+        # 如果没有匹配的内容，仅添加样品编号，其他字段为空
			
 
				+        result.append((sample_id, {}))
			
 
				+
			
 
				+# 构建 DataFrame
			
 
				+output_data = []
			
 
				+for item in result:
			
 
				+    row_dict = {"样品编号": item[0], **item[1]}  # 样品编号 + 匹配行内容（如果有）
			
 
				+    output_data.append(row_dict)
			
 
				+
			
 
				+output_df = pd.DataFrame(output_data)
			
 
				+
			
 
				+# 保存结果到 Excel
			
 
				+output_df.to_excel("庐江剖面地理信息.xlsx", index=False)
			
--- a/容重匹配.py
+++ b/容重匹配.py
@@ -0,0 +1,33 @@
 
				+import pandas as pd
			
 
				+
			
 
				+# 读取Excel文件
			
 
				+df1 = pd.read_excel(r'D:\guozhong\来安县\来安县\9、来安县\来安县表层数据统计.xlsx', converters={'原样品编号': str})
			
 
				+df2 = pd.read_excel(r'D:\guozhong\来安县\来安县\9、来安县\来安县土壤容重.xlsx')
			
 
				+
			
 
				+print(df1)
			
 
				+# 确保 '原样品编号' 列是字符串类型
			
 
				+df1['原样品编号'] = df1['原样品编号'].astype(str)
			
 
				+df2['样品编号'] = df2['样品编号'].astype(str)
			
 
				+
			
 
				+# 去除第一张表和第二张表中样品编号的后两位
			
 
				+df1['Processed ID'] = df1['原样品编号'].str[:-2]
			
 
				+df2['Processed ID'] = df2['样品编号'].str[:-2]
			
 
				+print(df1)
			
 
				+print(df2)
			
 
				+
			
 
				+# 创建一个空的DataFrame用于存储结果
			
 
				+result_df = pd.DataFrame(columns=df2.columns)
			
 
				+
			
 
				+# 遍历处理后的第一张表的样品编号
			
 
				+for id in df1['Processed ID']:
			
 
				+    # 在第二张表中查找匹配的样品编号
			
 
				+    matched_row = df2[df2['Processed ID'] == id]
			
 
				+    # 如果找到匹配项，则添加到结果DataFrame中；否则添加一个空值行
			
 
				+    if not matched_row.empty:
			
 
				+        result_df = pd.concat([result_df, matched_row], ignore_index=True)
			
 
				+    else:
			
 
				+        # ai
			
 
				+        empty_row = pd.Series([''] * len(df2.columns), index=df2.columns)
			
 
				+        result_df = pd.concat([result_df, empty_row.to_frame().T], ignore_index=True)
			
 
				+
			
 
				+result_df.to_excel('来安容重.xlsx', index=False)
			
--- a/水稳匹配.py
+++ b/水稳匹配.py
@@ -0,0 +1,29 @@
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+df_1 = pd.read_excel(r"D:\guozhong\来安县\来安县\9、来安县\来安县表层数据统计.xlsx")
			
 
				+df_1
			
 
				+df_2 = pd.read_excel(r"D:\guozhong\来安县\来安县\9、来安县\来安县水稳数据.xlsx")
			
 
				+df_2
			
 
				+# 确保列为字符串类型
			
 
				+df_1["原样品编号"] = df_1["原样品编号"].astype(str)
			
 
				+df_2["样品编号"] = df_2["样品编号"].astype(str)
			
 
				+
			
 
				+# 提取前16位进行匹配
			
 
				+df_1["原样品编号前16位"] = df_1["原样品编号"].str[:16]
			
 
				+df_2["样品编号前16位"] = df_2["样品编号"].str[:16]
			
 
				+
			
 
				+# 合并表格（左连接，保留df_1中的所有数据）
			
 
				+merged_df = pd.merge(
			
 
				+    df_1,
			
 
				+    df_2[["样品编号前16位", "样品编号","水稳>5mm（%）", "水稳3mm~5mm（%）", "水稳2mm~3mm（%）", "水稳1mm~2mm（%）", "水稳0.5mm~1mm（%）","水稳0.25mm~0.5mm（%）","水稳性大团聚体总和（%）"]],
			
 
				+    left_on="原样品编号前16位",
			
 
				+    right_on="样品编号前16位",
			
 
				+    how="left"
			
 
				+)
			
 
				+
			
 
				+# 删除辅助列
			
 
				+merged_df = merged_df.drop(columns=["原样品编号前16位", "样品编号前16位"])
			
 
				+
			
 
				+# 输出结果
			
 
				+print(merged_df)
			
 
				+merged_df.to_excel(r"来安水稳.xlsx", index=False, engine="openpyxl")
			
--- a/转码.py
+++ b/转码.py
@@ -0,0 +1,30 @@
 
				+import pandas as pd
			
 
				+
			
 
				+# 读取两个表
			
 
				+table1 = pd.read_excel(r"D:\guozhong\泾县数据集20241118\泾县转码表.xlsx")  # 表1
			
 
				+table2 = pd.read_excel(r"D:\guozhong\泾县数据集20241118\泾县水稳汇总20241110.xlsx")  # 表2
			
 
				+
			
 
				+
			
 
				+# 确保两表列数据类型一致，转换为字符串
			
 
				+table1["转码后样品编号"] = table1["转码后样品编号"].astype(str)
			
 
				+table1["样品编号"] = table1["样品编号"].astype(str)
			
 
				+table2["样品编号"] = table2["样品编号"].astype(str)
			
 
				+
			
 
				+# 提取表2的样品编号列
			
 
				+sample_ids_table2 = table2["样品编号"].tolist()
			
 
				+
			
 
				+# 创建一个列表，用于存储匹配结果
			
 
				+matched_results = []
			
 
				+
			
 
				+# 遍历 table2 的样品编号，与 table1 的转码后样品编号匹配
			
 
				+for sample_id in sample_ids_table2:
			
 
				+    matches = table1[table1["转码后样品编号"] == sample_id]
			
 
				+    for _, row in matches.iterrows():
			
 
				+        matched_results.append((sample_id, row["样品编号"]))
			
 
				+
			
 
				+# 将结果转为 DataFrame 并保存到 Excel 文件
			
 
				+output_df = pd.DataFrame(matched_results, columns=["表2样品编号", "表1样品编号"])
			
 
				+output_path = r"D:\guozhong\泾县剖面水稳转码.xlsx"
			
 
				+output_df.to_excel(output_path, index=False)
			
 
				+
			
 
				+print(f"匹配完成！结果已保存到 {output_path}")