1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348 |
- import pandas as pd
- import numpy as np
- import time
- import public as pb
- import openpyxl
- import matplotlib.pyplot as plt
- from scipy.stats import spearmanr
- # import tkinter as tk
- # from tkinter import ttk
- from tkinter import filedialog
- from tkinter import messagebox
- from openpyxl import load_workbook
- from openpyxl.drawing.image import Image as OImage
- import os
- import re
- import ttkbootstrap as ttk
- from ttkbootstrap.constants import *
- from PIL import Image, ImageTk
- from ttkbootstrap.dialogs import Messagebox
- import plotly.graph_objects as go
- import plotly.io as pio
- from sklearn.linear_model import LinearRegression
- from openpyxl.worksheet.hyperlink import Hyperlink
- from scipy.ndimage import gaussian_filter1d
- from scipy.stats import norm
- from plotly.subplots import make_subplots
- import plotly
- import docx
- from docx import Document
- from docx.shared import Inches
- from docx.oxml import OxmlElement, ns
- from docx.shared import Pt, RGBColor
- from docx.oxml.ns import nsdecls, nsmap
- from docx.oxml import parse_xml
- from docx.enum.dml import MSO_THEME_COLOR_INDEX
- from docx import Document
- from docx.opc.constants import RELATIONSHIP_TYPE as RT
- from docx.enum.table import WD_TABLE_ALIGNMENT, WD_CELL_VERTICAL_ALIGNMENT
- from docx.oxml.ns import qn
- from docx.enum.text import WD_ALIGN_PARAGRAPH
- # 显示所有数据
- pd.set_option('display.width', 10000) # 设置字符显示宽度
- pd.set_option('display.max_rows', None) # 设置显示最大行
- pd.set_option('display.max_columns', None) # 设置显示最大列,None为显示所有列
- # 设置字体
- # 设置字体 微软雅黑 新罗马 加粗
- plt.rcParams['font.family'] = ['Times New Roman', 'Microsoft YaHei']
- # 设置字体加粗
- font = {'weight': 'bold'}
- plt.rc('font', **font) # 应用字体设置
- indexClassificationList = {
- '物理指标': ['pH', '土壤质地', '土壤容重1(g/cm³)', '土壤容重2(g/cm³)', '土壤容重3(g/cm³)', '土壤容重4(g/cm³)', '土壤容重平均值(g/cm³)',
- '2~0.2mm颗粒含量', '0.2~0.02mm颗粒含量', '0.02~0.002mm颗粒含量', '0.002mm以下颗粒含量', '水稳>5mm(%)', '水稳3mm~5mm(%)',
- '水稳2mm~3mm(%)', '水稳1mm~2mm(%)', '水稳0.5mm~1mm(%)', '水稳0.25mm~0.5mm(%)', '水稳性大团聚体总和(%)', '洗失量(吸管法需填)', '风干试样含水量(分析基)'],
- '常规养分指标': ['有机质', '全氮', '全磷', '全钾', '有效磷', '速效钾', '有效硫', '有效硼', '有效铁', '有效锰', '有效铜', '有效锌', '有效钼', '有效硅', '缓效钾'],
- '一般化学性指标': ['阳离子交换量', '交换性盐基总量', '交换性钙', '交换性镁', '交换性钠', '交换性钾', '全盐量', '电导率',
- '水溶性Na⁺含量', '水溶性K⁺含量', '水溶性Ca²⁺含量', '水溶性Mg²⁺含量', '水溶性Cl⁻含量', '水溶性CO₃²⁻含量','水溶性HCO₃⁻含量',
- '水溶性SO₄²⁻含量', '离子总量', '碳酸钙','游离铁', '全硫', '全锰', '全锌', '全铜', '全钼', '全硼', '全硒', '全铝',
- '交换性酸总量', '交换性H⁺', '交换性Al³⁺','水解性总酸度', '全硅', '全铁', '全钙', '全镁'],
- '重金属指标': ['总汞', '总砷', '总铅', '总镉', '总铬', '总镍']
- }
- # 可交互绘图函数
- def getInteractiveImg(x,y,label,x1,y1,label1,x2,y2,label2,url,name,xLabel,YLabel,numArr):
- # coef, p_value = spearmanr(x, y)
- # 绘制数据散点图
- fig = go.Figure(data=go.Scatter(
- x=x,
- y=y,
- text=numArr.to_numpy(),
- mode='markers', name=label,
- marker=dict(
- size=4, # 点的大小
- color='blue', # 点的颜色
- ))
- )
- # 设置图表布局
- fig.update_layout(
- title={
- 'text': f"{name}",
- 'xanchor': 'center', # 控制水平对齐,可选'left', 'center', 'right'
- 'yanchor': 'bottom', # 控制垂直对齐,可选'top', 'middle', 'bottom'
- 'x': 0.5, # 控制标题的水平位置,0.5代表中心,可以是小数(相对位置)或整数(像素位置)
- 'y': 0.9 # 控制标题的垂直位置,0.9代表底部,可以是小数或整数
- },
- xaxis_title=xLabel,
- yaxis_title=YLabel)
- if label == 'pH':
- ph_y_f = [7.5 for _ in x]
- ph_y_s = [8.0 for _ in x]
- fig.add_trace(go.Scatter(x=x, y=ph_y_f, mode='lines', name='pH = 7.5', line=dict(
- width=1.5,
- color='#339933',
- dash='dash' # 设置虚线样式
- )))
- fig.add_trace(go.Scatter(x=x, y=ph_y_s, mode='lines', name='pH = 8.0', line=dict(
- width=2.5,
- color='#339933',
- dash='dash' # 设置虚线样式
- )))
- if len(x1) > 0 and len(y1) > 0:
- fig.add_trace(go.Scatter(x=x1, y=y1, mode='markers', name=label1, text=numArr.to_numpy(),
- marker=dict(
- size=4, # 点的大小
- color='red', # 点的颜色
- symbol='hourglass'
- )))
- if len(x2) > 0 and len(y2) > 0:
- fig.add_trace(go.Scatter(x=x2, y=y2, mode='markers', name=label2, text=numArr.to_numpy(),
- marker=dict(
- size=4, # 点的大小
- color='green', # 点的颜色
- symbol='triangle-up' # 点的形状,这里设置为正方形
- )))
- # model = LinearRegression()
- # model.fit(x.to_numpy().reshape(-1, 1), y) # 用x的平方作为特征值
- # y_pred = model.predict(x.to_numpy().reshape(-1, 1))
- # fig.add_trace(go.Scatter(x=x, y=y_pred, mode='lines', name='拟合直线'))
- html_file_path = f"{url}/{name}.html"
- pio.write_html(fig, file=html_file_path, auto_open=False)
- # 同时保存一份图片
- try:
- print(pio.kaleido.scope.plotlyjs)
- print(f"图片将保存至 " + f"{url}/{name}.png")
- fig.write_image(f"{url}/{name}.png",scale=3, width=800, height=600)
- #pio.write_image(fig, f"{url}/{name}.png")
- print(f"图片已成功保存至 "+f"{url}/{name}.png")
- except Exception as e:
- print(f"保存图片时出现错误: {str(e)}")
- #pio.write_image(fig, f"{url}/{name}.png")
- # 在文档中中插入html
- # workbook = load_workbook(filename=fileUrl)
- # # 选择一个工作表
- # ws = workbook[sheetName]
- # # 将 HTML 内容作为富文本写入单元格
- # ws[loc] = '=HYPERLINK("file:///{0}","点击查看统计图")'.format(html_file_path)
- # workbook.save(fileUrl)
- # 循环确定数据属于哪一个类型
- def getDataType(data, list):
- for item in list:
- if str(str(data['原样品编号'])[6:10]) in list[item]:
- data['土地类型归类'] = item
- return data
- # 1. 统计样品数量 分类统计:耕地:0101 0102 0103 园地:0201 0202 0203 0204 林地:0301 0302 0303 0304 0305 0306 0307 草地:0401 0402 0403 0404
- def getSimpleNum(data):
- """
- :param simpleData: 样品数据
- :return:
- """
- typeList = {
- '耕地园地': ['0101', '0102', '0103', '0201', '0202', '0203', '0204'],
- '林地草地': ['0301', '0302', '0303', '0304', '0305', '0306', '0307', '0401', '0402', '0403', '0404'],
- '商服用地': ['0501', '0502', '0503', '0504', '0505', '0506', '0507'],
- '工矿仓储用地': ['0601', '0602', '0603', '0604'],
- '住宅用地': ['0701', '0702'],
- '公共管理与公共服务用地': ['0801', '0802', '0803', '0804', '0805', '0806', '0807', '0808', '0809', '0810'],
- '特殊用地': ['0901', '0902', '0903', '0904', '0905', '0906'],
- '交通运输用地': ['1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009'],
- '水域及水利设施用地': ['1101', '1102', '1103', '1104', '1105', '1106', '1107', '1108', '1109', '1110'],
- '其他土地': ['1201', '1202', '1203', '1204', '1205', '1206', '1207']
- }
- # 根据耕地园地、林地草地分类数据,统计不同类型数据数量,生成表格
- newData = pd.DataFrame({})
- for index, row in data.iterrows():
- newRow = getDataType(row, typeList)
- newData =newData._append(newRow)
- # 按照土地类型归类 分类计数
- grouped_df = newData.groupby('土地类型归类')
- counts = grouped_df.size()
- resData = pd.DataFrame({})
- for group_name, group_data in grouped_df:
- # 附表:数量总体统计数据
- res = {
- '类型': group_name,
- '样品编号': group_data['样品编号'].to_list(),
- '数量': counts[group_name],
- '合计': counts.sum()
- }
- newRes = pd.DataFrame(res)
- resData = resData._append(newRes)
- return {
- 'sData': counts, # 统计数量
- 'allData': resData # 附表总数
- }
- # 2.小数修改:进行中
- def is_one_decimal_place(num): # 判断是否保留一位小数
- return str(num).count('.') == 1 and len(str(num).split('.')[1]) == 1
- def has_two_decimals(num): # 判断是否保留了两位小数
- return str(num).count(".") == 1 and len(str(num).split(".")[1]) == 2
- def three_decimal(num):
- # 不超过三位有效数字
- num = str(num)
- num = num.replace('.', '')
- numLen = 0
- stripped_s = num.lstrip('0')
- if stripped_s and stripped_s[0].isdigit():
- index = num.index(stripped_s[0]) # 加上去除的0的个数
- numLen = len(num[index:])
- # 字符串全为0或不包含数字时
- return numLen
- def is_less_than_three_decimals(number): # 不超过3位小数
- # 转换为字符串
- number_str = str(number)
- # 分离整数部分和小数部分
- integer_part, decimal_part = number_str.split('.') if '.' in number_str else (number_str, '')
- # 判断小数部分是否不超过三位
- return len(decimal_part) <= 3
- def is_three_decimal(number):
- number_str = str(number)
- # 分离整数部分和小数部分
- integer_part, decimal_part = number_str.split('.') if '.' in number_str else (number_str, '')
- # 判断小数部分是否不超过三位
- return len(decimal_part) == 3
- def highlight_condition(s):
- if s['数据审核结果'] != '' and not pd.isna(s['数据审核结果']):
- return ['background-color: #99CC99']*len(s)
- else:
- return ['']*len(s)
- def filter_number(arr):
- """
- :param arr:
- :return:
- """
- return pd.to_numeric(arr, errors='coerce')
- def getNum(data, url):
- # 读取数据 处理每一项小数保留问题
- oneData = ['2~0.2mm颗粒含量','0.2~0.02mm颗粒含量','0.02~0.002mm颗粒含量','0.002mm以下颗粒含量', '洗失量(吸管法需填)',
- '水稳>5mm','水稳3mm~5mm','水稳2mm~3mm','水稳1mm~2mm','水稳0.5mm~1mm','水稳0.25mm~0.5mm','水稳性大团聚体总和'
- ]
- twoData = ['土壤容重1(g/cm³)','土壤容重2(g/cm³)','土壤容重3','土壤容重4','土壤容重平均值','pH']
- threeData = ['电导率','水溶性钠离子','水溶性钾离子','水溶性钙离子','水溶性镁离子','水溶性碳酸根','水溶性碳酸氢根','水溶性硫酸根','水溶性氯根','离子总量'] # 3位有效数字
- twoDataToThree = ['有机质','全氮','全磷','全钾','全硫','全硼','全硒','全铁','水解性总酸度', '全钼', '全铝','全硅', '全钙', '全镁','有效磷', '有效硫','游离铁','总砷'] #保留2位小数,最多不超过3位有效数字
- threeDataToThree = ['有效钼', '总汞']
- changeDataList = ['阳离子交换量', '交换性盐基总量', '交换性钙', '交换性镁', '交换性钠', '交换性钾'] # <10 2位小数 >=10 3位有效数字
- cationDataList = ['全锰', '全铜', '全锌', '速效钾', '缓效钾', '有效硅', '总铅', '总镉', '总铬', '总镍'] # <1000 2位小数 不超过3位有效数字 >=1000 保留整数
- threeD = ['全盐量'] # 保留3位小数
- needData = data.iloc[:, 7:]
- if '土壤质地' in needData.columns:
- del needData['土壤质地']
- # needData = needData.apply(pd.to_numeric, errors='coerce')
- checkDataRes = []
- for index, row in needData.iterrows():
- str = ''
- for item in row.index:
- if item in oneData and (not pd.isna(row[item])) and not is_one_decimal_place(row[item]):
- str += f"{item}应保留一位小数。"
- if item in twoData and (not pd.isna(row[item])) and not has_two_decimals(row[item]):
- str += f"{item}应保留两位小数。"
- if item in threeData and (not pd.isna(row[item])) and three_decimal(row[item]) != 3:
- str += f"{item}应保留三位有效数字。"
- if item in twoDataToThree and (not pd.isna(row[item])) and (not has_two_decimals(row[item]) or three_decimal(row[item]) > 3):
- str += f"{item}应保留两位小数且不超过三位有效数字。"
- if item in threeDataToThree and (not pd.isna(row[item])) and (not is_less_than_three_decimals(row[item]) or three_decimal(row[item]) != 3 ):
- str += f"{item}应保留三位有效数字且不超过三位小数。"
- if item in changeDataList and (not pd.isna(row[item])) and (filter_number(row[item]) < 10) and not has_two_decimals(row[item]):
- str += f"{item}应保留两位小数。"
- if item in changeDataList and (not pd.isna(row[item])) and (filter_number(row[item]) >= 10) and three_decimal(row[item]) != 3:
- str += f"{item}应保留三位有效数字。"
- if item == '碳酸钙' and (not pd.isna(row[item])) and not isinstance(row[item], int):
- str += f"{item}应为整数。"
- if item == '全盐量' and (not pd.isna(row[item])) and not is_three_decimal(row[item]):
- str += f"{item}应保留3位小数。"
- if item in cationDataList and (not pd.isna(row[item])) and (filter_number(row[item]) < 1000) and (not has_two_decimals(row[item]) or three_decimal(row[item]) > 3):
- str += f"{item}保留2位小数且不超过三位有效数字。"
- if item in cationDataList and (not pd.isna(row[item])) and (filter_number(row[item]) >= 1000) and not isinstance(row[item], int):
- str += f"{item}应为整数。"
- if item == '可交换酸度' and (not pd.isna(row[item])) and (filter_number(row[item]) < 1) and not has_two_decimals(row[item]):
- str += f"{item}<1,应保留2位小数。"
- if item == '可交换酸度' and (not pd.isna(row[item])) and (filter_number(row[item]) >= 1) and three_decimal(row[item]) != 3:
- str += f"{item}>=1,应保留3位有效数字。"
- checkDataRes.append(str)
- data['数据审核结果'] = checkDataRes
- # 对审核结果有问题的数据进行标绿
- resData = data.style.apply(highlight_condition, axis=1)
- # 数据写入表格
- with pd.ExcelWriter( f'{url}/数值修约审核.xlsx', engine='openpyxl') as writer:
- resData.to_excel(writer, index=False, sheet_name='数值修约')
- # getNum('')
- # 3.计算所有指标的频度信息 已完成
- def getFrequencyInformation(data, url):
- if '数据审核结果' in data.columns:
- del data['数据审核结果']
- if '水稳编号' in data.columns:
- del data['水稳编号']
- #统计样品数量 计算最大值 最小值 中位数 平均值 标准差
- needData = data.iloc[:, 7:]
- if '土壤质地' in needData.columns:
- del needData['土壤质地']
- needData = needData.apply(pd.to_numeric, errors="coerce")
- resData = pd.DataFrame({})
- for item in needData.columns:
- min_value = needData[item].min() # 最大值
- max_value = needData[item].max() # 最小值
- median_value = needData[item].median() # 中位数
- mean_value = needData[item].mean() # 平均数
- std_value = needData[item].std() # 标准差
- resData[item] = [min_value, max_value, median_value, mean_value, std_value]
- index_value = ['最小值', '最大值', '中位数', '平均数', '标准差']
- # 汇总数据
- resData.index = index_value
- data_res = round(resData, 2)
- data_res = data_res.rename_axis('频度分析')
- # data_res = data_res.transpose()
- # data_res = data_res.reset_index()
- # data_res.columns = ['指标名称', '最小值', '最大值', '中位数', '平均数', '标准差']
- return data_res
- # 数据写入表格
- # with pd.ExcelWriter(f'{url}/频度信息.xlsx', engine='openpyxl') as writer:
- # data_res.to_excel(writer, sheet_name='频度信息')
- # getFrequencyInformation('')
- # 4. 绘制每个指标的累积频率图
- def getMap(data, title, unit, url):
- data = data.dropna()
- if len(data) != 0: # 指标无数据 不进行绘图
- min = data.min()
- max = data.max()
- # 计算直方图
- bins = np.linspace(min - 1, max + 1, 30, endpoint=True)
- hist, bin_edges = np.histogram(data, bins=bins)
- cumulative_hist = np.cumsum(hist)
- # 计算拟合曲线
- p1 = np.polyfit(bin_edges[1:], hist, 2)
- p2 = np.poly1d(p1)
- # 计算累计频率
- cumulative_freq = cumulative_hist / cumulative_hist.max()
- # fig, ax1 = plt.subplots()
- fig = make_subplots(specs=[[{"secondary_y": True}]])
- if title == 'pH':
- fig.add_trace(go.Histogram(x=data, name="频数", nbinsx = 17, xbins = dict(start=0, end=8.5, size=0.5)), secondary_y=True)
- else:
- fig.add_trace(go.Histogram(x=data, name="频数"), secondary_y=True)
- # 绘制直方图 color="#ffc798" #a4464b #b1a9ab
- # fig = go.Figure(data=[go.Histogram(x=data, name="频数")])
- # 设置标题和其他格式
- fig.update_layout(
- title={
- 'text': f"{title}统计图",
- 'xanchor': 'center', # 控制水平对齐,可选'left', 'center', 'right'
- 'yanchor': 'bottom', # 控制垂直对齐,可选'top', 'middle', 'bottom'
- 'x': 0.5, # 控制标题的水平位置,0.5代表中心,可以是小数(相对位置)或整数(像素位置)
- 'y': 0.9 # 控制标题的垂直位置,0.9代表底部,可以是小数或整数
- },
- xaxis_title=f"{title}{unit}",
- yaxis_title='频次',
- bargap=0.2, # 相邻位置坐标的钢筋之间的间隙
- bargroupgap=0.1
- )
- # ax1.hist(data, bins=bins, rwidth=0.8, zorder=1, label="频数")
- # ax2 = ax1.twinx()
- # ax2.plot(bin_edges[1:], cumulative_freq, zorder=10, color="#a4464b", label="累积频率 ")
- fig.add_trace(go.Scatter(x=bin_edges[1:], y=cumulative_freq *100, mode='lines', name='累积频率',line=dict(width=2), yaxis='y2',
- marker=dict(
- color='red', # 点的颜色
- symbol='hourglass'
- )), secondary_y=False)
- # 绘制正态分布曲线
- # 估计正态分布参数
- mu = data.mean()
- sigma = data.std()
- # 创建正态分布对象
- dist = norm(mu, sigma)
- # 计算要绘制的x值
- x = np.linspace(bin_edges.min(), bin_edges.max(), 100)
- p = norm.pdf(x, mu, sigma)
- # ax3 = ax1.twinx()
- # # ax3.plot(bin_edges[1:], y_smoothed, label='拟合曲线', color="#333")
- # #label = 'N({:.2f}, {:.2f}^2)'.format(mu, sigma)
- # ax3.plot(x, dist.pdf(x), color="#333", label='拟合曲线')
- fig.add_trace(go.Scatter(x=x, y=p*100, mode='lines', yaxis='y2', name='拟合曲线', line=dict(width=2)), secondary_y=False)
- fig.update_layout(
- title={
- 'text': '指标累积频率分析图',
- 'xanchor': 'center', # 控制水平对齐,可选'left', 'center', 'right'
- 'yanchor': 'bottom', # 控制垂直对齐,可选'top', 'middle', 'bottom'
- 'x': 0.5, # 控制标题的水平位置,0.5代表中心,可以是小数(相对位置)或整数(像素位置)
- 'y': 0.9 # 控制标题的垂直位置,0.9代表底部,可以是小数或整数
- },
- yaxis_title='累积频率(%)', # 设置Y1轴标签
- yaxis2_title='频数'
- )
- maxData = 110
- if p.max()*100 >100:
- maxData = p.max()*100
- fig.update_yaxes(range=[0, maxData], row=1, col=1, secondary_y=False)
- html_file_path = f"{url}/{title}分析图.html"
- html_file_path = html_file_path.replace('(g/cm³)','')
- html_file_path = html_file_path.replace('(%)', '')
- html_file_path = html_file_path.replace('>', '')
- pio.write_html(fig, file=html_file_path, auto_open=False)
- title = title.replace('(g/cm³)', '')
- title = title.replace('(%)', '')
- title = title.replace('>', '')
- pio.write_image(fig, f"{url}/{title}分析图.png")
- def getFrequencyImage(data, url):
- unitList = {
- 'g/cm3': ['土壤容重1(g/cm³)','土壤容重2(g/cm³)', '土壤容重3(g/cm³)', '土壤容重4(g/cm³)','土壤容重平均值(g/cm³)'],
- '%': ['土壤机械组成','全铝', '全硅', '全铁', '全钙', '全镁', '水稳>5mm(%)','水稳3mm~5mm(%)', '水稳2mm~3mm(%)', '水稳1mm~2mm(%)', '水稳0.5mm~1mm(%)', '水稳0.25mm~0.5mm(%)', '水稳性大团聚体总和(%)'],
- 'g/kg': ['有机质', '全氮', '全磷', '全钾', '水溶性盐总量', '离子总量', '碳酸钙', '游离铁', '全硫'],
- 'mg/kg': ['有效磷', '速效钾', '有效硫', '有效硼', '有效铁', '有效锰', '有效铜', '有效锌', '有效钼', '有效硅', '缓效钾', '全锰',
- '全锌', '全铜', '全钼', '全硼', '全硒', '总汞', '总砷', '总铅', '总镉', '总铬', '总镍'],
- 'cmol/kg': ['阳离子交换量', '交换性盐基总量', '交换性钙', '交换性镁', '交换性钠', '交换性钾']
- }
- # 绘制图形
- needData = data.iloc[:, 7:] #这里有可能需要修改
- if '土壤质地' in needData.columns:
- del needData['土壤质地']
- needData = needData.apply(pd.to_numeric, errors="coerce")
- for item in needData.columns:
- # newData = needData[item].dropna()
- if len(needData) > 0:
- label = ''
- for i in unitList:
- if item in unitList[i]:
- label = i
- getMap(needData[item], item, label, url)
- # 公共函数 判断指标完整性
- # def checkAllNames(data, list):
- # for item in list:
- # if data[item] != '未检测' and data[item] != '/': # 未检测的值可以忽略 空值
- # if pd.isna(data[item]): # 表示值缺失
- # 5. 检测指标完整性
- def getDataComplete(data):
- # 根据类型:耕地园地表层土壤样品,耕地园地剖面土壤样品,林地草地表层土壤样品,林地草地剖面土壤样品,水稳定性大团聚体样品
- # 耕地园地样品列表 有效硅(水田才有)
- cultivatedLandandfieldList = ['风干试样含水量(分析基)', '洗失量(吸管法需填)', '土壤质地', '土壤容重1(g/cm³)','土壤容重2(g/cm³)','土壤容重3(g/cm³)','土壤容重4(g/cm³)','土壤容重平均值(g/cm³)'
- , '2~0.2mm颗粒含量', '0.2~0.02mm颗粒含量', '0.02~0.002mm颗粒含量', '0.002mm以下颗粒含量'
- , '水稳>5mm(%)', '水稳3mm~5mm(%)','水稳2mm~3mm(%)', '水稳1mm~2mm(%)','水稳0.5mm~1mm(%)','水稳0.25mm~0.5mm(%)', '水稳性大团聚体总和(%)'
- , 'pH', '阳离子交换量', '交换性盐基总量', '交换性钙', '交换性镁', '交换性钠', '交换性钾',
- '全盐量', '电导率', '水溶性Na⁺含量', '水溶性K⁺含量', '水溶性Ca²⁺含量', '水溶性Mg²⁺含量', '水溶性Cl⁻含量', '水溶性CO₃²⁻含量',
- '水溶性HCO₃⁻含量', '水溶性SO₄²⁻含量', '离子总量', '有机质', '全氮', '全磷', '全钾', '有效磷', '速效钾', '缓效钾', '有效硫', '有效铁',
- '有效锰', '有效铜', '有效锌', '有效硼', '有效钼', '总汞', '总砷', '总铅', '总镉', '总铬', '总镍', '交换性酸总量', '碳酸钙', '全硫', '全硼', '全铁',
- '全锰', '全铜', '全锌', '全钼', '全铝', '全硅', '全钙', '全镁', '游离铁','交换性H⁺', '交换性Al³⁺','母岩']
- # 林地草地样品列表
- woodlandandGrassList = ['风干试样含水量(分析基)', '洗失量(吸管法需填)', '土壤质地', '土壤容重1(g/cm³)','土壤容重2(g/cm³)', '土壤容重3(g/cm³)','土壤容重4(g/cm³)','土壤容重平均值(g/cm³)', '2~0.2mm颗粒含量','0.2~0.02mm颗粒含量','0.02~0.002mm颗粒含量', '0.002mm以下颗粒含量', 'pH', '阳离子交换量', '交换性盐基总量', '交换性钙', '交换性镁', '交换性钠', '交换性钾', '有机质',
- '全氮', '全磷', '全钾', '有效磷', '速效钾', '交换性酸总量', '水解性总酸度', '碳酸钙', '全硫', '全铁','游离铁', '交换性H⁺', '交换性Al³⁺', '母岩']
- # 其他样品列表 剖面
- # '耕地': ['0101', '0102', '0103'],
- # '园地': ['0201', '0202', '0203', '0204'],
- # '林地': ['0301', '0302', '0303', '0304', '0305', '0306', '0307'],
- # '草地': ['0401', '0402', '0403', '0404'],
- # 根据土地利用类型 判断指标是否完整 不完整的数据提取出来 这里规则无法确定 先放着吧
- gYType = ['0101', '0102', '0103', '0201', '0202', '0203', '0204']
- lCType = ['0301', '0302', '0303', '0304', '0305', '0306', '0307', '0401', '0402', '0403', '0404']
- # 根据ph值筛选数据数量
- # 可交换酸度 ph<6, 水解性酸度 ph<6 林地草地,碳酸钙 ph>7, 全铁 耕地园地 + 林地草地ph<6,
- # 全部数据 ph《6 ph>7数量 林地草地ph《6的数量
- phLowSix = data['pH'].lt(6).sum() # ph<6的数据数量 可交换酸数量
- phHighSeven = data['pH'].gt(7).sum() # ph>7的数据数量 碳酸钙数量
- # 筛选所有林地草地数据
- gAndYData = data['原样品编号'].apply(lambda x: any(item == str(x)[6:10] for item in gYType))
- treeAndGrassData = data['原样品编号'].apply(lambda x: any(item in str(x)[6:10] for item in lCType))
- tAndGLowSix = data[treeAndGrassData]['pH'].lt(6).sum() # 水解性酸
- allFe = phLowSix + tAndGLowSix # 全铁数量
- # 统计所有数据中 各指标的数量
- resData = getSimpleNum(data)
- yxg = 0
- for index, row in data.iterrows():
- if str(str(row['原样品编号'])[6:10]) == '0101':
- yxg += 1
- data = data.replace('未检测', np.nan)
- if '序号' in data.columns:
- del data['序号']
- if '原样品编号' in data.columns:
- del data['原样品编号']
- if '样品编号' in data.columns:
- del data['样品编号']
- if '地理位置' in data.columns:
- del data['地理位置']
- if '母质' in data.columns:
- del data['母质']
- if '土壤类型' in data.columns:
- del data['土壤类型']
- if '土地利用类型' in data.columns:
- del data['土地利用类型']
- if '审核结果' in data.columns:
- del data['审核结果']
- # 根据指标排序
- counts = data.count()
- # 根据土地类型统计样本数
- countData = {
- '耕地园地': resData['sData']['耕地园地'] if '耕地园地' in resData['sData'].index else 0,
- '林地草地': resData['sData']['林地草地'] if '林地草地' in resData['sData'].index else 0,
- '其他': 0,
- '有效硅': yxg
- }
- needNumList = []
- for item in data.columns:
- # 判断指标是否属于耕地园地、林地草地数组
- if item == '有效硅':
- needNumList.append(countData['有效硅'])
- elif (item in cultivatedLandandfieldList) and (item in woodlandandGrassList):
- needNumList.append(countData['耕地园地'] + countData['林地草地'])
- elif (item in cultivatedLandandfieldList) and (item not in woodlandandGrassList):
- needNumList.append(countData['耕地园地'])
- elif (item not in cultivatedLandandfieldList) and (item in woodlandandGrassList):
- needNumList.append(countData['林地草地'])
- elif (item not in cultivatedLandandfieldList) and (item not in woodlandandGrassList):
- needNumList.append(0)
- else:
- needNumList.append(0)
- counts = counts.to_frame()
- counts['应测数量'] = needNumList
- counts.loc['全铁', '应测数量'] = allFe
- counts.loc['水解性总酸度', '应测数量'] = tAndGLowSix
- counts.loc['交换性酸总量', '应测数量'] = phLowSix
- counts.loc['交换性H⁺', '应测数量'] = phLowSix
- counts.loc['交换性Al³⁺', '应测数量'] = phLowSix
- counts.loc['碳酸钙', '应测数量'] = phHighSeven
- return counts
- # 6.指标名称与实际检测样品数量统计表,统计样品数量和指标检测数量
- def getCheckNum():
- # 统计每个指标的有值的数量,水溶性盐分总量大于1g/kg的 统计八大离子的检测量
- resData = ''
- return resData
- # 过滤特殊字符
- def filter_special_characters(s):
- s = str(s)
- return re.sub(r'[^a-zA-Z\u4e00-\u9fa5\d]', '', s)
- # 7.检测方法
- def checkMethod(data, url):
- # 各指标的检测标准方法
- checkData = {
- '土壤容重' : '《土壤检测 第4部分:土壤容重的测定》(NY/T 1121.4—2006) 环刀法',
- '机械组成' : '《土壤分析技术规范》(第二版), 5.1 吸管法',
- '土壤水稳性大团聚体': '《土壤检测 第19部分:土壤水稳定大团聚体组成的测定》(NY/T 1121.19—2008)筛分法',
- 'pH检测方法': '《土壤检测 第2部分:土壤pH的测定》(NY/T 1121.2—2006) 电位法',
- '水溶性盐类(水溶性盐总量、电导率、水溶性钠离子、钾离子、钙离子、镁离子、碳酸根、碳酸氢根、硫酸根、氯根)': '《森林土壤水溶性盐分分析》(LY/T 1251—1999)(浸提液中钙、镁、钾、钠离子的测定采用等离子体发射光谱法,硫酸根和碳酸根的测定增加离子色谱法) 质量法',
- '全氮': '《土壤检测 第24部分:土壤全氮的测定 自动定氮仪法》(NY/T 1121.24—2012) 自动定氮仪法',
- '全磷': '《森林土壤磷的测定》(LY/T 1232—2015)(详见本规范培训教材) 酸消解—电感耦合等离子体发射光谱法',
- '全钾': '《森林土壤钾的测定》(LY/T 1234—2015) 酸消解一电感耦合等离子体发射光谱法',
- '全铁': '《固体废物22种金属元素的测定》(HJ 781—2016) 酸消解—电感耦合等离子体发射光谱法',
- '全锰': '《固体废物22种金属元素的测定》(HJ 781—2016) 酸消解—电感耦合等离子体发射光谱法',
- '全钼': '《固体废物 金属元素的测定 电感耦合等离子体质谱法》(HJ 766—2015) 酸消解—电感耦合等离子体质谱法',
- '全铝': '《固体废物 22种金属元素的测定 电感耦合等离子体发射光谱法》(HJ 781—2016) 酸消解-电感耦合等离子体发射光谱法',
- '全硅': '《土壤和沉积物 11种元素的测定 碱熔—电感耦合等离子体发射光谱法》(HJ 974—2018) 碱熔一电感耦合等离子体发射光谱法 ',
- '全钙': '《固体废物 22种金属元素的测定 电感耦合等离子体发射光谱法》(HJ 781—2016) 酸消解—电感耦合等离子体发射光谱法',
- '全镁': '《固体废物 22种金属元素的测定 电感耦合等离子体发射光谱法》(HJ 781—2016) 酸消解—电感耦合等离子体发射光谱法',
- '速效钾': '《土壤 速效钾和缓效钾含量的测定》(NY/T 889—2004) 乙酸铵浸提—火焰光度法',
- '缓效钾': '《土壤 速效钾和缓效钾含量的测定》(NY/T 889—2004) 热硝酸浸提—火焰光度法',
- '有效硅': '《土壤检测第15部分:土壤有效硅的测定》(NY/T 1121.15-2006) 柠檬酸浸提-硅钼蓝比色法',
- '有效硼': '土壤样品制备与检测技术规范培训教材 沸水提取-电感耦合等离子体发射光谱法',
- '有效钼': '《土壤检测 第9部分:土壤有效钼的测定》(NY/T 1121.9-2023) 草酸-草酸铵浸提-电感耦合等离子体质谱法',
- '游离铁': '《土壤分析技术规范》(第二版),19.1游离铁(Fed)的测定(DCB法) 连二亚硫酸钠-柠檬酸钠-重 碳酸提-邻菲罗啉比色法',
- '总砷': '《土壤质量 总汞、总砷、总铅的测定 原子荧光法第2部分:土壤中总砷的测定》(GB/T22105.2-2008) 原子荧光法',
- '总铅': '《固体废物 金属元素的测定电感耦合等离子体质谱法》(HJ766-2015) 酸消解-电感耦合等离子体质谱法',
- '总镉': '《固体废物 金属元素的测定电感耦合等离子体质谱法》(HJ766-2015) 酸消解-电感耦合等离子体质谱法',
- '总铬': '《固体废物 金属元素的测定电感耦合等离子体质谱法》(HJ766-2015) 酸消解-电感耦合等离子体质谱法',
- '总镍': '《固体废物 金属元素的测定电感耦合等离子体质谱法》(HJ766-2015) 酸消解-电感耦合等离子体质谱法'
- }
- checkDBData = {
- '有机质': ['《土壤检测 第6部分:土壤有机质的测定》(NY/T 1121.6—2006) 重铬酸钾氧化—容量法',
- '土壤中总碳和有机质的测定元素分析仪法(农业行业标准报批稿) 元素分析仪法'], # 两种方法都可以
- '全硫': ['《土壤检测 第2部分:土壤全硫的测定》(NY/T 1104—2006) 硝酸镁氧化-硫酸钡比浊法',
- '燃烧红外光谱法(本规范培训教材)'],
- '全硼': ['《土壤分析技术规范》(第二版),18.1土壤全硼的测定 碱熔-姜黄素-比色法',
- '《土壤分析技术规范》(第二版),18.1土壤全硼的测定 碱熔-等离子体发射光谱法'],
- '全铜': ['《固体废物 金属元素的测定电感耦合等离子体质谱法》(HJ766-2015) 酸消解-电感耦合等离子体质谱法',
- '《固体废物 22种金属元素的测定电感耦合等离子体发射光谱法》(HJ781-2016) 酸消解-电感耦合等离子体发射光谱法'],
- '全锌': ['《固体废物 金属元素的测定电感耦合等离子体质谱法》(HJ766-2015) 酸消解一电感耦合等离子体质谱法',
- '《固体废物 22种金属元素的测定 电感耦合等离子体发射光谱法》(HJ 781—2016) 酸消解—电感耦合等离子体发射光谱法'],
- '可交换酸度': '《土壤分析技术规范》(第二版), 11.2 土壤交换性酸的测定 氯化钾交换—中和滴定法', # ph<6
- '碳酸钙': '《土壤分析技术规范》(第二版), 15.1 土壤碳酸盐的测定 气量法', # ph>7
- '总汞': ['《土壤质量 总汞、总砷、总铅的测定原子荧光法 第1部分:土壤中总汞的测定》(GB/T22105.1-2008) 原子荧光法',
- '《土壤和沉积物总汞的测定 催化热解/冷原子吸收分光光度法》(HJ923-2017) 催化热解-冷原子吸收分光光度法'],
- '有效铁': [
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-原子吸收分光光度法',
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-电感耦合等离子体发射光谱法'],
- '有效锰': [
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-原子吸收分光光度法',
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA浸提-电感耦合等离子体发射光谱法'],
- '有效铜': [
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-原子吸收分光光度法',
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-电感耦合等离子体发射光谱法'],
- '有效锌': [
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-原子吸收分光光度法',
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-电感耦合等离子体发射光谱法'],
- }
- checkPHData = {
- '阳离子交换量': ['《土壤分析技术规范》(第二版), 12.2 乙酸铵交换法',
- '《土壤分析技术规范》(第二版), 12.1 EDTA—乙酸铵盐交换法'], # ph<=7.5 ph>7.5
- '交换性盐基总量、交换性钾、交换性钠、交换性钙、交换性镁': [
- '《土壤分析技术规范》(第二版), 13.1 酸性和中性土壤交换性盐基组分的测定(乙酸铵交换法)(交换性钙、镁、钾、钠离子的测定增加等离子体发射光谱法) 乙酸铵交换法等',
- '《石灰性土壤交换性盐基及盐基总量的测定》(NY/T 1615-2008)(交换液中钾、钠、钙、镁离子的测定增加等离子体发射光谱法) 氯化铵-乙醇交换法等'],
- # ph<=7.5 ph>7.5
- '有效磷': ['《土壤检测 第7部分:土壤有效磷的测定》(NY/T 1121.7—2014) 氟化铵-盐酸溶液浸提一钼锑抗比色法',
- '《土壤检测 第7部分:土壤有效磷的测定》(NY/T 1121.7—2014) 碳酸氢钠溶液—钼锑抗比色法'], # ph<6.5 ph>=6.5
- '有效硫': ['《土壤检测第14部分:土壤有效的测定》NY/T 1121.14-2023) 磷酸盐-乙酸溶液浸提-电感耦合等离子体发射光谱法',
- '《土壤检测第14部分:土壤有效的测定》(NY/T 1121.14-2023) 氯化钙浸提一电感耦合等离子体发射光谱法'],
- # ph<7.5 ph>=7.5
- }
- checkDataKey = [key for key in checkData]
- checkDBDataKey = ['有机质', '全硫', '全硼', '全铜', '全锌', '可交换酸度', '碳酸钙', '总汞', '有效铁', '有效锰', '有效铜', '有效锌']
- # [key for key in checkDBData]
- checkPHDataKey = ['阳离子交换量', '交换性盐基总量、交换性钾、交换性钠、交换性钙、交换性镁', '有效磷', '有效硫']
- # [key for key in checkPHData]
- # print('list',checkDBDataKey)
- # print('list', checkPHDataKey)
- checkDataRes = []
- for index, row in data.iterrows():
- str = ''
- for item in row.index:
- if row[item] == '未检测':
- str = ''
- elif row[item] == '未填写' or pd.isna(row[item]):
- str += f"{item}未填写。"
- elif not pd.isna(row[item]) and (item in checkDataKey) and (not pd.isna(checkData[item])): # 指标不为空 且 在通用指标列表中
- if filter_special_characters(row[item]) != filter_special_characters(checkData[item]):
- str += f"{item}检测方法填报有误。"
- elif (not pd.isna(row[item]) and (item in checkDBDataKey)):
- if filter_special_characters(row[item]) != filter_special_characters(checkDBData[item][0]) and filter_special_characters(row[item]) != filter_special_characters(checkDBData[item][1]):
- str += f"{item}检测方法填报有误。"
- # 指标在两种方法指标列表中
- elif (not pd.isna(row[item]) and (item in checkPHDataKey)):
- # 指标在区分ph值列表中
- if item == '阳离子交换量' and row['pH'] <= 7.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- if item == '阳离子交换量' and row['pH'] > 7.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][1]):
- str += f"{item}检测方法填写有误。"
- if item == '交换性盐基总量、交换性钾、交换性钠、交换性钙、交换性镁' and row['pH'] <= 7.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- if item == '交换性盐基总量、交换性钾、交换性钠、交换性钙、交换性镁' and row['pH'] > 7.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- if item == '有效磷' and row['pH'] < 6.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- if item == '有效磷' and row['pH'] >= 6.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- if item == '有效硫' and row['pH'] < 7.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- if item == '有效硫' and row['pH'] >= 7.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- checkDataRes.append(str)
- data['数据审核结果'] = checkDataRes
- resData = data.style.apply(highlight_condition, axis=1)
- # 数据写入表格
- with pd.ExcelWriter(f'{url}/检测方法审核结果.xlsx', engine='openpyxl') as writer:
- resData.to_excel(writer, index=False, sheet_name='检测方法审核')
- return data[data['检测方法审核结果'] !='']
- # 8.数据填报审核
- def dataReportResult(data, url):
- # 未检出项填报,空值填报,错误值处理
- # 检测值有*号、/、未检测、空值、数值 为合理情况,
- resData = []
- for index, row in data.iterrows():
- str = ''
- for item in row.index:
- if row[item] == '-' or row[item] == 0:
- str = f"{item}数据填报错误。"
- resData.append(str)
- data['数据审核结果'] = resData
- finData = data.style.apply(highlight_condition, axis=1)
- # 数据写入表格
- with pd.ExcelWriter(f'{url}/数据填报项审核结果.xlsx', engine='openpyxl') as writer:
- finData.to_excel(writer, index=False, sheet_name='数据填报审核')
- # 9.土壤质地类型判断 这个之前已有,将土壤质地类型不一致的样本数据提取出来
- # 使用resData数据,判断土壤类型和土壤类型判断是否一致,不一致提取出不一致数据写入表格
- # 10.土壤检测数据超阈值样品统计表,将之前统计的超阈值的数据提取出来,显示原因
- # def getOverLineData(data, url): # 所有阈值判断
- # 提取数据项,异常原因,增加一列外业保持空值
- # 提取每个表格审核结果不为空的数据,最后将所有数据合并
- # resData =
- # 数据写入表格
- # with pd.ExcelWriter(f'{url}/超阈值样品统计表.xlsx', engine='openpyxl') as writer:
- # resData.to_excel(writer, index=False, sheet_name='超阈值数据')
- # 11.ph值统计 频度计算、历年数据统计、ph异常数据提取
- def getPHData(data, url):
- resData = pd.DataFrame({})
- # 计算频度
- min_value = data['pH'].min() # 最大值
- max_value = data['pH'].max() # 最小值
- median_value = data['pH'].median() # 中位数
- mean_value = round(data['pH'].mean(),2) # 平均数
- std_value = round(data['pH'].std(), 2) # 标准差
- resData['PH'] = [min_value, max_value, median_value, mean_value, std_value]
- index_value = ['最小值', '最大值', '中位数', '平均数', '标准差']
- resData.index = index_value
- # 绘制分布图
- x = np.arange(0, len(data['pH']), 1) # 横坐标数据
- y = data['pH'].sort_values() # 纵坐标数据
- getInteractiveImg(x, y, 'pH', [], [], '', [], [], '', url,
- 'pH值分布图', '样品序号', 'pH', data['原样品编号'])
- # 提取异常数据
- abnormalData = pd.DataFrame({})
- for index, row in data.iterrows():
- if not pd.isna(row['pH']) and (row['pH'] < mean_value-3*std_value or row['pH'] > mean_value+3*std_value):
- newRow = row[['原样品编号', '样品编号', '土地利用类型', 'pH']]
- soilType = ''
- if isinstance(row['土壤类型'], str):
- if len(row['土壤类型'].split('_')) > 1:
- soilType = row['土壤类型'].split('_')[1]
- newRow['土壤类型'] = soilType
- newRow['外业情况'] = ''
- abnormalData = abnormalData._append(newRow)
- return {
- '异常数据': abnormalData,
- '频度分析': resData
- }
- # resData写进文档表格
- # 异常数据写入表格
- # 生成也可以写入文档表格
- # with pd.ExcelWriter( './img/ph异常数据.xlsx', engine='openpyxl') as writer:
- # abnormalData.to_excel(writer, index=False, sheet_name='ph异常数据')
- # resData.to_excel(writer, sheet_name='频度分析')
- # 12.有机质和全氮 计算比值、绘制相关性图、提取异常数据形成表格
- def getNAndC(data, url):
- # 去掉nan的值
- data = data.dropna(subset=['有机质', '全氮'])
- # 绘制散点图 拟合直线 计算方差
- if data.empty:
- return pd.DataFrame({})
- x = data['有机质']
- y = data['全氮']
- plt.scatter(x, y)
- fig = go.Figure(data=go.Scatter(
- x=x,
- y=y,
- text=data['原样品编号'].to_numpy(),
- mode='markers', name='有机质与全氮',
- marker=dict(
- size=4, # 点的大小
- color='blue', # 点的颜色
- ))
- )
- # 使用sklearn的LinearRegression进行最小二乘法拟合
- model = LinearRegression()
- model.fit(x.to_numpy().reshape(-1, 1), y)
- # 计算拟合直线的斜率和截距
- slope = model.coef_[0]
- intercept = model.intercept_
- r, _ = np.corrcoef(y, slope * x + intercept)
- # 绘制拟合直线
- fig.add_trace(go.Scatter(x=x, y=slope * x + intercept,mode='lines', name='拟合直线'))
- # plt.plot(x, slope * x + intercept, color='red', label='拟合直线', linewidth=2)
- # 设置图表布局
- fig.update_layout(
- title={
- 'text': f"有机质与全氮相关关系图,y={round(slope,2)}x + {round(intercept,2)},R²={round(r[1], 2) ** 2},R={round(r[1],2)}",
- 'xanchor': 'center', # 控制水平对齐,可选'left', 'center', 'right'
- 'yanchor': 'bottom', # 控制垂直对齐,可选'top', 'middle', 'bottom'
- 'x': 0.5, # 控制标题的水平位置,0.5代表中心,可以是小数(相对位置)或整数(像素位置)
- 'y': 0.9 # 控制标题的垂直位置,0.9代表底部,可以是小数或整数
- },
- xaxis_title='有机质(g/kg)',
- yaxis_title='全氮(g/kg)')
- html_file_path = f"{url}/有机质与全氮相关性分析图.html"
- pio.write_html(fig, file=html_file_path, auto_open=False)
- # 同时保存一份图片
- pio.write_image(fig, f"{url}/有机质与全氮相关性分析图.png")
- # plt.savefig('./img/审核报告图形/' + '有机质与全氮相关性分析图.png', dpi=500, bbox_inches='tight')
- # plt.show()
- abnormalData = pd.DataFrame({})
- for index, row in data.iterrows():
- if not pd.isna(row['有机质']) and not pd.isna(row['全氮']):
- if row['有机质']/row['全氮'] <13 or row['有机质']/row['全氮'] > 20:
- newRow = row[['原样品编号', '样品编号', '土地利用类型', '有机质', '全氮']]
- soilType = ''
- resStr = ''
- if row['有机质']/row['全氮'] < 13:
- resStr = '偏低'
- if row['有机质']/row['全氮'] > 20:
- resStr = '偏高'
- if isinstance(row['土壤类型'], str):
- if len(row['土壤类型'].split('_')) > 1:
- soilType = row['土壤类型'].split('_')[1]
- newRow['土壤类型'] = soilType
- newRow['碳氮比'] = round(row['有机质']/row['全氮'], 2)
- newRow['审核结果'] = resStr
- newRow['外业情况'] = ''
- abnormalData = abnormalData._append(newRow)
- abnormalData = abnormalData.replace(np.nan, '')
- return abnormalData
- # with pd.ExcelWriter('./img/碳氮比异常数据.xlsx', engine='openpyxl') as writer:
- # abnormalData.to_excel(writer, index=False, sheet_name='碳氮比异常数据')
- def getImg(x,y,label):
- plt.scatter(x, y)
- plt.xlabel('样品数量')
- plt.ylabel(label)
- # plt.savefig('./img/' + label + '数据分布图.png', dpi=500, bbox_inches='tight')
- # plt.show()
- # 13.全磷和有效磷,绘图,统计异常值,绘图绘制在同一个图中
- def getPData(data, url):
- # 提取异常数
- abnormalData = pd.DataFrame({})
- # abnormalData.columns = ['原样品编号', '样品编号', '土地利用类型', '全磷', '有效磷', '土壤类型', '有效磷比', '外业情况']
- for index, row in data.iterrows():
- if not pd.isna(row['有效磷']) and not pd.isna(row['全磷']):
- if row['有效磷'] /(1000 * row['全磷']) >= 0.15:
- newRow = row[['原样品编号', '样品编号', '土地利用类型', '全磷', '有效磷']]
- soilType = ''
- if isinstance(row['土壤类型'], str):
- if len(row['土壤类型'].split('_')) > 1:
- soilType = row['土壤类型'].split('_')[1]
- newRow['土壤类型'] = soilType
- newRow['有效磷比'] = round(row['有效磷'] / (row['全磷']*10), 2)
- newRow['外业情况'] = ''
- abnormalData = abnormalData._append(newRow)
- with pd.ExcelWriter(f'{url}/有效磷占全磷比异常数据.xlsx', engine='openpyxl') as writer:
- abnormalData.to_excel(writer, index=False, sheet_name='有效磷占全磷比比异常数据')
- if not data['全磷'].empty and not data['有效磷'].empty:
- x1 = np.arange(0, len(data['全磷']), 1)
- x2 = np.arange(0, len(data['有效磷']), 1)
- x3 = np.arange(0, len(data['有效磷']/data['全磷']), 1)
- y1 = data['全磷'].sort_values()
- y2 = data['有效磷'].sort_values()
- y3 = data['有效磷']/10*data['全磷'].sort_values()
- # getImg(x1, y1,'全磷(g/kg)')
- # getImg(x2, y2, '有效磷(mg/kg)')
- # getImg(x3, y3, '有效磷占全磷比(%)')
- getInteractiveImg(x1, y1,'全磷(g/kg)',[], [], '', [], [], '', url,
- '全磷分布图', '样品序号', '全磷(g/kg)', data['原样品编号'])
- getInteractiveImg(x2, y2, '有效磷(mg/kg)', [], [], '', [], [], '', url,
- '有效磷分布图', '样品序号', '有效磷(mg/kg)', data['原样品编号'])
- getInteractiveImg(x3, y3, '有效磷占全磷比(%)', [], [], '', [], [], '', url,
- '有效磷占全磷比分布图', '样品序号', '有效磷占全磷比(%)', data['原样品编号'])
- return abnormalData
- # 14. 全钾、速效钾和缓效钾,绘图
- def getKData(data, url):
- x1 = np.arange(0, len(data['全钾']), 1)
- y1 = data['全钾']
- x2 = np.arange(0, len(data['速效钾'] + data['缓效钾']), 1)
- y2 = data['速效钾']/1000 + data['缓效钾']/1000
- getInteractiveImg(x1, y1, '全钾', x2, y2,'速效钾与缓效钾之和', [],[],'', url,'全钾与速效钾缓效钾之和关系统计图','样品序号','g/kg', data['原样品编号'])
- x = np.arange(0, len(data['速效钾']), 1)
- y = data['速效钾']
- x_1 = np.arange(0, len(data['缓效钾']), 1)
- y_1 = data['缓效钾']
- getInteractiveImg(x, y, '速效钾', x_1, y_1, '缓效钾', [], [], '', url,
- '速效钾与缓效钾关系统计图', '样品序号', 'mg/kg', data['原样品编号'])
- # 15.重金属 已有 提取重金属异常数据即可
- def getMetal(simpleData):
- # resData_14 数据中提取重金属超标的数据,提取相应的指标形成表格
- resData = ''
- return resData
- # 16.阳离子交换量与交换性盐基总量
- def cationExchangeCapacity(data, url):
- # 绘图
- x1 = data['阳离子交换量']
- y1 = data['交换性盐基总量']
- fig = go.Figure(data=go.Scatter(
- x=x1,
- y=y1,
- text=data['原样品编号'].to_numpy(),
- mode='markers', name='阳离子交换量与交换性盐基总量相关性散点图',
- marker=dict(
- size=4, # 点的大小
- color='blue', # 点的颜色
- ))
- )
- print(3.43)
- # 使用sklearn的LinearRegression进行最小二乘法拟合
- model = LinearRegression()
- model.fit(x1.to_numpy().reshape(-1, 1), y1)
- # 计算拟合直线的斜率和截距
- slope = model.coef_[0]
- intercept = model.intercept_
- r, _ = np.corrcoef(y1, slope * x1 + intercept)
- # 绘制拟合直线
- fig.add_trace(go.Scatter(x=x1, y=slope * x1 + intercept, mode='lines', name='拟合直线'))
- # plt.plot(x, slope * x + intercept, color='red', label='拟合直线', linewidth=2)
- # 设置图表布局
- fig.update_layout(
- title={
- 'text': f"阳离子交换量与交换性盐基总量相关性散点图,y={round(slope, 2)}x + {round(intercept, 2)},R²={round(r[1], 2) ** 2},R={round(r[1], 1)}",
- 'xanchor': 'center', # 控制水平对齐,可选'left', 'center', 'right'
- 'yanchor': 'bottom', # 控制垂直对齐,可选'top', 'middle', 'bottom'
- 'x': 0.5, # 控制标题的水平位置,0.5代表中心,可以是小数(相对位置)或整数(像素位置)
- 'y': 0.9 # 控制标题的垂直位置,0.9代表底部,可以是小数或整数
- },
- xaxis_title='阳离子交换量(g/kg)',
- yaxis_title='交换性盐基总量(mS/cm)')
- html_file_path = f"{url}/阳离子交换量与交换性盐基总量相关性散点图.html"
- pio.write_html(fig, file=html_file_path, auto_open=False)
- # 同时保存一份图片
- pio.write_image(fig, f"{url}/阳离子交换量与交换性盐基总量相关性散点图.png")
- # cationExchangeCapacity('')
- # 17.交换性盐基:二者之差 交换性盐基总量cmol(+)/kg 交换性钙镁钠钾之和 区分ph>7.5 和ph值<7.5
- def changeCation(data,url):
- hightData = data[data['pH'] > 7.5]
- lowData = data[data['pH'] <= 7.5]
- hightData = hightData.apply(pd.to_numeric, errors="coerce")
- lowData = lowData.apply(pd.to_numeric, errors="coerce")
- x_h = np.arange(0, len(hightData['交换性盐基总量']), 1)
- y_h = hightData['交换性盐基总量'].sort_values()
- y1_h = (hightData['交换性钙'] + hightData['交换性镁'] + hightData['交换性钾'] + hightData['交换性钠']).sort_values()
- y2_h = (y_h-y1_h).sort_values()
- # 绘图
- getInteractiveImg(x_h, y_h, '交换性盐基总量', x_h, y1_h, '钙镁钾钠之和', x_h, y2_h, '交换性盐基总量与钙镁钾钠和之差', url,
- '交换性盐基总量与交换性盐相关关系(pH大于7.5)', '样品序号', 'cmol/kg', data['原样品编号'])
- x_l = np.arange(0, len(lowData['交换性盐基总量']), 1)
- y_l = lowData['交换性盐基总量'].sort_values()
- y1_l = (lowData['交换性钙'] + lowData['交换性镁'] + lowData['交换性钾'] + lowData['交换性钠']).sort_values()
- y2_l = (y_l - y1_l).sort_values()
- getInteractiveImg(x_l, y_l, '交换性盐基总量', x_l, y1_l, '钙镁钾钠之和', x_l, y2_l,
- '交换性盐基总量与钙镁钾钠和之差', url,
- '交换性盐基总量与交换性盐相关关系(pH小于等于7.5)', '样品序号', 'cmol/kg', data['原样品编号'])
- # 18.水溶性盐总量、电导率、离子总量全盐量分布图,全盐量和电导率相关性分析,水溶性盐与离子总量关系
- def manyTypes(data,url):
- data = data.dropna(subset=['全盐量', '电导率'])
- print(3.41)
- # 全盐量分布图
- x = np.arange(0, len(data['全盐量']), 1)
- y = data['全盐量'].sort_values()
- getInteractiveImg(x, y, '全盐量量', [], [], '', [], [],
- '', url,
- '全盐量分布图', '样品序号', '全盐量(g/kg)', data['原样品编号'])
- # 电导率分布图
- # x1 = np.arange(0, len(data['电导率']), 1)
- # y1 = data['电导率'].sort_values()
- # getInteractiveImg(x1, y1, '电导率', [], [], '', [], [],
- # '', './img/审核报告图形',
- # '电导率分布图', '样品序号', '电导率(mS/cm)', data['原样品编号'], 'fileUrl',
- # 'loc')
- print(3.42)
- x1 = data['全盐量'].dropna()
- y1 = data['电导率'].dropna()
- # plt.scatter(x, y)
- fig = go.Figure(data=go.Scatter(
- x=x1,
- y=y1,
- text=data['原样品编号'].to_numpy(),
- mode='markers', name='全盐量与电导率相关关系',
- marker=dict(
- size=4, # 点的大小
- color='blue', # 点的颜色
- ))
- )
- print(3.43)
- # 使用sklearn的LinearRegression进行最小二乘法拟合
- model = LinearRegression()
- model.fit(x1.to_numpy().reshape(-1, 1), y1)
- # 计算拟合直线的斜率和截距
- slope = model.coef_[0]
- intercept = model.intercept_
- r, _ = np.corrcoef(y1, slope * x1 + intercept)
- # 绘制拟合直线
- fig.add_trace(go.Scatter(x=x1, y=slope * x1 + intercept, mode='lines', name='拟合直线'))
- # plt.plot(x, slope * x + intercept, color='red', label='拟合直线', linewidth=2)
- # 设置图表布局
- fig.update_layout(
- title={
- 'text':f"全盐量与电导率相关关系图,y={round(slope, 2)}x + {round(intercept, 2)},R²={round(r[1], 2) ** 2},R={round(r[1], 2)}",
- 'xanchor': 'center', # 控制水平对齐,可选'left', 'center', 'right'
- 'yanchor': 'bottom', # 控制垂直对齐,可选'top', 'middle', 'bottom'
- 'x': 0.5, # 控制标题的水平位置,0.5代表中心,可以是小数(相对位置)或整数(像素位置)
- 'y': 0.9 # 控制标题的垂直位置,0.9代表底部,可以是小数或整数
- },
- xaxis_title='全盐量(g/kg)',
- yaxis_title='电导率(mS/cm)')
- html_file_path = f"{url}/全盐量与电导率相关性散点图.html"
- pio.write_html(fig, file=html_file_path, auto_open=False)
- # 同时保存一份图片
- pio.write_image(fig, f"{url}/全盐量与电导率相关性散点图.png")
- # 离子总量 全盐量及差值
- filterData = data.dropna(subset=['全盐量', '离子总量'])
- x2 = np.arange(0, len(filterData['离子总量']), 1)
- y2 = filterData['离子总量'].sort_values()
- y3 = filterData['全盐量'].sort_values()
- y4 = (y2-y3).sort_values()
- getInteractiveImg(x2, y2, '离子总量', x2, y3, '全盐量', x2, y4,
- '离子总量与全盐量之差', url,
- '离子总量与全盐量关系图', '样品序号', '离子总量/全盐量(g/kg)', data['原样品编号'])
- # 绘制水稳数据折线图
- def makeWaterImg(data,url,name):
- x = ['水稳>5mm(%)', '水稳3mm~5mm(%)','水稳2mm~3mm(%)','水稳1mm~2mm(%)','水稳0.5mm~1mm(%)','水稳0.25mm~0.5mm(%)']
- fig = go.Figure()
- for item, row in data.iterrows():
- fig.add_trace(go.Scatter(x=x, y=[row['水稳>5mm(%)'],row['水稳3mm~5mm(%)'], row['水稳2mm~3mm(%)'], row['水稳1mm~2mm(%)'],row['水稳0.5mm~1mm(%)'], row['水稳0.25mm~0.5mm(%)']],
- name='水稳性大团聚体'))
- # 设置图表布局
- fig.update_layout(title=f"{row['原样品编号']}水稳性大团聚体指标折线图", xaxis_title='样品数量', yaxis_title='指标数值')
- # 保存图表到本地HTML文件
- html_file_path = f"{url}/{name}-{row['原样品编号']}.html"
- pio.write_html(fig, file=html_file_path, auto_open=False)
- # 同时保存一份图片
- pio.write_image(fig, f"{url}/{name}--{row['原样品编号']}.png")
- def makePlotImg(nameArr,data,url,name):
- print('nameArr--', nameArr)
- print('errData--', data)
- x = np.arange(1,len(data), 1)
- fig = go.Figure()
- for item in nameArr:
- # 添加第一条折线
- fig.add_trace(go.Scatter(x=x, y=data[item], name=item, text=data['原样品编号'].to_numpy()))
- # 设置图表布局
- fig.update_layout(title='有效态元素异常指标折线图', xaxis_title='样品数量', yaxis_title='指标数值')
- # 在线显示图表
- # fig.show()
- # 保存图表到本地HTML文件
- html_file_path = f"{url}/{name}.html"
- pio.write_html(fig, file=html_file_path, auto_open=False)
- # 同时保存一份图片
- pio.write_image(fig, f"{url}/{name}.png")
- # 判断母岩类型
- def getRockType(value):
- print('value---', value)
- type = ''
- if value.bool():
- # 花岗岩、闪长岩、玄武岩、片麻岩
- if ('黄岗岩' in value) or ('闪长岩' in value) or ('玄武岩' in value) or ('片麻岩' in value):
- type = 'first'
- # 页岩、泥岩和石灰岩
- if ('黄岗岩' in value) or ('闪长岩' in value) or ('玄武岩' in value):
- type = 'sec'
- return type
- # 处理数据 将同一层数据归类放置
- def orderData(data):
- # 取原样品编号 按照去掉最后一位的编号 搜索编号相同的数据放在一起 筛选出的数据按照最后一位升序排序
- data['原样品编号分割_1'] = data['原样品编号'].astype(str).str[:-1]
- data['剖面层数'] = data['原样品编号'].astype(str).str[-1]
- # 按照 原样品编号分割_1 列分组
- grouped = data.groupby('原样品编号分割_1')
- newData = pd.DataFrame({})
- effectiveStateIonsData = pd.DataFrame({})
- print('o1')
- errName = []
- for name, group in grouped:
- # 按照 剖面层数排序
- group = group.sort_values(by='剖面层数')
- # 按照规则验证
- phErr = ''
- kErr = ''
- pErr = ''
- allGongErr = ''
- allShenErr = ''
- allPbErr = ''
- allGeErr = ''
- allGe2Err = ''
- allNieErr = ''
- errList_C = ''
- errList_S = '' # 全硫
- errList_P = ''
- errList_K = ''
- errList_Cu = ''
- errList_Zn = ''
- errList_Fe = ''
- errList_Mn = ''
- errList_Mu = ''
- errList_Pe = ''
- errList_Cation = ''
- errList_all_cation = ''
- errList_all_Fe = ''
- allGui = '' # 全硅 递增
- changeK = '' # 交换性钾
- allCa = '' # 全钙
- allMg = '' # 全镁
- # pH、全钾、全磷(沉积岩)自上而下递增,违背则存疑
- is_increasing_ph = group['pH'].is_monotonic_increasing
- is_increasing_k = group['全钾'].is_monotonic_increasing
- is_increasing_p = group['全磷'].is_monotonic_increasing
- is_increasing_Gui = group['全硅'].is_monotonic_increasing
- is_increasing_Ca = group['全钙'].is_monotonic_increasing
- is_increasing_Mg = group['全镁'].is_monotonic_increasing
- is_increasing_changeK = group['交换性钾'].is_monotonic_increasing
- print('o2')
- # 交换性钾(母岩为花岗岩) 全钙、全镁(母岩为石灰岩) 全硅
- if not is_increasing_ph:
- phErr = 'pH:非自上而下递增。'
- if not is_increasing_k:
- kErr = '全钾:非自上而下递增。'
- if not is_increasing_p:
- pErr = '全磷:非自上而下递增。'
- if not is_increasing_changeK and '花岗岩' in group['母岩']:
- changeK = '交换性钾:母岩为花岗岩,非自上而下递增。'
- if not is_increasing_Gui:
- allGui = '全硅:非自上而下递增。'
- if not is_increasing_Ca and '石灰岩' in group['母岩']:
- allCa = '全钙:母岩为石灰岩,非自上而下递增。'
- if not is_increasing_Mg and '石灰岩' in group['母岩']:
- allMg = '全镁:母岩为石灰岩,非自上而下递增。'
- # 自上而下递减:有机质,全氮,有效磷、有效钾、有效铜、有效锌、有效铁、有效锰、有效钼、有效硼、阳离子交换量,违背则存疑。
- is_descending_C = group['有机质'].sort_values(ascending=False).equals(group['有机质'])
- is_descending_cation = group['离子总量'].sort_values(ascending=False).equals(group['离子总量'])
- is_descending_P = group['有效磷'].sort_values(ascending=False).equals(group['有效磷'])
- is_descending_K = group['速效钾'].sort_values(ascending=False).equals(group['速效钾'])
- is_descending_Cu = group['有效铜'].sort_values(ascending=False).equals(group['有效铜'])
- is_descending_Zn = group['有效锌'].sort_values(ascending=False).equals(group['有效锌'])
- is_descending_Fe = group['有效铁'].sort_values(ascending=False).equals(group['有效铁'])
- is_descending_Mn = group['有效锰'].sort_values(ascending=False).equals(group['有效锰'])
- is_descending_Mu = group['有效钼'].sort_values(ascending=False).equals(group['有效钼'])
- is_descending_Pe = group['有效硼'].sort_values(ascending=False).equals(group['有效硼'])
- is_descending_S = group['全硫'].sort_values(ascending=False).equals(group['全硫'])
- is_descending_Cation = group['阳离子交换量'].sort_values(ascending=False).equals(group['阳离子交换量'])
- is_descending_all_Fe = group['全铁'].sort_values(ascending=False).equals(group['全铁'])
- print('o3')
- if not is_descending_C:
- errList_C = '有机质:非自上而下递减。'
- if not is_descending_cation:
- errList_all_cation = '离子总量: 非自上而下递减。'
- if not is_descending_S:
- errList_S = '全硫:非自上而下递减。'
- errName.append('全硫')
- if not is_descending_P:
- errList_P = '有效磷:非自上而下递减。'
- errName.append('有效磷')
- if not is_descending_K:
- errList_K = '速效钾:非自上而下递减。'
- errName.append('速效钾')
- if not is_descending_Cu:
- errList_Cu = '有效铜:非自上而下递减。'
- errName.append('有效铜')
- if not is_descending_Zn:
- errList_Zn = '有效锌:非自上而下递减。'
- errName.append('有效锌')
- if not is_descending_Fe:
- errList_Fe = '有效铁:非自上而下递减。'
- errName.append('有效铁')
- if not is_descending_Mn:
- errList_Mn = '有效锰:非自上而下递减。'
- errName.append('有效锰')
- if not is_descending_Mu:
- errList_Mu = '有效钼:非自上而下递减。'
- errName.append('有效钼')
- if not is_descending_Pe:
- errList_Pe = '有效硼:非自上而下递减。'
- errName.append('有效硼')
- if not is_descending_Cation:
- errList_Cation = '阳离子交换量:非自上而下递减。'
- if not is_descending_all_Fe:
- errList_all_Fe = '全铁:非自上而下递减。'
- print('o4')
- # 判断此组数据有效态元素是否存疑 存疑则放入异常数据组中,进行折线图绘制
- errStr = f"{errList_S}{errList_P}{errList_K}{errList_Cu}{errList_Zn}{errList_Fe}{errList_Mn}{errList_Mu}{errList_Pe}{errList_Cation}{errList_all_cation}{errList_all_Fe}"
- if errStr != '':
- effectiveStateIonsData = effectiveStateIonsData._append(group)
- # 重金属六项指标总汞、总砷、总铅、总镉、总铬、总镍每个样点从上到下总体上呈现递增的趋势,但表层(即剖面第一层)可能受环境影响而表现为偏高,软件仍然根据规则提出存疑,交给专家判断合理性。
- is_increasing_Gong = group['总汞'].is_monotonic_increasing
- is_increasing_Shen = group['总砷'].is_monotonic_increasing
- is_increasing_pb = group['总铅'].is_monotonic_increasing
- is_increasing_Ge = group['总镉'].is_monotonic_increasing
- is_increasing_Ge2 = group['总铬'].is_monotonic_increasing
- is_increasing_Nie = group['总镍'].is_monotonic_increasing
- if not is_increasing_Gong:
- allGongErr = '总汞:非自上而下递增。'
- if not is_increasing_Shen:
- allShenErr = '总砷:非自上而下递增。'
- if not is_increasing_pb:
- allPbErr = '总铅:非自上而下递增。'
- if not is_increasing_Ge:
- allGeErr = '总镉:非自上而下递增。'
- if not is_increasing_Ge2:
- allGe2Err = '总铬:非自上而下递增。'
- if not is_increasing_Nie:
- allNieErr = '总镍:非自上而下递增。'
- print('o5')
- # 把规则验证结果拼接到审核结果一列
- res = (f"{phErr}{kErr}{pErr}{allGongErr}{allShenErr}{allPbErr}{allGeErr}{allGe2Err}{allNieErr}{errList_C}{errList_S}{errList_P}{errList_K}{errList_Cu}{errList_Zn}{errList_Fe}{errList_Mn}{errList_Mu}"
- f"{errList_Pe}{errList_Cation}{errList_Cation}{errList_all_cation}{errList_all_Fe}{allGui}{changeK}{allCa}{allMg}")
- finalRes = [res for _ in range(len(group))]
- group['审核结果'] = finalRes
- # 审核输出结果中应给出存疑采样点的存疑有效态元素指标的折线图,并在审核报告中体现
- # 所有全量指标(前面有全字的指标),均应大于相同元素的有效态指标,例如全硫应大于有效硫,诸如此类。
- allErr_S = []
- allErr_Gui = []
- allErr_Ca = []
- allErr_Mg = []
- allErr_Al = []
- allErr_Fe = []
- allErr_Meng = []
- allErr_Cu = []
- allErr_Zn = []
- allErr_Peng = []
- allErr_Mu = []
- allErr_P = []
- lastFloorErr = []
- for index, row in group.iterrows():
- print('o51')
- if row['全硫'] > row['有效硫']:
- allErr_S.append('')
- else:
- allErr_S.append('存疑:全硫不大于有效硫。')
- if row['全硅'] > row['有效硅']:
- allErr_Gui.append('')
- else:
- allErr_Gui.append('存疑:全硅不大于有效硅。')
- if row['全铁'] > row['有效铁']:
- allErr_Fe.append('')
- else:
- allErr_Fe.append('存疑:全铁不大于有效铁。')
- if row['全锰'] > row['有效锰']:
- allErr_Meng.append('')
- else:
- allErr_Meng.append('存疑:全锰不大于有效锰。')
- if row['全铜'] > row['有效铜']:
- allErr_Cu.append('')
- else:
- allErr_Cu.append('存疑:全铜不大于有效铜。')
- if row['全锌'] > row['有效锌']:
- allErr_Zn.append('')
- else:
- allErr_Zn.append('存疑:全锌不大于有效锌。')
- print('o52')
- if row['全硼'] > row['有效硼']:
- allErr_Peng.append('')
- else:
- allErr_Peng.append('存疑:全硼不大于有效硼。')
- if row['全钼'] > row['有效钼']:
- allErr_Mu.append('')
- else:
- allErr_Mu.append('存疑:全钼不大于有效钼。')
- if row['全磷'] > row['有效磷']:
- allErr_P.append('')
- else:
- allErr_P.append('存疑:全磷不大于有效磷。')
- print('o53')
- # 对每一组最后一层数据进行审核 审核结果写入审核结果一列 无误写入空,有误写入错误原因
- if index+1 < len(group):
- lastFloorErr.append('')
- if index+1 == len(group):
- errStr = ''
- if not pd.isna(row['母岩']):
- if (getRockType(row['母岩']) == 'first' and (row['全硫'] < 0.3 or row['全硫'] > 10)) or (getRockType(row['母岩']) == 'sec' and (row['全硫'] < 0.5 or row['全硫'] > 15)):
- errStr += '全硫:底层全硫超阈值。'
- if (getRockType(row['母岩']) == 'sec' and (row['全硅'] < 15 or row['全硅'] > 30)) or (getRockType(row['母岩']) == 'first' and (row['全硅'] < 18 or row['全硅'] > 30)):
- errStr += '全硅:底层全硅超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['全钙'] < 1.5 or row['全钙'] > 2.5)) or (getRockType(row['母岩']) == 'sec' and (row['全钙'] < 3 or row['全钙'] > 6)):
- errStr += '全钙:底层全钙超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['全镁'] < 0.5 or row['全镁'] > 1.5)) or (getRockType(row['母岩']) == 'sec' and (row['全镁'] < 0.5 or row['全镁'] > 2.0)):
- errStr += '全镁:底层全镁超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['全铝'] < 8.5 or row['全铝'] > 12)) or (getRockType(row['母岩']) == 'sec' and (row['全铝'] < 8 or row['全铝'] > 10)):
- errStr += '全铝:底层全铝超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['全锰'] < 100 or row['全锰'] > 3200)) or (getRockType(row['母岩']) == 'sec' and (row['全锰'] < 500 or row['全锰'] >1500)):
- errStr += '全锰:底层全锰超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['全铜'] < 30 or row['全铜'] > 70)) or (getRockType(row['母岩']) == 'sec' and (row['全铜'] < 30 or row['全铜'] >50)):
- errStr += '全铜:底层全铜超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['全锌'] < 10 or row['全锌'] > 150)) or (getRockType(row['母岩']) == 'sec' and (row['全锌'] < 110 or row['全锌'] > 120)):
- errStr += '全锌:底层全锌超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['全硼'] < 50 or row['全硼'] > 60)) or (getRockType(row['母岩']) == 'sec' and (row['全硼'] < 80 or row['全硼'] > 120)):
- errStr += '全硼:底层全硼超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['碳酸钙'] < 0.1 or row['碳酸钙'] > 10)) or (getRockType(row['母岩']) == 'sec' and (row['碳酸钙'] < 1 or row['碳酸钙'] > 35)):
- errStr += '碳酸钙:底层碳酸钙超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['pH'] < 3.5 or row['pH'] > 5)) or (getRockType(row['母岩']) == 'sec' and (row['pH'] < 5.5 or row['pH'] > 9)):
- errStr += 'pH:底层pH超阈值。'
- print('o54')
- if (getRockType(row['母岩']) == 'first' and (row['全磷'] < 0.12 or row['全磷'] > 0.52)):
- errStr += '全磷:底层全磷超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['全钾'] < 27 or row['全钾'] > 35)) or (getRockType(row['母岩']) == 'sec' and (row['全钾'] < 13 or row['全钾'] > 25)):
- errStr += '全钾:底层全钾超阈值。'
- if row['全铁']> 4 or row['全铁']< 3:
- errStr += '全铁:底层全铁超阈值。'
- if row['游离铁']>35 or row['游离铁']< 1:
- errStr += '游离铁:底层游离铁超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['总汞'] > 0.1 or row['总汞'] < 0.16)) or (getRockType(row['母岩']) == 'sec' and (row['总汞'] > 0.11 or row['总汞'] < 0.3)):
- errStr += '总汞:底层总汞超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['总镉'] < 0.12 or row['总镉'] > 0.15)) or (
- getRockType(row['母岩']) == 'sec' and (row['总镉'] < 0.1 or row['总镉'] > 0.2)):
- errStr += '总镉:底层总镉超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['总砷'] < 10 or row['总砷'] > 25)) or (getRockType(row['母岩']) == 'sec' and (row['总砷'] < 20 or row['总砷'] > 30)):
- errStr += '总砷:底层总砷超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['总铅'] < 30 or row['总铅'] > 55)) or (getRockType(row['母岩']) == 'sec' and (row['总铅'] < 20 or row['总铅'] > 70)):
- errStr += '总铅:底层总铅超阈值。'
- if (getRockType(row['母岩']) == 'first' and (row['总镍'] < 30 or row['总镍'] > 120)) or (getRockType(row['母岩']) == 'sec' and (row['总镍'] < 40 or row['总镍'] > 80)):
- errStr += '总镍:底层总镍超阈值。'
- lastFloorErr.append(errStr)
- checkRes = pd.Series(allErr_S) + pd.Series(allErr_Gui) + pd.Series(allErr_Fe) + pd.Series(
- allErr_Meng) + pd.Series(allErr_Cu) + pd.Series(allErr_Zn) + pd.Series(allErr_Peng) + pd.Series(allErr_Mu) + pd.Series(lastFloorErr)
- group['审核结果'] = group['审核结果'] + checkRes
- print('o6')
- # 合并成新的数据
- newData = newData._append(group)
- del newData['原样品编号分割_1']
- del newData['剖面层数']
- return {
- 'allData': newData,
- 'errName': np.unique(errName),
- 'errData': effectiveStateIonsData
- }
|