123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038 |
- import pandas as pd
- import numpy as np
- import time
- import public as pb
- import openpyxl
- import matplotlib.pyplot as plt
- from scipy.stats import spearmanr
- # import tkinter as tk
- # from tkinter import ttk
- from tkinter import filedialog
- from tkinter import messagebox
- from openpyxl import load_workbook
- from openpyxl.drawing.image import Image as OImage
- import os
- import re
- import ttkbootstrap as ttk
- from ttkbootstrap.constants import *
- from PIL import Image, ImageTk
- from ttkbootstrap.dialogs import Messagebox
- import plotly.graph_objects as go
- import plotly.io as pio
- from sklearn.linear_model import LinearRegression
- from openpyxl.worksheet.hyperlink import Hyperlink
- from scipy.ndimage import gaussian_filter1d
- from scipy.stats import norm
- from plotly.subplots import make_subplots
- import docx
- from docx import Document
- from docx.shared import Inches
- from docx.oxml import OxmlElement, ns
- from docx.shared import Pt, RGBColor
- from docx.oxml.ns import nsdecls, nsmap
- from docx.oxml import parse_xml
- from docx.enum.dml import MSO_THEME_COLOR_INDEX
- from docx import Document
- from docx.opc.constants import RELATIONSHIP_TYPE as RT
- from docx.enum.table import WD_TABLE_ALIGNMENT, WD_CELL_VERTICAL_ALIGNMENT
- from docx.oxml.ns import qn
- from docx.enum.text import WD_ALIGN_PARAGRAPH
- # 显示所有数据
- pd.set_option('display.width', 10000) # 设置字符显示宽度
- pd.set_option('display.max_rows', None) # 设置显示最大行
- pd.set_option('display.max_columns', None) # 设置显示最大列,None为显示所有列
- # 设置字体
- # 设置字体 微软雅黑 新罗马 加粗
- plt.rcParams['font.family'] = ['Times New Roman', 'Microsoft YaHei']
- # 设置字体加粗
- font = {'weight': 'bold'}
- plt.rc('font', **font) # 应用字体设置
- indexClassificationList = {
- '物理指标': ['pH', '土壤质地', '土壤容重1(g/cm³)', '土壤容重2(g/cm³)', '土壤容重3(g/cm³)', '土壤容重4(g/cm³)', '土壤容重平均值(g/cm³)',
- '2~0.2mm颗粒含量', '0.2~0.02mm颗粒含量', '0.02~0.002mm颗粒含量', '0.002mm以下颗粒含量', '水稳>5mm(%)', '水稳3mm~5mm(%)',
- '水稳2mm~3mm(%)', '水稳1mm~2mm(%)', '水稳0.5mm~1mm(%)', '水稳0.25mm~0.5mm(%)', '水稳性大团聚体总和(%)', '洗失量(吸管法需填)', '风干试样含水量(分析基)'],
- '常规养分指标': ['有机质', '全氮', '全磷', '全钾', '有效磷', '速效钾', '有效硫', '有效硼', '有效铁', '有效锰', '有效铜', '有效锌', '有效钼', '有效硅', '缓效钾'],
- '一般化学性指标': ['阳离子交换量', '交换性盐基总量', '交换性钙', '交换性镁', '交换性钠', '交换性钾', '全盐量', '电导率',
- '水溶性Na⁺含量', '水溶性K⁺含量', '水溶性Ca²⁺含量', '水溶性Mg²⁺含量', '水溶性Cl⁻含量', '水溶性CO₃²⁻含量','水溶性HCO₃⁻含量',
- '水溶性SO₄²⁻含量', '离子总量', '碳酸钙',
- '游离铁', '全硫', '全锰', '全锌', '全铜', '全钼', '全硼', '全硒', '全铝', '全硅', '全铁', '全钙', '全镁'],
- '重金属指标': ['总汞', '总砷', '总铅', '总镉', '总铬', '总镍']
- }
- # 可交互绘图函数
- def getInteractiveImg(x,y,label,x1,y1,label1,x2,y2,label2,url,name,xLabel,YLabel,numArr):
- # coef, p_value = spearmanr(x, y)
- # 绘制数据散点图
- fig = go.Figure(data=go.Scatter(
- x=x,
- y=y,
- text=numArr.to_numpy(),
- mode='markers', name=label,
- marker=dict(
- size=4, # 点的大小
- color='blue', # 点的颜色
- ))
- )
- # 设置图表布局
- fig.update_layout(
- title={
- 'text': f"{name}",
- 'xanchor': 'center', # 控制水平对齐,可选'left', 'center', 'right'
- 'yanchor': 'bottom', # 控制垂直对齐,可选'top', 'middle', 'bottom'
- 'x': 0.5, # 控制标题的水平位置,0.5代表中心,可以是小数(相对位置)或整数(像素位置)
- 'y': 0.9 # 控制标题的垂直位置,0.9代表底部,可以是小数或整数
- },
- xaxis_title=xLabel,
- yaxis_title=YLabel)
- if label == 'pH':
- ph_y_f = [7.5 for _ in x]
- ph_y_s = [8.0 for _ in x]
- fig.add_trace(go.Scatter(x=x, y=ph_y_f, mode='lines', name='pH = 7.5', line=dict(
- width=1.5,
- color='#339933',
- dash='dash' # 设置虚线样式
- )))
- fig.add_trace(go.Scatter(x=x, y=ph_y_s, mode='lines', name='pH = 8.0', line=dict(
- width=2.5,
- color='#339933',
- dash='dash' # 设置虚线样式
- )))
- if len(x1) > 0 and len(y1) > 0:
- fig.add_trace(go.Scatter(x=x1, y=y1, mode='markers', name=label1, text=numArr.to_numpy(),
- marker=dict(
- size=4, # 点的大小
- color='red', # 点的颜色
- symbol='hourglass'
- )))
- if len(x2) > 0 and len(y2) > 0:
- fig.add_trace(go.Scatter(x=x2, y=y2, mode='markers', name=label2, text=numArr.to_numpy(),
- marker=dict(
- size=4, # 点的大小
- color='green', # 点的颜色
- symbol='triangle-up' # 点的形状,这里设置为正方形
- )))
- # model = LinearRegression()
- # model.fit(x.to_numpy().reshape(-1, 1), y) # 用x的平方作为特征值
- # y_pred = model.predict(x.to_numpy().reshape(-1, 1))
- # fig.add_trace(go.Scatter(x=x, y=y_pred, mode='lines', name='拟合直线'))
- html_file_path = f"{url}/{name}.html"
- pio.write_html(fig, file=html_file_path, auto_open=False)
- # 同时保存一份图片
- try:
- print(pio.kaleido.scope.plotlyjs)
- print(f"图片将保存至 " + f"{url}/{name}.png")
- fig.write_image(f"{url}/{name}.png",scale=3, width=800, height=600)
- print(f"图片已成功保存至 "+f"{url}/{name}.png")
- except Exception as e:
- print(f"保存图片时出现错误: {str(e)}")
- # 在文档中中插入html
- # workbook = load_workbook(filename=fileUrl)
- # # 选择一个工作表
- # ws = workbook[sheetName]
- # # 将 HTML 内容作为富文本写入单元格
- # ws[loc] = '=HYPERLINK("file:///{0}","点击查看统计图")'.format(html_file_path)
- # workbook.save(fileUrl)
- # 循环确定数据属于哪一个类型
- def getDataType(data, list):
- for item in list:
- if str(str(data['原样品编号'])[6:10]) in list[item]:
- data['土地类型归类'] = item
- return data
- # 1. 统计样品数量 分类统计:耕地:0101 0102 0103 园地:0201 0202 0203 0204 林地:0301 0302 0303 0304 0305 0306 0307 草地:0401 0402 0403 0404
- def getSimpleNum(data):
- """
- :param simpleData: 样品数据
- :return:
- """
- typeList = {
- '耕地园地': ['0101', '0102', '0103', '0201', '0202', '0203', '0204'],
- '林地草地': ['0301', '0302', '0303', '0304', '0305', '0306', '0307', '0401', '0402', '0403', '0404'],
- '商服用地': ['0501', '0502', '0503', '0504', '0505', '0506', '0507'],
- '工矿仓储用地': ['0601', '0602', '0603', '0604'],
- '住宅用地': ['0701', '0702'],
- '公共管理与公共服务用地': ['0801', '0802', '0803', '0804', '0805', '0806', '0807', '0808', '0809', '0810'],
- '特殊用地': ['0901', '0902', '0903', '0904', '0905', '0906'],
- '交通运输用地': ['1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009'],
- '水域及水利设施用地': ['1101', '1102', '1103', '1104', '1105', '1106', '1107', '1108', '1109', '1110'],
- '其他土地': ['1201', '1202', '1203', '1204', '1205', '1206', '1207']
- }
- # 根据耕地园地、林地草地分类数据,统计不同类型数据数量,生成表格
- newData = pd.DataFrame({})
- for index, row in data.iterrows():
- newRow = getDataType(row, typeList)
- newData =newData._append(newRow)
- # 按照土地类型归类 分类计数
- grouped_df = newData.groupby('土地类型归类')
- counts = grouped_df.size()
- resData = pd.DataFrame({})
- for group_name, group_data in grouped_df:
- # 附表:数量总体统计数据
- res = {
- '类型': group_name,
- '样品编号': group_data['样品编号'].to_list(),
- '数量': counts[group_name],
- '合计': counts.sum()
- }
- newRes = pd.DataFrame(res)
- resData = resData._append(newRes)
- return {
- 'sData': counts, # 统计数量
- 'allData': resData # 附表总数
- }
- # 2.小数修改:进行中
- def is_one_decimal_place(num): # 判断是否保留一位小数
- print('检测小数1', num, str(num).count('.') == 1 and len(str(num).split('.')[1]) == 1)
- return (str(num).count('.') == 1 and len(str(num).split('.')[1]) == 1)
- def has_two_decimals(num): # 判断是否保留了两位小数
- print('检测小数2', num, str(num).count('.') == 1 and len(str(num).split('.')[1]) == 2)
- return (str(num).count(".") == 1 and len(str(num).split(".")[1]) == 2)
- def three_decimal(num):
- # 不超过三位有效数字
- num = str(num)
- num = num.replace('.', '')
- numLen = 0
- stripped_s = num.lstrip('0')
- if stripped_s and stripped_s[0].isdigit():
- index = num.index(stripped_s[0]) # 加上去除的0的个数
- numLen = len(num[index:])
- # 字符串全为0或不包含数字时
- return numLen
- def is_less_than_three_decimals(number): # 不超过3位小数
- # 转换为字符串
- number_str = str(number)
- # 分离整数部分和小数部分
- integer_part, decimal_part = number_str.split('.') if '.' in number_str else (number_str, '')
- # 判断小数部分是否不超过三位
- return len(decimal_part) <= 3
- # 保留3位小数
- def is_three_decimal(number):
- number_str = str(number)
- # 分离整数部分和小数部分
- integer_part, decimal_part = number_str.split('.') if '.' in number_str else (number_str, '')
- # 判断小数部分是否不超过三位
- return len(decimal_part) == 3
- def highlight_condition(s):
- if s['数据审核结果'] != '' and not pd.isna(s['数据审核结果']):
- return ['background-color: #99CC99']*len(s)
- else:
- return ['']*len(s)
- def filter_number(arr):
- """
- :param arr:
- :return:
- """
- return pd.to_numeric(arr, errors='coerce')
- def getNum(data, url):
- # 读取数据 处理每一项小数保留问题
- oneData = ['2~0.2mm颗粒含量','0.2~0.02mm颗粒含量','0.02~0.002mm颗粒含量','0.002mm以下颗粒含量', '洗失量(吸管法需填)',
- '水稳>5mm','水稳3mm~5mm','水稳2mm~3mm','水稳1mm~2mm','水稳0.5mm~1mm','水稳0.25mm~0.5mm','水稳性大团聚体总和'
- ]
- twoData = ['土壤容重1(g/cm³)','土壤容重2(g/cm³)','土壤容重3','土壤容重4','土壤容重平均值','pH']
- threeData = ['电导率','水溶性钠离子','水溶性钾离子','水溶性钙离子','水溶性镁离子','水溶性碳酸根','水溶性碳酸氢根','水溶性硫酸根','水溶性氯根','离子总量'] # 3位有效数字
- twoDataToThree = ['有机质','全氮','全磷','全钾','全硫','全硼','全硒','全铁'] #保留2位小数,最多不超过3位有效数字
- threeDataToThree = ['有效钼', '总汞']
- changeDataList = ['阳离子交换量', '交换性盐基总量', '交换性钙', '交换性镁', '交换性钠', '交换性钾'] # <10 2位小数 >=10 3位有效数字
- cationDataList = ['全锰', '全铜', '全锌', '速效钾', '缓效钾', '有效硅', '总铅', '总镉', '总铬', '总镍'] # <1000 2位小数 不超过3位有效数字 >=1000 保留整数
- needData = data.iloc[:, 7:]
- if '土壤质地' in needData.columns:
- del needData['土壤质地']
- # needData = needData.apply(pd.to_numeric, errors='coerce')
- checkDataRes = []
- for index, row in needData.iterrows():
- str = ''
- for item in row.index:
- if item in oneData and (not pd.isna(row[item])) and not is_one_decimal_place(row[item]):
- str += f"{item}应保留一位小数。"
- if (item in twoData) and (not pd.isna(row[item])) and not has_two_decimals(row[item]):
- str += f"{item}应保留两位小数。"
- if item in threeData and (not pd.isna(row[item])) and three_decimal(row[item]) != 3:
- str += f"{item}应保留三位有效数字。"
- if item in twoDataToThree and (not pd.isna(row[item])) and (not has_two_decimals(row[item]) or three_decimal(row[item]) > 3):
- str += f"{item}应保留两位小数且不超过三位有效数字。"
- if item in threeDataToThree and (not pd.isna(row[item])) and (not is_less_than_three_decimals(row[item]) or three_decimal(row[item]) != 3 ):
- str += f"{item}应保留三位有效数字且不超过三位小数。"
- if item in changeDataList and (not pd.isna(row[item])) and (filter_number(row[item]) < 10) and not has_two_decimals(row[item]):
- str += f"{item}应保留两位小数。"
- if item in changeDataList and (not pd.isna(row[item])) and (filter_number(row[item]) >= 10) and three_decimal(row[item]) != 3:
- str += f"{item}应保留三位有效数字。"
- if item == '碳酸钙' and (not pd.isna(row[item])) and not isinstance(row[item], int):
- str += f"{item}应为整数。"
- if item == '全盐量' and (not pd.isna(row[item])) and not is_three_decimal(row[item]):
- str += f"{item}应保留3位小数。"
- if item in cationDataList and (not pd.isna(row[item])) and (filter_number(row[item]) < 1000) and (not has_two_decimals(row[item]) or three_decimal(row[item]) > 3):
- str += f"{item}保留2位小数且不超过三位有效数字。"
- if item in cationDataList and (not pd.isna(row[item])) and (filter_number(row[item]) >= 1000) and not isinstance(row[item], int):
- str += f"{item}应为整数。"
- checkDataRes.append(str)
- data['数据审核结果'] = checkDataRes
- # 对审核结果有问题的数据进行标绿
- resData = data.style.apply(highlight_condition, axis=1)
- # 数据写入表格
- with pd.ExcelWriter( f'{url}/数值修约审核.xlsx', engine='openpyxl') as writer:
- resData.to_excel(writer, index=False, sheet_name='数值修约')
- # getNum('')
- # 3.计算所有指标的频度信息 已完成
- def getFrequencyInformation(data, url):
- if '数据审核结果' in data.columns:
- del data['数据审核结果']
- if '母岩' in data.columns:
- del data['母岩']
- #统计样品数量 计算最大值 最小值 中位数 平均值 标准差
- needData = data.iloc[:, 7:]
- if '土壤质地' in needData.columns:
- del needData['土壤质地']
- needData = needData.apply(pd.to_numeric, errors="coerce")
- resData = pd.DataFrame({})
- for item in needData.columns:
- if item == '总砷':
- print(needData[item])
- min_value = needData[item].min() # 最大值
- max_value = needData[item].max() # 最小值
- median_value = needData[item].median() # 中位数
- mean_value = needData[item].mean() # 平均数
- std_value = needData[item].std() # 标准差
- resData[item] = [min_value, max_value, median_value, mean_value, std_value]
- index_value = ['最小值', '最大值', '中位数', '平均数', '标准差']
- # 汇总数据
- resData.index = index_value
- data_res = round(resData, 2)
- data_res = data_res.rename_axis('频度分析')
- # data_res = data_res.transpose()
- # data_res = data_res.reset_index()
- # data_res.columns = ['指标名称', '最小值', '最大值', '中位数', '平均数', '标准差']
- print('频度分析---', data_res)
- return data_res
- # 数据写入表格
- # with pd.ExcelWriter(f'{url}/频度信息.xlsx', engine='openpyxl') as writer:
- # data_res.to_excel(writer, sheet_name='频度信息')
- # getFrequencyInformation('')
- # 4. 绘制每个指标的累积频率图
- def getMap(data, title, unit, url):
- data = data.dropna()
- if len(data) != 0: # 指标无数据 不进行绘图
- min = data.min()
- max = data.max()
- # 计算直方图
- bins = np.linspace(min - 1, max + 1, 30, endpoint=True)
- hist, bin_edges = np.histogram(data, bins=bins)
- cumulative_hist = np.cumsum(hist)
- # 计算拟合曲线
- p1 = np.polyfit(bin_edges[1:], hist, 2)
- p2 = np.poly1d(p1)
- # 计算累计频率
- cumulative_freq = cumulative_hist / cumulative_hist.max()
- # fig, ax1 = plt.subplots()
- fig = make_subplots(specs=[[{"secondary_y": True}]])
- if title == 'pH':
- fig.add_trace(go.Histogram(x=data, name="频数", nbinsx = 17, xbins = dict(start=0, end=8.5, size=0.5)), secondary_y=True)
- else:
- fig.add_trace(go.Histogram(x=data, name="频数"), secondary_y=True)
- # 绘制直方图 color="#ffc798" #a4464b #b1a9ab
- # fig = go.Figure(data=[go.Histogram(x=data, name="频数")])
- # 设置标题和其他格式
- fig.update_layout(
- title={
- 'text': f"{title}统计图",
- 'xanchor': 'center', # 控制水平对齐,可选'left', 'center', 'right'
- 'yanchor': 'bottom', # 控制垂直对齐,可选'top', 'middle', 'bottom'
- 'x': 0.5, # 控制标题的水平位置,0.5代表中心,可以是小数(相对位置)或整数(像素位置)
- 'y': 0.9 # 控制标题的垂直位置,0.9代表底部,可以是小数或整数
- },
- xaxis_title=f"{title}{unit}",
- yaxis_title='频次',
- bargap=0.2, # 相邻位置坐标的钢筋之间的间隙
- bargroupgap=0.1
- )
- # ax1.hist(data, bins=bins, rwidth=0.8, zorder=1, label="频数")
- # ax2 = ax1.twinx()
- # ax2.plot(bin_edges[1:], cumulative_freq, zorder=10, color="#a4464b", label="累积频率 ")
- fig.add_trace(go.Scatter(x=bin_edges[1:], y=cumulative_freq *100, mode='lines', name='累积频率',line=dict(width=2), yaxis='y2',
- marker=dict(
- color='red', # 点的颜色
- symbol='hourglass'
- )), secondary_y=False)
- # 绘制正态分布曲线
- # 估计正态分布参数
- mu = data.mean()
- sigma = data.std()
- # 创建正态分布对象
- dist = norm(mu, sigma)
- # 计算要绘制的x值
- x = np.linspace(bin_edges.min(), bin_edges.max(), 100)
- p = norm.pdf(x, mu, sigma)
- # ax3 = ax1.twinx()
- # # ax3.plot(bin_edges[1:], y_smoothed, label='拟合曲线', color="#333")
- # #label = 'N({:.2f}, {:.2f}^2)'.format(mu, sigma)
- # ax3.plot(x, dist.pdf(x), color="#333", label='拟合曲线')
- fig.add_trace(go.Scatter(x=x, y=p*100, mode='lines', yaxis='y2', name='拟合曲线', line=dict(width=2)), secondary_y=False)
- fig.update_layout(
- title={
- 'text': '指标频度统计图',
- 'xanchor': 'center', # 控制水平对齐,可选'left', 'center', 'right'
- 'yanchor': 'bottom', # 控制垂直对齐,可选'top', 'middle', 'bottom'
- 'x': 0.5, # 控制标题的水平位置,0.5代表中心,可以是小数(相对位置)或整数(像素位置)
- 'y': 0.9 # 控制标题的垂直位置,0.9代表底部,可以是小数或整数
- },
- yaxis_title='累积频率(%)', # 设置Y1轴标签
- yaxis2_title='频数'
- )
- maxData = 110
- if p.max()*100 >100:
- maxData = p.max()*100
- fig.update_yaxes(range=[0, maxData], row=1, col=1, secondary_y=False)
- html_file_path = f"{url}/{title}频度统计图.html"
- html_file_path = html_file_path.replace('(g/cm³)','')
- html_file_path = html_file_path.replace('(%)', '')
- html_file_path = html_file_path.replace('>', '')
- pio.write_html(fig, file=html_file_path, auto_open=False)
- title = title.replace('(g/cm³)', '')
- title = title.replace('(%)', '')
- title = title.replace('>', '')
- pio.write_image(fig, f"{url}/{title}频度统计图.png")
- def getFrequencyImage(data, url):
- unitList = {
- 'g/cm3': ['土壤容重1(g/cm³)','土壤容重2(g/cm³)', '土壤容重3(g/cm³)', '土壤容重4(g/cm³)','土壤容重平均值(g/cm³)'],
- '%': ['土壤机械组成','全铝', '全硅', '全铁', '全钙', '全镁', '水稳>5mm(%)','水稳3mm~5mm(%)', '水稳2mm~3mm(%)', '水稳1mm~2mm(%)', '水稳0.5mm~1mm(%)', '水稳0.25mm~0.5mm(%)', '水稳性大团聚体总和(%)'],
- 'g/kg': ['有机质', '全氮', '全磷', '全钾', '水溶性盐总量', '离子总量', '碳酸钙', '游离铁', '全硫'],
- 'mg/kg': ['有效磷', '速效钾', '有效硫', '有效硼', '有效铁', '有效锰', '有效铜', '有效锌', '有效钼', '有效硅', '缓效钾', '全锰',
- '全锌', '全铜', '全钼', '全硼', '全硒', '总汞', '总砷', '总铅', '总镉', '总铬', '总镍'],
- 'cmol/kg': ['阳离子交换量', '交换性盐基总量', '交换性钙', '交换性镁', '交换性钠', '交换性钾']
- }
- # 绘制图形
- needData = data.iloc[:, 7:] #这里有可能需要修改
- if '土壤质地' in needData.columns:
- del needData['土壤质地']
- needData = needData.apply(pd.to_numeric, errors="coerce")
- for item in needData.columns:
- # newData = needData[item].dropna()
- if len(needData) > 0:
- label = ''
- for i in unitList:
- if item in unitList[i]:
- label = i
- getMap(needData[item], item, label, url)
- # 公共函数 判断指标完整性
- # def checkAllNames(data, list):
- # for item in list:
- # if data[item] != '未检测' and data[item] != '/': # 未检测的值可以忽略 空值
- # if pd.isna(data[item]): # 表示值缺失
- # 5. 检测指标完整性
- def getDataComplete(data):
- # 根据类型:耕地园地表层土壤样品,耕地园地剖面土壤样品,林地草地表层土壤样品,林地草地剖面土壤样品,水稳定性大团聚体样品
- # 耕地园地样品列表 有效硅(水田才有)
- cultivatedLandandfieldList = ['风干试样含水量(分析基)', '洗失量(吸管法需填)', '土壤质地', '土壤容重1(g/cm³)', '土壤容重2(g/cm³)', '土壤容重3(g/cm³)', '土壤容重4(g/cm³)', '土壤容重平均值(g/cm³)'
- , '2~0.2mm颗粒含量', '0.2~0.02mm颗粒含量', '0.02~0.002mm颗粒含量', '0.002mm以下颗粒含量'
- , '水稳>5mm(%)', '水稳3mm~5mm(%)','水稳2mm~3mm(%)', '水稳1mm~2mm(%)','水稳0.5mm~1mm(%)','水稳0.25mm~0.5mm(%)', '水稳性大团聚体总和(%)'
- , 'pH', '阳离子交换量', '交换性盐基总量', '交换性钙', '交换性镁', '交换性钠', '交换性钾',
- '全盐量', '电导率', '水溶性Na⁺含量', '水溶性K⁺含量', '水溶性Ca²⁺含量', '水溶性Mg²⁺含量', '水溶性Cl⁻含量', '水溶性CO₃²⁻含量',
- '水溶性HCO₃⁻含量', '水溶性SO₄²⁻含量', '离子总量', '有机质', '全氮', '全磷', '全钾', '有效磷', '速效钾', '缓效钾', '有效硫', '有效铁',
- '有效锰', '有效铜', '有效锌', '有效硼', '有效钼', '总汞', '总砷', '总铅', '总镉', '总铬', '总镍']
- # 林地草地样品列表
- woodlandandGrassList = ['风干试样含水量(分析基)', '洗失量(吸管法需填)', '土壤质地', '土壤容重1(g/cm³)', '土壤容重2(g/cm³)', '土壤容重3(g/cm³)', '土壤容重4(g/cm³)', '土壤容重平均值(g/cm³)', '2~0.2mm颗粒含量','0.2~0.02mm颗粒含量', '0.02~0.002mm颗粒含量', '0.002mm以下颗粒含量', 'pH', '阳离子交换量', '交换性盐基总量', '交换性钙', '交换性镁', '交换性钠', '交换性钾', '有机质',
- '全氮', '全磷', '全钾', '有效磷', '速效钾',]
- # 其他样品列表 剖面
- # '耕地': ['0101', '0102', '0103'],
- # '园地': ['0201', '0202', '0203', '0204'],
- # '林地': ['0301', '0302', '0303', '0304', '0305', '0306', '0307'],
- # '草地': ['0401', '0402', '0403', '0404'],
- # 根据土地利用类型 判断指标是否完整 不完整的数据提取出来 这里规则无法确定 先放着吧
- # 统计所有数据中 各指标的数量
- resData = getSimpleNum(data)
- yxg = 0
- for index, row in data.iterrows():
- if str(str(row['原样品编号'])[6:10]) == '0101':
- yxg += 1
- data = data.replace('未检测', np.nan)
- if '序号' in data.columns:
- del data['序号']
- if '原样品编号' in data.columns:
- del data['原样品编号']
- if '样品编号' in data.columns:
- del data['样品编号']
- if '地理位置' in data.columns:
- del data['地理位置']
- if '母质' in data.columns:
- del data['母质']
- if '土壤类型' in data.columns:
- del data['土壤类型']
- if '土地利用类型' in data.columns:
- del data['土地利用类型']
- # 根据指标排序
- counts = data.count()
- # 根据土地类型统计样本数
- countData = {
- '耕地园地': resData['sData']['耕地园地'] if '耕地园地' in resData['sData'].index else 0,
- '林地草地': resData['sData']['林地草地'] if '林地草地' in resData['sData'].index else 0,
- '其他': 0,
- '有效硅': yxg
- }
- needNumList = []
- for item in data.columns:
- # 判断指标是否属于耕地园地、林地草地数组
- if item == '有效硅':
- needNumList.append(countData['有效硅'])
- elif (item in cultivatedLandandfieldList) and (item in woodlandandGrassList):
- needNumList.append(countData['耕地园地'] + countData['林地草地'])
- elif (item in cultivatedLandandfieldList) and (item not in woodlandandGrassList):
- needNumList.append(countData['耕地园地'])
- elif (item not in cultivatedLandandfieldList) and (item in woodlandandGrassList):
- needNumList.append(countData['林地草地'])
- elif (item not in cultivatedLandandfieldList) and (item not in woodlandandGrassList):
- needNumList.append(0)
- else:
- needNumList.append(0)
- counts = counts.to_frame()
- counts['应测数量'] = needNumList
- return counts
- # 6.指标名称与实际检测样品数量统计表,统计样品数量和指标检测数量
- def getCheckNum():
- # 统计每个指标的有值的数量,水溶性盐分总量大于1g/kg的 统计八大离子的检测量
- resData = ''
- return resData
- # 过滤特殊字符
- def filter_special_characters(s):
- s = str(s)
- return re.sub(r'[^a-zA-Z\u4e00-\u9fa5\d]', '', s)
- # 7.检测方法
- def checkMethod(data, url):
- # 各指标的检测标准方法
- checkData = {
- '土壤容重' : '《土壤检测 第4部分:土壤容重的测定》(NY/T 1121.4—2006) 环刀法',
- '机械组成' : '《土壤分析技术规范》(第二版), 5.1 吸管法',
- '土壤水稳性大团聚体': '《土壤检测 第19部分:土壤水稳定大团聚体组成的测定》(NY/T 1121.19—2008)筛分法',
- 'pH检测方法': '《土壤检测 第2部分:土壤pH的测定》(NY/T 1121.2—2006) 电位法',
- '水溶性盐类(水溶性盐总量、电导率、水溶性钠离子、钾离子、钙离子、镁离子、碳酸根、碳酸氢根、硫酸根、氯根)': '《森林土壤水溶性盐分分析》(LY/T 1251—1999)(浸提液中钙、镁、钾、钠离子的测定采用等离子体发射光谱法,硫酸根和碳酸根的测定增加离子色谱法) 质量法',
- '全氮': '《土壤检测 第24部分:土壤全氮的测定 自动定氮仪法》(NY/T 1121.24—2012) 自动定氮仪法',
- '全磷': '《森林土壤磷的测定》(LY/T 1232—2015)(详见本规范培训教材) 酸消解—电感耦合等离子体发射光谱法',
- '全钾': '《森林土壤钾的测定》(LY/T 1234—2015) 酸消解一电感耦合等离子体发射光谱法',
- '全铁': '《固体废物22种金属元素的测定》(HJ 781—2016) 酸消解—电感耦合等离子体发射光谱法',
- '全锰': '《固体废物22种金属元素的测定》(HJ 781—2016) 酸消解—电感耦合等离子体发射光谱法',
- '全钼': '《固体废物 金属元素的测定 电感耦合等离子体质谱法》(HJ 766—2015) 酸消解—电感耦合等离子体质谱法',
- '全铝': '《固体废物 22种金属元素的测定 电感耦合等离子体发射光谱法》(HJ 781—2016) 酸消解-电感耦合等离子体发射光谱法',
- '全硅': '《土壤和沉积物 11种元素的测定 碱熔—电感耦合等离子体发射光谱法》(HJ 974—2018) 碱熔一电感耦合等离子体发射光谱法 ',
- '全钙': '《固体废物 22种金属元素的测定 电感耦合等离子体发射光谱法》(HJ 781—2016) 酸消解—电感耦合等离子体发射光谱法',
- '全镁': '《固体废物 22种金属元素的测定 电感耦合等离子体发射光谱法》(HJ 781—2016) 酸消解—电感耦合等离子体发射光谱法',
- '速效钾': '《土壤 速效钾和缓效钾含量的测定》(NY/T 889—2004) 乙酸铵浸提—火焰光度法',
- '缓效钾': '《土壤 速效钾和缓效钾含量的测定》(NY/T 889—2004) 热硝酸浸提—火焰光度法',
- '有效硅': '《土壤检测第15部分:土壤有效硅的测定》(NY/T 1121.15-2006) 柠檬酸浸提-硅钼蓝比色法',
- '有效硼': '土壤样品制备与检测技术规范培训教材 沸水提取-电感耦合等离子体发射光谱法',
- '有效钼': '《土壤检测 第9部分:土壤有效钼的测定》(NY/T 1121.9-2023) 草酸-草酸铵浸提-电感耦合等离子体质谱法',
- '游离铁': '《土壤分析技术规范》(第二版),19.1游离铁(Fed)的测定(DCB法) 连二亚硫酸钠-柠檬酸钠-重 碳酸提-邻菲罗啉比色法',
- '总砷': '《土壤质量 总汞、总砷、总铅的测定 原子荧光法第2部分:土壤中总砷的测定》(GB/T22105.2-2008) 原子荧光法',
- '总铅': '《固体废物 金属元素的测定电感耦合等离子体质谱法》(HJ766-2015) 酸消解-电感耦合等离子体质谱法',
- '总镉': '《固体废物 金属元素的测定电感耦合等离子体质谱法》(HJ766-2015) 酸消解-电感耦合等离子体质谱法',
- '总铬': '《固体废物 金属元素的测定电感耦合等离子体质谱法》(HJ766-2015) 酸消解-电感耦合等离子体质谱法',
- '总镍': '《固体废物 金属元素的测定电感耦合等离子体质谱法》(HJ766-2015) 酸消解-电感耦合等离子体质谱法'
- }
- checkDBData = {
- '有机质': ['《土壤检测 第6部分:土壤有机质的测定》(NY/T 1121.6—2006) 重铬酸钾氧化—容量法',
- '土壤中总碳和有机质的测定元素分析仪法(农业行业标准报批稿) 元素分析仪法'], # 两种方法都可以
- '全硫': ['《土壤检测 第2部分:土壤全硫的测定》(NY/T 1104—2006) 硝酸镁氧化-硫酸钡比浊法',
- '燃烧红外光谱法(本规范培训教材)'],
- '全硼': ['《土壤分析技术规范》(第二版),18.1土壤全硼的测定 碱熔-姜黄素-比色法',
- '《土壤分析技术规范》(第二版),18.1土壤全硼的测定 碱熔-等离子体发射光谱法'],
- '全铜': ['《固体废物 金属元素的测定电感耦合等离子体质谱法》(HJ766-2015) 酸消解-电感耦合等离子体质谱法',
- '《固体废物 22种金属元素的测定电感耦合等离子体发射光谱法》(HJ781-2016) 酸消解-电感耦合等离子体发射光谱法'],
- '全锌': ['《固体废物 金属元素的测定电感耦合等离子体质谱法》(HJ766-2015) 酸消解一电感耦合等离子体质谱法',
- '《固体废物 22种金属元素的测定 电感耦合等离子体发射光谱法》(HJ 781—2016) 酸消解—电感耦合等离子体发射光谱法'],
- '可交换酸度': '《土壤分析技术规范》(第二版), 11.2 土壤交换性酸的测定 氯化钾交换—中和滴定法', # ph<6
- '碳酸钙': '《土壤分析技术规范》(第二版), 15.1 土壤碳酸盐的测定 气量法', # ph>7
- '总汞': ['《土壤质量 总汞、总砷、总铅的测定原子荧光法 第1部分:土壤中总汞的测定》(GB/T22105.1-2008) 原子荧光法',
- '《土壤和沉积物总汞的测定 催化热解/冷原子吸收分光光度法》(HJ923-2017) 催化热解-冷原子吸收分光光度法'],
- '有效铁': [
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-原子吸收分光光度法',
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-电感耦合等离子体发射光谱法'],
- '有效锰': [
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-原子吸收分光光度法',
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA浸提-电感耦合等离子体发射光谱法'],
- '有效铜': [
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-原子吸收分光光度法',
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-电感耦合等离子体发射光谱法'],
- '有效锌': [
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-原子吸收分光光度法',
- '《土壤有效态锌、锰、铁、铜含量的测定 二乙三胺五乙酸(DTPA)浸提法》(NY/T8902004) DTPA 浸提-电感耦合等离子体发射光谱法'],
- }
- checkPHData = {
- '阳离子交换量': ['《土壤分析技术规范》(第二版), 12.2 乙酸铵交换法',
- '《土壤分析技术规范》(第二版), 12.1 EDTA—乙酸铵盐交换法'], # ph<=7.5 ph>7.5
- '交换性盐基总量、交换性钾、交换性钠、交换性钙、交换性镁': [
- '《土壤分析技术规范》(第二版), 13.1 酸性和中性土壤交换性盐基组分的测定(乙酸铵交换法)(交换性钙、镁、钾、钠离子的测定增加等离子体发射光谱法) 乙酸铵交换法等',
- '《石灰性土壤交换性盐基及盐基总量的测定》(NY/T 1615-2008)(交换液中钾、钠、钙、镁离子的测定增加等离子体发射光谱法) 氯化铵-乙醇交换法等'],
- # ph<=7.5 ph>7.5
- '有效磷': ['《土壤检测 第7部分:土壤有效磷的测定》(NY/T 1121.7—2014) 氟化铵-盐酸溶液浸提一钼锑抗比色法',
- '《土壤检测 第7部分:土壤有效磷的测定》(NY/T 1121.7—2014) 碳酸氢钠溶液—钼锑抗比色法'], # ph<6.5 ph>=6.5
- '有效硫': ['《土壤检测第14部分:土壤有效的测定》NY/T 1121.14-2023) 磷酸盐-乙酸溶液浸提-电感耦合等离子体发射光谱法',
- '《土壤检测第14部分:土壤有效的测定》(NY/T 1121.14-2023) 氯化钙浸提一电感耦合等离子体发射光谱法'],
- # ph<7.5 ph>=7.5
- }
- checkDataKey = [key for key in checkData]
- checkDBDataKey = ['有机质', '全硫', '全硼', '全铜', '全锌', '可交换酸度', '碳酸钙', '总汞', '有效铁', '有效锰', '有效铜', '有效锌']
- # [key for key in checkDBData]
- checkPHDataKey = ['阳离子交换量', '交换性盐基总量、交换性钾、交换性钠、交换性钙、交换性镁', '有效磷', '有效硫']
- # [key for key in checkPHData]
- # print('list',checkDBDataKey)
- # print('list', checkPHDataKey)
- checkDataRes = []
- for index, row in data.iterrows():
- str = ''
- for item in row.index:
- if row[item] == '未检测':
- str = ''
- elif row[item] == '未填写' or pd.isna(row[item]):
- str += f"{item}未填写。"
- elif not pd.isna(row[item]) and (item in checkDataKey) and (not pd.isna(checkData[item])): # 指标不为空 且 在通用指标列表中
- if filter_special_characters(row[item]) != filter_special_characters(checkData[item]):
- str += f"{item}检测方法填报有误。"
- elif (not pd.isna(row[item]) and (item in checkDBDataKey)):
- if filter_special_characters(row[item]) != filter_special_characters(checkDBData[item][0]) and filter_special_characters(row[item]) != filter_special_characters(checkDBData[item][1]):
- str += f"{item}检测方法填报有误。"
- # 指标在两种方法指标列表中
- elif (not pd.isna(row[item]) and (item in checkPHDataKey)):
- # 指标在区分ph值列表中
- if item == '阳离子交换量' and row['pH'] <= 7.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- if item == '阳离子交换量' and row['pH'] > 7.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][1]):
- str += f"{item}检测方法填写有误。"
- if item == '交换性盐基总量、交换性钾、交换性钠、交换性钙、交换性镁' and row['pH'] <= 7.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- if item == '交换性盐基总量、交换性钾、交换性钠、交换性钙、交换性镁' and row['pH'] > 7.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- if item == '有效磷' and row['pH'] < 6.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- if item == '有效磷' and row['pH'] >= 6.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- if item == '有效硫' and row['pH'] < 7.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- if item == '有效硫' and row['pH'] >= 7.5:
- if filter_special_characters(row[item]) != filter_special_characters(checkPHData[item][0]):
- str += f"{item}检测方法填写有误。"
- checkDataRes.append(str)
- data['数据审核结果'] = checkDataRes
- resData = data.style.apply(highlight_condition, axis=1)
- # 数据写入表格
- with pd.ExcelWriter(f'{url}/检测方法审核结果.xlsx', engine='openpyxl') as writer:
- resData.to_excel(writer, index=False, sheet_name='检测方法审核')
- return data[data['检测方法审核结果'] !='']
- # 8.数据填报审核
- def dataReportResult(data, url):
- # 未检出项填报,空值填报,错误值处理
- # 检测值有*号、/、未检测、空值、数值 为合理情况,
- resData = []
- for index, row in data.iterrows():
- str = ''
- for item in row.index:
- if row[item] == '-' or row[item] == 0:
- str = f"{item}数据填报错误。"
- resData.append(str)
- data['数据审核结果'] = resData
- finData = data.style.apply(highlight_condition, axis=1)
- # 数据写入表格
- with pd.ExcelWriter(f'{url}/数据填报项审核结果.xlsx', engine='openpyxl') as writer:
- finData.to_excel(writer, index=False, sheet_name='数据填报审核')
- # 9.土壤质地类型判断 这个之前已有,将土壤质地类型不一致的样本数据提取出来
- # 使用resData数据,判断土壤类型和土壤类型判断是否一致,不一致提取出不一致数据写入表格
- # 10.土壤检测数据超阈值样品统计表,将之前统计的超阈值的数据提取出来,显示原因
- # def getOverLineData(data, url): # 所有阈值判断
- # 提取数据项,异常原因,增加一列外业保持空值
- # 提取每个表格审核结果不为空的数据,最后将所有数据合并
- # resData =
- # 数据写入表格
- # with pd.ExcelWriter(f'{url}/超阈值样品统计表.xlsx', engine='openpyxl') as writer:
- # resData.to_excel(writer, index=False, sheet_name='超阈值数据')
- # 11.ph值统计 频度计算、历年数据统计、ph异常数据提取
- def getPHData(data, url):
- resData = pd.DataFrame({})
- # 计算频度
- min_value = data['pH'].min() # 最大值
- max_value = data['pH'].max() # 最小值
- median_value = data['pH'].median() # 中位数
- mean_value = round(data['pH'].mean(),2) # 平均数
- std_value = round(data['pH'].std(), 2) # 标准差
- resData['PH'] = [min_value, max_value, median_value, mean_value, std_value]
- index_value = ['最小值', '最大值', '中位数', '平均数', '标准差']
- resData.index = index_value
- # 绘制分布图
- x = np.arange(0, len(data['pH']), 1) # 横坐标数据
- data = data.sort_values(by='pH', ascending=True)
- y = data['pH']
- getInteractiveImg(x, y, 'pH', [], [], '', [], [], '', url,
- 'pH值分布图', '样品序号', 'pH', data['原样品编号'])
- # 提取异常数据
- abnormalData = pd.DataFrame({})
- for index, row in data.iterrows():
- if not pd.isna(row['pH']) and (row['pH'] < mean_value-3*std_value or row['pH'] > mean_value+3*std_value):
- newRow = row[['原样品编号', '样品编号', '土地利用类型', 'pH']]
- soilType = ''
- if isinstance(row['土壤类型'], str):
- if len(row['土壤类型'].split('_')) > 1:
- soilType = row['土壤类型'].split('_')[1]
- newRow['土壤类型'] = soilType
- newRow['外业情况'] = ''
- abnormalData = abnormalData._append(newRow)
- return {
- '异常数据': abnormalData,
- '频度分析': resData
- }
- # resData写进文档表格
- # 异常数据写入表格
- # 生成也可以写入文档表格
- # with pd.ExcelWriter( './img/ph异常数据.xlsx', engine='openpyxl') as writer:
- # abnormalData.to_excel(writer, index=False, sheet_name='ph异常数据')
- # resData.to_excel(writer, sheet_name='频度分析')
- # 12.有机质和全氮 计算比值、绘制相关性图、提取异常数据形成表格
- def getNAndC(data, url):
- # 去掉nan的值
- data = data.dropna(subset=['有机质', '全氮'])
- # 绘制散点图 拟合直线 计算方差
- x = data['有机质']
- y = data['全氮']
- plt.scatter(x, y)
- fig = go.Figure(data=go.Scatter(
- x=x,
- y=y,
- text=data['原样品编号'].to_numpy(),
- mode='markers', name='有机质与全氮',
- marker=dict(
- size=4, # 点的大小
- color='blue', # 点的颜色
- ))
- )
- # 使用sklearn的LinearRegression进行最小二乘法拟合
- model = LinearRegression()
- model.fit(x.to_numpy().reshape(-1, 1), y)
- # 计算拟合直线的斜率和截距
- slope = model.coef_[0]
- intercept = model.intercept_
- r, _ = np.corrcoef(y, slope * x + intercept)
- # 绘制拟合直线
- fig.add_trace(go.Scatter(x=x, y=slope * x + intercept,mode='lines', name='拟合直线'))
- # plt.plot(x, slope * x + intercept, color='red', label='拟合直线', linewidth=2)
- # 设置图表布局
- fig.update_layout(
- title={
- 'text': f"有机质与全氮相关性散点图,y={round(slope,2)}x + {round(intercept,2)},R²={round(r[1], 2) ** 2},R={round(r[1],1)}",
- 'xanchor': 'center', # 控制水平对齐,可选'left', 'center', 'right'
- 'yanchor': 'bottom', # 控制垂直对齐,可选'top', 'middle', 'bottom'
- 'x': 0.5, # 控制标题的水平位置,0.5代表中心,可以是小数(相对位置)或整数(像素位置)
- 'y': 0.9 # 控制标题的垂直位置,0.9代表底部,可以是小数或整数
- },
- xaxis_title='有机质(g/kg)',
- yaxis_title='全氮(g/kg)')
- html_file_path = f"{url}/有机质与全氮相关性散点图.html"
- pio.write_html(fig, file=html_file_path, auto_open=False)
- # 同时保存一份图片
- pio.write_image(fig, f"{url}/有机质与全氮相关性散点图.png")
- # plt.savefig('./img/审核报告图形/' + '有机质与全氮相关性散点图.png', dpi=500, bbox_inches='tight')
- # plt.show()
- abnormalData = pd.DataFrame({})
- for index, row in data.iterrows():
- if not pd.isna(row['有机质']) and not pd.isna(row['全氮']):
- if row['有机质']/row['全氮'] <13 or row['有机质']/row['全氮'] > 20:
- newRow = row[['原样品编号', '样品编号', '土地利用类型', '有机质', '全氮']]
- soilType = ''
- resStr = ''
- if row['有机质']/row['全氮'] < 13:
- resStr = '偏低'
- if row['有机质']/row['全氮'] > 20:
- resStr = '偏高'
- if isinstance(row['土壤类型'], str):
- if len(row['土壤类型'].split('_')) > 1:
- soilType = row['土壤类型'].split('_')[1]
- newRow['土壤类型'] = soilType
- newRow['碳氮比'] = round(row['有机质']/row['全氮'], 2)
- newRow['审核结果'] = resStr
- newRow['外业情况'] = ''
- abnormalData = abnormalData._append(newRow)
- return abnormalData
- # with pd.ExcelWriter('./img/碳氮比异常数据.xlsx', engine='openpyxl') as writer:
- # abnormalData.to_excel(writer, index=False, sheet_name='碳氮比异常数据')
- def getImg(x,y,label):
- plt.scatter(x, y)
- plt.xlabel('样品数量')
- plt.ylabel(label)
- # plt.savefig('./img/' + label + '数据分布图.png', dpi=500, bbox_inches='tight')
- plt.show()
- # 13.全磷和有效磷,绘图,统计异常值,绘图绘制在同一个图中
- def getPData(data, url):
- # 提取异常数
- abnormalData = pd.DataFrame({})
- # abnormalData.columns = ['原样品编号', '样品编号', '土地利用类型', '全磷', '有效磷', '土壤类型', '有效磷比', '外业情况']
- for index, row in data.iterrows():
- if not pd.isna(row['有效磷']) and not pd.isna(row['全磷']):
- if row['有效磷'] /(1000 * row['全磷']) >= 0.15:
- newRow = row[['原样品编号', '样品编号', '土地利用类型', '全磷', '有效磷']]
- soilType = ''
- if isinstance(row['土壤类型'], str):
- if len(row['土壤类型'].split('_')) > 1:
- soilType = row['土壤类型'].split('_')[1]
- newRow['土壤类型'] = soilType
- newRow['有效磷比'] = round(row['有效磷'] / (row['全磷']*10), 2)
- newRow['外业情况'] = ''
- abnormalData = abnormalData._append(newRow)
- with pd.ExcelWriter(f'{url}/有效磷占全磷比异常数据.xlsx', engine='openpyxl') as writer:
- abnormalData.to_excel(writer, index=False, sheet_name='有效磷占全磷比比异常数据')
- if not data['全磷'].empty and not data['有效磷'].empty:
- x1 = np.arange(0, len(data['全磷']), 1)
- x2 = np.arange(0, len(data['有效磷']), 1)
- x3 = np.arange(0, len(data['有效磷']/data['全磷']), 1)
- #data1 = data.sort_values(by='全磷', ascending=True)
- #data2 = data.sort_values(by='有效磷', ascending=True)
- data['有效磷占比'] = data['有效磷']/10*data['全磷']
- #data3 = data.sort_values(by='有效磷占比', ascending=True)
- y1 = data['全磷']
- y2 = data['有效磷']
- y3 = data['有效磷占比']
- # getImg(x1, y1,'全磷(g/kg)')
- # getImg(x2, y2, '有效磷(mg/kg)')
- # getImg(x3, y3, '有效磷占全磷比(%)')
- getInteractiveImg(x1, y1,'全磷(g/kg)',[], [], '', [], [], '', url,
- '全磷分布图', '样品序号', '全磷(g/kg)', data['原样品编号'])
- getInteractiveImg(x2, y2, '有效磷(mg/kg)', [], [], '', [], [], '', url,
- '有效磷分布图', '样品序号', '有效磷(mg/kg)', data['原样品编号'])
- getInteractiveImg(x3, y3, '有效磷占全磷比(%)', [], [], '', [], [], '', url,
- '有效磷占全磷比分布图', '样品序号', '有效磷占全磷比(%)', data['原样品编号'])
- del data['有效磷占比']
- return abnormalData
- # 14. 全钾、速效钾和缓效钾,绘图
- def getKData(data, url):
- data = data.replace(np.nan,0)
- x1 = np.arange(0, len(data['全钾']), 1)
- y1 = data['全钾']
- x2 = np.arange(0, len(data['速效钾'] + data['缓效钾']), 1)
- y2 = data['速效钾']/1000 + data['缓效钾']/1000
- getInteractiveImg(x1, y1, '全钾', x2, y2,'速效钾与缓效钾之和', [],[],'', url,'全钾与速效钾缓效钾之和关系统计图','样品序号','g/kg', data['原样品编号'])
- x = np.arange(0, len(data['速效钾']), 1)
- y = data['速效钾']
- x_1 = np.arange(0, len(data['缓效钾']), 1)
- y_1 = data['缓效钾']
- getInteractiveImg(x, y, '速效钾', x_1, y_1, '缓效钾', [], [], '', url,
- '速效钾与缓效钾散点图', '样品序号', 'mg/kg', data['原样品编号'])
- # 15.重金属 已有 提取重金属异常数据即可
- def getMetal(simpleData):
- # resData_14 数据中提取重金属超标的数据,提取相应的指标形成表格
- resData = ''
- return resData
- # 16.阳离子交换量与交换性盐基总量
- def cationExchangeCapacity(data, url):
- # 绘图
- x1 = data['阳离子交换量']
- y1 = data['交换性盐基总量']
- fig = go.Figure(data=go.Scatter(
- x=x1,
- y=y1,
- text=data['原样品编号'].to_numpy(),
- mode='markers', name='阳离子交换量与交换性盐基总量相关性散点图',
- marker=dict(
- size=4, # 点的大小
- color='blue', # 点的颜色
- ))
- )
- print(3.43)
- # 使用sklearn的LinearRegression进行最小二乘法拟合
- model = LinearRegression()
- model.fit(x1.to_numpy().reshape(-1, 1), y1)
- # 计算拟合直线的斜率和截距
- slope = model.coef_[0]
- intercept = model.intercept_
- r, _ = np.corrcoef(y1, slope * x1 + intercept)
- # 绘制拟合直线
- fig.add_trace(go.Scatter(x=x1, y=slope * x1 + intercept, mode='lines', name='拟合直线'))
- # plt.plot(x, slope * x + intercept, color='red', label='拟合直线', linewidth=2)
- # 设置图表布局
- fig.update_layout(
- title={
- 'text': f"阳离子交换量与交换性盐基总量相关性散点图,y={round(slope, 2)}x + {round(intercept, 2)},R²={round(r[1], 2) ** 2},R={round(r[1],1)}",
- 'xanchor': 'center', # 控制水平对齐,可选'left', 'center', 'right'
- 'yanchor': 'bottom', # 控制垂直对齐,可选'top', 'middle', 'bottom'
- 'x': 0.5, # 控制标题的水平位置,0.5代表中心,可以是小数(相对位置)或整数(像素位置)
- 'y': 0.9 # 控制标题的垂直位置,0.9代表底部,可以是小数或整数
- },
- xaxis_title='阳离子交换量(g/kg)',
- yaxis_title='交换性盐基总量(mS/cm)')
- html_file_path = f"{url}/阳离子交换量与交换性盐基总量相关性散点图.html"
- pio.write_html(fig, file=html_file_path, auto_open=False)
- # 同时保存一份图片
- pio.write_image(fig, f"{url}/阳离子交换量与交换性盐基总量相关性散点图.png")
- # 17.交换性盐基:二者之差 交换性盐基总量cmol(+)/kg 交换性钙镁钠钾之和 区分ph>7.5 和ph值<7.5
- def changeCation(data,url):
- hightData = data[data['pH'] > 7.5]
- lowData = data[data['pH'] <= 7.5]
- hightData = hightData.apply(pd.to_numeric, errors="coerce")
- lowData = lowData.apply(pd.to_numeric, errors="coerce")
- x_h = np.arange(0, len(hightData['交换性盐基总量']), 1)
- y_h = hightData['交换性盐基总量']
- y1_h = (hightData['交换性钙'] + hightData['交换性镁'] + hightData['交换性钾'] + hightData['交换性钠'])
- y2_h = (y_h-y1_h)
- # 绘图
- getInteractiveImg(x_h, y_h, '交换性盐基总量', x_h, y1_h, '钙镁钾钠之和', x_h, y2_h, '交换性盐基总量与钙镁钾钠和之差', url,
- '交换性盐基总量与交换性盐相关关系(pH大于7.5)', '样品序号', 'cmol/kg', hightData['原样品编号'])
- x_l = np.arange(0, len(lowData['交换性盐基总量']), 1)
- y_l = lowData['交换性盐基总量']
- y1_l = (lowData['交换性钙'] + lowData['交换性镁'] + lowData['交换性钾'] + lowData['交换性钠'])
- y2_l = (y_l - y1_l)
- getInteractiveImg(x_l, y_l, '交换性盐基总量', x_l, y1_l, '钙镁钾钠之和', x_l, y2_l,
- '交换性盐基总量与钙镁钾钠和之差', url,
- '交换性盐基总量与交换性盐相关关系(pH小于等于7.5)', '样品序号', 'cmol/kg', lowData['原样品编号'])
- # 18.水溶性盐总量、电导率、离子总量全盐量分布图,全盐量和电导率相关性分析,水溶性盐与离子总量关系
- def manyTypes(data,url):
- # data = data.replace('未检测', np.nan)
- data = data.dropna(subset=['全盐量', '电导率'])
- print(3.41)
- # 全盐量分布图
- x = np.arange(0, len(data['全盐量']), 1)
- data_1 = data.sort_values(by='全盐量', ascending=True)
- y = data_1['全盐量']
- getInteractiveImg(x, y, '全盐量', [], [], '', [], [],
- '', url,
- '全盐量分布图', '样品序号', '全盐量(g/kg)', data_1['原样品编号'])
- # 电导率分布图
- # x1 = np.arange(0, len(data['电导率']), 1)
- # y1 = data['电导率'].sort_values()
- # getInteractiveImg(x1, y1, '电导率', [], [], '', [], [],
- # '', './img/审核报告图形',
- # '电导率分布图', '样品序号', '电导率(mS/cm)', data['原样品编号'], 'fileUrl',
- # 'loc')
- print(3.42)
- x1 = data['全盐量'].dropna()
- y1 = data['电导率'].dropna()
- print('电导率', y1)
- # plt.scatter(x, y)
- fig = go.Figure(data=go.Scatter(
- x=x1,
- y=y1,
- text=data['原样品编号'].to_numpy(),
- mode='markers', name='全盐量与电导率相关关系',
- marker=dict(
- size=4, # 点的大小
- color='blue', # 点的颜色
- ))
- )
- print(3.43)
- # 使用sklearn的LinearRegression进行最小二乘法拟合
- model = LinearRegression()
- model.fit(x1.to_numpy().reshape(-1, 1), y1)
- # 计算拟合直线的斜率和截距
- slope = model.coef_[0]
- intercept = model.intercept_
- r, _ = np.corrcoef(y1, slope * x1 + intercept)
- # 绘制拟合直线
- fig.add_trace(go.Scatter(x=x1, y=slope * x1 + intercept, mode='lines', name='拟合直线'))
- # plt.plot(x, slope * x + intercept, color='red', label='拟合直线', linewidth=2)
- # 设置图表布局
- fig.update_layout(
- title={
- 'text':f"全盐量与电导率相关性散点图,y={round(slope, 2)}x + {round(intercept, 2)},R²={round(r[1], 2) ** 2},R={round(r[1], 2)}",
- 'xanchor': 'center', # 控制水平对齐,可选'left', 'center', 'right'
- 'yanchor': 'bottom', # 控制垂直对齐,可选'top', 'middle', 'bottom'
- 'x': 0.5, # 控制标题的水平位置,0.5代表中心,可以是小数(相对位置)或整数(像素位置)
- 'y': 0.9 # 控制标题的垂直位置,0.9代表底部,可以是小数或整数
- },
- xaxis_title='全盐量(g/kg)',
- yaxis_title='电导率(mS/cm)')
- html_file_path = f"{url}/全盐量与电导率相关性散点图.html"
- pio.write_html(fig, file=html_file_path, auto_open=False)
- # 同时保存一份图片
- pio.write_image(fig, f"{url}/全盐量与电导率相关性散点图.png")
- print(3.44)
- # 离子总量 水溶性盐总量及差值
- filterData = data.dropna(subset=['全盐量', '离子总量'])
- x2 = np.arange(0, len(filterData['离子总量']), 1)
- print(x2)
- print(3.441)
- print(filterData['离子总量'])
- y2 = filterData['离子总量']
- print(y2)
- print(3.442)
- y3 = filterData['全盐量']
- print(y3)
- print(3.443)
- y4 = (y2-y3)
- if not filterData.empty:
- #要增加对指标值是否缺失进行判断,都不缺失绘图
- getInteractiveImg(x2, y2, '离子总量', x2, y3, '全盐量', x2, y4,
- '离子总量与全盐量之差', url,
- '全盐量与离子总量相关性散点图', '样品数量', '离子总量/全盐量(g/kg)', data['原样品编号'])
|