From 5c6041becb18c827ef1bcf029c024f9fe172bbe9 Mon Sep 17 00:00:00 2001 From: yangyang01000846 <15195822163@163.com> Date: Mon, 17 Nov 2025 20:35:23 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=EF=BC=9AHPC=E5=B8=B8?= =?UTF-8?q?=E7=94=A8=E6=8C=87=E4=BB=A4=E5=B0=81=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../entity/pojo/pbs/hpc/AddJobParam.java | 129 +++++ .../entity/pojo/pbs/hpc/CancelJobParam.java | 28 + .../entity/pojo/pbs/hpc/CloneJobParam.java | 18 + .../entity/pojo/pbs/hpc/FinishJobParam.java | 28 + .../entity/pojo/pbs/hpc/HpcParamFromEnum.java | 86 +++ .../entity/pojo/pbs/hpc/JobModifyParam.java | 185 ++++++ .../entity/pojo/pbs/hpc/JobRequeueParam.java | 19 + .../entity/pojo/pbs/hpc/JobViewParam.java | 28 + .../entity/pojo/pbs/hpc/ListJobParam.java | 53 ++ .../entity/pojo/pbs/hpc/ListTasksParam.java | 38 ++ .../entity/pojo/pbs/hpc/NewJobParam.java | 144 +++++ .../pojo/pbs/hpc/NodeListCoreParam.java | 27 + .../entity/pojo/pbs/hpc/NodeListParam.java | 40 ++ .../entity/pojo/pbs/hpc/NodeViewParam.java | 29 + .../pojo/pbs/hpc/SubmitHpcJobParam.java | 214 +++++++ .../common/entity/req/pbs/hpc/AddJobReq.java | 131 +++++ .../entity/req/pbs/hpc/CancelJobReq.java | 34 ++ .../entity/req/pbs/hpc/CloneJobReq.java | 20 + .../entity/req/pbs/hpc/FinishJobReq.java | 30 + .../entity/req/pbs/hpc/JobModifyReq.java | 221 ++++++++ .../entity/req/pbs/hpc/JobRequeueReq.java | 21 + .../common/entity/req/pbs/hpc/JobViewReq.java | 30 + .../entity/req/pbs/hpc/ListTasksReq.java | 41 ++ .../common/entity/req/pbs/hpc/NewJobReq.java | 91 +++ .../entity/req/pbs/hpc/NodeListCoreReq.java | 30 + .../entity/req/pbs/hpc/NodeListReq.java | 44 ++ .../entity/req/pbs/hpc/NodeViewReq.java | 35 ++ .../entity/req/pbs/hpc/SubmitHpcJobReq.java | 257 +++++++++ .../entity/resp/pbs/hpc/AddJobResp.java | 8 + .../entity/resp/pbs/hpc/JobCancelResp.java | 8 + .../entity/resp/pbs/hpc/JobFinishResp.java | 8 + .../entity/resp/pbs/hpc/JobModifyResp.java | 8 + .../entity/resp/pbs/hpc/JobViewResp.java | 22 + .../resp/pbs/hpc/JobViewTaskStatus.java | 13 + .../entity/resp/pbs/hpc/ListJobResp.java | 17 + .../entity/resp/pbs/hpc/ListTasksResp.java | 20 + .../entity/resp/pbs/hpc/NewJobResp.java | 8 + .../entity/resp/pbs/hpc/NodeListCoreResp.java | 12 + .../entity/resp/pbs/hpc/NodeListResp.java | 14 + .../entity/resp/pbs/hpc/NodeViewResp.java | 20 + .../entity/resp/pbs/hpc/SubmitHpcJobResp.java | 12 + .../common/utils/CmdCommandExcuteUtil.java | 101 ++++ .../common/utils/HpcCommandBuilderUtil.java | 171 ++++++ .../common/utils/HpcCommandExcuteUtil.java | 54 ++ .../utils/HpcCommandResulParseUtil.java | 525 ++++++++++++++++++ pbs/pom.xml | 15 +- .../sdm/pbs/controller/TaskController.java | 109 ++++ .../java/com/sdm/pbs/service/TaskService.java | 38 ++ .../sdm/pbs/service/impl/TaskServiceImpl.java | 226 ++++++++ pbs/src/main/resources/application-dev.yml | 15 +- pbs/src/main/resources/logback.xml | 26 +- 51 files changed, 3490 insertions(+), 11 deletions(-) create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/AddJobParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/CancelJobParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/CloneJobParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/FinishJobParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/HpcParamFromEnum.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/JobModifyParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/JobRequeueParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/JobViewParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/ListJobParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/ListTasksParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NewJobParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NodeListCoreParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NodeListParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NodeViewParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/SubmitHpcJobParam.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/AddJobReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/CancelJobReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/CloneJobReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/FinishJobReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/JobModifyReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/JobRequeueReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/JobViewReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/ListTasksReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NewJobReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NodeListCoreReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NodeListReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NodeViewReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/req/pbs/hpc/SubmitHpcJobReq.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/AddJobResp.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobCancelResp.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobFinishResp.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobModifyResp.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobViewResp.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobViewTaskStatus.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/ListJobResp.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/ListTasksResp.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NewJobResp.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NodeListCoreResp.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NodeListResp.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NodeViewResp.java create mode 100644 common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/SubmitHpcJobResp.java create mode 100644 common/src/main/java/com/sdm/common/utils/CmdCommandExcuteUtil.java create mode 100644 common/src/main/java/com/sdm/common/utils/HpcCommandBuilderUtil.java create mode 100644 common/src/main/java/com/sdm/common/utils/HpcCommandExcuteUtil.java create mode 100644 common/src/main/java/com/sdm/common/utils/HpcCommandResulParseUtil.java create mode 100644 pbs/src/main/java/com/sdm/pbs/controller/TaskController.java create mode 100644 pbs/src/main/java/com/sdm/pbs/service/TaskService.java create mode 100644 pbs/src/main/java/com/sdm/pbs/service/impl/TaskServiceImpl.java diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/AddJobParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/AddJobParam.java new file mode 100644 index 00000000..8051dc11 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/AddJobParam.java @@ -0,0 +1,129 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import lombok.Data; + +@Data +//@Schema(description = "HPC作业添加任务请求参数(对应 job add 命令)") +public class AddJobParam { + + /** + * 要添加任务的作业标识符(必填) + */ +// @Schema(description = "指定要向其添加任务的作业的作业标识符", requiredMode = Schema.RequiredMode.REQUIRED, example = "10086") +// private String jobId; + + /** + * 新任务所依赖的任务名称列表,格式:task_name1,task_name2... + */ +// @Schema(description = "新任务依赖的任务名称列表(格式:task_name1,task_name2...)", example = "pre-task-1,pre-task-2") + private String depend; + + /** + * 任务运行时环境变量列表,格式:variable_name1=value1;variable_name2=value2... + */ +// @Schema(description = "任务运行时环境变量(格式:variable_name1=value1;variable_name2=value2...)", example = "LOG_LEVEL=INFO;TEMP_PATH=/tmp") + private String env; + + /** + * 是否独占节点运行(运行时同一节点无其他任务) + */ +// @Schema(description = "是否独占节点运行(True=同一节点无其他任务)", example = "false") + private Boolean exclusive; + + /** + * 任务名称(前端传入,必填,最大长度80个字符) + */ +// @Schema(description = "任务显示名称(最大长度80个字符)", requiredMode = Schema.RequiredMode.REQUIRED, example = "数据处理子任务-01") + private String name; + + /** + * 任务所需跨集群核心总数(必填,格式:最小[-最大]) + */ +// @Schema(description = "任务所需核心总数(格式:最小[-最大],必填)", requiredMode = Schema.RequiredMode.REQUIRED, example = "2-4") + private String numcores; + + /** + * 任务所需集群节点总数(格式:最小[-最大]) + */ +// @Schema(description = "任务所需节点总数(格式:最小[-最大])", example = "1-2") + private String numnodes; + + /** + * 任务所需跨集群套接字总数(格式:最小[-最大]) + */ +// @Schema(description = "任务所需套接字总数(格式:最小[-最大])", example = "1-2") + private String numsockets; + + /** + * 是否为参数任务(多次运行,替换命令行中星号=的索引值) + */ +// @Schema(description = "是否为参数任务(True=多次运行并替换命令行索引值)", example = "false") + private Boolean parametric; + + /** + * 任务必须运行的节点列表,格式:node1_name,node2_name... + */ +// @Schema(description = "任务必须运行的节点列表(格式:node1_name,node2_name...)", example = "node-001,node-002") + private String requirednodes; + + /** + * 任务失败后是否重试,默认True + */ +// @Schema(description = "任务失败后是否重试", defaultValue = "true", example = "true") + private Boolean rerunnable; + + /** + * 作业最大运行时间(格式:天:时:分:秒,默认Infinite表示无限时间) + */ + private String runtime; + + /** + * 集群头节点的主机名或IP地址 + */ +// @Schema(description = "集群头节点主机名或IP地址", example = "192.168.1.200") + private String scheduler; + + /** + * 标准错误流重定向文件路径(最大长度160个字符) + */ +// @Schema(description = "标准错误流重定向文件路径(最大长度160个字符)", example = "/hpc/logs/task-01/stderr.log") + private String stderr; + + /** + * 标准输入文件路径(最大长度160个字符) + */ +// @Schema(description = "标准输入文件路径(最大长度160个字符)", example = "/hpc/input/task-01/stdin.txt") + private String stdin; + + /** + * 标准输出流重定向文件路径(最大长度160个字符) + */ +// @Schema(description = "标准输出流重定向文件路径(最大长度160个字符)", example = "/hpc/logs/task-01/stdout.log") + private String stdout; + + /** + * 任务类型(定义命令运行方式),默认Basic(mpi) + */ +// @Schema(description = "任务类型(定义命令运行方式)", defaultValue = "Basic(mpi)", example = "Basic(mpi)") + private String type; + + /** + * 任务成功退出的有效退出码,默认0 + */ +// @Schema(description = "任务成功退出的有效退出码", defaultValue = "0", example = "0,2") + private String validexitcodes; + + /** + * 任务运行的工作目录(HPC挂载路径,必填,最大长度160个字符) + */ +// @Schema(description = "任务运行工作目录(HPC挂载路径,最大长度160个字符)", requiredMode = Schema.RequiredMode.REQUIRED, example = "/hpc/workspace/task-01") + private String workdir; + + /** + * 任务命令行(包含命令、应用程序名称及必需参数,必填) + */ +// @Schema(description = "任务命令行(含命令及参数,必填)", requiredMode = Schema.RequiredMode.REQUIRED, example = "/usr/bin/python3 /hpc/scripts/data_process.py --input data.csv --output result.csv") +// private String command; + +} + diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/CancelJobParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/CancelJobParam.java new file mode 100644 index 00000000..70f58a18 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/CancelJobParam.java @@ -0,0 +1,28 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import lombok.Data; + +@Data +public class CancelJobParam { + + /** + * 要取消的作业标识符(必填) + */ +// private String jobId; + + /** + * 取消作业方式(立即停止,无宽限期,不运行节点发布任务)force 强制、graceful 优雅,默认graceful + */ +// private String cancelWay; + + /** + * 取消作业的原因消息(最大长度128个字符) + */ + private String message; + + /** + * 提交作业的集群头节点主机名或IP地址 + */ + private String scheduler; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/CloneJobParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/CloneJobParam.java new file mode 100644 index 00000000..78e8a6a4 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/CloneJobParam.java @@ -0,0 +1,18 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema (description = "HPC 作业克隆请求参数(对应 job clone 命令)") +public class CloneJobParam { + /** + 要复制的作业的标识符(必填) + */ +// private String jobId; + /** + 包含要复制作业的集群头节点主机名或 IP 地址 + */ + private String scheduler; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/FinishJobParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/FinishJobParam.java new file mode 100644 index 00000000..d42edaca --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/FinishJobParam.java @@ -0,0 +1,28 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +// HPC 作业完成请求参数(对应 job finish 命令 +public class FinishJobParam { + /** + 要完成的作业的标识符(必填) + */ +// private String jobId; + /** + 是否正常完成作业(等待运行中任务完成),目前只有 graceful + */ +// private String finshWay; + /** + 完成作业的原因消息(最大长度 128 个字符) + */ + @Schema (description = "完成作业的原因消息(最大长度 128 个字符)", example = "作业已按预期完成,手动标记结束") + private String message; + /** + 提交作业的集群头节点主机名或 IP 地址 + */ + @Schema (description = "集群头节点主机名或 IP 地址", example = "192.168.1.200") + private String scheduler; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/HpcParamFromEnum.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/HpcParamFromEnum.java new file mode 100644 index 00000000..2509e8e9 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/HpcParamFromEnum.java @@ -0,0 +1,86 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import java.util.HashMap; +import java.util.Map; + +/** + * HPC参数来源枚举(通过className映射来源) + */ +public enum HpcParamFromEnum { + + // 示例:NewJobParam 来自"作业提交接口" + NODELISTPARAM("NodeListParam", "HPC节点列表"), + NODELISTCOREPARAM("NodeListCoreParam", "HPC节点的核心列表"), + NODEVIEWPARAM("NodeViewParam", "HPC节点详情查询"), + NEWJOBPARAM("NewJobParam", "HPC创建任务"), + ADDJOBPARAM("AddJobParam", "HPC添加任务"), + SUBMITHPCJOBPARAM("SubmitHpcJobParam", "HPC提交任务"), + CANCELJOBPARAM("CancelJobParam", "HPC取消任务"), + CLONEJOBPARAM("CloneJobParam", "HPC克隆任务"), + FINISHJOBPARAM("FinishJobParam", "HPC优雅完成任务"), + LISTOBPARAM("ListJobParam", "HPC有条件查询所有任务"), + LISTTASKSPARAM("ListTasksParam", "HPC查询指定作业的任务信息"), + MODIFYJOBPARAM("JobModifyParam", "HPC修改任务信息"), + REQUEUEJOBPARAM("JobRequeueParam", "HPC任务重新排队"), + VIEWJOBPARAM("JobViewParam", "HPC查看作业视图"), + + // 可根据实际类名和来源继续添加枚举项 + ; + + /** + * 类的全限定名(如 com.example.param.NewJobParam) + */ + private final String className; + + /** + * 参数来源(如"作业提交接口"、"外部系统同步"等) + */ + private final String from; + + // 缓存:className -> HpcParamFromEnum,用于快速查询 + private static final Map CLASS_NAME_MAP = new HashMap<>(); + + static { + // 初始化缓存,将所有枚举项按className存入Map + for (HpcParamFromEnum enumItem : values()) { + CLASS_NAME_MAP.put(enumItem.className, enumItem); + } + } + + HpcParamFromEnum(String className, String from) { + this.className = className; + this.from = from; + } + + // getter方法 + public String getClassName() { + return className; + } + + public String getFrom() { + return from; + } + + /** + * 根据类的全限定名获取来源(from) + * @param className 类的全限定名(如"com.example.param.NewJobParam") + * @return 对应的来源字符串,若未找到返回null + */ + public static String getFromByClassName(String className) { + HpcParamFromEnum enumItem = CLASS_NAME_MAP.get(className); + return enumItem != null ? enumItem.from : "null"; + } + +// /** +// * 根据类对象获取来源(from) +// * @param clazz 类对象(如NewJobParam.class) +// * @return 对应的来源字符串,若未找到返回null +// */ +// public static String getFromByClass(Class clazz) { +// if (clazz == null) { +// return null; +// } +// return getFromByClassName(clazz.getName()); +// } + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/JobModifyParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/JobModifyParam.java new file mode 100644 index 00000000..61e6b454 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/JobModifyParam.java @@ -0,0 +1,185 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + + +@Data +public class JobModifyParam { + + /** + * 要修改属性的作业标识符(必填) + */ +// private String jobId; + + /** + * 新增作业不应运行的节点列表(格式:node1_name,node2_name...) + */ + private String addexcludednodes; + + /** + * 是否清除所有作业不应运行的节点(true=清空列表) + */ +// private Boolean clearexcludednodes; + + /** + * 从作业不应运行的节点列表中移除的节点(格式:node1_name,node2_name...) + */ + private String removeexcludednodes; + + /** + * 节点视为作业候选节点的最小内核数 + */ + private Integer corespernode; + + /** + * 作业的自定义属性(格式:name1=value1;name2=value2...) + */ + private String customproperties; + + /** + * 接收作业通知的电子邮件地址 + */ + private String emailaddress; + + /** + * 每个进程消耗的最大内存量(MB) + */ + private Integer estimatedprocessmemory; + + /** + * 是否独占节点运行(其他作业不可用同一节点),默认True + */ + private Boolean exclusive; + + /** + * 任务失败时是否使依赖任务失败,默认false + */ + private Boolean faildependenttasks; + + /** + * 任务失败时是否立即停止整个作业,默认true + */ + private Boolean failontahpailure; + + /** + * 作业运行时环境变量(格式:variable_name1=value1;variable_name2=value2...) + */ + private String jobenv; + + /** + * 作业名称(最大长度80个字符) + */ + private String jobname; + + /** + * 作业所需许可证信息(格式:license_name1:number1;license_name2:number2...,最大160字符) + */ + private String license; + + /** + * 指定要用于作业的作业模板的名称。 + * 作业模板名称的最大长度为 80 个字符 + */ + private String jobtemplate; + + /** + * 节点视为作业候选节点的最小内存量(MB,1-2147483647) + */ + private Integer memorypernode; + + /** + * 作业可运行的节点组(默认"所有节点") + */ + private String nodegroup; + + /** + * 作业结束时是否发送通知(状态:已完成/失败/已取消),默认True + */ + private Boolean notifyoncompletion; + + /** + * 作业启动时是否发送通知,默认True + */ + private Boolean notifyonstart; + + /** + * 作业所需跨集群核心总数(格式:最小[-最大]) + */ + private String numcores; + + /** + * 作业所需集群节点总数(格式:最小[-最大]) + */ + private String numnodes; + + /** + * 作业所需跨集群套接字总数(格式:最小[-最大]) + */ + private String numsockets; + + /** + * 运行作业的帐户密码 + */ + private String password; + + /** + * 作业依赖的父作业ID列表(格式:jobID1,jobID2...) + */ + private String parentjobids; + + /** + * 作业优先级(0-4000),默认2000 + */ + private Integer priority; + + /** + * 作业完成百分比(0-100,需手动维护) + */ + private Integer progress; + + /** + * 作业自定义状态消息 + */ + private String progressmsg; + + /** + * 作业所属项目名称(最大长度80个字符) + */ + private String projectname; + + /** + * 作业最大运行时间(秒),默认2147483647 + */ + private String runtime; + + /** + * 是否运行至取消(忽略运行时限制) + */ + private Boolean rununtilcanceled; + + /** + * 包含要修改作业的集群头节点主机名或IP地址 + */ + private String scheduler; + + /** + * 是否在单个节点上分配所有资源,默认False + */ + private Boolean singlenode ; + + /** + * 任务执行失败后自动重试次数(排除节点准备/发布任务) + */ + private Integer taskexecutionfailureretrylimit; + + /** + * 运行作业的帐户用户名(格式:[域\\]用户名) + */ + private String user; + + /** + * 任务成功退出的有效退出码,默认0 + */ + private String validexitcodes; +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/JobRequeueParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/JobRequeueParam.java new file mode 100644 index 00000000..5f0c3aa6 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/JobRequeueParam.java @@ -0,0 +1,19 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +// HPC 作业重新排队请求参数(对应 job requeue 命令) +public class JobRequeueParam { + /** + 要重新排队的作业标识符(必填) + 说明:仅作业状态为 “正在运行”“已取消” 或 “失败” 时可重新排队 + */ +// private String jobId; + /** + 包含要重新排队作业的集群头节点主机名或 IP 地址 + */ + private String scheduler; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/JobViewParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/JobViewParam.java new file mode 100644 index 00000000..9ab29f0b --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/JobViewParam.java @@ -0,0 +1,28 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +/** + HPC 作业信息查看请求参数(对应 job view 命令) + */ +@Data +public class JobViewParam { + /** + 要查看信息的作业标识符(必填) + */ +// private String jobId; + /** + 是否显示作业属性值的详细列表,默认 True + */ + private Boolean detailed; + /** + 是否显示作业的历史记录,默认 True + */ + private Boolean history; + /** + 作业所在集群的头节点主机名或 IP 地址 + */ + private String scheduler; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/ListJobParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/ListJobParam.java new file mode 100644 index 00000000..dad39f85 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/ListJobParam.java @@ -0,0 +1,53 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import lombok.Data; + +@Data +public class ListJobParam { + + /** + * 是否列出集群中所有作业,查询所有的时候,其他的参数就不要指定 + */ +// private Boolean all; + + /** + * 信息显示格式(可选值:list/table),默认table + */ + private String format; + + /** + * 按作业名称筛选(模糊匹配/精确匹配,取决于命令底层逻辑) + */ + private String jobname; + + /** + * 按项目名称筛选 + */ + private String project; + + /** + * 按池名称筛选 + */ + private String pool; + + /** + * 集群头节点主机名或IP地址 + */ + private String scheduler; + + /** + * 按作业状态筛选(多个状态用逗号分隔) + */ + private String state; + + /** + * 仅显示过去指定天数内提交的作业(正整数) + */ + private Integer submittime; + + /** + * 按作业所有者筛选(格式:[域\\]用户名,*表示所有用户) + */ + private String user; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/ListTasksParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/ListTasksParam.java new file mode 100644 index 00000000..fcaa042d --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/ListTasksParam.java @@ -0,0 +1,38 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import lombok.Data; + + +/** + * HPC作业任务列表查询请求参数(对应 job listtasks 命令 + */ +@Data +public class ListTasksParam { + + /** + * 作业标识符(必填,指定要查询任务信息的作业) + */ +// private String jobId; + + /** + * 是否展开参数任务(列出每个子任务,而非总体参数任务),默认True + */ +// private Boolean expand; + + /** + * 信息显示格式(可选值:list/table),默认list + */ + private String format; + + /** + * 集群头节点主机名或IP地址(指定作业所在集群) + */ + private String scheduler; + + /** + * 按任务状态筛选(多个状态用逗号分隔) + */ + private String state; + +} + diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NewJobParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NewJobParam.java new file mode 100644 index 00000000..055ac9b0 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NewJobParam.java @@ -0,0 +1,144 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import lombok.Builder; +import lombok.Data; + +@Data +public class NewJobParam { + + /** + * 指定要将节点视为运行作业的候选节点的最小内核数(1-2147483647) + */ + private Integer corespernode; + + /** + * 作业的自定义属性,格式为 name1=value1;name2=value2 + */ + private String customproperties; + + /** + * 接收作业通知的电子邮件地址 + */ + private String emailaddress; + + /** + * 作业中每个进程消耗的最大内存量(MB) + */ + private Integer estimatedprocessmemory; + + /** + * 是否独占节点运行,默认True(True表示其他作业不会在此节点运行) + */ + private Boolean exclusive ; + + /** + * 任务失败时是否使依赖任务失败,默认false + */ + private Boolean faildependenttasks ; + + /** + * 任务失败时是否立即停止整个作业,默认true + */ + private Boolean failontahpailure ; + + /** + * 作业运行时环境变量,格式为 variable_name1=value1;variable_name2=value2 + */ + private String jobenv; + + /** + * 作业名称(必填,最大长度80个字符) + */ + private String jobname; + + /** + * 作业所需许可证信息,格式为 license_name1:number1;license_name2:number2(最大长度160字符) + */ + private String license; + + /** + * 节点视为作业候选节点的最小内存量(MB,1-2147483647) + */ + private Integer memorypernode; + + /** + * 作业可运行的节点组(必填,格式为 node_group1_name;node_group2_name,默认"所有节点") + */ + private String nodegroup; + + /** + * 作业结束时是否发送通知(状态:已完成/失败/已取消),默认True + */ + private Boolean notifyoncompletion ; + + /** + * 作业启动时是否发送电子邮件通知,默认True + */ + private Boolean notifyonstart; + + /** + * 作业所需跨集群核心总数(必填,格式:最小[-最大]) + */ + private String numcores; + + /** + * 作业所需集群节点总数(格式:最小[-最大]) + */ + private String numnodes; + + /** + * 作业所需跨集群套接字总数(格式:最小[-最大]) + */ + private String numsockets; + + /** + * 作业依赖的父作业ID列表,格式为 jobID1,jobID2... + */ + private String parentjobids; + + /** + * 作业优先级(0-4000),默认2000 + */ + private Integer priority; + + /** + * 作业完成百分比(0-100),需手动维护更新 + */ + private Integer progress; + + /** + * 作业自定义状态消息 + */ + private String progressmsg; + + /** + * 作业所属项目名称(必填,最大长度80个字符) + */ + private String projectname; + + /** + * 作业最大运行时间,天:时:分:秒 , /runtime:1:00:00:00 表示一天, 每个位置最大 2147483647 ,整个字符串默认 Infinite 表示无限时间 + */ + private String runtime; + + /** + * 是否运行至取消(忽略运行时限制),默认不拼接就是false + */ + private Boolean rununtilcanceled; + + /** + * 是否在单个节点上分配所有资源,默认False + */ + private Boolean singlenode; + + /** + * 任务执行失败后自动重试次数(排除节点准备/发布任务) + */ + private Integer taskexecutionfailureretrylimit; + + /** + * 任务成功退出的验证退出码,默认0 + */ + private String validexitcodes; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NodeListCoreParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NodeListCoreParam.java new file mode 100644 index 00000000..ebd24173 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NodeListCoreParam.java @@ -0,0 +1,27 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import lombok.Data; + +/** + HPC 节点核心列表查询请求参数(对应 node listcores 命令) + */ +@Data +public class NodeListCoreParam { + /** + 信息显示格式(可选值:list/table),默认 table + */ + private String format; + /** + 按作业 ID 筛选(仅显示指定作业使用的核心) + */ + private String jobid; + /** + 集群头节点主机名或 IP 地址 + */ + private String scheduler; + /** + 按核心状态筛选(多个状态用逗号分隔) + */ + private String state; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NodeListParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NodeListParam.java new file mode 100644 index 00000000..b509be53 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NodeListParam.java @@ -0,0 +1,40 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import lombok.Data; + +/** + 是HPC 节点列表查询请求参数(对应 node list 命令) + */ +@Data +public class NodeListParam { + /** + 是否仅显示活动的头节点 + */ +// private Boolean activeheadnode; + /** + 信息显示格式(可选值:list/table),默认 table + */ + private String format; + /** + 按节点组名称筛选 + */ + private String group; + /** + 是否显示节点历史信息(true = 显示,false = 不显示) + */ + private String history; + /** + 仅显示过去指定天数内有活动的节点(正整数) + */ + private Integer lastdays; + /** + 集群头节点主机名或 IP 地址 + */ + private String scheduler; + /** + 按节点状态筛选(如 Online、Offline 等) + */ + private String state; + + +} diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NodeViewParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NodeViewParam.java new file mode 100644 index 00000000..46c82652 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/NodeViewParam.java @@ -0,0 +1,29 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import lombok.Data; + +//HPC 节点信息查看请求参数(对应 node view 命令) +@Data +public class NodeViewParam { + /** + 要查看信息的节点名称(必填) + */ +// private String nodeName; + /** + 是否显示节点详细属性信息(true = 显示,false = 不显示) + */ + private String detailed; + /** + 是否显示节点历史信息(true = 显示,false = 不显示) + */ + private String history; + /** + 仅显示过去指定天数内的节点历史记录(正整数) + */ + private Integer lastdays; + /** + 节点所在集群的头节点主机名或 IP 地址 + */ + private String scheduler; + +} diff --git a/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/SubmitHpcJobParam.java b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/SubmitHpcJobParam.java new file mode 100644 index 00000000..0ae839e1 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/pojo/pbs/hpc/SubmitHpcJobParam.java @@ -0,0 +1,214 @@ +package com.sdm.common.entity.pojo.pbs.hpc; + +import lombok.Data; + + +@Data +public class SubmitHpcJobParam { + + /** + * 要提交的作业标识符(必填) + */ +// private String id; + + /** + * 节点视为运行作业候选节点的最小内核数(可选) + */ + private Integer corespernode; + + /** + * 作业的自定义属性,格式:name1=value1;name2=value2... + */ + private String customproperties; + + /** + * 任务运行时环境变量列表,格式:variable_name1=value1;variable_name2=value2... + */ + private String env; + + /** + * 作业中每个进程消耗的最大内存量(MB) + */ + private Integer estimatedprocessmemory; + + /** + * 是否独占节点运行(运行时同一节点无其他作业),默认True + */ + private Boolean exclusive; + + /** + * 任务失败时是否使依赖任务失败,默认True + */ + private Boolean faildependenttasks; + + /** + * 任务失败时是否立即停止整个作业,默认True + */ + private Boolean failontahpailure; + + /** + * 作业运行时环境变量列表,格式:variable_name1=value1;variable_name2=value2... + */ + private String jobenv; + + /** + * 作业名称(必填,最大长度80个字符) + */ + private String jobname; + + /** + * 作业所需许可证信息,格式:license_name1:number1;license_name2:number2...(最大长度160个字符) + */ + private String license; + + /** + * 节点视为作业候选节点的最小内存量(MB) + */ + private Integer memorypernode; + + /** + * 任务显示名称(最大长度80个字符) + */ + private String name; + + /** + * 作业可运行的节点组列表,格式:node_group1_name,node_group2_name... + */ + private String nodegroup; + + /** + * 作业结束时是否发送通知,默认True + */ + private Boolean notifyoncompletion; + + /** + * 作业启动时是否发送通知,默认True + */ + private Boolean notifyonstart; + + /** + * 作业所需跨集群核心总数,格式:最小[-最大] + */ + private String numcores; + + /** + * 作业所需集群节点总数,格式:最小[-最大] + */ + private String numnodes; + + /** + * 任务所需跨集群套接字总数,格式:最小[-最大] + */ + private String numsockets; + + /** + * 是否为参数任务(多次运行并替换命令行索引值) + */ + private Boolean parametric; + + /** + * 作业依赖的父作业ID列表,格式:jobID1,jobID2... + */ + private String parentjobids; + + /** + * 运行作业的帐户密码(必填) + */ + private String password; + + /** + * 作业优先级(0-4000),默认2000 + */ + private Integer priority; + + /** + * 作业完成百分比(0-100) + */ + private Integer progress; + + /** + * 作业自定义状态消息(最大长度80个字符) + */ + private String progressmsg; + + /** + * 作业所属项目名称(必填,最大长度80个字符) + */ + private String projectname; + + /** + * 任务必须运行的节点列表,格式:node1_name,node2_name... + */ + private String requirednodes; + + /** + * 任务失败后是否重试,默认True + */ + private Boolean rerunnable; + + /** + * 作业最大运行时间(格式:天:时:分:秒,默认Infinite表示无限时间)", example = "2:08:30:00 + */ + private String runtime; + + /** + * 是否运行至取消(忽略运行时限制) + */ + private Boolean rununtilcanceled; + + /** + * 集群头节点的主机名或IP地址 + */ + private String scheduler; + + /** + * 是否在单个节点上分配所有资源,默认True + */ + private Boolean singlenode; + + /** + * 标准错误流重定向文件路径(最大长度160个字符) + */ + private String stderr; + + /** + * 标准输入文件路径(最大长度160个字符) + */ + private String stdin; + + /** + * 标准输出流重定向文件路径(最大长度160个字符) + */ + private String stdout; + + /** + * 任务执行失败后自动重试次数(排除节点准备/发布任务) + */ + private Integer taskexecutionfailureretrylimit; + + /** + * 任务类型(定义命令运行方式),默认Basic(mpi) + */ + private String type; + + /** + * 任务运行的工作目录(最大长度160个字符) + */ + private String workdir; + + /** + * 运行作业的帐户用户名(含可选域,必填) + */ + private String user; + + /** + * 任务成功退出的有效退出码,默认0 + */ + private String validexitcodes; + + /** + * 任务命令行(包含命令、应用程序名称及必需参数) + */ +// private String command; + +} diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/AddJobReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/AddJobReq.java new file mode 100644 index 00000000..c86655f8 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/AddJobReq.java @@ -0,0 +1,131 @@ +package com.sdm.common.entity.req.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema(description = "HPC作业添加任务请求参数(对应 job add 命令)") +public class AddJobReq { + + /** + * 要添加任务的作业标识符(必填) + */ + @Schema(description = "指定要向其添加任务的作业的作业标识符,JOB NEW 返回", requiredMode = Schema.RequiredMode.REQUIRED, example = "10086") + private String jobId; + + /** + * 新任务所依赖的任务名称列表,格式:task_name1,task_name2... + */ + @Schema(description = "新任务依赖的任务名称列表(格式:task_name1,task_name2...)", example = "pre-task-1,pre-task-2") + private String depend; + + /** + * 任务运行时环境变量列表,格式:variable_name1=value1;variable_name2=value2... + */ + @Schema(description = "任务运行时环境变量(格式:variable_name1=value1;variable_name2=value2...)", example = "LOG_LEVEL=INFO;TEMP_PATH=/tmp") + private String env; + + /** + * 是否独占节点运行(运行时同一节点无其他任务) + */ + @Schema(description = "是否独占节点运行(True=同一节点无其他任务)", example = "false") + private Boolean exclusive; + + /** + * 任务名称(前端传入,必填,最大长度80个字符) + */ + @Schema(description = "任务显示名称(最大长度80个字符)", requiredMode = Schema.RequiredMode.REQUIRED, example = "数据处理子任务-01") + private String name; + + /** + * 任务所需跨集群核心总数(必填,格式:最小[-最大]) + */ + @Schema(description = "任务所需核心总数(格式:最小[-最大],必填)", requiredMode = Schema.RequiredMode.REQUIRED, example = "2-4") + private String numcores; + + /** + * 任务所需集群节点总数(格式:最小[-最大]) + */ + @Schema(description = "任务所需节点总数(格式:最小[-最大])", example = "1-2") + private String numnodes; + + /** + * 任务所需跨集群套接字总数(格式:最小[-最大]) + */ + @Schema(description = "任务所需套接字总数(格式:最小[-最大])", example = "1-2") + private String numsockets; + + /** + * 是否为参数任务(多次运行,替换命令行中星号=的索引值) + */ + @Schema(description = "是否为参数任务(True=多次运行并替换命令行索引值)", example = "false") + private Boolean parametric; + + /** + * 任务必须运行的节点列表,格式:node1_name,node2_name... + */ + @Schema(description = "任务必须运行的节点列表(格式:node1_name,node2_name...)", example = "node-001,node-002") + private String requirednodes; + + /** + * 任务失败后是否重试,默认True + */ + @Schema(description = "任务失败后是否重试", defaultValue = "true", example = "true") + private Boolean rerunnable; + + /** + * 任务最大运行时间(单位:秒),默认2147483647 + */ + @Schema(description = "作业最大运行时间(格式:天:时:分:秒,默认Infinite表示无限时间)", example = "2:08:30:00") + private String runtime; + + /** + * 集群头节点的主机名或IP地址 + */ + @Schema(description = "集群头节点主机名或IP地址", example = "192.168.1.200") + private String scheduler; + + /** + * 标准错误流重定向文件路径(最大长度160个字符) + */ + @Schema(description = "标准错误流重定向文件路径(最大长度160个字符)", example = "/hpc/logs/task-01/stderr.log") + private String stderr; + + /** + * 标准输入文件路径(最大长度160个字符) + */ + @Schema(description = "标准输入文件路径(最大长度160个字符)", example = "/hpc/input/task-01/stdin.txt") + private String stdin; + + /** + * 标准输出流重定向文件路径(最大长度160个字符) + */ + @Schema(description = "标准输出流重定向文件路径(最大长度160个字符)", example = "/hpc/logs/task-01/stdout.log") + private String stdout; + + /** + * 任务类型(定义命令运行方式),默认Basic(mpi) + */ + @Schema(description = "任务类型(定义命令运行方式)", defaultValue = "Basic(mpi)", example = "Basic(mpi)") + private String type; + + /** + * 任务成功退出的有效退出码,默认0 + */ + @Schema(description = "任务成功退出的有效退出码", defaultValue = "0", example = "0,2") + private String validexitcodes; + + /** + * 任务运行的工作目录(HPC挂载路径,必填,最大长度160个字符) + */ + @Schema(description = "任务运行工作目录(HPC挂载路径,最大长度160个字符)", requiredMode = Schema.RequiredMode.REQUIRED, example = "/hpc/workspace/task-01") + private String workdir; + + /** + * 任务命令行(包含命令、应用程序名称及必需参数,必填) + */ + @Schema(description = "任务命令行(含命令及参数,必填)", requiredMode = Schema.RequiredMode.REQUIRED, example = "/usr/bin/python3 /hpc/scripts/data_process.py --input data.csv --output result.csv") + private String command; + +} + diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/CancelJobReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/CancelJobReq.java new file mode 100644 index 00000000..0fc74b6f --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/CancelJobReq.java @@ -0,0 +1,34 @@ +package com.sdm.common.entity.req.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema(description = "HPC作业取消请求参数(对应 job cancel 命令)") +public class CancelJobReq { + + /** + * 要取消的作业标识符(必填) + */ + @Schema(description = "指定要取消的作业的标识符", requiredMode = Schema.RequiredMode.REQUIRED, example = "1001") + private String jobId; + + /** + * 取消作业方式(立即停止,无宽限期,不运行节点发布任务)force 强制、graceful 优雅,默认graceful + */ + @Schema(description = "取消作业方式", defaultValue = "graceful", example = "graceful") + private String cancelWay; + + /** + * 取消作业的原因消息(最大长度128个字符) + */ + @Schema(description = "取消作业的原因消息(最大长度128个字符)", example = "业务需求变更,手动取消作业执行") + private String message; + + /** + * 提交作业的集群头节点主机名或IP地址 + */ + @Schema(description = "集群头节点主机名或IP地址", example = "192.168.1.200") + private String scheduler; + +} diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/CloneJobReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/CloneJobReq.java new file mode 100644 index 00000000..57505e54 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/CloneJobReq.java @@ -0,0 +1,20 @@ +package com.sdm.common.entity.req.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema (description = "HPC 作业克隆请求参数(对应 job clone 命令)") +public class CloneJobReq { + /** + 要复制的作业的标识符(必填) + */ + @Schema (description = "指定要复制的作业的标识符", requiredMode = Schema.RequiredMode.REQUIRED, example = "1001") + private String jobId; + /** + 包含要复制作业的集群头节点主机名或 IP 地址 + */ + @Schema (description = "集群头节点主机名或 IP 地址", example = "192.168.1.200") + private String scheduler; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/FinishJobReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/FinishJobReq.java new file mode 100644 index 00000000..ee38f3e0 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/FinishJobReq.java @@ -0,0 +1,30 @@ +package com.sdm.common.entity.req.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema(description = "HPC 作业完成请求参数(对应 job finish 命令)") +public class FinishJobReq { + /** + 要完成的作业的标识符(必填) + */ + @Schema (description = "指定要完成的作业的标识符", requiredMode = Schema.RequiredMode.REQUIRED, example = "1001") + private String jobId; + /** + 是否正常完成作业(等待运行中任务完成),目前只有 graceful + */ + @Schema (description = "正常完成作业(等待运行中任务完成)", defaultValue = "graceful", example = "graceful") + private String finshWay; + /** + 完成作业的原因消息(最大长度 128 个字符) + */ + @Schema (description = "完成作业的原因消息(最大长度 128 个字符)", example = "作业已按预期完成,手动标记结束") + private String message; + /** + 提交作业的集群头节点主机名或 IP 地址 + */ + @Schema (description = "集群头节点主机名或 IP 地址", example = "192.168.1.200") + private String scheduler; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/JobModifyReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/JobModifyReq.java new file mode 100644 index 00000000..26139df2 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/JobModifyReq.java @@ -0,0 +1,221 @@ +package com.sdm.common.entity.req.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema(description = "HPC作业属性修改请求参数(对应 job modify 命令)") +public class JobModifyReq { + + /** + * 要修改属性的作业标识符(必填) + */ + @Schema(description = "指定要修改属性的作业标识符", requiredMode = Schema.RequiredMode.REQUIRED, example = "1001") + private String jobId; + + /** + * 新增作业不应运行的节点列表(格式:node1_name,node2_name...) + */ + @Schema(description = "新增作业不应运行的节点列表(新增)", example = "node-003,node-004") + private String addexcludednodes; + + /** + * 是否清除所有作业不应运行的节点(true=清空列表) + */ + @Schema(description = "清除所有作业不应运行的节点列表", example = "false") + private Boolean clearexcludednodes; + + /** + * 从作业不应运行的节点列表中移除的节点(格式:node1_name,node2_name...) + */ + @Schema(description = "从作业不应运行的节点列表中移除的节点", example = "node-001") + private String removeexcludednodes; + + /** + * 节点视为作业候选节点的最小内核数 + */ + @Schema(description = "节点视为作业候选节点的最小内核数", example = "4") + private Integer corespernode; + + /** + * 作业的自定义属性(格式:name1=value1;name2=value2...) + */ + @Schema(description = "作业的自定义属性", example = "env=test;priority=high") + private String customproperties; + + /** + * 接收作业通知的电子邮件地址 + */ + @Schema(description = "接收作业通知的电子邮件地址", example = "notify@example.com") + private String emailaddress; + + /** + * 每个进程消耗的最大内存量(MB) + */ + @Schema(description = "每个进程消耗的最大内存量(MB)", example = "4096") + private Integer estimatedprocessmemory; + + /** + * 是否独占节点运行(其他作业不可用同一节点),默认True + */ + @Schema(description = "是否独占节点运行", defaultValue = "true", example = "true") + private Boolean exclusive ; + + /** + * 任务失败时是否使依赖任务失败,默认false + */ + @Schema(description = "任务失败时是否使依赖任务失败", defaultValue = "false", example = "false") + private Boolean faildependenttasks ; + + /** + * 任务失败时是否立即停止整个作业,默认true + */ + @Schema(description = "任务失败时是否立即停止整个作业", defaultValue = "true", example = "true") + private Boolean failontahpailure ; + + /** + * 作业运行时环境变量(格式:variable_name1=value1;variable_name2=value2...) + */ + @Schema(description = "作业运行时环境变量", example = "PATH=/usr/local/bin;LOG_LEVEL=DEBUG") + private String jobenv; + + /** + * 作业名称(最大长度80个字符) + */ + @Schema(description = "作业名称(最大长度80个字符)", example = "修改后的作业名称") + private String jobname; + + /** + * 作业所需许可证信息(格式:license_name1:number1;license_name2:number2...,最大160字符) + */ + @Schema(description = "作业所需许可证信息", example = "python:2;spark:1") + private String license; + + /** + * 指定要用于作业的作业模板的名称。 + * 作业模板名称的最大长度为 80 个字符 + */ + @Schema(description = "指定要用于作业的作业模板的名称", example = "jobtemplate") + private String jobtemplate; + + /** + * 节点视为作业候选节点的最小内存量(MB,1-2147483647) + */ + @Schema(description = "节点视为作业候选节点的最小内存量(MB)", example = "16384") + private Integer memorypernode; + + /** + * 作业可运行的节点组(默认"所有节点") + */ + @Schema(description = "作业可运行的节点组", defaultValue = "所有节点", example = "gpu-node-group") + private String nodegroup; + + /** + * 作业结束时是否发送通知(状态:已完成/失败/已取消),默认True + */ + @Schema(description = "作业结束时是否发送通知", defaultValue = "true", example = "true") + private Boolean notifyoncompletion; + + /** + * 作业启动时是否发送通知,默认True + */ + @Schema(description = "作业启动时是否发送通知", defaultValue = "true", example = "true") + private Boolean notifyonstart ; + + /** + * 作业所需跨集群核心总数(格式:最小[-最大]) + */ + @Schema(description = "作业所需跨集群核心总数", example = "8-16") + private String numcores; + + /** + * 作业所需集群节点总数(格式:最小[-最大]) + */ + @Schema(description = "作业所需集群节点总数", example = "2-4") + private String numnodes; + + /** + * 作业所需跨集群套接字总数(格式:最小[-最大]) + */ + @Schema(description = "作业所需跨集群套接字总数", example = "2-4") + private String numsockets; + + /** + * 运行作业的帐户密码 + */ + @Schema(description = "运行作业的帐户密码", example = "******") + private String password; + + /** + * 作业依赖的父作业ID列表(格式:jobID1,jobID2...) + */ + @Schema(description = "作业依赖的父作业ID列表", example = "2001,2002") + private String parentjobids; + + /** + * 作业优先级(0-4000),默认2000 + */ + @Schema(description = "作业优先级(0-4000)", defaultValue = "2000", example = "2500") + private Integer priority ; + + /** + * 作业完成百分比(0-100,需手动维护) + */ + @Schema(description = "作业完成百分比(0-100)", example = "50") + private Integer progress; + + /** + * 作业自定义状态消息 + */ + @Schema(description = "作业自定义状态消息", example = "作业正在调整资源配置") + private String progressmsg; + + /** + * 作业所属项目名称(最大长度80个字符) + */ + @Schema(description = "作业所属项目名称", example = "调整后的项目名称") + private String projectname; + + /** + * 作业最大运行时间(秒),默认2147483647 + */ + @Schema(description = "作业最大运行时间(格式:天:时:分:秒,默认Infinite表示无限时间)", example = "2:08:30:00") + private String runtime; + + /** + * 是否运行至取消(忽略运行时限制) + */ + @Schema(description = "是否运行至取消(忽略运行时限制)", example = "true") + private Boolean rununtilcanceled; + + /** + * 包含要修改作业的集群头节点主机名或IP地址 + */ + @Schema(description = "集群头节点主机名或IP地址", example = "192.168.1.200") + private String scheduler; + + /** + * 是否在单个节点上分配所有资源,默认False + */ + @Schema(description = "是否在单个节点上分配所有资源", defaultValue = "false", example = "false") + private Boolean singlenode; + + /** + * 任务执行失败后自动重试次数(排除节点准备/发布任务) + */ + @Schema(description = "任务执行失败后自动重试次数", example = "2") + private Integer taskexecutionfailureretrylimit; + + /** + * 运行作业的帐户用户名(格式:[域\\]用户名) + */ + @Schema(description = "运行作业的帐户用户名", example = "DOMAIN\\user1") + private String user; + + /** + * 任务成功退出的有效退出码,默认0 + */ + @Schema(description = "任务成功退出的有效退出码", defaultValue = "0", example = "0,3") + private String validexitcodes; +} + diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/JobRequeueReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/JobRequeueReq.java new file mode 100644 index 00000000..7e6672b3 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/JobRequeueReq.java @@ -0,0 +1,21 @@ +package com.sdm.common.entity.req.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema(description = "HPC 作业重新排队请求参数(对应 job requeue 命令)") +public class JobRequeueReq { + /** + 要重新排队的作业标识符(必填) + 说明:仅作业状态为 “正在运行”“已取消” 或 “失败” 时可重新排队 + */ + @Schema (description = "指定要重新排队的作业标识符(仅支持状态:正在运行、已取消、失败)", requiredMode = Schema.RequiredMode.REQUIRED, example = "1001") + private String jobId; + /** + 包含要重新排队作业的集群头节点主机名或 IP 地址 + */ + @Schema (description = "集群头节点主机名或 IP 地址", example = "192.168.1.200") + private String scheduler; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/JobViewReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/JobViewReq.java new file mode 100644 index 00000000..1d72fabe --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/JobViewReq.java @@ -0,0 +1,30 @@ +package com.sdm.common.entity.req.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema(description = "HPC 作业信息查看请求参数(对应 job view 命令)") +public class JobViewReq { + /** + 要查看信息的作业标识符(必填) + */ + @Schema (description = "指定要查看信息的作业标识符", requiredMode = Schema.RequiredMode.REQUIRED, example = "1001") + private String jobId; + /** + 是否显示作业属性值的详细列表,默认 True + */ + @Schema (description = "显示作业属性值的详细列表", defaultValue = "true", example = "true") + private Boolean detailed; + /** + 是否显示作业的历史记录,默认 True + */ + @Schema (description = "显示作业的历史记录", defaultValue = "true", example = "true") + private Boolean history; + /** + 作业所在集群的头节点主机名或 IP 地址 + */ + @Schema (description = "指定作业所在集群的头节点主机名或 IP 地址", example = "192.168.1.200") + private String scheduler; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/ListTasksReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/ListTasksReq.java new file mode 100644 index 00000000..f560dece --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/ListTasksReq.java @@ -0,0 +1,41 @@ +package com.sdm.common.entity.req.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema(description = "HPC作业任务列表查询请求参数(对应 job listtasks 命令)") +public class ListTasksReq { + + /** + * 作业标识符(必填,指定要查询任务信息的作业) + */ + @Schema(description = "指定要查询任务信息的作业标识符", requiredMode = Schema.RequiredMode.REQUIRED, example = "1001") + private String jobId; + + /** + * 是否展开参数任务(列出每个子任务,而非总体参数任务),默认True + */ + @Schema(description = "展开参数任务,显示所有子任务信息", defaultValue = "true", example = "true") + private Boolean expand; + + /** + * 信息显示格式(可选值:list/table),默认list + */ + @Schema(description = "信息显示格式(list=列表格式,table=表格格式)", defaultValue = "list", example = "table", allowableValues = {"list", "table"}) + private String format; + + /** + * 集群头节点主机名或IP地址(指定作业所在集群) + */ + @Schema(description = "指定作业所在集群的头节点主机名或IP地址", example = "192.168.1.200") + private String scheduler; + + /** + * 按任务状态筛选(多个状态用逗号分隔) + */ + @Schema(description = "按任务状态筛选查询(多个状态用逗号分隔)", example = "Running,Completed,Failed") + private String state; + +} + diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NewJobReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NewJobReq.java new file mode 100644 index 00000000..fc430aa6 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NewJobReq.java @@ -0,0 +1,91 @@ +package com.sdm.common.entity.req.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema(description = "创建HPC JOB请求参数") +public class NewJobReq { + + @Schema(description = "指定要将节点视为运行作业的候选节点的最小内核数(1-2147483647)", example = "4") + private Integer corespernode; + + @Schema(description = "作业的自定义属性,格式为 name1=value1;name2=value2", example = "env=prod;type=batch") + private String customproperties; + + @Schema(description = "接收作业通知的电子邮件地址", example = "job-notify@example.com") + private String emailaddress; + + @Schema(description = "作业中每个进程消耗的最大内存量(MB)", example = "2048") + private Integer estimatedprocessmemory; + + @Schema(description = "是否独占节点运行(True表示其他作业不会在此节点运行),默认True", defaultValue = "true", example = "true") + private Boolean exclusive; + + @Schema(description = "任务失败时是否使依赖任务失败,默认false", defaultValue = "false", example = "false") + private Boolean faildependenttasks; + + @Schema(description = "任务失败时是否立即停止整个作业,默认true", defaultValue = "true", example = "true") + private Boolean failontahpailure; + + @Schema(description = "作业运行时环境变量,格式为 variable_name1=value1;variable_name2=value2", example = "JAVA_HOME=/usr/local/jdk17;PATH=/usr/bin") + private String jobenv; + + @Schema(description = "作业名称(必填,最大长度80个字符)", requiredMode = Schema.RequiredMode.REQUIRED, example = "HPC-数据处理作业-20251116") + private String jobname; + + @Schema(description = "作业所需许可证信息,格式为 license_name1:number1;license_name2:number2(最大长度160字符)", example = "python:1;spark:2") + private String license; + + @Schema(description = "节点视为作业候选节点的最小内存量(MB,1-2147483647)", example = "8192") + private Integer memorypernode; + + @Schema(description = "作业可运行的节点组(必填,格式为 node_group1_name;node_group2_name,默认\"所有节点\")", requiredMode = Schema.RequiredMode.REQUIRED, defaultValue = "所有节点", example = "all-nodes;data-node-group") + private String nodegroup; + + @Schema(description = "作业结束时是否发送通知(状态:已完成/失败/已取消),默认True", defaultValue = "true", example = "true") + private Boolean notifyoncompletion; + + @Schema(description = "作业启动时是否发送电子邮件通知,默认True", defaultValue = "true", example = "true") + private Boolean notifyonstart; + + @Schema(description = "作业所需跨集群核心总数(必填,格式:最小[-最大])", requiredMode = Schema.RequiredMode.REQUIRED, example = "6-12") + private String numcores; + + @Schema(description = "作业所需集群节点总数(格式:最小[-最大])", example = "2-3") + private String numnodes; + + @Schema(description = "作业所需跨集群套接字总数(格式:最小[-最大])", example = "2-4") + private String numsockets; + + @Schema(description = "作业依赖的父作业ID列表,格式为 jobID1,jobID2...", example = "1005,1006") + private String parentjobids; + + @Schema(description = "作业优先级(0-4000),默认2000", defaultValue = "2000", example = "2200") + private Integer priority; + + @Schema(description = "作业完成百分比(0-100),需手动维护更新", example = "0") + private Integer progress; + + @Schema(description = "作业自定义状态消息", example = "作业已提交,等待调度执行") + private String progressmsg; + + @Schema(description = "作业所属项目名称(必填,最大长度80个字符)", requiredMode = Schema.RequiredMode.REQUIRED, example = "大数据分析平台-用户行为统计") + private String projectname; + + @Schema(description = "作业最大运行时间(格式:天:时:分:秒,默认Infinite表示无限时间)", example = "2:08:30:00") + private String runtime; + + @Schema(description = "是否运行至取消(忽略运行时限制),默认不拼接即为false", example = "false") + private Boolean rununtilcanceled; + + @Schema(description = "是否在单个节点上分配所有资源,默认False", defaultValue = "false", example = "false") + private String singlenode; + + @Schema(description = "任务执行失败后自动重试次数(排除节点准备/发布任务)", example = "3") + private Integer taskexecutionfailureretrylimit; + + @Schema(description = "任务成功退出的验证退出码,默认0", defaultValue = "0", example = "0") + private String validexitcodes; +} + diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NodeListCoreReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NodeListCoreReq.java new file mode 100644 index 00000000..8ef48809 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NodeListCoreReq.java @@ -0,0 +1,30 @@ +package com.sdm.common.entity.req.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema(description = "HPC 节点核心列表查询请求参数(对应 node listcores 命令)") +public class NodeListCoreReq { + /** + 信息显示格式(可选值:list/table),默认 list + */ + @Schema (description = "信息显示格式(list = 列表格式,table = 表格格式)", defaultValue = "table", example = "table", allowableValues = {"list", "table"}) + private String format="list"; + /** + 按作业 ID 筛选(仅显示指定作业使用的核心) + */ + @Schema (description = "按作业 ID 筛选核心(仅显示该作业使用的核心)", example = "1001") + private String jobid; + /** + 集群头节点主机名或 IP 地址 + */ + @Schema (description = "指定查询的集群头节点主机名或 IP 地址", example = "192.168.1.200") + private String scheduler; + /** + 按核心状态筛选(多个状态用逗号分隔) + */ + @Schema (description = "按核心状态筛选(多个状态用逗号分隔)", example = "Allocated,Idle") + private String state; + +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NodeListReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NodeListReq.java new file mode 100644 index 00000000..897ef0ad --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NodeListReq.java @@ -0,0 +1,44 @@ +package com.sdm.common.entity.req.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema(description = "HPC 节点列表查询请求参数(对应 node list 命令)") +public class NodeListReq { + /** + 是否仅显示活动的头节点 + */ + @Schema (description = "仅显示活动的头节点", example = "false") + private Boolean activeheadnode; + /** + 信息显示格式(可选值:list/table),默认 list,table可能有问题 + */ + @Schema (description = "信息显示格式(list = 列表格式,table = 表格格式)", defaultValue = "table", example = "table", allowableValues = {"list", "table"}) + private String format="list"; + /** + 按节点组名称筛选 + */ + @Schema (description = "按节点组名称筛选查询", example = "compute-group") + private String group; + /** + 是否显示节点历史信息(true = 显示,false = 不显示) + */ + @Schema (description = "是否显示节点历史信息", example = "true", allowableValues = {"true", "false"}) + private String history; + /** + 仅显示过去指定天数内有活动的节点(正整数) + */ + @Schema (description = "仅显示过去 N 天内有活动的节点(N 为正整数)", example = "30") + private Integer lastdays; + /** + 集群头节点主机名或 IP 地址 + */ + @Schema (description = "指定查询的集群头节点主机名或 IP 地址", example = "192.168.1.200") + private String scheduler; + /** + 按节点状态筛选(如 Online、Offline 等) + */ + @Schema (description = "按节点状态筛选查询(如 Online、Offline 等)", example = "Online") + private String state; +} diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NodeViewReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NodeViewReq.java new file mode 100644 index 00000000..b3fa192f --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/NodeViewReq.java @@ -0,0 +1,35 @@ +package com.sdm.common.entity.req.pbs.hpc; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema(description = "HPC 节点信息查看请求参数(对应 node view 命令)") +public class NodeViewReq { + /** + 要查看信息的节点名称(必填) + */ + @Schema (description = "指定要查看信息的节点名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "node-001") + private String nodeName; + /** + 是否显示节点详细属性信息(true = 显示,false = 不显示) + */ + @Schema (description = "是否显示节点详细属性信息", example = "true", allowableValues = {"true", "false"}) + private String detailed; + /** + 是否显示节点历史信息(true = 显示,false = 不显示) + */ + @Schema (description = "是否显示节点历史信息", example = "true", allowableValues = {"true", "false"}) + private String history; + /** + 仅显示过去指定天数内的节点历史记录(正整数) + */ + @Schema (description = "仅显示过去 N 天内的节点历史记录(N 为正整数)", example = "7") + private Integer lastdays; + /** + 节点所在集群的头节点主机名或 IP 地址 + */ + @Schema (description = "指定节点所在集群的头节点主机名或 IP 地址", example = "192.168.1.200") + private String scheduler; + +} diff --git a/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/SubmitHpcJobReq.java b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/SubmitHpcJobReq.java new file mode 100644 index 00000000..eb5fba3d --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/req/pbs/hpc/SubmitHpcJobReq.java @@ -0,0 +1,257 @@ +package com.sdm.common.entity.req.pbs.hpc; + + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; + +@Data +@Schema(description = "HPC作业提交请求参数(对应 job submit 命令)") +public class SubmitHpcJobReq { + + /** + * 要提交的作业标识符(必填) + */ + @Schema(description = "指定要提交的作业的作业标识符", requiredMode = Schema.RequiredMode.REQUIRED, example = "1001") + private String id; + + /** + * 节点视为运行作业候选节点的最小内核数(可选) + */ + @Schema(description = "节点视为运行作业候选节点的最小内核数(可选)", example = "4") + private Integer corespernode; + + /** + * 作业的自定义属性,格式:name1=value1;name2=value2... + */ + @Schema(description = "作业的自定义属性(格式:name1=value1;name2=value2...)", example = "env=prod;type=batch") + private String customproperties; + + /** + * 任务运行时环境变量列表,格式:variable_name1=value1;variable_name2=value2... + */ + @Schema(description = "任务运行时环境变量(格式:variable_name1=value1;variable_name2=value2...)", example = "LOG_LEVEL=INFO;TEMP_PATH=/tmp") + private String env; + + /** + * 作业中每个进程消耗的最大内存量(MB) + */ + @Schema(description = "作业中每个进程消耗的最大内存量(MB)", example = "2048") + private Integer estimatedprocessmemory; + + /** + * 是否独占节点运行(运行时同一节点无其他作业),默认True + */ + @Schema(description = "是否独占节点运行(True=同一节点无其他作业)", defaultValue = "true", example = "true") + private Boolean exclusive ; + + /** + * 任务失败时是否使依赖任务失败,默认True + */ + @Schema(description = "任务失败时是否使依赖任务失败", defaultValue = "true", example = "true") + private Boolean faildependenttasks ; + + /** + * 任务失败时是否立即停止整个作业,默认True + */ + @Schema(description = "任务失败时是否立即停止整个作业", defaultValue = "true", example = "true") + private Boolean failontahpailure; + + /** + * 作业运行时环境变量列表,格式:variable_name1=value1;variable_name2=value2... + */ + @Schema(description = "作业运行时环境变量(格式:variable_name1=value1;variable_name2=value2...)", example = "JAVA_HOME=/usr/local/jdk17;PATH=/usr/bin") + private String jobenv; + + /** + * 作业名称(必填,最大长度80个字符) + */ + @Schema(description = "作业显示名称(最大长度80个字符)", requiredMode = Schema.RequiredMode.REQUIRED, example = "HPC-模型训练作业-20251117") + private String jobname; + + /** + * 作业所需许可证信息,格式:license_name1:number1;license_name2:number2...(最大长度160个字符) + */ + @Schema(description = "作业所需许可证信息(格式:license_name1:number1;license_name2:number2...)", example = "tensorflow:2;cuda:1") + private String license; + + /** + * 节点视为作业候选节点的最小内存量(MB) + */ + @Schema(description = "节点视为作业候选节点的最小内存量(MB)", example = "8192") + private Integer memorypernode; + + /** + * 任务显示名称(最大长度80个字符) + */ + @Schema(description = "任务显示名称(最大长度80个字符)", example = "数据预处理子任务-01") + private String name; + + /** + * 作业可运行的节点组列表,格式:node_group1_name,node_group2_name... + */ + @Schema(description = "作业可运行的节点组列表(格式:node_group1_name,node_group2_name...)", example = "compute-group,gpu-group") + private String nodegroup; + + /** + * 作业结束时是否发送通知,默认True + */ + @Schema(description = "作业结束时是否发送通知", defaultValue = "true", example = "true") + private Boolean notifyoncompletion; + + /** + * 作业启动时是否发送通知,默认True + */ + @Schema(description = "作业启动时是否发送通知", defaultValue = "true", example = "true") + private Boolean notifyonstart; + + /** + * 作业所需跨集群核心总数,格式:最小[-最大] + */ + @Schema(description = "作业所需跨集群核心总数(格式:最小[-最大])", example = "8-16") + private String numcores; + + /** + * 作业所需集群节点总数,格式:最小[-最大] + */ + @Schema(description = "作业所需集群节点总数(格式:最小[-最大])", example = "2-4") + private String numnodes; + + /** + * 任务所需跨集群套接字总数,格式:最小[-最大] + */ + @Schema(description = "任务所需跨集群套接字总数(格式:最小[-最大])", example = "2-4") + private String numsockets; + + /** + * 是否为参数任务(多次运行并替换命令行索引值) + */ + @Schema(description = "是否为参数任务(True=多次运行并替换命令行索引值)", example = "false") + private Boolean parametric; + + /** + * 作业依赖的父作业ID列表,格式:jobID1,jobID2... + */ + @Schema(description = "作业依赖的父作业ID列表(格式:jobID1,jobID2...)", example = "2001,2002") + private String parentjobids; + + /** + * 运行作业的帐户密码(必填) + */ + @Schema(description = "运行作业的帐户密码", requiredMode = Schema.RequiredMode.REQUIRED, example = "******") + private String password; + + /** + * 作业优先级(0-4000),默认2000 + */ + @Schema(description = "作业优先级(0-4000)", defaultValue = "2000", example = "2500") + private Integer priority; + + /** + * 作业完成百分比(0-100) + */ + @Schema(description = "作业完成百分比(0-100)", example = "0") + private Integer progress; + + /** + * 作业自定义状态消息(最大长度80个字符) + */ + @Schema(description = "作业自定义状态消息(最大长度80个字符)", example = "作业已提交,等待调度") + private String progressmsg; + + /** + * 作业所属项目名称(必填,最大长度80个字符) + */ + @Schema(description = "作业所属项目名称(最大长度80个字符)", requiredMode = Schema.RequiredMode.REQUIRED, example = "AI模型训练项目-图像识别") + private String projectname; + + /** + * 任务必须运行的节点列表,格式:node1_name,node2_name... + */ + @Schema(description = "任务必须运行的节点列表(格式:node1_name,node2_name...)", example = "node-001,node-002") + private String requirednodes; + + /** + * 任务失败后是否重试,默认True + */ + @Schema(description = "任务失败后是否重试", defaultValue = "true", example = "true") + private Boolean rerunnable; + + /** + * 任务最大运行时间(单位:秒),默认2147483647 + */ + @Schema(description = "作业最大运行时间(格式:天:时:分:秒,默认Infinite表示无限时间)", example = "2:08:30:00") + private String runtime; + + /** + * 是否运行至取消(忽略运行时限制) + */ + @Schema(description = "是否运行至取消(忽略运行时限制)", example = "false") + private Boolean rununtilcanceled; + + /** + * 集群头节点的主机名或IP地址 + */ + @Schema(description = "集群头节点主机名或IP地址", example = "192.168.1.200") + private String scheduler; + + /** + * 是否在单个节点上分配所有资源,默认True + */ + @Schema(description = "是否在单个节点上分配所有资源", defaultValue = "true", example = "true") + private Boolean singlenode; + + /** + * 标准错误流重定向文件路径(最大长度160个字符) + */ + @Schema(description = "标准错误流重定向文件路径(最大长度160个字符)", example = "/hpc/logs/job-01/stderr.log") + private String stderr; + + /** + * 标准输入文件路径(最大长度160个字符) + */ + @Schema(description = "标准输入文件路径(最大长度160个字符)", example = "/hpc/input/job-01/stdin.txt") + private String stdin; + + /** + * 标准输出流重定向文件路径(最大长度160个字符) + */ + @Schema(description = "标准输出流重定向文件路径(最大长度160个字符)", example = "/hpc/logs/job-01/stdout.log") + private String stdout; + + /** + * 任务执行失败后自动重试次数(排除节点准备/发布任务) + */ + @Schema(description = "任务执行失败后自动重试次数", example = "3") + private Integer taskexecutionfailureretrylimit; + + /** + * 任务类型(定义命令运行方式),默认Basic(mpi) + */ + @Schema(description = "任务类型(定义命令运行方式)", defaultValue = "Basic(mpi)", example = "Basic(mpi)") + private String type; + + /** + * 任务运行的工作目录(最大长度160个字符) + */ + @Schema(description = "任务运行工作目录(最大长度160个字符)", example = "/hpc/workspace/job-01") + private String workdir; + + /** + * 运行作业的帐户用户名(含可选域,必填) + */ + @Schema(description = "运行作业的帐户用户名(格式:[域\\]用户名)", requiredMode = Schema.RequiredMode.REQUIRED, example = "SDM\\admin") + private String user; + + /** + * 任务成功退出的有效退出码,默认0 + */ + @Schema(description = "任务成功退出的有效退出码", defaultValue = "0", example = "0,2") + private String validexitcodes; + + /** + * 任务命令行(包含命令、应用程序名称及必需参数,必填) + */ + @Schema(description = "任务命令行(含命令及参数,必填)", requiredMode = Schema.RequiredMode.REQUIRED, example = "/usr/bin/python3 /hpc/scripts/train.py --model resnet50 --epoch 50") + private String command; + +} diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/AddJobResp.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/AddJobResp.java new file mode 100644 index 00000000..5546681b --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/AddJobResp.java @@ -0,0 +1,8 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class AddJobResp { + private String tsakId; +} diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobCancelResp.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobCancelResp.java new file mode 100644 index 00000000..2329d257 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobCancelResp.java @@ -0,0 +1,8 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class JobCancelResp { + private Boolean canceled; +} diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobFinishResp.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobFinishResp.java new file mode 100644 index 00000000..b9857057 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobFinishResp.java @@ -0,0 +1,8 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class JobFinishResp { + private Boolean finished; +} diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobModifyResp.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobModifyResp.java new file mode 100644 index 00000000..4f8e3c58 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobModifyResp.java @@ -0,0 +1,8 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class JobModifyResp { + private Boolean modified; +} diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobViewResp.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobViewResp.java new file mode 100644 index 00000000..05a7d7f8 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobViewResp.java @@ -0,0 +1,22 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class JobViewResp { + String id; + String state; + String name; + String projectName; + String owner; + String template; + String priority; + String resourceRequest; + String type; + String allocatedNodes; + String submitTime; + String startTime; + String endTime; + String progress; + JobViewTaskStatus taskStatus; +} \ No newline at end of file diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobViewTaskStatus.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobViewTaskStatus.java new file mode 100644 index 00000000..151eb3ae --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/JobViewTaskStatus.java @@ -0,0 +1,13 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class JobViewTaskStatus { + int configuring; + int queued; + int running; + int finished; + int failed; + int canceled; +} diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/ListJobResp.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/ListJobResp.java new file mode 100644 index 00000000..e0a6b50b --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/ListJobResp.java @@ -0,0 +1,17 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class ListJobResp { + // 任务ID(对应表格Id列) + private String id; + // 所有者(对应表格Owner列,格式:域\用户名) + private String owner; + // 任务名称(对应表格Name列) + private String name; + // 任务状态(对应表格State列,如Configuring、Finished等) + private String state; + // 优先级(对应表格Priority列,如Normal) + private String priority; +} diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/ListTasksResp.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/ListTasksResp.java new file mode 100644 index 00000000..45e6a981 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/ListTasksResp.java @@ -0,0 +1,20 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class ListTasksResp { + private String taskId; + private String state; + private String taskName; + private String commandLine; + private String resourceRequest; + private String allocatedNodes; + private String exitCode; + private String errorMessage; + private String output; + private String startTime; + private String endTime; + private String totalKernelTime; + private String totalUserTime; +} diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NewJobResp.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NewJobResp.java new file mode 100644 index 00000000..487c4fb2 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NewJobResp.java @@ -0,0 +1,8 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class NewJobResp { + private String jobId; +} diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NodeListCoreResp.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NodeListCoreResp.java new file mode 100644 index 00000000..149df0ac --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NodeListCoreResp.java @@ -0,0 +1,12 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class NodeListCoreResp { + private String nodeProcessor; // 如:CARSAFECLIENT - 0 + private String state; // Idle / Running / Offline + private String jobId; // 可能为空 + private String taskId; // 可能为空 + private String commandLine; // 可能为空 +} diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NodeListResp.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NodeListResp.java new file mode 100644 index 00000000..b9958d6c --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NodeListResp.java @@ -0,0 +1,14 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class NodeListResp { + private String nodeName; + private String state; + private String max; + private String run; + private String idleResourceCount; + private String availability; + private String costPerHour; +} diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NodeViewResp.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NodeViewResp.java new file mode 100644 index 00000000..4ce1537a --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/NodeViewResp.java @@ -0,0 +1,20 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class NodeViewResp { + private String systemId; + private String systemGuid; + private String jobTypes; + private String state; + private String numberOfCores; + private String numberOfSockets; + private String offlineTime; + private String onlineTime; + private String securityIdentifier; + private String memorySize; + private String cpuSpeed; + private String nodeGroups; + private String costPerHour; +} diff --git a/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/SubmitHpcJobResp.java b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/SubmitHpcJobResp.java new file mode 100644 index 00000000..43d8c3d9 --- /dev/null +++ b/common/src/main/java/com/sdm/common/entity/resp/pbs/hpc/SubmitHpcJobResp.java @@ -0,0 +1,12 @@ +package com.sdm.common.entity.resp.pbs.hpc; + +import lombok.Data; + +@Data +public class SubmitHpcJobResp { + // true 成功 false 失败 + private Boolean submit; + + private String jobId; + +} diff --git a/common/src/main/java/com/sdm/common/utils/CmdCommandExcuteUtil.java b/common/src/main/java/com/sdm/common/utils/CmdCommandExcuteUtil.java new file mode 100644 index 00000000..4c281eb8 --- /dev/null +++ b/common/src/main/java/com/sdm/common/utils/CmdCommandExcuteUtil.java @@ -0,0 +1,101 @@ +package com.sdm.common.utils; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.concurrent.TimeUnit; + +// 用于cmd命令交互的工具类 +public class CmdCommandExcuteUtil { + + // 命令超时时间(默认30秒,可根据需求调整) + private static final long DEFAULT_TIMEOUT = 120; + private static final TimeUnit TIMEOUT_UNIT = TimeUnit.SECONDS; + + + /** + * 执行CMD命令并返回结果 + * @param command 要执行的命令(如 "ipconfig /all") + * @return 命令输出结果(标准输出+错误输出) + */ + public static String executeCommand(String command) { + // 1. 参数校验 + if (command == null || command.trim().isEmpty()) { + return "命令不能为空"; + } + Process process = null; + BufferedReader inputReader = null; + BufferedReader errorReader = null; + StringBuilder output = new StringBuilder(); // 存储标准输出 + StringBuilder error = new StringBuilder(); // 存储错误输出 + try { + // 2. 构建进程(执行CMD命令) + ProcessBuilder pb = new ProcessBuilder("cmd.exe", "/c", command); + pb.redirectErrorStream(false); // 不合并错误流,单独处理 + process = pb.start(); + // 3. 异步读取标准输出(避免阻塞) + inputReader = new BufferedReader( + new InputStreamReader(process.getInputStream(), "GBK") // Windows默认编码GBK + ); + readStreamAsync(inputReader, output); + // 4. 异步读取错误输出(避免阻塞) + errorReader = new BufferedReader( + new InputStreamReader(process.getErrorStream(), "GBK") + ); + readStreamAsync(errorReader, error); + // 5. 等待命令执行完成(带超时) + boolean finished = process.waitFor(DEFAULT_TIMEOUT, TIMEOUT_UNIT); + if (!finished) { + // 超时:销毁进程释放资源 + process.destroyForcibly(); + return "命令执行超时(" + DEFAULT_TIMEOUT + "秒):" + command; + } + // 6. 检查执行结果(非0状态码表示命令执行失败) + int exitCode = process.exitValue(); + if (exitCode != 0) { + return "命令执行失败(状态码:" + exitCode + "),错误信息:" + error.toString().trim(); + } + // 7. 返回标准输出结果 + return output.toString().trim(); + + } catch (IOException e) { + return "命令执行IO异常:" + e.getMessage(); + } catch (InterruptedException e) { + // 处理线程中断(恢复中断状态) + Thread.currentThread().interrupt(); + return "命令执行中断异常:" + e.getMessage(); + } catch (Exception e) { + return "命令执行未知异常:" + e.getMessage(); + } finally { + // 8. 强制释放所有资源(防止内存泄漏) + if (inputReader != null) { + try { inputReader.close(); } catch (IOException ignored) {} + } + if (errorReader != null) { + try { errorReader.close(); } catch (IOException ignored) {} + } + if (process != null) { + // 确保进程被销毁 + process.destroy(); + } + } + } + + /** + * 异步读取流(避免主线程阻塞) + */ + private static void readStreamAsync(BufferedReader reader, StringBuilder result) { + new Thread(() -> { + String line; + try { + while ((line = reader.readLine()) != null) { + result.append(line).append("\n"); + } + } catch (IOException ignored) { + // 流关闭时可能抛出异常,忽略即可 + } + }, "cmd-stream-reader").start(); + } + + +} diff --git a/common/src/main/java/com/sdm/common/utils/HpcCommandBuilderUtil.java b/common/src/main/java/com/sdm/common/utils/HpcCommandBuilderUtil.java new file mode 100644 index 00000000..429c610b --- /dev/null +++ b/common/src/main/java/com/sdm/common/utils/HpcCommandBuilderUtil.java @@ -0,0 +1,171 @@ +package com.sdm.common.utils; + +import com.alibaba.fastjson2.JSONObject; +import com.sdm.common.entity.pojo.pbs.hpc.HpcParamFromEnum; +import com.sdm.common.log.CoreLogger; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; + +import java.lang.reflect.Field; +import java.util.Objects; +import java.util.StringJoiner; + +@Slf4j +public class HpcCommandBuilderUtil { + + // 空格 + private static final String SPACE = " "; + + + private static final String NODELIST_PREFIX = "node list"; + private static final String NODELISTCORE_PREFIX = "node listcores"; + private static final String NODEVIEW_PREFIX = "node view %s"; + private static final String NEWJOB_PREFIX = "job new"; + private static final String ADDJOB_PREFIX = "job add %s"; + private static final String SUBMITJOB_PREFIX = "job submit /id:%s"; + private static final String CANCELJOB_PREFIX = "job cancel %s"; + private static final String CLONEJOB_PREFIX = "job clone %s"; + private static final String FINISHJOB_PREFIX = "job finish %s"; + private static final String LISTJOB_PREFIX = "job list"; + private static final String TASKSJOB_PREFIX = "job listtasks %s"; + private static final String MODIFYJOB_PREFIX = "job modify %s"; + private static final String REQUEUEJOB_PREFIX = "job requeue %s"; + private static final String VIEWJOB_PREFIX = "job view %s"; + + public static String initNodeListPrefixStr(Boolean activeheadnode) { + String nodeListCommand = NODELIST_PREFIX; + if(Objects.equals(activeheadnode, "true")){ + nodeListCommand = nodeListCommand + SPACE + "/activeheadnode"; + } + return nodeListCommand; + } + + public static String initNodeListCorePrefixStr() { + return NODELISTCORE_PREFIX; + } + + public static String initNodeViewPrefixStr(String nodeName) { + return String.format(NODEVIEW_PREFIX, escapeParam("nodeName",nodeName)); + } + + public static String initNewJobPrefixStr() { + return NEWJOB_PREFIX; + } + + public static String initAddJobPrefixStr(String jobId) { + return String.format(ADDJOB_PREFIX, escapeParam("jobId",jobId)); + } + + public static String initSubmitJobPrefixStr(String id) { + return String.format(SUBMITJOB_PREFIX, escapeParam("id",id)); + } + + public static String initCancelJobPrefixStr(String jobId, String cancelWay) { + String cancelCommand = String.format(CANCELJOB_PREFIX, escapeParam("jobId", jobId)); + if(StringUtils.isNotBlank(cancelWay)&& + (Objects.equals(cancelWay, "graceful")|| Objects.equals(cancelWay, "force"))){ + cancelCommand = cancelCommand+SPACE+"/"+cancelWay; + } + return cancelCommand; + } + + public static String initFinishJobPrefixStr(String jobId, String finshWay) { + String finishJobCommand = String.format(FINISHJOB_PREFIX, escapeParam("jobId", jobId)); + if(StringUtils.isNotBlank(finshWay)&&Objects.equals(finshWay, "graceful")){ + finishJobCommand = finishJobCommand+SPACE+"/"+finshWay; + } + return finishJobCommand; + } + + + public static String initCloneJobPrefixStr(String jobId) { + return String.format(CLONEJOB_PREFIX, escapeParam("jobId", jobId)); + } + + public static String initListJobPrefixStr(Boolean all) { + String listJobCommand=LISTJOB_PREFIX; + if(Objects.equals(all, true)){ + listJobCommand = listJobCommand + SPACE + "/all"; + } + return listJobCommand; + } + + public static String initTasksJobPrefixStr(String jobId, Boolean expand) { + String tasksJobCommand = String.format(TASKSJOB_PREFIX, escapeParam("jobId", jobId)); + if(Objects.equals(expand, true)){ + tasksJobCommand = tasksJobCommand+SPACE+"/expand"; + } + return tasksJobCommand; + } + + public static String initModifyJobPrefixStr(String jobId, Boolean clearexcludednodes) { + String modifyJobCommand = String.format(MODIFYJOB_PREFIX, escapeParam("jobId", jobId)); + if(Objects.equals(clearexcludednodes, true)){ + modifyJobCommand = modifyJobCommand+SPACE+"/clearexcludednodes"; + } + return modifyJobCommand; + } + + public static String initRequeueJobPrefixStr(String jobId) { + return String.format(REQUEUEJOB_PREFIX, escapeParam("jobId", jobId)); + } + + public static String initViewJobPrefixStr(String jobId) { + return String.format(VIEWJOB_PREFIX, escapeParam("jobId", jobId)); + } + + + /** + * 根据传入的对象生成 HPC 命令参数,只管拼接,特殊格式paramObj已经处理好! + * @param paramObj 需要转换的对象 + * @param prefixCommand 前缀命令 + * @param suffixCommand 后缀命令 + * @return 拼接好的命令参数字符串,如 /name:xxx /priority:100 + */ + public static String buildHpcCommandStr(String prefixCommand, Object paramObj, String suffixCommand) { + String className = paramObj.getClass().getSimpleName(); + // 1. 初始化 StringJoiner:分隔符用空格,避免参数间重复拼接 + StringJoiner joiner = new StringJoiner(SPACE); + // 2. 先添加前缀命令 + if (StringUtils.isBlank(prefixCommand)) { + CoreLogger.warn("from:{},buildHpcCommandStr prefixCommand null",className); + return ""; + } + joiner.add(prefixCommand); + // 3. 反射拼接参数(/字段名:值 格式,仅拼接非空值) + if (paramObj != null) { + try { + Class clazz = paramObj.getClass(); + Field[] fields = clazz.getDeclaredFields(); + for (Field field : fields) { + field.setAccessible(true); + Object value = field.get(paramObj); + if (value == null) continue; + // 拼接单个参数:/字段名:值(如 /jobname:test) + String param = "/" + field.getName() + ":" + value.toString(); + joiner.add(param); + } + } catch (Exception e) { + CoreLogger.error("from:{},buildHpcCommandStr error:{},paramObj:{}", className,e.getMessage(),JSONObject.toJSONString(paramObj)); + } + } + + // 4. 拼接后缀命令(仅非空时添加) + if (StringUtils.isNotBlank(suffixCommand)) { + joiner.add(suffixCommand); + } + + // 5. 生成最终命令并日志输出 + String command = joiner.toString().trim(); // 去除首尾多余空格 + CoreLogger.info("from:{},hpcCommandStr:{},paramObj:{}", HpcParamFromEnum.getFromByClassName(className), command, JSONObject.toJSONString(paramObj)); + return command; + } + + private static String escapeParam(String filedName,String param) { + if (param == null) { + throw new IllegalArgumentException("参数不能为空:"+filedName); + } + return param; + } + +} diff --git a/common/src/main/java/com/sdm/common/utils/HpcCommandExcuteUtil.java b/common/src/main/java/com/sdm/common/utils/HpcCommandExcuteUtil.java new file mode 100644 index 00000000..17553524 --- /dev/null +++ b/common/src/main/java/com/sdm/common/utils/HpcCommandExcuteUtil.java @@ -0,0 +1,54 @@ +package com.sdm.common.utils; + +import com.sdm.common.log.CoreLogger; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import java.util.Objects; + +@Slf4j +@Component +public class HpcCommandExcuteUtil { + + @Value("${hpc.remoteCmdUrl:}") + private String remoteCmdUrl; + + @Autowired + private HttpClientUtil httpClientUtil; + + public String excuteCmd(String command,String hpcExcuteWay) { + if(Objects.equals(hpcExcuteWay,"remote")){ + return remoteExcuteCmd(command); + } + if(Objects.equals(hpcExcuteWay,"local")){ + String resultString = CmdCommandExcuteUtil.executeCommand(command); + CoreLogger.info("remoteExcuteCmd back:{}",resultString); + return resultString; + } + CoreLogger.error("hpcExcuteWay config error:{}",hpcExcuteWay); + return ""; + } + + + // 发起远程command命令调用,mock 执行cmd命令,后期直接是服务运行hpc机器,直接执行process命令 + private String remoteExcuteCmd(String command) + { + com.alibaba.fastjson2.JSONObject paramJson = new com.alibaba.fastjson2.JSONObject(); + paramJson.put("command",command); + String resultString = ""; + try { + resultString = httpClientUtil.doPostJson(remoteCmdUrl, paramJson.toJSONString()); + CoreLogger.info("remoteExcuteCmd back:{}",resultString); + } + catch (Exception e) + { + CoreLogger.error("remoteExcuteCmd error,command:{},errMsg:{}",command,e.getMessage()); + resultString=e.getMessage(); + } + return resultString; + } + + +} diff --git a/common/src/main/java/com/sdm/common/utils/HpcCommandResulParseUtil.java b/common/src/main/java/com/sdm/common/utils/HpcCommandResulParseUtil.java new file mode 100644 index 00000000..b08661a6 --- /dev/null +++ b/common/src/main/java/com/sdm/common/utils/HpcCommandResulParseUtil.java @@ -0,0 +1,525 @@ +package com.sdm.common.utils; + +import com.sdm.common.entity.resp.pbs.hpc.*; +import com.sdm.common.log.CoreLogger; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +@Slf4j +public class HpcCommandResulParseUtil { + + // 静态编译正则表达式:忽略大小写,匹配任意空格数量的"Job has been submitted" + private static final Pattern JOB_SUBMITTED_PATTERN = Pattern.compile("(?i)Job\\s+has\\s+been\\s+submitted"); + + private static final Pattern JOB_ADD_PATTERN = Pattern.compile("Task\\s+([0-9]+(?:\\.[0-9]+)?)\\s+added\\.?$", Pattern.CASE_INSENSITIVE); + + public static NewJobResp parseJobNewResult(String cmdOutput) { + NewJobResp newJobResp = new NewJobResp(); + try { + // 按逗号分割,然后按冒号分割 + String[] parts = cmdOutput.split(","); + for (String part : parts) { + if (part.trim().startsWith("ID:")) { + String idStr = part.split(":")[1].trim(); + newJobResp.setJobId(idStr); + return newJobResp; + } + } + } catch (Exception e) { + CoreLogger.error("parseJobNewResult error:{}",cmdOutput); + } + return newJobResp; + } + + public static AddJobResp parseJoAddResult(String cmdOutput){ + AddJobResp addJobResp = new AddJobResp(); + if (StringUtils.isBlank(cmdOutput)) { + CoreLogger.warn("parseJoAddResult cmdOutput null"); + return addJobResp; + } + // 正则匹配 Task xxx added(末尾可有.号) + // 允许多个空格,数字支持小数点 (job.task 格式) + try { + Matcher matcher = JOB_ADD_PATTERN.matcher(cmdOutput.trim()); + if (matcher.find()) { + String taskId = matcher.group(1); + addJobResp.setTsakId(taskId); + } + } catch (Exception e) { + CoreLogger.warn("parseJoAddResult error, cmdOutput: {}", cmdOutput); + } + CoreLogger.warn("parseJoAddResult format incorrect, cmdOutput: {}", cmdOutput); + return addJobResp; + } + + public static SubmitHpcJobResp parseJobSubmitResult(String cmdOutput){ + SubmitHpcJobResp submitHpcJobResp = new SubmitHpcJobResp(); + try { + if (StringUtils.isEmpty(cmdOutput)) { + CoreLogger.warn("parseJobSubmitResult cmdOutput null"); + submitHpcJobResp.setSubmit(false); + return submitHpcJobResp; + } + // 1. (?i) 表示忽略大小写 + // 2. Job\s+has\s+been\s+submitted 表示匹配"Job"、"has"、"been"、"submitted" + // 其中 \s+ 表示1个或多个空格(兼容任意数量的空格) + // 匹配输入字符串 + boolean submit = JOB_SUBMITTED_PATTERN.matcher(cmdOutput).find(); + submitHpcJobResp.setSubmit(submit); + return submitHpcJobResp; + } catch (Exception e) { + CoreLogger.warn("parseJobSubmitResult error,cmdOutput:{},errMsg:{}",cmdOutput,e.getMessage()); + submitHpcJobResp.setSubmit(false); + return submitHpcJobResp; + } + } + + public static List parseJobLists(String cmdOutput) { + List result = new ArrayList<>(); + try { + if (StringUtils.isBlank(cmdOutput)) { + CoreLogger.error("parseJobLists cmdOutput null,cmdOutput:{}",cmdOutput); + return result; + } + + String[] lines = cmdOutput.split("\r?\n"); + ListJobResp current = null; + + for (String line : lines) { + if (line == null) continue; + + String trimmed = line.trim(); + if (StringUtils.isBlank(trimmed)) { + continue; // 空行直接跳过,不影响逻辑 + } + + // 识别 “X jobs listed” —— 中间空格数量不确定 + if (trimmed.matches("\\d+\\s+jobs\\s+listed")) { + continue; + } + + // 解析 key: value 行 + if (trimmed.contains(":")) { + String[] kv = trimmed.split(":", 2); + if (kv.length < 2) { + continue; + } + + String key = kv[0].trim(); + String value = kv[1].trim(); + + // 如果遇到 Id,则开始一个新的 job 块 + if ("Id".equals(key)) { + // 如果之前正在构造 Job,则先保存 + if (current != null && current.getId() != null) { + result.add(current); + } + current = new ListJobResp(); + current.setId(value); + continue; + } + + // 如果 current 为空但遇到非 Id 行,则忽略(防止脏数据) + if (current == null) { + continue; + } + + switch (key) { + case "Owner": + current.setOwner(value); + break; + case "Name": + current.setName(value); + break; + case "State": + current.setState(value); + break; + case "Priority": + current.setPriority(value); + break; + default: + // 其他字段不处理 + break; + } + } + } + + // 把最后一个 job 加进去 + if (current != null && current.getId() != null) { + result.add(current); + } + } catch (Exception e) { + CoreLogger.error("parseJobLists error,cmdOutput:{},errMsg:{}",cmdOutput,e.getMessage()); + } + return result; + } + + public static List parseJobTasks(String cmdOutput) { + List result = new ArrayList<>(); + if (cmdOutput == null || cmdOutput.trim().isEmpty()) { + return result; + } + String[] lines = cmdOutput.split("\r?\n"); + ListTasksResp current = null; + String lastKey = null; + for (String line : lines) { + String trimmed = line.trim(); + if (trimmed.isEmpty()) { + if (current != null && current.getTaskId() != null) { + result.add(current); + current = null; + lastKey = null; + } + continue; + } + + // 遇到 Task Id 开头新建对象 + if (trimmed.startsWith("Task Id")) { + current = new ListTasksResp(); + } + if (current == null) continue; + if (trimmed.contains(":")) { + // key:value 行 + String[] kv = trimmed.split(":", 2); + if (kv.length < 2) continue; + String key = kv[0].trim(); + String value = kv[1].trim(); + lastKey = key; + // 通过反射或 switch 赋值 + switch (key) { + case "Task Id": current.setTaskId(value); break; + case "State": current.setState(value); break; + case "Task Name": current.setTaskName(value); break; + case "Command Line": current.setCommandLine(value); break; + case "Resource Request": current.setResourceRequest(value); break; + case "Allocated Nodes": current.setAllocatedNodes(value); break; + case "Exit Code": current.setExitCode(value); break; + case "Error Message": current.setErrorMessage(value); break; + case "Output": current.setOutput(value); break; + case "Start Time": current.setStartTime(value); break; + case "End Time": current.setEndTime(value); break; + case "Total Kernel Time": current.setTotalKernelTime(value); break; + case "Total User Time": current.setTotalUserTime(value); break; + default: + // 未知字段,放到 Output 或 ErrorMessage 里也可以,或者忽略 + break; + } + + } else if (lastKey != null) { + // 多行续行:追加到 lastKey 对应字段 + String appendValue = trimmed; + try { + java.lang.reflect.Method getter = current.getClass().getMethod( + "get" + lastKey.replace(" ", "")); + java.lang.reflect.Method setter = current.getClass().getMethod( + "set" + lastKey.replace(" ", ""), String.class); + String prev = (String) getter.invoke(current); + setter.invoke(current, (prev == null ? "" : prev + " ") + appendValue); + } catch (Exception ignored) { + // 如果没有对应字段,忽略 + } + } + } + + if (current != null && current.getTaskId() != null) { + result.add(current); + } + return result; + } + + public static JobViewResp parseJobView(String cmdOutput) { + if (cmdOutput == null || cmdOutput.trim().isEmpty()) { + return null; + } + + JobViewResp job = new JobViewResp(); + JobViewTaskStatus taskStatus = new JobViewTaskStatus(); + boolean inTaskBlock = false; // 标识是否在 Task Count 下的嵌套块 + String lastKey = null; // 记录上一行 key,用于多行值累加 + String[] lines = cmdOutput.split("\r?\n"); + for (String line : lines) { + if (line.trim().isEmpty()) continue; + + // 判断是否缩进,缩进4个空格或1个制表符 + boolean isIndented = line.startsWith(" ") || line.startsWith("\t"); + + if (isIndented && inTaskBlock) { + // 解析 Task 状态统计 + String[] kv = line.trim().split(":", 2); + if (kv.length < 2) continue; + String key = kv[0].trim(); + String value = kv[1].trim(); + try { + switch (key) { + case "Configuring tasks": taskStatus.setConfiguring(Integer.parseInt(value)); break; + case "Queued tasks": taskStatus.setQueued(Integer.parseInt(value)); break; + case "Running tasks": taskStatus.setRunning(Integer.parseInt(value)); break; + case "Finished tasks": taskStatus.setFinished(Integer.parseInt(value)); break; + case "Failed tasks": taskStatus.setFailed(Integer.parseInt(value)); break; + case "Canceled tasks": taskStatus.setCanceled(Integer.parseInt(value)); break; + } + } catch (NumberFormatException ignored) {} + continue; + } + + // 普通字段解析 + if (line.contains(":")) { + String[] kv = line.split(":", 2); + String key = kv[0].trim(); + String value = kv[1].trim(); + + lastKey = key; // 记录key,便于换行累加 + + switch (key) { + case "Id": job.setId(value); break; + case "State": job.setState(value); break; + case "Name": job.setName(value); break; + case "Project Name": job.setProjectName(value); break; + case "Owner": job.setOwner(value); break; + case "Template": job.setTemplate(value); break; + case "Priority": job.setPriority(value); break; + case "Resource Request": job.setResourceRequest(value); break; + case "Type": job.setType(value); break; + case "Allocated Nodes": job.setAllocatedNodes(value); break; + case "Submit Time": job.setSubmitTime(value); break; + case "Start Time": job.setStartTime(value); break; + case "End Time": job.setEndTime(value); break; + case "Progress": job.setProgress(value); break; + case "Task Count": + inTaskBlock = true; // 开始解析 Task 状态统计 + break; + } + } else { + // 如果没有冒号,可能是上一行字段值换行了,累加 + if (lastKey != null) { + String prev = ""; + switch (lastKey) { + case "Name": prev = job.getName(); job.setName(prev + " " + line.trim()); break; + case "Allocated Nodes": prev = job.getAllocatedNodes(); job.setAllocatedNodes(prev + " " + line.trim()); break; + case "Project Name": prev = job.getProjectName(); job.setProjectName(prev + " " + line.trim()); break; + } + } + } + } + + job.setTaskStatus(taskStatus); + return job; + } + + public static JobModifyResp parseJobModify(String cmdOutput) { + JobModifyResp jobModifyResp = new JobModifyResp(); + if(StringUtils.isBlank(cmdOutput)){ + jobModifyResp.setModified(true); + return jobModifyResp; + } + jobModifyResp.setModified(false); + return jobModifyResp; + } + + public static JobCancelResp parseJobCancel(String cmdOutput) { + JobCancelResp jobCancelResp = new JobCancelResp(); + if(StringUtils.isBlank(cmdOutput)){ + jobCancelResp.setCanceled(true); + return jobCancelResp; + } + jobCancelResp.setCanceled(false); + return jobCancelResp; + } + + public static JobFinishResp parseJobFinish(String cmdOutput) { + JobFinishResp jobFinishResp = new JobFinishResp(); + if(StringUtils.isBlank(cmdOutput)){ + jobFinishResp.setFinished(true); + return jobFinishResp; + } + jobFinishResp.setFinished(false); + return jobFinishResp; + } + + + + public static List parseNodList(String cmdOutput) { + List result = new ArrayList<>(); + if (cmdOutput == null || cmdOutput.trim().isEmpty()) { + return result; + } + String[] lines = cmdOutput.split("\r?\n"); + NodeListResp current = null; + for (String line : lines) { + String trimmed = line.trim(); + // 空行 = 一个节点结束 + if (trimmed.isEmpty()) { + if (current != null && current.getNodeName() != null) { + result.add(current); + current = null; + } + continue; + } + // 新的 Node 记录开始 + if (trimmed.startsWith("Node Name")) { + current = new NodeListResp(); + } + if (current != null && trimmed.contains(":")) { + String[] kv = trimmed.split(":", 2); + if (kv.length < 2) continue; + String key = kv[0].trim(); + String value = kv[1].trim(); + switch (key) { + case "Node Name": current.setNodeName(value); break; + case "State": current.setState(value); break; + case "Max": current.setMax(value); break; + case "Run": current.setRun(value); break; + case "IdleResourceCount": current.setIdleResourceCount(value); break; + case "Availability": current.setAvailability(value); break; + case "Cost Per Hour": current.setCostPerHour(value); break; + } + } + } + // 末尾可能没有空行,也要收集 + if (current != null && current.getNodeName() != null) { + result.add(current); + } + return result; + } + + public static List parseNodeCoreList(String cmdOutput) { + List list = new ArrayList<>(); + // 按空行分割每个 Core 的块 + String[] blocks = cmdOutput.split("\\n\\s*\\n"); + for (String block : blocks) { + NodeListCoreResp resp = new NodeListCoreResp(); + String[] lines = block.split("\\n"); + String lastKey = null; + StringBuilder lastValue = new StringBuilder(); + for (String line : lines) { + if (line.contains(":")) { + // 新字段开始 + if (lastKey != null) { + applyField(resp, lastKey, lastValue.toString().trim()); + } + String[] kv = line.split(":", 2); + lastKey = kv[0].trim(); + lastValue = new StringBuilder(kv.length > 1 ? kv[1].trim() : ""); + } else { + // 这是字段换行部分 + if (lastKey != null) { + lastValue.append(" ").append(line.trim()); + } + } + } + // 最后一组字段 + if (lastKey != null) { + applyField(resp, lastKey, lastValue.toString().trim()); + } + list.add(resp); + } + return list; + } + + // 将字段名映射到 Java 对象 + private static void applyField(NodeListCoreResp resp, String key, String value) { + switch (key) { + case "Node Processor": + resp.setNodeProcessor(value); + break; + case "State": + resp.setState(value); + break; + case "Job ID": + resp.setJobId(value); + break; + case "Task ID": + resp.setTaskId(value); + break; + case "CommandLine": + resp.setCommandLine(value); + break; + } + } + + public static NodeViewResp parseNodeView(String cmdOutput) { + NodeViewResp resp = new NodeViewResp(); + + String[] lines = cmdOutput.split("\\n"); + + String lastKey = null; + StringBuilder lastValue = new StringBuilder(); + + for (String line : lines) { + if (line.contains(":")) { + // 保存上一对 key-value + if (lastKey != null) { + applyNodeViewField(resp, lastKey, lastValue.toString().trim()); + } + String[] kv = line.split(":", 2); + lastKey = kv[0].trim(); + lastValue = new StringBuilder(kv.length > 1 ? kv[1].trim() : ""); + } else { + // 多行值 + if (lastKey != null) { + if (line.trim().length() > 0) { + lastValue.append(" ").append(line.trim()); + } + } + } + } + + // 最后一个 key-value + if (lastKey != null) { + applyNodeViewField(resp, lastKey, lastValue.toString().trim()); + } + + return resp; + } + + private static void applyNodeViewField(NodeViewResp resp, String key, String value) { + switch (key) { + case "System Id": + resp.setSystemId(value); + break; + case "System GUID": + resp.setSystemGuid(value); + break; + case "Job Types": + resp.setJobTypes(value); + break; + case "State": + resp.setState(value); + break; + case "Number Of Cores": + resp.setNumberOfCores(value); + break; + case "Number Of Sockets": + resp.setNumberOfSockets(value); + break; + case "Offline Time": + resp.setOfflineTime(value); + break; + case "Online Time": + resp.setOnlineTime(value); + break; + case "Security Identifier": + resp.setSecurityIdentifier(value); + break; + case "Memory Size": + resp.setMemorySize(value); + break; + case "CPU Speed": + resp.setCpuSpeed(value); + break; + case "Node Groups": + resp.setNodeGroups(value); + break; + case "Cost Per Hour": + resp.setCostPerHour(value); + break; + } + } + + +} diff --git a/pbs/pom.xml b/pbs/pom.xml index ec1e15e4..f0d02196 100644 --- a/pbs/pom.xml +++ b/pbs/pom.xml @@ -30,6 +30,10 @@ + + org.springframework.boot + spring-boot-starter + org.springframework.boot spring-boot-configuration-processor @@ -77,6 +81,11 @@ springdoc-openapi-starter-webmvc-ui + + + org.springframework.boot + spring-boot-starter-actuator + @@ -93,11 +102,7 @@ - - - org.springframework.boot - spring-boot-starter-actuator - + diff --git a/pbs/src/main/java/com/sdm/pbs/controller/TaskController.java b/pbs/src/main/java/com/sdm/pbs/controller/TaskController.java new file mode 100644 index 00000000..ca82d84f --- /dev/null +++ b/pbs/src/main/java/com/sdm/pbs/controller/TaskController.java @@ -0,0 +1,109 @@ +package com.sdm.pbs.controller; + +import com.sdm.common.common.SdmResponse; +import com.sdm.common.entity.req.pbs.hpc.*; +import com.sdm.pbs.service.TaskService; +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequestMapping("/pbs") +@Tag(name = "HPC调度", description = "与hpc交互的接口") +public class TaskController { + + @Autowired + private TaskService taskService; + + @PostMapping("/nodeList") + @Operation(summary = "节点列表查询") + public SdmResponse nodeList(@RequestBody NodeListReq req) { + return taskService.nodeList(req); + } + + @PostMapping("/nodeListCore") + @Operation(summary = "节点核心列表查询") + public SdmResponse nodeListCore(@RequestBody NodeListCoreReq req) { + return taskService.nodeListCore(req); + } + + @PostMapping("/nodeView") + @Operation(summary = "节点详情查询") + public SdmResponse nodeView(@RequestBody NodeViewReq req) { + return taskService.nodeView(req); + } + + @PostMapping("/jobNew") + @Operation(summary = "创建job任务") + public SdmResponse jobNew(@RequestBody NewJobReq req) { + return taskService.jobNew(req); + } + + @PostMapping("/jobAdd") + @Operation(summary = "添加job任务") + public SdmResponse jobAdd(@RequestBody AddJobReq req) { + return taskService.jobAdd(req); + } + + @PostMapping("/jobSubmit") + @Operation(summary = "提交job任务") + public SdmResponse jobSubmit(@RequestBody SubmitHpcJobReq req) { + return taskService.jobSubmit(req); + } + + @PostMapping("/jobCancel") + @Operation(summary = "取消job任务") + public SdmResponse jobCancel(@RequestBody CancelJobReq req) { + return taskService.jobCancel(req); + } + + @PostMapping("/jobClone") + @Operation(summary = "克隆job任务") + public SdmResponse jobClone(@RequestBody CloneJobReq req) { + return taskService.jobClone(req); + } + + @PostMapping("/jobFinish") + @Operation(summary = "优雅完成job任务") + public SdmResponse jobFinish(@RequestBody FinishJobReq req) { + return taskService.jobFinish(req); + } + + @PostMapping("/jobList") + @Operation(summary = "有条件查询所有job任务") + public SdmResponse jobList(@RequestBody ListJobReq req) { + return taskService.jobList(req); + } + + @PostMapping("/jobTasks") + @Operation(summary = "查询所有作业列表") + public SdmResponse jobTasks(@RequestBody ListTasksReq req) { + return taskService.jobTasks(req); + } + + @PostMapping("/jobModify") + @Operation(summary = "修改作业属性") + public SdmResponse jobModify(@RequestBody JobModifyReq req) { + return taskService.jobModify(req); + } + + @PostMapping("/jobRequeue") + @Operation(summary = "作业重新排队") + public SdmResponse jobRequeue(@RequestBody JobRequeueReq req) { + return taskService.jobRequeue(req); + } + + @PostMapping("/jobView") + @Operation(summary = "查看作业视图") + public SdmResponse jobView(@RequestBody JobViewReq req) { + return taskService.jobView(req); + } + + + + +} diff --git a/pbs/src/main/java/com/sdm/pbs/service/TaskService.java b/pbs/src/main/java/com/sdm/pbs/service/TaskService.java new file mode 100644 index 00000000..bcfd10f2 --- /dev/null +++ b/pbs/src/main/java/com/sdm/pbs/service/TaskService.java @@ -0,0 +1,38 @@ +package com.sdm.pbs.service; + +import com.sdm.common.common.SdmResponse; +import com.sdm.common.entity.req.pbs.hpc.*; +import org.springframework.stereotype.Service; + +@Service +public interface TaskService { + + SdmResponse nodeList(NodeListReq req); + + SdmResponse nodeListCore(NodeListCoreReq req); + + SdmResponse nodeView(NodeViewReq req); + + SdmResponse jobNew(NewJobReq req); + + SdmResponse jobAdd(AddJobReq req); + + SdmResponse jobSubmit(SubmitHpcJobReq req); + + SdmResponse jobCancel(CancelJobReq req); + + SdmResponse jobClone(CloneJobReq req); + + SdmResponse jobFinish(FinishJobReq req); + + SdmResponse jobList(ListJobReq req); + + SdmResponse jobTasks(ListTasksReq req); + + SdmResponse jobModify(JobModifyReq req); + + SdmResponse jobRequeue(JobRequeueReq req); + + SdmResponse jobView(JobViewReq req); + +} diff --git a/pbs/src/main/java/com/sdm/pbs/service/impl/TaskServiceImpl.java b/pbs/src/main/java/com/sdm/pbs/service/impl/TaskServiceImpl.java new file mode 100644 index 00000000..8a6d6c39 --- /dev/null +++ b/pbs/src/main/java/com/sdm/pbs/service/impl/TaskServiceImpl.java @@ -0,0 +1,226 @@ +package com.sdm.pbs.service.impl; + +import com.sdm.common.common.SdmResponse; +import com.sdm.common.entity.pojo.pbs.hpc.*; +import com.sdm.common.entity.req.pbs.hpc.*; +import com.sdm.common.entity.resp.pbs.hpc.*; +import com.sdm.common.utils.HpcCommandBuilderUtil; +import com.sdm.common.utils.HpcCommandExcuteUtil; +import com.sdm.common.utils.HpcCommandResulParseUtil; +import com.sdm.pbs.service.TaskService; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.BeanUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@Slf4j +@Service +public class TaskServiceImpl implements TaskService { + + @Value("${hpc.excuteWay:}") + private String hpcExcuteWay; + + @Autowired + private HpcCommandExcuteUtil hpcCommandExcuteUtil; + + @Override + public SdmResponse nodeList(NodeListReq req) { + String prefixStr = HpcCommandBuilderUtil.initNodeListPrefixStr(req.getActiveheadnode()); + NodeListParam nodeListParam = new NodeListParam(); + BeanUtils.copyProperties(req, nodeListParam); + String nodeListCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, nodeListParam, ""); + String result = hpcCommandExcuteUtil.excuteCmd(nodeListCommand,hpcExcuteWay); + List nodeListResp = HpcCommandResulParseUtil.parseNodList(result); + Map map = new HashMap<>(); + map.put("hpcCommand", nodeListCommand); + map.put("result", nodeListResp); + return SdmResponse.success(map); + } + + @Override + public SdmResponse nodeListCore(NodeListCoreReq req) { + String prefixStr = HpcCommandBuilderUtil.initNodeListCorePrefixStr(); + NodeListCoreParam nodeListCoreParam = new NodeListCoreParam(); + BeanUtils.copyProperties(req, nodeListCoreParam); + String nodeListCoreCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, nodeListCoreParam, ""); + String result = hpcCommandExcuteUtil.excuteCmd(nodeListCoreCommand,hpcExcuteWay); + List nodeListCoreResp = HpcCommandResulParseUtil.parseNodeCoreList(result); + Map map = new HashMap<>(); + map.put("hpcCommand", nodeListCoreCommand); + map.put("result", nodeListCoreResp); + return SdmResponse.success(map); + } + + @Override + public SdmResponse nodeView(NodeViewReq req) { + String prefixStr = HpcCommandBuilderUtil.initNodeViewPrefixStr(req.getNodeName()); + NodeViewParam nodeViewParam = new NodeViewParam(); + BeanUtils.copyProperties(req, nodeViewParam); + String nodeViewCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, nodeViewParam, ""); + String result = hpcCommandExcuteUtil.excuteCmd(nodeViewCommand,hpcExcuteWay); + NodeViewResp nodeViewResp = HpcCommandResulParseUtil.parseNodeView(result); + Map map = new HashMap<>(); + map.put("hpcCommand", nodeViewCommand); + map.put("result", nodeViewResp); + return SdmResponse.success(map); + } + + @Override + public SdmResponse jobNew(NewJobReq req) { + String prefixStr = HpcCommandBuilderUtil.initNewJobPrefixStr(); + NewJobParam newJobParam = new NewJobParam(); + BeanUtils.copyProperties(req, newJobParam); + String newJobCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, newJobParam, ""); + String result = hpcCommandExcuteUtil.excuteCmd(newJobCommand,hpcExcuteWay); + NewJobResp newJobResp = HpcCommandResulParseUtil.parseJobNewResult(result); + Map map = new HashMap<>(); + map.put("hpcCommand", newJobCommand); + map.put("result", newJobResp); + return SdmResponse.success(map); + } + + @Override + public SdmResponse jobAdd(AddJobReq req) { + String prefixStr = HpcCommandBuilderUtil.initAddJobPrefixStr(req.getJobId()); + AddJobParam addJobParam = new AddJobParam(); + BeanUtils.copyProperties(req, addJobParam); + String addJobCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, addJobParam, req.getCommand()); + String result = hpcCommandExcuteUtil.excuteCmd(addJobCommand,hpcExcuteWay); + AddJobResp addJobResp = HpcCommandResulParseUtil.parseJoAddResult(result); + Map map = new HashMap<>(); + map.put("hpcCommand", addJobCommand); + map.put("result", addJobResp); + return SdmResponse.success(map); + } + + @Override + public SdmResponse jobSubmit(SubmitHpcJobReq req) { + String prefixStr = HpcCommandBuilderUtil.initSubmitJobPrefixStr(req.getId()); + SubmitHpcJobParam submitHpcJobParam = new SubmitHpcJobParam(); + BeanUtils.copyProperties(req, submitHpcJobParam); + String submitJobCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, submitHpcJobParam, req.getCommand()); + String result = hpcCommandExcuteUtil.excuteCmd(submitJobCommand,hpcExcuteWay); + SubmitHpcJobResp submitHpcJobResp = HpcCommandResulParseUtil.parseJobSubmitResult(result); + submitHpcJobResp.setJobId(req.getId()); + Map map = new HashMap<>(); + map.put("hpcCommand", submitJobCommand); + map.put("result", submitHpcJobResp); + return SdmResponse.success(map); + } + + @Override + public SdmResponse jobCancel(CancelJobReq req) { + String prefixStr = HpcCommandBuilderUtil.initCancelJobPrefixStr(req.getJobId(),req.getCancelWay()); + CancelJobParam cancelJobParam = new CancelJobParam(); + BeanUtils.copyProperties(req, cancelJobParam); + String cancelJobCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, cancelJobParam, ""); + String result = hpcCommandExcuteUtil.excuteCmd(cancelJobCommand,hpcExcuteWay); + JobCancelResp jobCancelResp = HpcCommandResulParseUtil.parseJobCancel(result); + Map map = new HashMap<>(); + map.put("hpcCommand", cancelJobCommand); + map.put("result", jobCancelResp); + return SdmResponse.success(map); + } + + @Override + public SdmResponse jobClone(CloneJobReq req) { + String prefixStr = HpcCommandBuilderUtil.initCloneJobPrefixStr(req.getJobId()); + CloneJobParam cloneJobParam = new CloneJobParam(); + BeanUtils.copyProperties(req, cloneJobParam); + String cloneJobCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, cloneJobParam, ""); + String result = hpcCommandExcuteUtil.excuteCmd(cloneJobCommand,hpcExcuteWay); + Map map = new HashMap<>(); + map.put("hpcCommand", cloneJobCommand); + map.put("result", result); + return SdmResponse.success(map); + } + + @Override + public SdmResponse jobFinish(FinishJobReq req) { + String prefixStr = HpcCommandBuilderUtil.initFinishJobPrefixStr(req.getJobId(),req.getFinshWay()); + FinishJobParam finishJobParam = new FinishJobParam(); + BeanUtils.copyProperties(req, finishJobParam); + String finishJobCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, finishJobParam, ""); + String result = hpcCommandExcuteUtil.excuteCmd(finishJobCommand,hpcExcuteWay); + JobFinishResp jobFinishResp = HpcCommandResulParseUtil.parseJobFinish(result); + Map map = new HashMap<>(); + map.put("hpcCommand", finishJobCommand); + map.put("result", jobFinishResp); + return SdmResponse.success(map); + } + + @Override + public SdmResponse jobList(ListJobReq req) { + String prefixStr = HpcCommandBuilderUtil.initListJobPrefixStr(req.getAll()); + ListJobParam listJobParam = new ListJobParam(); + BeanUtils.copyProperties(req, listJobParam); + String listJobCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, listJobParam, ""); + String result = hpcCommandExcuteUtil.excuteCmd(listJobCommand,hpcExcuteWay); + List jobLists = HpcCommandResulParseUtil.parseJobLists(result); + Map map = new HashMap<>(); + map.put("hpcCommand", listJobCommand); + map.put("result", jobLists); + return SdmResponse.success(map); + } + + @Override + public SdmResponse jobTasks(ListTasksReq req) { + String prefixStr = HpcCommandBuilderUtil.initTasksJobPrefixStr(req.getJobId(),req.getExpand()); + ListTasksParam listTasksParam = new ListTasksParam(); + BeanUtils.copyProperties(req, listTasksParam); + String listTasksJobCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, listTasksParam, ""); + String result = hpcCommandExcuteUtil.excuteCmd(listTasksJobCommand,hpcExcuteWay); + List jobTaskResp = HpcCommandResulParseUtil.parseJobTasks(result); + Map map = new HashMap<>(); + map.put("hpcCommand", listTasksJobCommand); + map.put("result", jobTaskResp); + return SdmResponse.success(map); + } + + @Override + public SdmResponse jobModify(JobModifyReq req) { + String prefixStr = HpcCommandBuilderUtil.initModifyJobPrefixStr(req.getJobId(),req.getClearexcludednodes()); + JobModifyParam jobModifyParam = new JobModifyParam(); + BeanUtils.copyProperties(req, jobModifyParam); + String modifyJobCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, jobModifyParam, ""); + String result = hpcCommandExcuteUtil.excuteCmd(modifyJobCommand,hpcExcuteWay); + JobModifyResp jobModifyResp = HpcCommandResulParseUtil.parseJobModify(result); + Map map = new HashMap<>(); + map.put("hpcCommand", modifyJobCommand); + map.put("result", jobModifyResp); + return SdmResponse.success(map); + } + + @Override + public SdmResponse jobRequeue(JobRequeueReq req) { + String prefixStr = HpcCommandBuilderUtil.initRequeueJobPrefixStr(req.getJobId()); + JobRequeueParam jobRequeueParam = new JobRequeueParam(); + BeanUtils.copyProperties(req, jobRequeueParam); + String requeueJobCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, jobRequeueParam, ""); + String result = hpcCommandExcuteUtil.excuteCmd(requeueJobCommand,hpcExcuteWay); + Map map = new HashMap<>(); + map.put("hpcCommand", requeueJobCommand); + map.put("result", result); + return SdmResponse.success(map); + } + + @Override + public SdmResponse jobView(JobViewReq req) { + String prefixStr = HpcCommandBuilderUtil.initViewJobPrefixStr(req.getJobId()); + JobViewParam jobViewParam = new JobViewParam(); + BeanUtils.copyProperties(req, jobViewParam); + String viewJobCommand = HpcCommandBuilderUtil.buildHpcCommandStr(prefixStr, jobViewParam, ""); + String result = hpcCommandExcuteUtil.excuteCmd(viewJobCommand,hpcExcuteWay); + JobViewResp jobViewResp = HpcCommandResulParseUtil.parseJobView(result); + Map map = new HashMap<>(); + map.put("hpcCommand", viewJobCommand); + map.put("result", jobViewResp); + return SdmResponse.success(map); + } + +} diff --git a/pbs/src/main/resources/application-dev.yml b/pbs/src/main/resources/application-dev.yml index 47c42d13..6aa6dea0 100644 --- a/pbs/src/main/resources/application-dev.yml +++ b/pbs/src/main/resources/application-dev.yml @@ -7,24 +7,24 @@ spring: datasource: username: root password: mysql - jdbc-url: jdbc:mysql://192.168.2.166:3306/sdm_base_line?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=Asia/Shanghai + jdbc-url: jdbc:mysql://192.168.65.161:3306/spdm_baseline?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=Asia/Shanghai driver-class-name: com.mysql.cj.jdbc.Driver master: username: root password: mysql - jdbc-url: jdbc:mysql://192.168.2.166:3306/sdm_base_line?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=Asia/Shanghai + jdbc-url: jdbc:mysql://192.168.65.161:3306/spdm_baseline?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=Asia/Shanghai driver-class-name: com.mysql.cj.jdbc.Driver slave: username: root password: mysql - jdbc-url: jdbc:mysql://192.168.2.166:3306/sdm_base_line?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=Asia/Shanghai + jdbc-url: jdbc:mysql://192.168.65.161:3306/spdm_baseline?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=Asia/Shanghai driver-class-name: com.mysql.cj.jdbc.Driver enable: true cloud: nacos: discovery: - server-addr: 192.168.2.166:8848 -# server-addr: 127.0.0.1:8848 + server-addr: 192.168.65.161:8848 + roup: DEV_GROUP enabled: true namespace: 3 # username: nacos @@ -100,6 +100,11 @@ file: hpc: url: http://172.27.3.135/JSONAPI/JSONAPI.ashx + # 这个是spdm mock执行cmd命令 +# remoteCmdUrl: http://127.0.0.1:9097/doProcess + # remote: hpc借助工具http远程调用,local:该服务和hpc部署在同一机器 + excuteWay: remote + remoteCmdUrl: http://192.168.65.55:9097/doProcess #logging: # config: ./config/logback.xml \ No newline at end of file diff --git a/pbs/src/main/resources/logback.xml b/pbs/src/main/resources/logback.xml index b6d247f3..50a7d635 100644 --- a/pbs/src/main/resources/logback.xml +++ b/pbs/src/main/resources/logback.xml @@ -6,7 +6,7 @@ - + @@ -36,6 +36,24 @@ + + + ${LOG_HOME}/core.log + + ${LOG_HOME}/core.log.%d{yyyy-MM-dd}.%i.log + 30 + 500MB + 10MB + + + + [%X{traceId}] %d{yyyy-MM-dd HH:mm:ss.SSS} %5p ${PID:- } [%15.15t] %X{callerInfo} : %m%n${LOG_EXCEPTION_CONVERSION_WORD:-%wEx} + + + INFO + + + @@ -48,6 +66,12 @@ + + + + + +