diff --git a/docs/executor.md b/docs/executor.md index 959aa220f7..66bff3a17d 100644 --- a/docs/executor.md +++ b/docs/executor.md @@ -445,3 +445,42 @@ Nextflow does not provide direct support for SLURM multi-clusters. If you need t :::{versionadded} 23.07.0-edge Some SLURM clusters require memory allocations to be specified with `--mem-per-cpu` instead of `--mem`. You can specify `executor.perCpuMemAllocation = true` in the Nextflow configuration to enable this behavior. Nextflow will automatically compute the memory per CPU for each task (by default 1 CPU is used). ::: + +## TCS + +The `tcs` executor allows you to run your pipeline script using a [Fujitsu Technical Computing Suite (TCS)](https://software.fujitsu.com/jp/manual/manualindex/p21000155e.html). + +Nextflow manages each process as a separate job that is submitted to the cluster using the `pjsub` command. + +The pipeline must be launched from a node where the `pjsub` command is available, which is typically the login node. + +To enable the TCS executor, set `process.executor = 'tcs'` in the `nextflow.config` file. + +Resource requests and other job characteristics can be controlled via the following process directives: + +- {ref}`process-clusterOptions` +- {ref}`process-time` + +:::{note} +Other options such as queue (resource group), cpu, and node should be indicated by clusterOptions. +This is because they depend on target systems (required options are not the same) and can be controlled by "-L" options in arguments of pjsub command. + +This is an example of nextflow.config on Supercomputer Genkai (Kyushu University). +``` +process { + executor = 'tcs' + time = '00:30:00' + clusterOptions = '-L rscgrp=a-batch -L vnode-core=4' +} +``` + +This is an example of nextflow.config on Supercomputer Flow (Nagoya University). +``` +process { + executor = 'tcs' + time = '00:30:00' + clusterOptions = '-L rscunit=cx -L rscgrp=cx-share -L gpu=1' +} +``` + +(tcs-executor)= diff --git a/modules/nextflow/src/main/groovy/nextflow/executor/ExecutorFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/executor/ExecutorFactory.groovy index 0609d0245f..646f1282fe 100644 --- a/modules/nextflow/src/main/groovy/nextflow/executor/ExecutorFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/executor/ExecutorFactory.groovy @@ -59,7 +59,8 @@ class ExecutorFactory { 'nqsii': NqsiiExecutor, 'moab': MoabExecutor, 'oar': OarExecutor, - 'hq': HyperQueueExecutor + 'hq': HyperQueueExecutor, + 'tcs': TcsExecutor ] @PackageScope Map> executorsMap diff --git a/modules/nextflow/src/main/groovy/nextflow/executor/TcsExecutor.groovy b/modules/nextflow/src/main/groovy/nextflow/executor/TcsExecutor.groovy new file mode 100644 index 0000000000..71fc392bbd --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/executor/TcsExecutor.groovy @@ -0,0 +1,198 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.executor + +import java.nio.file.Path +import java.util.regex.Pattern + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.processor.TaskArrayRun +import nextflow.processor.TaskRun + +@Slf4j +@CompileStatic +class TcsExecutor extends AbstractGridExecutor implements TaskArrayExecutor { + + static private Pattern SUBMIT_REGEX = ~/\[INFO\] PJM 0000 pjsub Job (\d+) submitted./ + /* modify jobname for TCS on Fugaku */ + static String modName(String name1){ + String name2 = name1.replaceAll("\\(", "") + String name3 = name2.replaceAll("\\)", "") + return name3 + } + + /** + * Gets the directives to submit the specified task to the cluster for execution + * + * @param task A {@link TaskRun} to be submitted + * @param result The {@link List} instance to which add the job directives + * @return A {@link List} containing all directive tokens and values. + */ + protected List getDirectives( TaskRun task, List result ) { + assert result !=null + + if( task instanceof TaskArrayRun ) { + final arraySize = task.getArraySize() + result << '--bulk --sparam' << "0-${arraySize - 1}".toString() + } + + result << '-N' << modName(getJobNameFor(task)) + + result << '-j' << '' + result << '-S' << '' + if( !task.workDir ) { + result << '-o' << (task.isArray() ? '/dev/null' : quote(task.workDir.resolve(TaskRun.CMD_LOG))) + } + //result << '-o' << (task.isArray() ? '/dev/null' : quote(task.workDir.resolve(TaskRun.CMD_LOG))) + + // max task duration + if( task.config.getTime() ) { + final duration = task.config.getTime() + result << "-L" << "elapse=${duration.format('HH:mm:ss')}".toString() + } + + // -- at the end append the command script wrapped file name + addClusterOptionsDirective(task.config, result) + + return result + } + + /** + * The command line to submit this job + * + * @param task The {@link TaskRun} instance to submit for execution to the cluster + * @param scriptFile The file containing the job launcher script + * @return A list representing the submit command line + */ + List getSubmitCommandLine(TaskRun task, Path scriptFile ) { + return pipeLauncherScript() + ? List.of('pjsub') + : List.of('pjsub', scriptFile.getName()) +/* [ 'pjsub', '-N', modName(getJobNameFor(task)), scriptFile.getName() ] */ + } + + protected String getHeaderToken() { '#PJM' } + + /** + * Parse the string returned by the {@code pjsub} command and extract the job ID string + * + * @param text The string returned when submitting the job + * @return The actual job ID string + */ + @Override + def parseJobId( String text ) { + for( String line : text.readLines() ) { + log.warn1 line + def m = SUBMIT_REGEX.matcher(line) + if( m.find() ) { + return m.group(1).toString() + } + } + throw new IllegalArgumentException("Invalid TCS submit response:\n$text\n\n") + } + + @Override + protected List getKillCommand() { ['pjdel'] } + + @Override + protected List queueStatusCommand(Object queue) { + //String cmd = 'pjstat | grep -v JOB_ID' + //if( queue ) cmd += ' ' + queue + //return ['bash','-c', "set -o pipefail; $cmd | { grep -E '(Job Id:|job_state =)' || true; }".toString()] + final result = ['pjstat2', '-E'] + return result + } + + static private Map STATUS_MAP = [ + 'ACC': QueueStatus.PENDING, // accepted + 'QUE': QueueStatus.PENDING, // wait for running + 'RNA': QueueStatus.RUNNING, // preparing + 'RUN': QueueStatus.RUNNING, // running + 'RNO': QueueStatus.RUNNING, // cleanup + 'EXT': QueueStatus.DONE, // finished + 'CCL': QueueStatus.DONE, // canceled + 'HLD': QueueStatus.HOLD, // holding + 'ERR': QueueStatus.ERROR, // error + ] +/* + protected QueueStatus decode(String status) { + DECODE_STATUS.get(status) + } + */ +/* + @Override + protected Map parseQueueStatus(String text) { + + final JOB_ID = 'Job Id:' + final JOB_STATUS = 'job_state =' + final result = new LinkedHashMap() + + String id = null + String status = null + text.eachLine { line -> + if( line.startsWith(JOB_ID) ) { + id = fetchValue(JOB_ID, line) + } + else if( id ) { + status = fetchValue(JOB_STATUS, line) + } + result.put( id, decode(status) ?: QueueStatus.UNKNOWN ) + } + + return result + } +*/ + @Override + protected Map parseQueueStatus(String text) { + final result = new LinkedHashMap() + text.eachLine { String line -> + def cols = line.split(/\s+/) + if( cols.size() > 1 ) { + result.put( cols[0], STATUS_MAP.get(cols[3]) ) + } + else { + log.debug "[TCS] invalid status line: `$line`" + } + } + + return result + } + static String fetchValue( String prefix, String line ) { + final p = line.indexOf(prefix) + return p!=-1 ? line.substring(p+prefix.size()).trim() : null + } + + static protected boolean matchOptions(String value) { + value ? SUBMIT_REGEX.matcher(value).find() : null + } + + @Override + String getArrayIndexName() { + return 'PJM_BULKNUM' + } + + @Override + int getArrayIndexStart() { + return 0 + } + + @Override + String getArrayTaskId(String jobId, int index) { + assert jobId, "Missing 'jobId' argument" + return "${jobId}[${index}]" + } + +} diff --git a/modules/nextflow/src/test/groovy/nextflow/executor/TcsExecutorTest.groovy b/modules/nextflow/src/test/groovy/nextflow/executor/TcsExecutorTest.groovy new file mode 100644 index 0000000000..b252545b75 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/executor/TcsExecutorTest.groovy @@ -0,0 +1,235 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.executor + +import java.nio.file.Paths + +import nextflow.Session +import nextflow.processor.TaskArrayRun +import nextflow.processor.TaskConfig +import nextflow.processor.TaskProcessor +import nextflow.processor.TaskRun +import spock.lang.Specification +import spock.lang.Unroll + +/** + * + * @author Satoshi Ohshima + */ + +class TcsExecutorTest extends Specification { + + def testParseJob() { + + given: + def exec = [:] as TcsExecutor + + expect: + exec.parseJobId('[INFO] PJM 0000 pjsub Job 123456 submitted.') == '123456' + exec.parseJobId('[INFO] PJM 0000 pjsub Job 630114 submitted.') == '630114' + } + + def testKill() { + + given: + def exec = [:] as TcsExecutor + expect: + exec.killTaskCommand(123) == ['pjdel','123'] + + } + + @Unroll + def testGetCommandLine() { + given: + def session = Mock(Session) {getConfig()>>[:]} + def exec = Spy(TcsExecutor) { getSession()>>session } + + when: + def result = exec.getSubmitCommandLine(Mock(TaskRun), Paths.get(PATH)) + then: + exec.pipeLauncherScript() >> PIPE + result == EXPECTED + + where: + PATH | PIPE | EXPECTED + '/some/path/job.sh' | false | ['pjsub', 'job.sh'] + '/some/path/job.sh' | true | ['pjsub'] + } + + def 'test job script headers' () { + + setup: + // TCS executor + def executor = [:] as TcsExecutor + executor.session = Mock(Session) + + // mock process + def proc = Mock(TaskProcessor) + + // task object + def task = new TaskRun() + task.processor = proc + task.workDir = Paths.get('/work/path') + task.name = 'the task name' + + when: + task.index = 21 + task.config = new TaskConfig() + task.config.time = '00:10:00' + then: + executor.getHeaders(task) == ''' + #PJM -N nf-the_task_name + #PJM -j + #PJM -S + #PJM -L elapse=00:10:00 + ''' + .stripIndent().leftTrim() + } +/* + def testWorkDirWithBlanks() { + + setup: + // LSF executor + def executor = Spy(TcsExecutor) + executor.session = Mock(Session) + + // mock process + def proc = Mock(TaskProcessor) + + // task object + def task = new TaskRun() + task.processor = proc + task.workDir = Paths.get('/work/some data/path') + task.name = 'the task name' + + when: + task.index = 21 + task.config = new TaskConfig() + task.config.time = '00:10:00' + then: + executor.getHeaders(task) == ''' + #PJM -N nf-the_task_name + #PJM -j + #PJM -S + #PJM -L elapse=00:10:00 + #PJM -o "/work/some\\ data/path/.command.log" + ''' + .stripIndent().leftTrim() + + } +*/ + + def testQstatCommand() { + + setup: + def executor = [:] as TcsExecutor + def text = + """ + 100001 job1 NM ACC + 100002 job2 NM QUE + 100003 job3 NM RNA + 100004 job4 NM RUN + 100005 job5 NM RNO + 100006 job6 NM EXT + 100007 job7 NM CCL + 100008 job8 NM HLD + 100009 job9 NM ERR + """.stripIndent().trim() + + + when: + def result = executor.parseQueueStatus(text) + then: + result.size() == 9 + result['100001'] == AbstractGridExecutor.QueueStatus.PENDING + result['100002'] == AbstractGridExecutor.QueueStatus.PENDING + result['100003'] == AbstractGridExecutor.QueueStatus.RUNNING + result['100004'] == AbstractGridExecutor.QueueStatus.RUNNING + result['100005'] == AbstractGridExecutor.QueueStatus.RUNNING + result['100006'] == AbstractGridExecutor.QueueStatus.DONE + result['100007'] == AbstractGridExecutor.QueueStatus.DONE + result['100008'] == AbstractGridExecutor.QueueStatus.HOLD + result['100009'] == AbstractGridExecutor.QueueStatus.ERROR + + } + + def testQueueStatusCommand() { + when: + def usr = System.getProperty('user.name') + def exec = [:] as TcsExecutor + then: + usr + exec.queueStatusCommand(null) == ['pjstat2', '-E'] + //exec.queueStatusCommand('long') == ['pjstat2','-E'] + } + + def 'should get array (bulk) index name and start' () { + given: + def executor = Spy(TcsExecutor) + expect: + executor.getArrayIndexName() == 'PJM_BULKNUM' + executor.getArrayIndexStart() == 0 + } + + @Unroll + def 'should get array (bulk) task id' () { + given: + def executor = Spy(TcsExecutor) + expect: + executor.getArrayTaskId(JOB_ID, TASK_INDEX) == EXPECTED + + where: + + JOB_ID | TASK_INDEX | EXPECTED + '1234' | 1 | '1234[1]' + '123456' | 2 | '123456[2]' +/* + JOB_ID JOB_NAME MD STATUS USER RSCGROUP START_DATE ELAPSE NODE CORE GPU POINT + 3209914 bulk1.sh BU RUN ku40000105 a-batch-low - - - 1 - - + 3209914[1] bulk1.sh BU RUN ku40000105 a-batch-low 2025/06/13 17:53:43 00:00:01 - 1 - 1 +*/ + } + +/* There is no project option in TCS. (on Genkai and Flow) */ +/* + @Unroll + def 'should set tcs account' () { + given: + // task + def task = new TaskRun() + task.workDir = Paths.get('/work/dir') + task.processor = Mock(TaskProcessor) + task.processor.getSession() >> Mock(Session) + task.config = Mock(TaskConfig) + and: + def executor = Spy(TcsExecutor) + executor.getJobNameFor(_) >> 'foo' + executor.getName() >> 'tcs' + executor.getSession() >> Mock(Session) { getExecConfigProp('tcs', 'account',null)>>ACCOUNT } + + when: + def result = executor.getDirectives(task, []) + then: + result == EXPECTED + + where: + ACCOUNT | EXPECTED + null | ['-N', 'foo', '-o', '/work/dir/.command.log', '-j'] + 'project-123' | ['-N', 'foo', '-o', '/work/dir/.command.log', '-j'] + } + */ +}