在我的全局扭矩安装(扭矩-6.1.1)上,我提交的所有工作都处于'Q'状态,我必须使用qrun强制执行。
>qstat -f 141
Job Id: 141.localhost
Job_Name = script.pbs
Job_Owner = michael@localhost
job_state = Q
queue = batch
server = localhost
Checkpoint = u
ctime = Wed Aug 23 16:45:25 2017
Error_Path = localhost:/var/spool/torque/script.pbs.e141
Hold_Types = n
Join_Path = n
Keep_Files = n
Mail_Points = bae
mtime = Wed Aug 23 16:45:25 2017
Output_Path = localhost:/var/spool/torque/script.pbs.o141
Priority = 0
qtime = Wed Aug 23 16:45:25 2017
Rerunable = True
Resource_List.walltime = 01:00:00
Resource_List.nodes = 1
Resource_List.nodect = 1
Variable_List = PBS_O_QUEUE=batch,PBS_O_HOME=/home/michael,
PBS_O_LOGNAME=michael,
PBS_O_PATH=/home/michael/bin:/home/michael/.local/bin:/usr/local/bin:
/usr/local/sbin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbi
n:/bin:/usr/games:/usr/local/games:/snap/bin,PBS_O_SHELL=/bin/bash,
PBS_O_LANG=fr_FR.UTF-8,PBS_O_WORKDIR=/var/spool/torque,
PBS_O_HOST=localhost,PBS_O_SERVER=localhost
euser = michael
egroup = michael
queue_type = E
etime = Wed Aug 23 16:45:25 2017
submit_args = /home/michael/cnes-sowt/script.pbs
fault_tolerant = False
job_radix = 0
submit_host = localhost
init_work_dir = /var/spool/torque
request_version = 1
>sudo tracejob 141
/var/spool/torque/mom_logs/20170823: No matching job records located
/var/spool/torque/sched_logs/20170823: No matching job records located
Job: 141.localhost
08/23/2017 16:45:25.323 S enqueuing into batch, state 1 hop 1
08/23/2017 16:45:25 A queue=batch
它可能来自我可以qsub而不是root的事实,但我必须用sudo qrun?
非常感谢你的帮助..
答案 0 :(得分:0)
@chuck这是我的日程安排日志
> sudo more /var/spool/torque/sched_logs/20170824
08/24/2017 10:21:49.610;02; pbs_sched.3505;Svr;Log;Log opened
08/24/2017 10:21:49.617;02; pbs_sched.3505;Svr;TokenAct;Account file
/var/spool/torque/sched_priv/accounting/20170824 opened
08/24/2017 10:21:49.617;02; pbs_sched.3506;Svr;main;pbs_sched startup pid 3506
关于队列的信息
> qmgr -c 'p s'
#
# Create queues and set their attributes.
#
#
# Create and define queue batch
#
create queue batch
set queue batch queue_type = Execution
set queue batch resources_default.walltime = 01:00:00
set queue batch resources_default.nodes = 1
set queue batch enabled = True
set queue batch started = True
#
# Set server attributes.
#
set server scheduling = True
set server acl_hosts = localhost
set server acl_hosts += mbenguig-VirtualBox
set server managers = root@localhost
set server operators = root@localhost
set server default_queue = batch
set server log_events = 2047
set server mail_from = adm
set server node_check_rate = 150
set server tcp_timeout = 300
set server job_stat_rate = 300
set server poll_jobs = True
set server down_on_error = True
set server mom_job_sync = True
set server keep_completed = 0
set server next_job_number = 142
set server moab_array_compatible = True
set server nppcu = 1
set server timeout_for_job_delete = 120
set server timeout_for_job_requeue = 120
set server note_append_on_error = True
答案 1 :(得分:0)
解决方案是https://cmayes.wordpress.com/2012/12/15/single-host-torque-pbs/,在/ etc / hosts
中添加规则