一、系統環境
a) Centos 7.8
b) 兩臺機器規劃IP 分別爲10.10.0.20 與10.10.0.21
c) 機器主機名分別爲node1、node2
d) 均關閉了防火牆以及selinux
e) 已經配置好SSH 免密、NIS、NFS
二、配置munge 及Slurm
a) 安裝依賴
yum install -y epel-release
yum install -y gtk2 gtk2-devel munge munge-devel python python3
b) 配置munge #兩臺機器均要配置
chown slurm:slurm /etc/munge
chown slurm:slurm /var/run/munge
chown slurm:slurm /var/lib/munge
chown slurm:slurm /var/log/munge
create-munge-key #此步驟只在node1 節點做即可
scp /etc/munge/munge.key node2:/etc/munge/
chown slurm:slurm /etc/munge/munge.key
su - slurm #所有節點使用slurm 用戶啓動munged
munged
c) 配置Slurm #所有節點配置相同
tar xvf slurm-20.11.5.tar.bz2
cd slurm-20.11.5/
./configure
make -j3
make install -j3
cp etc/{slurmctld.service,slurmdbd.service,slurmd.service}
/usr/lib/systemd/system
vi /usr/local/etc/slurm.conf #配置文件附在PS 中
scp /usr/local/etc/slurm.conf node2:/usr/local/etc/
chown slurm:slurm /var/spool/
systemctl start slurmctld #主節點啓動slurmctld、slurmd
systemctl start slurmd #從節點只需啓動slurmd 即可
三、測試
a) 系統命令測試
[root[@node1](https://my.oschina.net/u/4273199) ~]# sinfo
PARTITION AVAIL TIMELIMIT NODES STATE NODELIST
control up infinite 1 idle node1
compute* up infinite 2 idle node[1-2]
[root[@node1](https://my.oschina.net/u/4273199) ~]# srun -N 2 -l hostname # -N 2 執行的節點數
0: node1
1: node2
至此Slurm 集羣搭建完成
b) mpi 測試
vi test.c #測試程序在PS 中
mpicc -o test test.c #編譯mpi 程序
vi tj.sh #作業腳本在PS 中
sbatch tj.sh #提交作業
squeue #查看作業狀態
[root[@node1](https://my.oschina.net/u/4273199) ~]# cat test.out #查看作業結果
node2: Hello world from process 2
number of processes: 4
...node1: Hello world from process 0
node2: Hello world from process 3
node1: Hello world from process 1
四、PS
a) tj.sh 作業腳本
#!/bin/sh
#SBATCH -o /root/test.out #結果輸出到test.out
#SBATCH --nodes=2 #節點數:2
#SBATCH --ntasks-per-node=2
mpirun /root/test
b) slurm.conf 配置文件
SlurmctldHost=node1 #主節點
MpiDefault=none
ProctrackType=proctrack/pgid #通過網站配置注意修改
ReturnToService=1
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmctldPort=6817
SlurmdPidFile=/var/run/slurmd.pid
SlurmdPort=6818
SlurmdSpoolDir=/var/spool/slurmd
SlurmUser=slurm #slurm 管理用戶
StateSaveLocation=/var/spool
SwitchType=switch/none
TaskPlugin=task/affinity
InactiveLimit=0
KillWait=30
MinJobAge=300
SlurmctldTimeout=120
SlurmdTimeout=300
Waittime=0
SchedulerType=sched/backfill
SelectType=select/cons_tres
SelectTypeParameters=CR_Core
AccountingStorageType=accounting_storage/none
AccountingStoreJobComment=YES
ClusterName=siton #集羣名
JobCompType=jobcomp/none
JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/none
SlurmctldDebug=info
SlurmdDebug=info
NodeName=node1,node2 CPUs=4 RealMemory=2 Sockets=4 CoresPerSocket=1 ThreadsPerCore=1 State=UNKNOWN
/*節點名稱,CPUs 核數,corepersocket,threadspersocket,使用lscpu 查看,realmemory 實際分配給slurm 內存,procs 是實際CPU 個數,/proc/cpuinfo 裏查看state=unknown是啓動集羣的時候爲unknown,之後會變成idle*/
PartitionName=control Nodes=node1 Default=YES MaxTime=INFINITE State=UP
PartitionName=compute Nodes=node1,node2 Default=YES MaxTime=INFINITE State=UP
/*partitionname 是分成control 和compute,default=yes 是說這個用來計算*/
可以通過https://slurm.schedmd.com/configurator.html 生成配置文件
c) mpi 測試程序
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#include <time.h>
int main(int argc, char* argv[])
{
int myid, numprocs, namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Get_processor_name(processor_name, &namelen);
if (myid == 0) printf("number of
processes: %d\n...", numprocs);
printf("%s: Hello world from process %d \n",
processor_name, myid);
MPI_Finalize();
return 0;
}
五、進階(GPU)
修改slurm.conf 文件
GresTypes=gpu
NodeName=slave3 Sockets=2 Procs=32 CoresPerSocket=8 ThreadsPerCore=2
RealMemory=3000 Gres=gpu:tesla:2 State=UNKNOWN NodeAddr=10.135.12.29
此外,slave3 這個機器需配置GPU 信息,編輯/usr/local/etc/gres.conf 文件
Name=gpu Type=tesla File=/dev/nvidia0
Name=gpu Type=tesla File=/dev/nvidia1
slurm 腳本中添加gres 指定gpu 資源
#SBATCH --gres=gpu:tesla:2