Last Update: April 4, 2006
概要: 本ページは,MPI(Message Passing Interface)を使った並列計算プログラムを手っ取り早く構築するための自分用マニュアルである。「この通り実行したら動きました」という程度の文書なので,勘違いや間違いも含まれている可能性がある。そーゆーものを見つけた方は,ぜひ下記の連絡先までご指摘下さい。
[1] 幸谷智紀, Vine LinuxによるPC clusterの構築, 2003年3月20日
[2] 幸谷智紀, Vine LinuxによるPC clusterの構築 Version 2, 2004年2月28日
[3] Fedora Project
[4] MPI Forum
[5] MPICH
[6] LAM/MPI
導入したマシン(4台)のスペックとネットワーク構成(NIS Domain: "cs-pccluster3")は次の通り。
下線部を入力 or 確認すること。
[root@cs-room443-d01 user01]# export http_proxy=http://proxy_ip_addr:port/
[root@cs-room443-d01 user01]# yum install lam
(略)
lam-7.1.1-11.x86_64.rpm 100% |=========================| 112 kB 00:07
(略)
libaio-devel-0.3.106-2.2. 100% |=========================| 6.6 kB 00:00
(略)
libaio-0.3.106-2.2.x86_64 100% |=========================| 7.6 kB 00:00
(略)
=============================================================================
Package Arch Version Repository Size
=============================================================================
Installing:
lam x86_64 2:7.1.1-11 core 3.1 M
Installing for dependencies:
libaio x86_64 0.3.106-2.2 core 19 k
libaio-devel x86_64 0.3.106-2.2 core 11 k
Transaction Summary
=============================================================================
Install 3 Package(s)
Update 0 Package(s)
Remove 0 Package(s)
Total download size: 3.2 M
Is this ok [y/N]: y
Downloading Packages:
(略)
(1/3): libaio-devel-0.3.1 100% |=========================| 11 kB 00:00
(2/3): libaio-0.3.106-2.2 100% |=========================| 19 kB 00:00
(3/3): lam-7.1.1-11.x86_6 100% |=========================| 3.1 MB 00:47
Running Transaction Test
Finished Transaction Test
Transaction Test Succeeded
Running Transaction
Installing: libaio ######################### [1/3]
Installing: libaio-devel ######################### [2/3]
Installing: lam ######################### [3/3]
Installed: lam.x86_64 2:7.1.1-11
Dependency Installed: libaio.x86_64 0:0.3.106-2.2 libaio-devel.x86_64 0:0.3.106-2.2
Complete!
[root@cs-room443-d01 user01]# /sbin/ldconfig -v | grep mpi <-- 何故かldconfigが自動実行されない場合があるらしいので
liblamf77mpi.so.0 -> liblamf77mpi.so.0.0.0
liblammpi++.so.0 -> liblammpi++.so.0.0.0
libmpi.so.0 -> libmpi.so.0.0.0
[root@cs-room443-d01 user01]# /sbin/chkconfig --list nfs
nfs 0:off 1:off 2:off 3:off 4:off 5:off 6:off
[root@cs-room443-d01 user01]# /sbin/chkconfig nfs on
[root@cs-room443-d01 user01]# /sbin/chkconfig --list nfs
nfs 0:off 1:off 2:on 3:on 4:on 5:on 6:off
[root@cs-room443-d01 user01]# /sbin/service nfs start
NFS サービスを起動中: [ OK ]
NFS クォータを起動中: [ OK ]
NFS デーモンを起動中: [ OK ]
NFS mountd を起動中: [ OK ]
[root@cs-room443-d01 user01]# cat /etc/exports
# Apr. 3, 2006 Tomonroi Kouya
/home 192.168.1.0/255.255.255.0(rw,async)
/usr/local 192.168.1.0/255.255.255.0(rw,async)
[root@cs-room443-d01 user01]# /usr/sbin/exportfs -a -v
exporting 192.168.1.0/255.255.255.0:/usr/local
exporting 192.168.1.0/255.255.255.0:/home
[root@cs-room443-d01 user01]# /usr/sbin/exportfs -v
/usr/local 192.168.1.0/255.255.255.0(rw,async,wdelay,root_squash)
/home 192.168.1.0/255.255.255.0(rw,async,wdelay,root_squash)
[root@cs-room43-d02 user01]# /sbin/chkconfig --list nfs
nfs 0:off 1:off 2:off 3:off 4:off 5:off 6:off
[root@cs-room43-d02 user01]# /sbin/chkconfig nfs on
[root@cs-room43-d02 user01]# /sbin/chkconfig --list nfs
nfs 0:off 1:off 2:on 3:on 4:on 5:on 6:off
[root@cs-room43-d02 user01]# cat /etc/fstab
(略)
cs-room443-d01-in:/usr/local /usr/local nfs rw,hard,intr 0 0
cs-room443-d01-in:/home /home nfs rw,hard,intr 0 0
[root@cs-room43-d02 user01]# mount /usr/local
[root@cs-room43-d02 user01]# mount
(略)
cs-room443-d01-in:/usr/local on /usr/local type nfs (rw,hard,intr,addr=192.168.1.21)
[root@cs-room443-d01 user01]# yum install ypserv
(略)
---> Downloading header for ypserv to pack into transaction set.
ypserv-2.19-0.x86_64.rpm 100% |=========================| 19 kB 00:00
(略)
=============================================================================
Package Arch Version Repository Size
=============================================================================
Installing:
ypserv x86_64 2.19-0 core 140 k
(略)
Installing: ypserv ######################### [1/1]
Installed: ypserv.x86_64 0:2.19-0
Complete!
[root@cs-room443-d01 user01]# /sbin/chkconfig --list
(略)
portmap 0:off 1:off 2:off 3:on 4:on 5:on 6:off
(略)
ypbind 0:off 1:off 2:on 3:on 4:on 5:on 6:off
yppasswdd 0:off 1:off 2:off 3:off 4:off 5:off 6:off
ypserv 0:off 1:off 2:off 3:off 4:off 5:off 6:off
ypxfrd 0:off 1:off 2:off 3:off 4:off 5:off 6:off
[root@cs-room443-d01 user01]# /sbin/chkconfig ypbind on
[root@cs-room443-d01 user01]# /sbin/chkconfig yppasswdd on
[root@cs-room443-d01 user01]# /sbin/chkconfig ypserv on
[root@cs-room443-d01 user01]# /sbin/chkconfig ypxfrd on
[root@cs-room443-d01 user01]# /sbin/chkconfig --list
ypbind 0:off 1:off 2:on 3:on 4:on 5:on 6:off
yppasswdd 0:off 1:off 2:on 3:on 4:on 5:on 6:off
ypserv 0:off 1:off 2:on 3:on 4:on 5:on 6:off
ypxfrd 0:off 1:off 2:on 3:on 4:on 5:on 6:off
[root@cs-room443-d01 user01]# cat /etc/yp.conf
(略)
domain cs-pccluster3 server cs-room443-d01-nis
[root@cs-room443-d01 user01]# cat /etc/sysconfig/network
NETWORKING=yes
HOSTNAME=cs-room443-d01
NISDOMAIN="cs-pccluster3"
[root@cs-room443-d01 user01]# domainname cs-pccluster3
[root@cs-room443-d01 user01]# domainname
cs-pccluster3
[root@cs-room443-d01 yp]# cat /etc/nsswitch.conf
(略)
#passwd: files
passwd: db files nisplus nis
#shadow: files
shadow: db files nisplus nis
#group: files
group: db files nisplus nis
hosts: db files nisplus nis dns
(略)
[root@cs-room443-d01 yp]# cd /var/yp
[root@cs-room443-d01 yp]# /sbin/service ypserv start
YP サーバサービスを起動中: [ OK ]
[root@cs-room443-d01 yp]# /sbin/service ypbind start
NIS ドメインにバインド中: [ OK ]
NIS ドメインサーバを検索中
[root@cs-room443-d01 yp]# /sbin/service yppasswdd start
YP パスワードサービスを起動中: [ OK ]
[root@cs-room443-d01 yp]# /sbin/service ypxfrd start
YP マップサーバを起動中: [ OK ]
[root@cs-room443-d01 yp]# make
gmake[1]: Entering directory `/var/yp/cs-pccluster3'
Updating netid.byname...
gmake[1]: Leaving directory `/var/yp/cs-pccluster3'
[root@cs-room443-d01 yp]# ypcat passwd
user01:(略):500:500:User 01:/home/user01:/bin/bash
[root@cs-room443-d01 yp]# ypcat hosts
192.168.2.3 cs-room443-d03
192.168.2.4 cs-room443-d04
133.88.120.88 cs-room443-d01-out cs-room443-d01.cs.sist.ac.jp
192.168.1.22 cs-room443-d02-in
127.0.0.1 localhost localhost.localdomain
192.168.1.23 cs-room443-d03-in
127.0.0.1 localhost localhost.localdomain
192.168.1.24 cs-room443-d04-in
192.168.2.1 cs-room443-d01
192.168.2.2 cs-room443-d02
133.88.120.88 cs-room443-d01-out cs-room443-d01.cs.sist.ac.jp
192.168.1.21 cs-room443-d01-in
[root@cs-room443-d01 yp]# reboot
(再起動後)
[root@cs-room443-d01 user01]# domainname
cs-pccluster3
[root@cs-room443-d01 user01]# ypcat passwd
user01:$1$bEK4MwPo$4fFNm4iLyzqrgbRX82MrM1:500:500:Tomonori Kouya:/home/user01:/bin/bash
[root@cs-room443-d01 user01]# ypcat hosts
192.168.2.3 cs-room443-d03
192.168.2.4 cs-room443-d04
133.88.120.88 cs-room443-d01-out cs-room443-d01.cs.sist.ac.jp
192.168.1.22 cs-room443-d02-in
127.0.0.1 localhost localhost.localdomain
192.168.1.23 cs-room443-d03-in
127.0.0.1 localhost localhost.localdomain
192.168.1.24 cs-room443-d04-in
192.168.2.1 cs-room443-d01
192.168.2.2 cs-room443-d02
133.88.120.88 cs-room443-d01-out cs-room443-d01.cs.sist.ac.jp
192.168.1.21 cs-room443-d01-in
[root@cs-room443-d01 user01]# /usr/sbin/exportfs -v
/usr/local 192.168.1.0/255.255.255.0(rw,async,wdelay,root_squash)
/home 192.168.1.0/255.255.255.0(rw,async,wdelay,root_squash)
[root@cs-room443-d01 user01]#
[root@cs-room443-d02 user01]# cat /etc/nsswitch.conf
(略)
#passwd: files
passwd: db files nisplus nis
#shadow: files
shadow: db files nisplus nis
#group: files
group: db files nisplus nis
#hosts: db files nisplus nis dns
#hosts: files dns
hosts: db files nisplus nis dns
(略)
[root@cs-room43-d02 user01]# cat /etc/yp.conf
(略)
domain cs-pccluster3 server cs-room443-d01-in
[root@cs-room43-d02 user01]# /sbin/chkconfig --list ypbind
ypbind 0:off 1:off 2:off 3:off 4:off 5:off 6:off
[root@cs-room43-d02 user01]# /sbin/chkconfig ypbind on
[root@cs-room43-d02 user01]# /sbin/chkconfig --list ypbind
ypbind 0:off 1:off 2:on 3:on 4:on 5:on 6:off
[root@cs-room43-d02 user01]# cat /etc/sysconfig/network
NETWORKING=yes
HOSTNAME=cs-room43-d02
GATEWAY=192.168.11.254
NISDOMAIN="cs-pccluster3"
[root@cs-room43-d02 user01]# cat /etc/fstab
(略)
cs-room443-d01-in:/usr/local /usr/local nfs rw,hard,intr 0 0
cs-room443-d01-in:/home /home nfs rw,hard,intr 0 0
(再起動後)
[root@cs-room43-d02 user01]# ypcat passwd
user01:$1$bEK4MwPo$4fFNm4iLyzqrgbRX82MrM1:500:500: User 01:/home/user01:/bin/bash
[root@cs-room43-d02 user01]# ypcat hosts
192.168.2.3 cs-room443-d03
192.168.2.4 cs-room443-d04
192.168.1.22 cs-room443-d02-in
127.0.0.1 localhost localhost.localdomain
192.168.1.23 cs-room443-d03-in
127.0.0.1 localhost localhost.localdomain
192.168.1.24 cs-room443-d04-in
192.168.2.1 cs-room443-d01
192.168.2.2 cs-room443-d02
192.168.1.21 cs-room443-d01-in
[root@cs-room43-d02 user01]# mount
(略)
cs-room443-d01-in:/usr/local on /usr/local type nfs (rw,hard,intr,addr=192.168.1.21)
cs-room443-d01-in:/home on /home type nfs (rw,hard,intr,addr=192.168.1.21)
[root@cs-room443-d01 user01]# cat /etc/lam/lam-bhost.def
(略)
cs-room443-d01
cs-room443-d02
cs-room443-d03
cs-room443-d04
[user01@cs-room443-d01 ~]$ lamboot -v <-- MPIプログラム使用時は必ず実行
LAM 7.1.1/MPI 2 C++/ROMIO - Indiana University
n-1<5522> ssi:boot:base:linear: booting n0 (cs-room443-d01)
n-1<5522> ssi:boot:base:linear: booting n1 (cs-room443-d02)
user01cs-room443-d02's password:パスワード入力
user01cs-room443-d02's password:パスワード入力
n-1<5522> ssi:boot:base:linear: booting n2 (cs-room443-d03)
user01cs-room443-d03's password:パスワード入力
user01cs-room443-d03's password:パスワード入力
n-1<5522> ssi:boot:base:linear: booting n3 (cs-room443-d04)
user01cs-room443-d04's password:パスワード入力
user01cs-room443-d04's password:パスワード入力
n-1<5522> ssi:boot:base:linear: finished
[user01@cs-room443-d01 ~]$ lamnodes
n0 cs-room443-d01:1:origin,this_node
n1 cs-room443-d02:1:
n2 cs-room443-d03:1:
n3 cs-room443-d04:1:
[user01@cs-room443-d01 ~]$ cat mpi_hellow.c
#include <stdio.h>
#include "mpi.h"
int main(int argc, char *argv[])
{
int myrank, numprocs, length_name;
char nodename[128];
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Get_processor_name(nodename, &length_name);
printf("Hellow, MPI! (%0d/%0d)-- %s\n", myrank, numprocs, nodename);
MPI_Finalize();
return 0;
}
[user01@cs-room443-d01 ~]$ mpicc mpi_hellow.c <-- MPIプログラムのコンパイル
[user01@cs-room443-d01 ~]$ mpirun -np 8 ./a.out <--- 実行(8PEs使用時)
Hellow, MPI! (0/8)-- cs-room443-d01
Hellow, MPI! (2/8)-- cs-room443-d03
Hellow, MPI! (1/8)-- cs-room443-d02
Hellow, MPI! (3/8)-- cs-room443-d04
Hellow, MPI! (6/8)-- cs-room443-d03
Hellow, MPI! (5/8)-- cs-room443-d02
Hellow, MPI! (7/8)-- cs-room443-d04
Hellow, MPI! (4/8)-- cs-room443-d01
[user01@cs-room443-d01 ~]$ lamhalt -v <-- 終了時
LAM 7.1.1/MPI 2 C++/ROMIO - Indiana University
Shutting down LAM
hreq: waiting for HALT ACKs from remote LAM daemons
hreq: received HALT_ACK from n2 (cs-room443-d03)
hreq: received HALT_ACK from n1 (cs-room443-d02)
hreq: received HALT_ACK from n3 (cs-room443-d04)
hreq: received HALT_ACK from n0 (cs-room443-d01)
LAM halted
2 CPUs / Nodeで使う時には"cpu=2"と指定しておく。
[user01@cs-room443-d01 ~]$ cat /etc/lam/lam-bhost.def
cs-room443-d01 cpu=2
cs-room443-d02 cpu=2
cs-room443-d03 cpu=2
cs-room443-d04 cpu=2
[user01@cs-room443-d01 ~]$ lamnodes
n0 cs-room443-d01:2:origin,this_node
n1 cs-room443-d02:2:
n2 cs-room443-d03:2:
n3 cs-room443-d04:2:
[user01@cs-room443-d01 ~]$ mpirun -np 8 ./a.out
Hellow, MPI! (6/8)-- cs-room443-d04
Hellow, MPI! (0/8)-- cs-room443-d01
Hellow, MPI! (4/8)-- cs-room443-d03
Hellow, MPI! (1/8)-- cs-room443-d01
Hellow, MPI! (7/8)-- cs-room443-d04
Hellow, MPI! (2/8)-- cs-room443-d02
Hellow, MPI! (5/8)-- cs-room443-d03
Hellow, MPI! (3/8)-- cs-room443-d02
基本的には上記LAM編と一緒。各マシンにLAMがインストールされていない状態で,ソースからmpichをインストールしてセットアップする。