Para fazer o gridftp ser feito pela interface certa (ele da um erro "Expected /host/
target but get /org=doegrid//nome da maquina.sprace.org.br:
[root@spraid02 ~]# vim /opt/d-cache/config/gridftpdoor.batch
create dmg.cells.services.login.LoginManager GFTP-${thisHostname} \
"${gsiFtpPortNumber} \
-listen=200.136.80.7\
Update dos nós
Já que iremos parar a farm, por que não fazer update dos pacotes instalados nas máquinas?
Criando o mirror na sprace:
[root@@sprace ~]# nohup nice rsync -avzlH --delete --exclude=sites/Fermi --exclude=errata/debuginfo --exclude=errata/obsolete rsync://rsync.scientificlinux.org/scientific/44/i386 /export/linux/SL_45_i386&
[root@@sprace ~]# yum install createrepo
[root@@sprace ~]# mkdir /var/www/html/linux
[root@@sprace ~]# mkdir /var/www/html/linux/scientific
[root@@sprace ~]# mkdir /var/www/html/linux/scientific/45
[root@@sprace ~]# mkdir /var/www/html/linux/scientific/45/errata
[root@@sprace ~]# ln -s /export/linux/SL_45_i386/errata/SL/RPMS/ /var/www/html/linux/scientific/45/errata/SL
[root@@sprace ~]# ln -s /export/linux/SL_45_i386/SL/RPMS/ /var/www/html/linux/scientific/45/SL
[root@@sprace ~]# createrepo /var/www/html/linux/scientific/45/errata/SL
[root@@sprace ~]# createrepo /var/www/html/linux/scientific/45/SL/
[root@@sprace ~]# vim /etc/httpd/conf/httpd.conf
#_____________Repositorio do Scientific Linux__________
Options +Indexes
[root@@sprace ~]# /etc/init.d/httpd restart
Agora devemos configurar os clientes. Desabilitando o repositorio sl padrao:
[root@@sprace ~]# for ((i=1; i<19; i++)) ; do ssh 192.168.1.$i "sed -i 's/enabled=1/enabled=0/g' /etc/yum.repos.d/sl.repo /etc/yum.repos.d/sl-errata.repo"; done
[root@@sprace ~]# for ((i=21; i<84; i++)) ; do ssh 192.168.1.$i "sed -i 's/enabled=1/enabled=0/g' /etc/yum.repos.d/sl.repo /etc/yum.repos.d/sl-errata.repo"; done
agora inserimos nosso repositorio
[root@@sprace ~]# for ((i=1; i<19; i++)) ; do scp -r /export/linux/SL_45_i386/sprace*.repo 192.168.1.$i:/etc/yum.repos.d/. ; done
[root@@sprace ~]# for ((i=21; i<84; i++)) ; do scp -r /export/linux/SL_45_i386/sprace*.repo 192.168.1.$i:/etc/yum.repos.d/. ; done
fazendo a atualizacao
[root@@sprace ~]# for ((i=1; i<19; i++)) ; do ssh 192.168.1.$i 'yum -y clean all; yum -y update yum; yum -y update' ; done
[root@@sprace ~]# for ((i=21; i<84; i++)) ; do ssh 192.168.1.$i 'yum -y clean all; yum -y update yum; yum -y update' ; done
Backup do database da spdc00
Parar o dcache, pnfs
[root@@spraid ~]# /opt/d-cache/bin/dcache-pool stop
[root@@spraid ~]# /opt/d-cache/bin/dcache-core stop
na spdc00
[root@@spdc00 ~]# /opt/d-cache/bin/dcache-core stop
[root@@spdc00 ~]# /opt/pnfs/bin/pnfs stop
[root@@spdc00 ~]# pg_dump -U pnfsserver admin >admin.dump ;pg_dump -U pnfsserver data1 >data1.dump
[root@@spdc00 ~]# scp /root/*.dump mdias@@osg-se.sprace.org.br:/home/mdias/.
Backup da spgrid e spraid:
[root@@spgrid ~]# cd /raid0/spgrid_backup/
[root@@spgrid spgrid_backup]# dd if=/dev/sda of=spgrid.img bs=512k conv=noerror
o mesmo para spraid:
[root@@spraid ~]# mkdir /raid0/spraid_backup
[root@@spraid ~]# cd /raid0/spraid_backup/
[root@@spraid spraid_backup]# dd if=/dev/sda of=spraid.img bs=512k conv=noerror
A atualização dos backups pode ser feita diretamente nas imagens iso:
Neste exemplo iremos montar o /var. Determine o numero de cilindros da maquina:
[root@@spgrid ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/sda3 2.0G 695M 1.2G 37% /
/dev/sda1 248M 13M 223M 6% /boot
none 1014M 0 1014M 0% /dev/shm
/dev/sda7 2.0G 45M 1.9G 3% /tmp
/dev/sda5 9.9G 5.7G 3.7G 61% /usr
/dev/sda8 12G 6.2G 4.8G 57% /usr/local
/dev/sda6 4.0G 1.5G 2.4G 38% /var
[root@@spgrid ~]# fdisk -l
64 heads, 32 sectors/track, 34680 cylinders
na spraid
[root@@spraid ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/sda2 2.0G 902M 1012M 48% /
none 1013M 0 1013M 0% /dev/shm
/dev/sda7 1012M 34M 927M 4% /tmp
/dev/sda5 9.9G 3.2G 6.2G 34% /usr
/dev/sda8 15G 2.5G 12G 18% /usr/local
/dev/sda6 2.0G 540M 1.4G 29% /var
[root@@spraid ~]# fdisk -l /dev/sda
255 heads, 63 sectors/track, 4425 cylinders
usando a saida em
[root@@spgrid ~]# fdisk -l -u -C 34680 /raid0/spgrid_backup/spgrid.img
/raid0/spgrid_backup/spgrid.img6 34078752 42467327 4194288 83 Linux
multiplique o Start por 512 bytes e monte
[root@@spgrid ~]# bc
34078752*512
17448321024
quit
[root@@spgrid ~]# mount -o loop,offset=17448321024 -t ext3 /raid0/spgrid_backup/spgrid.img /root/test2/
sincronize a pasta /var com a imagem dd
[root@@spgrid ~]# rsync -avx --delete /var/ /root/test2/
[root@@spgrid ~]# umount /root/test2
Atualizando o backup
Atualizando o backup da spgrid para a data antes do upgrade
[root@@spgrid mdias]# mount -o loop,offset=16384 -t ext3 /raid0/spgrid_backup/spgrid.img /root/test
[root@@spgrid mdias]# rsync -avx --delete /boot/ /root/test2/
[root@@spgrid mdias]# umount /root/test2
[root@@spgrid mdias]# mount -o loop,offset=4563402752 -t ext3 /raid0/spgrid_backup/spgrid.img /root/test2
[root@@spgrid mdias]# rsync -avx --delete / /root/test2/
[root@@spgrid mdias]# umount /root/test2
[root@@spgrid mdias]# mount -o loop,offset=6710902784 -t ext3 /raid0/spgrid_backup/spgrid.img /root/test2
[root@@spgrid mdias]# rsync -avx --delete /usr/ /root/test2/
[root@@spgrid mdias]# umount /root/teste
[root@@spgrid mdias]# mount -o loop,offset=17448321024 -t ext3 /raid0/spgrid_backup/spgrid.img /root/teste
[root@@spgrid mdias]# rsync -avx --delete /var/ /root/teste/
[root@@spgrid mdias]# umount /root/teste
[root@@spgrid ~]# mount -o loop,offset=21743288320 -t ext3 /raid0/spgrid_backup/spgrid.img /root/teste2
[root@@spgrid ~]# rsync -avx --delete /tmp/ /root/teste2/
[root@@spgrid ~]# umount /root/teste2
[root@@spgrid mdias]# mount -o loop,offset=23890771968 -t ext3 /raid0/spgrid_backup/spgrid.img /root/teste
[root@@spgrid mdias]# rsync -avx --delete /usr/local/ /root/teste/
[root@@spgrid ~]# umount /root/teste
Problemas no condor
Nos nodes, temos que modificar o /etc/fstab
sed -i 's/spg00/osg-ce/g' /etc/fstab
e lembrar de editar o /etc/hosts e /etc/resolv.conf, mudando de if.usp.br para sprace.org.br.
Na osg-ce editar tudo sobre security em
[root@@osg-ce ~]# vim /opt/osg-0.8.0/condor/local.osg-ce/condor_config.local
# Security setup to use pool password
fazer a configuracao do
vim /OSG/condor/etc/condor_config
como em
mkdir /scratch/condor;mkdir /scratch/OSG;chmod a+rw /scratch/OSG; chown condor:condor /scratch/condor
cd /scratch/condor;mkdir execute log spool
chown condor:condor execute log spool
ln -s /OSG/condor/etc/condor_config condor_config
vim condor_config.local
#############################################
#
# Local Condor Configuration File for osg-ce
#
############################################
DAEMON_LIST = MASTER, COLLECTOR, NEGOTIATOR, STARTD, SCHEDD
MASTER = $(SBIN)/condor_master
COLLECTOR = $(SBIN)/condor_collector
NEGOTIATOR = $(SBIN)/condor_negotiator
STARTD = $(SBIN)/condor_startd
SCHEDD = $(SBIN)/condor_schedd
NETWORK_INTERFACE = 192.168.1.150
START_LOCAL_UNIVERSE = TotalLocalJobsRunning < 20 || GridMonitorJob =?= TRUE
chown condor:condor condor_config.local
Problemas com o GUMS
Devido a problemas com o pacman tivemos que reinstalar o osg inteiro:
cd /opt/pacman-3.21/
source setup.sh
cd /OSG/
pacman -get OSG:ce
source setup.sh
pacman -get OSG:Globus-Condor-Setup
pacman -get OSG:ManagedFork
$VDT_LOCATION/vdt/setup/configure_globus_gatekeeper --managed-fork y --server y
pacman -get OSG:gums
cp /opt/osg-0.8.0/post-install/gsi-authz.conf /etc/grid-security/.
cp /opt/osg-0.8.0/post-install/prima-authz.conf /etc/grid-security/.
/opt/osg-0.8.0/vdt/sbin/vdt-register-service -name gums-host-cron --enable
/opt/osg-0.8.0/vdt/sbin/vdt-control --on gums-host-cron
vim $VDT_LOCATION/MonaLisa/Service/VDTFarm/ml.properties
lia.Monitor.group=OSG
vim /OSG/MonaLisa/Service/CMD/site_env
CONDOR_LOCATION=/opt/osg-0.8.0/condor
export CONDOR_LOCATION
CONDOR_CONFIG=/opt/osg-0.8.0/condor/etc/condor_config
export CONDOR_CONFIG
CONDOR_LOCAL_DIR=/scratch/condor
export CONDOR_LOCAL_DIR
vim /OSG/MonaLisa/Service/CMD/ml_env
FARM_NAME=SPRACE
vdt-register-service --name MLD --enable
/OSG/vdt/setup/configure_prima_gt4 --enable --gums-server osg-ce.sprace.org.br
vim /OSG/condor/etc/condor_config
chown globus: /etc/grid-security/containerkey.pem /etc/grid-security/containercert.pem
/OSG/tomcat/v55/webapps/gums/WEB-INF/scripts/addMySQLAdmin "/DC=org/DC=doegrids/OU=People/CN=Marco Dias 280904"
/opt/osg-0.8.0/vdt/setup/configure_mds -secure
cd /OSG/tomcat/v55/webapps/gums/WEB-INF/scripts
./gums-create-config --osg-template
Para que o /OSG/verify/site_verify.sh rodasse sem erro, devemos editar o /OSG/globus/etc/grid3-info.conf:
/opt/osg-0.8.0/monitoring/configure-osg.sh
Ainda é preciso fazer o Prima compativel com o GUMS
> cd /etc/grid-security/http
> mv httpkey.pem httpkey.pem.sav
> mv httpcert.pem httpcert.pem.sav
> cd ..
> cp hostkey.pem http/httpkey.pem
> cp hostcert.pem http/httpcert.pem
> chown -R daemon:daemon http
Alguns diretórios onde fica os software, tiveram as permissôes ajustadas
chmod 1777 /raid0/OSG/data
chmod 1777 /raid0/OSG/app
chmod 1777 /raid0/OSG/pool
o 1 significa que mesmo com prmissao 777 (read, write, exec) ele nao pode mover este diretorio.
Problemas com o CEmon e o GIP
[root@osg-ce ~]# $VDT_LOCATION/vdt/setup/configure_cemon --consumer https://osg-ress-1.fnal.gov:8443/ig/services/CEInfoCollector --topic OSG_CE
[root@osg-ce ~]# /opt/osg-0.8.0/vdt/setup/configure_gip
[root@osg-ce ~]# vdt-register-service --name gris --enable
Configuracoes na Spraid01
Depois da instalacao,
Complementando a instalação:
[root@@spraid01 ~]# vim /etc/yp.conf
domain grid server 192.168.1.150
ypserver 192.168.1.150
[root@@spraid01 ~]# vim /etc/nsswitch.conf
passwd: files nis
shadow: files nis
group: files nis
publickey: nisplus
aliases: files nisplus
[root@@spraid01 ~]# vim /etc/sysconfig/network
NISDOMAIN=grid
[root@@spraid01 ~]# mv /etc/ntp.conf /etc/ntp_conf.bck
[root@@spraid01 ~]# vim /etc/ntp.conf
server 192.168.1.150
authenticate no
driftfile /var/lib/ntp/drift
[root@@spraid01 ~]# vim /etc/ntp/step-tickers
192.168.1.150
Montagem do diretorio OSG. Primeiro na osg-ce
[root@@osg-se /]# scp /etc/hosts 200.136.80.30:/etc/.
[root@@spraid01 ~]# echo "/spraid01 /etc/auto.spraid01 --timeout=30" >> /etc/auto.master
[root@@spraid01 ~]# echo "OSG -rw,soft,bg,rsize=8192,wsize=8192,tcp 192.168.1.150:/OSG" > /etc/auto.spraid01
[root@@spraid01 ~]# chkconfig autofs on
[root@@spraid01 ~]# ln -s /spraid01/OSG /OSG
Na osg-se
[root@@osg-se /]# scp /root/dcache-server-1.8.0-8.noarch.rpm /root/dcache-client-1.8.0-0.noarch.rpm /root/ganglia-monitor-core-gmond-2.5.4-8.i386.rpm /root/jdk-6u2-ea-bin-b02-linux-i586-12_apr_2007-rpm.bin 200.136.80.30:
Instalando o ganglia
[root@@spraid01 ~]# groupadd -g 104 ganglia;useradd -d /var/lib/ganglia -s /bin/false -g ganglia -u 107 ganglia
[root@@spraid01 ~]# rpm -ivh /root/ganglia-monitor-core-gmond-2.5.4-8.i386.rpm
[root@@spraid01 ~]# mv /etc/gmond.conf /etc/gmond_conf.bck
[root@@spraid01 ~]# vim /etc/gmond.conf
name "SPGRID Cluster"
owner "SPRACE-HEP"
url "http://osg-ce.sprace.org.br"
num_nodes 86
setuid ganglia
location "0,5,0"
mcast_if eth1
Instalando java
[root@@spraid01 ~]# cd /root/
[root@@spraid01 ~]# chmod 755 jdk-6u2-ea-bin-b02-linux-i586-12_apr_2007-rpm.bin
[root@@spraid01 ~]# ./jdk-6u2-ea-bin-b02-linux-i586-12_apr_2007-rpm.bin
Passando os certificados da sprace para a ftp-01:
[root@@sprace mdias]# scp -r /home/mdias/ftp-01/ 200.136.80.30:
Instalando os certificados:
[root@@spraid01 ~]# mkdir /etc/grid-security
[root@@spraid01 ~]# cp /root/ftp-01/host* /etc/grid-security/.
[root@@spraid01 ~]# chmod 444 /etc/grid-security/hostcert.pem
[root@@spraid01 ~]# chmod 400 /etc/grid-security/hostkey.pem
Instalacao do dcache:
[root@@spraid01 ~]# vim /etc/profile
export JAVA_HOME=/usr/java/jdk1.6.0_02
[root@@spraid01 ~]# rpm -ivh /usr/local/src/dcache-client-1.8.0-0.noarch.rpm /usr/local/src/dcache-server-1.8.0-8.noarch.rpm[root@@spraid01 ~]# cp /opt/d-cache/etc/dCacheSetup.template /opt/d-cache/config/dCacheSetup
[root@@spraid01 ~]# vim /opt/d-cache/config/dCacheSetup
serviceLocatorHost=osg-se.sprace.org.br
java="/usr/java/jdk1.6.0_02/bin/java"
useGPlazmaAuthorizationModule=true
useGPlazmaAuthorizationCell=false
[root@@spraid01 ~]# cp /opt/d-cache/etc/node_config.template /opt/d-cache/etc/node_config
[root@@spraid01 ~]# vim /opt/d-cache/etc/node_config
NODE_TYPE=pool
SERVER_ID=sprace.org.br
ADMIN_NODE=osg-se.sprace.org.br
GRIDFTP=yes
poolManager=yes
[root@@spraid01 ~]# vim /opt/d-cache/etc/dcachesrm-gplazma.policy
saml-vo-mapping="ON" #era OFF
saml-vo-mapping-priority="1"
kpwd-priority="2" #era 3
grid-mapfile-priority="3" #era 4
gplazmalite-vorole-mapping-priority="4" #era 2
mappingServiceUrl="https://osg-ce.sprace.org.br:8443/gums/services/GUMSAuthorizationServicePort"
[root@@spraid01 ~]# scp osg-se.sprace.org.br:/opt/d-cache/etc/dcache.kpwd /opt/d-cache/etc/dcache.kpwd
[root@@spraid01 ~]# more /opt/d-cache/etc/dcache.kpwd|grep sprace.org.br|sed 's/login/authorize/g' > /etc/grid-security/storage-authzdb
[root@@spraid01 ~]# echo "/raid2 1649 no" > /opt/d-cache/etc/pool_path
[root@@spraid01 ~]# echo "/raid3 1649 no" >> /opt/d-cache/etc/pool_path
[root@@spraid01 ~]# echo "/raid4 1649 no" >> /opt/d-cache/etc/pool_path
[root@@spraid01 ~]# echo "/raid5 1649 no" >> /opt/d-cache/etc/pool_path
[root@@spraid01 ~]# /opt/d-cache/install/install.sh
[root@@spraid01 ~]# cp /opt/d-cache/bin/dcache-core /etc/init.d/.
[root@@spraid01 ~]# cp /opt/d-cache/bin/dcache-pool /etc/init.d/.
[root@@spraid01 ~]# chkconfig --add dcache-core
[root@@spraid01 ~]# chkconfig --add dcache-pool
[root@@spraid01 ~]# chkconfig dcache-core on
[root@@spraid01 ~]# chkconfig dcache-pool on
0)atualizar backup da spgrid e spraid
1)comunicar as listas sobre a
parada: cms-t2@@fnal.gov, hn-cms-gridAnnounce@@cern.ch, goc@@opensciencegrid.org
2)desligar o gatekeeper na spgrid, para não aceitar novos jobs
#/etc/init.d/xinetd stop
3)backup dos certificados de grid da spraid
#scp -pr /etc/grid-security sprace:/root/sprace/.
4)backup dos databases da spdc00
[mdias@@spdc00 ~]$ pg_dump -U pnfsserver admin >admin.dump
[mdias@@spdc00 ~]$ pg_dump -U pnfsserver data1 >data1.dump
[mdias@@spdc00 ~]$ scp data1.dump admin.dump
root@@osg-se.sprace.org.br:/root/.
5)shutdown da farm
6)Instalar a nova controladora RAID na spgrid e maior quantidade de
memória na spraid.
8)Instalacao do Scientific Linux na spgrid. Esquema de particionamento possível:
/ ->5Gb
/boot -> 500Mb
/tmp ->1Gb
/usr -> 10 Gb
/var -> o resto (linkar simbolicamente o /opt dentro do /var)
e na spraid (não formatar
partições /raid#) e novo d-cache, nas duas maquinas:
# wget
http://www.dcache.org/downloads/1.8.0/dcache-server-1.8.0-8.noarch.rpm;
wget http://www.dcache.org/downloads/1.8.0/dcache-client-1.8.0-0.noarch.rpm
# wget
http://www.java.net/download/jdk6/6u2/promoted/b02/binaries/jdk-6u2-ea-bin-b02-linux-i586-12_apr_2007-rpm.bin
# chmod 755 jdk-6u2-ea-bin-b02-linux-i586-12_apr_2007-rpm.bin
# ./jdk-6u2-ea-bin-b02-linux-i586-12_apr_2007-rpm.bin
# vim /etc/profile
export JAVA_HOME=/usr/java/jdk1.6.0_02
# rpm -ivh dcache-client-1.8.0-0.noarch.rpm
dcache-server-1.8.0-8.noarch.rpm
# cp /opt/d-cache/etc/dCacheSetup.template
/opt/d-cache/config/dCacheSetup
# vim /opt/d-cache/config/dCacheSetup
serviceLocatorHost=osg-se.sprace.org.br
java="/usr/java/jdk1.6.0_02/bin/java"
useGPlazmaAuthorizationModule=true
useGPlazmaAuthorizationCell=no
[root@@spgrid04 ~]# mkdir /scratch/teste
[root@@spgrid04 ~]# cp /opt/d-cache/etc/node_config.template
/opt/d-cache/etc/node_config
[root@@spgrid04 ~]# vim /opt/d-cache/etc/node_config
NODE_TYPE=pool
SERVER_ID=sprace.org.br
ADMIN_NODE=osg-se.sprace.org.br
GSIDCAP=no
SRM=yes
DCAP=no
GRIDFTP=yes
poolManager=yes
o resto tudo "no"
[root@@ftp-01 ~]# vim /opt/d-cache/etc/dcachesrm-gplazma.policy
saml-vo-mapping="ON"
kpwd="ON"
grid-mapfile="OFF"
gplazmalite-vorole-mapping="OFF"
saml-vo-mapping-priority="1"
kpwd-priority="2"
grid-mapfile-priority="3"
gplazmalite-vorole-mapping-priority="4"
mappingServiceUrl="https://osg-ce.sprace.org.br:8443/gums/services/GUMSAuthorizationServicePort"
# mkdir /etc/grid-security
na spgrid:
#scp -r 192.168.1.200:/home/mdias/ftp-01 /etc/grid-security
quando for instalar a spraid:
scp -pr sprace:/root/sprace /etc/grid-security
# chmod 444 /etc/grid-security/hostcert.pem
# chmod 400 /etc/grid-security/hostkey.pem
# more /opt/d-cache/etc/dcache.kpwd|grep sprace.org.br|sed
's/login/authorize/g' > /etc/grid-security/storage-authzdb
# scp osg-se.sprace.org.br:/opt/d-cache/etc/dcache.kpwd
/opt/d-cache/etc/dcache.kpwd
# echo "/scratch/teste 2 no" > /opt/d-cache/etc/pool_path
# /opt/d-cache/install/install.sh
9)Colocar nos racks a osg-ce e a osg-se.
a)Troca dos ips da rede interna (/etc/sysconfig-network/ifcg-eth#) pelos
ip's da spgrid e spdc00, respectivamente.
b)na osg-ce, acertar os arquivos
/var/named/chroot/var/named/sprace.org.br.zone e
/var/named/chroot/var/named/80.136.200.in-addr.arpa.zone para que
reflitam a nova configuração.
c)
[root@@osg-ce ~]# vim /etc/yp.conf
domain grid server 192.168.1.150
[root@@osg-ce ~]# vim /etc/gmond.conf
url "http://osg-ce.sprace.org.br/"
trusted_hosts 200.136.80.4
10)Migração dos databases para a osg-se
[root@@osg-se ~]# /etc/init.d/dcache-core stop
[root@@osg-se ~]# /etc/init.d/pnfs stop
[root@@osg-se ~]# dropdb -U postgres admin
[root@@osg-se ~]# dropdb -U postgres data1
[root@@osg-se ~]# createdb -U postgres admin
[root@@osg-se ~]# createdb -U postgres data1
[root@@osg-se ~]# psql -U postgres admin/pnfs/fs/admin/etc/config/dCache/dcache.conf
[root@@osg-se etc]# echo "sprace.org.br" > /pnfs/fs/admin/etc/config/serverId
[root@@osg-se etc]# echo "osg-se.sprace.org.br" >/pnfs/fs/admin/etc/config/serverName