Hello all,

I need your help in order to solve a problem I'm dealing with the previous days. I have configured a 2-node cluster, using a common SBD device stored in a 3PAR 7200 Storedev and 4-path multipathing installed for this device. What I' m seeing is that when the node is powered off or even if I test the fencing method through "crm node fence <node_name>" command, the requested node is powered off correctly, but the clean one continues to try to fence the node through the SBD device when fence timeout (90s) is expired. Only when the second node is up and working, the cluster becomes again reactive and the two nodes are ok. I have configured the following as it's requested:


And I' m using the following versions of openais and pacemaker:

nm-rep-srv01:~ # rpm -qa | grep openais
openais-1.1.4-5.13.6
libopenais3-1.1.4-5.13.6
pacemaker-mgmt-client-2.1.0-0.8.74
libpacemaker3-1.1.9-0.19.102
pacemaker-mgmt-2.1.2-0.7.40
drbd-pacemaker-8.4.1-0.11.6
perl-XML-NamespaceSupport-1.09-1.22
pacemaker-1.1.9-0.19.102

and the configuration is the following:

node nm-rep-srv01 \
attributes standby="off"
node nm-rep-srv02 \
attributes standby="off"
primitive clvm ocf:lvm2:clvmd \
operations $id="clvm-operations" \
op start interval="0" timeout="90" \
op stop interval="0" timeout="100" \
params daemon_timeout="160" daemon_options="-d2" \
meta target-role="Started"
primitive dlm ocfacemaker:controld \
operations $id="dlm-operations" \
op monitor interval="60s" timeout="60" start-delay="0" \
op start interval="0" timeout="120s" \
op stop interval="0" timeout="120s" \
params daemon="dlm_controld.pcmk" args="-q 0" configdir="/sys/kernel/config" \
meta target-role="Started"
primitive nfs-vip ocf:heartbeat:IPaddr2 \
params ip="10.40.1.153" cidr_netmask="25" \
op start interval="0" timeout="20s" \
op monitor interval="30s" timeout="60s" on-fail="standby" \
op stop interval="0" timeout="20s"
primitive p_exportfs_root ocf:heartbeat:exportfs \
params fsid="0" directory="/nfs" options="rw,crossmnt" clientspec="10.40.1.128/255.255.255.128" \
op monitor interval="30s"
primitive p_fs_rep1 ocf:heartbeat:Filesystem \
params device="/dev/nfs/rep1" directory="/nfs/rep1" fstype="ext3" \
op monitor interval="10s"
primitive p_fs_repdw1 ocf:heartbeat:Filesystem \
params device="/dev/nfs/repdw1" directory="/nfs/repdw1" fstype="ext3" \
op monitor interval="10s"
primitive p_lsb_nfsserver lsb:nfsserver \
op monitor on-fail="standby" interval="30s"
primitive p_lvm_nfs ocf:heartbeat:LVM \
params volgrpname="nfs" \
op monitor interval="30s"
primitive ping_DG ocfacemakering \
params dampen="5s" host_list="10.205.4.3" name="pingd" \
op monitor interval="20s" timeout="60s" on-fail="standby" \
meta target-role="started"
primitive rep-vip ocf:heartbeat:IPaddr2 \
params ip="10.205.4.34" cidr_netmask="26" \
operations $id="rep-vip-operations" \
op start interval="0" timeout="20s" \
op monitor interval="10s" timeout="60s" on-fail="standby" \
op stop interval="0" timeout="20s"
primitive serviceweaver lsb:serviceweaver \
op monitor interval="10s" timeout="20s" on-fail="standby" start-delay="0" \
op start interval="0" timeout="600s" start-delay="0" \
op stop interval="0" timeout="500s" start-delay="0"
primitive stonith_sbd stonith:external/sbd \
meta target-role="Started" \
operations $id="stonith_sbd-operations" \
op start interval="0" timeout="20" \
params sbd_device="/dev/mapper/sbd_device_part1"
group dlm-clvm-ping dlm clvm ping_DG
group g_nfs nfs-vip p_lvm_nfs p_lsb_nfsserver p_fs_rep1 p_fs_repdw1 p_exportfs_root rep-vip serviceweaver \
meta is-managed="true" target-role="Started"
clone dlm-clvm-clone dlm-clvm-ping \
meta target-role="Started" ordered="true"
order ord-clvm-nfs-rep inf: dlm-clvm-clone g_nfs
property $id="cib-bootstrap-options" \
dc-version="1.1.9-2db99f1" \
cluster-infrastructure="classic openais (with plugin)" \
expected-quorum-votes="2" \
no-quorum-policy="ignore" \
stop-orphan-actions="false" \
stop-orphan-resources="false" \
default-action-timeout="60s" \
default-resource-stickiness="10000" \
stonith-timeout="90s" \
last-lrm-refresh="1480415956" \
stonith-action="poweroff" \
stonith-enabled="true"
rsc_defaults $id="rsc-options" \
resource-stickiness="200"
op_defaults $id="op_defaults-options" \
on-fail="fence"
Could anyone help me with this problem?