Skip to content

Commit d1bec37

Browse files
committed
Fix: TEST the gossip test was failed because a low-low node was expecting news from a top level one.
1 parent ecf7fa7 commit d1bec37

11 files changed

+18
-60
lines changed

opsbro/gossip.py

+13-39
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,6 @@ def find_group_node(self, group, hkey):
356356
group_nodes.sort()
357357

358358
idx = bisect.bisect_right(group_nodes, hkey) - 1
359-
# logger.debug("IDX %d" % idx, hkey, kv_nodes, len(kv_nodes))
360359
nuuid = group_nodes[idx]
361360
return nuuid
362361

@@ -686,6 +685,18 @@ def set_suspect(self, suspect):
686685
self.stack_suspect_broadcast(node)
687686

688687

688+
# Define a function that will wait 10s to let the others nodes know that we did leave
689+
# and then ask for a clean stop of the daemon
690+
@staticmethod
691+
def _bailout_after_leave():
692+
wait_time = 10
693+
logger.info('Waiting out %s seconds before exiting as we are set in leave state' % wait_time)
694+
time.sleep(10)
695+
logger.info('Exiting from a self leave message')
696+
# Will set self.interrupted = True to every thread that loop
697+
stopper.do_stop('Exiting from a leave massage')
698+
699+
689700
# Someone ask us about a leave node, so believe it
690701
# Leave node are about all states, so we don't filter by current state
691702
# if the incarnation is ok, we believe it
@@ -739,19 +750,7 @@ def set_leave(self, leaved, force=False):
739750
self._set_myself_atomic_property('state', state)
740751
self.increase_incarnation_and_broadcast()
741752

742-
743-
# Define a function that will wait 10s to let the others nodes know that we did leave
744-
# and then ask for a clean stop of the daemon
745-
def bailout_after_leave(self):
746-
wait_time = 10
747-
logger.info('Waiting out %s seconds before exiting as we are set in leave state' % wait_time)
748-
time.sleep(10)
749-
logger.info('Exiting from a self leave message')
750-
# Will set self.interrupted = True to every thread that loop
751-
stopper.do_stop('Exiting from a leave massage')
752-
753-
754-
threader.create_and_launch(bailout_after_leave, args=(self,), name='Exiting agent after set to leave', part='agent')
753+
threader.create_and_launch(self._bailout_after_leave, name='Exiting agent after set to leave', part='agent')
755754
return
756755

757756
logger.info('LEAVING: The node %s is leaving' % node['name'])
@@ -1026,12 +1025,6 @@ def ping_another_nodes(self):
10261025
# but talk to us
10271026
# also exclude leave node, because thay said they are not here anymore ^^
10281027
def ping_another(self):
1029-
# Only launch one parallel ping in the same time, max2 if we have thread
1030-
# that mess up with this flag :)
1031-
# if self.ping_another_in_progress:
1032-
# return
1033-
# self.ping_another_in_progress = True
1034-
10351028
possible_nodes = self.__get_valid_nodes_to_ping()
10361029

10371030
# first previously deads
@@ -1048,7 +1041,6 @@ def ping_another(self):
10481041
other = random.choice(possible_nodes)
10491042
self.__do_ping(other)
10501043
# Ok we did finish to ping another
1051-
# self.ping_another_in_progress = False
10521044

10531045

10541046
# Launch a ping to another node and if fail set it as suspect
@@ -1061,7 +1053,6 @@ def __do_ping(self, other):
10611053
if zonemgr.is_top_zone_from(self.zone, other_zone_name):
10621054
ping_zone = self.zone
10631055
ping_payload = {'type': PACKET_TYPES.PING, 'seqno': 0, 'node': other['uuid'], 'from_zone': self.zone, 'from': self.uuid}
1064-
# print "PREPARE PING", ping_payload, other
10651056
message = jsoner.dumps(ping_payload)
10661057
encrypter = libstore.get_encrypter()
10671058
enc_message = encrypter.encrypt(message, dest_zone_name=ping_zone)
@@ -1090,7 +1081,6 @@ def __do_ping(self, other):
10901081
self.set_suspect(other)
10911082
except (socket.timeout, socket.gaierror) as exp:
10921083
logger.info("PING: error joining the other node %s:%s : %s. Switching to a indirect ping mode." % (addr, port, exp))
1093-
# with self.nodes_lock:
10941084
possible_relays = [n for n in self.nodes.values() if
10951085
n['uuid'] != self.uuid
10961086
and n != other
@@ -1222,7 +1212,6 @@ def do_indirect_ping(self, tgt, _from, addr):
12221212
if zonemgr.is_top_zone_from(self.zone, nfrom_zone):
12231213
nfrom_zone = self.zone
12241214
enc_ret_msg = encrypter.encrypt(ret_msg, dest_zone_name=nfrom_zone)
1225-
# sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # UDP
12261215
sock.sendto(enc_ret_msg, addr)
12271216
sock.close()
12281217
except (socket.timeout, socket.gaierror) as exp:
@@ -1601,16 +1590,13 @@ def get_nodes_for_push_pull_response(self, other_node_zone):
16011590
# set the node as dead, and broadcast the information to everyone
16021591
def look_at_deads(self):
16031592
# suspect a node for 5 * log(n+1) * interval
1604-
# with self.nodes_lock:
16051593
node_scale = math.ceil(math.log10(float(len(self.nodes) + 1)))
16061594
probe_interval = 1
16071595
suspicion_mult = 5
16081596
suspect_timeout = suspicion_mult * node_scale * probe_interval
16091597
leave_timeout = suspect_timeout * 30 # something like 300s
16101598

1611-
# print "SUSPECT timeout", suspect_timeout
16121599
now = int(time.time())
1613-
# with self.nodes_lock:
16141600
for node in self.nodes.values():
16151601
# Only look at suspect nodes of course...
16161602
if node['state'] != NODE_STATES.SUSPECT:
@@ -1688,9 +1674,6 @@ def create_leave_msg(self, node):
16881674
return r
16891675

16901676

1691-
# def create_new_ts_msg(self, key):
1692-
# return {'type': '/ts/new', 'from': self.uuid, 'key': key}
1693-
16941677
def stack_alive_broadcast(self, node):
16951678
msg = self.create_alive_msg(node)
16961679
# Node messages are before all others
@@ -1710,12 +1693,6 @@ def stack_event_broadcast(self, payload, prioritary=False):
17101693
return
17111694

17121695

1713-
# def stack_new_ts_broadcast(self, key):
1714-
# msg = self.create_new_ts_msg(key)
1715-
# b = {'send': 0, 'msg': msg, 'groups': 'ts'}
1716-
# broadcaster.append(b)
1717-
# return
1718-
17191696
def stack_suspect_broadcast(self, node):
17201697
msg = self.create_suspect_msg(node)
17211698
# Node messages are before all others
@@ -1829,7 +1806,6 @@ def get_name():
18291806

18301807
@http_export('/agent/leave/:nuuid', protected=True)
18311808
def set_node_leave(nuuid):
1832-
# with self.nodes_lock:
18331809
node = self.nodes.get(nuuid, None)
18341810
if node is None:
18351811
logger.error('Asking us to set as leave the node %s but we cannot find it' % (nuuid))
@@ -1841,8 +1817,6 @@ def set_node_leave(nuuid):
18411817
@http_export('/agent/members')
18421818
def agent_members():
18431819
response.content_type = 'application/json'
1844-
# with self.nodes_lock:
1845-
# nodes = copy.copy(self.nodes)
18461820
return self.nodes
18471821

18481822

test/docker-files/docker-file-DUO4-gossip-zones-multi-1.txt

-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ WORKDIR /root/opsbro-oss
99

1010
RUN python setup.py install
1111

12-
# Ask for an encrypted test
13-
RUN opsbro gossip zone key import --zone internet --key "NGNjZWI2ZmEyMzEyMTFlOA=="
1412

1513
RUN rm -fr /etc/opsbro/zones/*
1614
ADD test/test-files/test-gossip-zones-multi/zones/* /etc/opsbro/zones/

test/docker-files/docker-file-DUO4-gossip-zones-multi-2.txt

-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ WORKDIR /root/opsbro-oss
99

1010
RUN python setup.py install
1111

12-
# Ask for an encrypted test
13-
RUN opsbro gossip zone key import --zone internet --key "NGNjZWI2ZmEyMzEyMTFlOA=="
1412

1513
RUN rm -fr /etc/opsbro/zones/*
1614
ADD test/test-files/test-gossip-zones-multi/zones/* /etc/opsbro/zones/

test/docker-files/docker-file-DUO4-gossip-zones-multi-3.txt

-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ WORKDIR /root/opsbro-oss
99

1010
RUN python setup.py install
1111

12-
# Ask for an encrypted test
13-
RUN opsbro gossip zone key import --zone internet --key "NGNjZWI2ZmEyMzEyMTFlOA=="
1412

1513
RUN rm -fr /etc/opsbro/zones/*
1614
ADD test/test-files/test-gossip-zones-multi/zones/* /etc/opsbro/zones/

test/docker-files/docker-file-DUO4-gossip-zones-multi-4.txt

-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ WORKDIR /root/opsbro-oss
99

1010
RUN python setup.py install
1111

12-
# Ask for an encrypted test
13-
RUN opsbro gossip zone key import --zone internet --key "NGNjZWI2ZmEyMzEyMTFlOA=="
1412

1513
RUN rm -fr /etc/opsbro/zones/*
1614
ADD test/test-files/test-gossip-zones-multi/zones/* /etc/opsbro/zones/

test/docker-files/docker-file-DUO4-gossip-zones-multi-PYTHON3-1.txt

-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ WORKDIR /root/opsbro-oss
99

1010
RUN python setup.py install
1111

12-
# Ask for an encrypted test
13-
#RUN opsbro gossip zone key import --zone internet --key "NGNjZWI2ZmEyMzEyMTFlOA=="
1412

1513
RUN rm -fr /etc/opsbro/zones/*
1614
ADD test/test-files/test-gossip-zones-multi/zones/* /etc/opsbro/zones/

test/docker-files/docker-file-DUO4-gossip-zones-multi-PYTHON3-2.txt

-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ WORKDIR /root/opsbro-oss
99

1010
RUN python setup.py install
1111

12-
# Ask for an encrypted test
13-
#RUN opsbro gossip zone key import --zone internet --key "NGNjZWI2ZmEyMzEyMTFlOA=="
1412

1513
RUN rm -fr /etc/opsbro/zones/*
1614
ADD test/test-files/test-gossip-zones-multi/zones/* /etc/opsbro/zones/

test/docker-files/docker-file-DUO4-gossip-zones-multi-PYTHON3-3.txt

-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ WORKDIR /root/opsbro-oss
99

1010
RUN python setup.py install
1111

12-
# Ask for an encrypted test
13-
#RUN opsbro gossip zone key import --zone internet --key "NGNjZWI2ZmEyMzEyMTFlOA=="
1412

1513
RUN rm -fr /etc/opsbro/zones/*
1614
ADD test/test-files/test-gossip-zones-multi/zones/* /etc/opsbro/zones/

test/docker-files/docker-file-DUO4-gossip-zones-multi-PYTHON3-4.txt

-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ WORKDIR /root/opsbro-oss
99

1010
RUN python setup.py install
1111

12-
# Ask for an encrypted test
13-
#RUN opsbro gossip zone key import --zone internet --key "NGNjZWI2ZmEyMzEyMTFlOA=="
1412

1513
RUN rm -fr /etc/opsbro/zones/*
1614
ADD test/test-files/test-gossip-zones-multi/zones/* /etc/opsbro/zones/

test/test_duo4_gossip_zones_multi.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,9 @@ if [ "$NODE_NB" == "3" ]; then
158158
exit_if_no_crash "Node 3 exit"
159159
fi
160160

161-
# node4 customer-1=> 1,3,4 (only the proxy of a higer zone)
161+
# node4 customer-1=> 3,4 (only the proxy of a direct higer zone)
162162
if [ "$NODE_NB" == "4" ]; then
163-
assert_member "node-1" "lan"
163+
assert_not_member "node-1"
164164
assert_not_member "node-2"
165165
assert_member "node-3" "internet"
166166
# Do not have access to higher events

test/test_duo9_gossip_zones_multi_encrypted.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ function assert_not_member {
115115
# node1 => every one
116116
# node2 => every one
117117
# node3 => 1,3,4 (only the proxy of a higer zone)
118-
# node4 => 1,3,4 (only the proxy of a higer zone)
118+
# node4 => 3,4 (only the proxy of a directly higer zone)
119119
if [ "$NODE_NB" == "1" ]; then
120120
assert_member "node-1" "lan"
121121
#wait_event_with_timeout "node-1-EVENT" 60
@@ -167,9 +167,9 @@ if [ "$NODE_NB" == "3" ]; then
167167
exit_if_no_crash "Node 3 exit"
168168
fi
169169

170-
# node4 customer-1=> 1,3,4 (only the proxy of a higer zone)
170+
# node4 customer-1=> 3,4 (only the proxy of a direct higer zone)
171171
if [ "$NODE_NB" == "4" ]; then
172-
assert_member "node-1" "lan"
172+
assert_not_member "node-1"
173173
assert_not_member "node-2"
174174
assert_member "node-3" "internet"
175175
# Do not have access to higher events

0 commit comments

Comments
 (0)