Line data Source code
1 : /*
2 : * Copyright 2004-2024 the Pacemaker project contributors
3 : *
4 : * The version control history for this file may have further details.
5 : *
6 : * This source code is licensed under the GNU Lesser General Public License
7 : * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 : */
9 :
10 : #include <crm_internal.h>
11 :
12 : #include <sys/time.h>
13 : #include <sys/resource.h>
14 :
15 : #include <crm/common/xml.h>
16 :
17 : #include <crm/common/mainloop.h>
18 : #include <crm/cluster/internal.h>
19 : #include <crm/cluster/election_internal.h>
20 : #include <crm/crm.h>
21 :
22 : #define STORM_INTERVAL 2 /* in seconds */
23 :
24 : struct election_s {
25 : enum election_result state;
26 : guint count; // How many times local node has voted
27 : char *name; // Descriptive name for this election
28 : char *uname; // Local node's name
29 : GSourceFunc cb; // Function to call if election is won
30 : GHashTable *voted; // Key = node name, value = how node voted
31 : mainloop_timer_t *timeout; // When to abort if all votes not received
32 : int election_wins; // Track wins, for storm detection
33 : bool wrote_blackbox; // Write a storm blackbox at most once
34 : time_t expires; // When storm detection period ends
35 : time_t last_election_loss; // When dampening period ends
36 : };
37 :
38 : static void
39 0 : election_complete(election_t *e)
40 : {
41 0 : e->state = election_won;
42 0 : if (e->cb != NULL) {
43 0 : e->cb(e);
44 : }
45 0 : election_reset(e);
46 0 : }
47 :
48 : static gboolean
49 0 : election_timer_cb(gpointer user_data)
50 : {
51 0 : election_t *e = user_data;
52 :
53 0 : crm_info("%s timed out, declaring local node as winner", e->name);
54 0 : election_complete(e);
55 0 : return FALSE;
56 : }
57 :
58 : /*!
59 : * \brief Get current state of an election
60 : *
61 : * \param[in] e Election object
62 : *
63 : * \return Current state of \e
64 : */
65 : enum election_result
66 0 : election_state(const election_t *e)
67 : {
68 0 : return (e == NULL)? election_error : e->state;
69 : }
70 :
71 : /*!
72 : * \brief Create a new election object
73 : *
74 : * Every node that wishes to participate in an election must create an election
75 : * object. Typically, this should be done once, at start-up. A caller should
76 : * only create a single election object.
77 : *
78 : * \param[in] name Label for election (for logging)
79 : * \param[in] uname Local node's name
80 : * \param[in] period_ms How long to wait for all peers to vote
81 : * \param[in] cb Function to call if local node wins election
82 : *
83 : * \return Newly allocated election object on success, NULL on error
84 : * \note The caller is responsible for freeing the returned value using
85 : * election_fini().
86 : */
87 : election_t *
88 0 : election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
89 : {
90 0 : election_t *e = NULL;
91 :
92 : static guint count = 0;
93 :
94 0 : CRM_CHECK(uname != NULL, return NULL);
95 :
96 0 : e = calloc(1, sizeof(election_t));
97 0 : if (e == NULL) {
98 0 : crm_perror(LOG_CRIT, "Cannot create election");
99 0 : return NULL;
100 : }
101 :
102 0 : e->uname = strdup(uname);
103 0 : if (e->uname == NULL) {
104 0 : crm_perror(LOG_CRIT, "Cannot create election");
105 0 : free(e);
106 0 : return NULL;
107 : }
108 :
109 0 : e->name = name? crm_strdup_printf("election-%s", name)
110 0 : : crm_strdup_printf("election-%u", count++);
111 0 : e->cb = cb;
112 0 : e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
113 : election_timer_cb, e);
114 0 : crm_trace("Created %s", e->name);
115 0 : return e;
116 : }
117 :
118 : /*!
119 : * \brief Disregard any previous vote by specified peer
120 : *
121 : * This discards any recorded vote from a specified peer. Election users should
122 : * call this whenever a voting peer becomes inactive.
123 : *
124 : * \param[in,out] e Election object
125 : * \param[in] uname Name of peer to disregard
126 : */
127 : void
128 0 : election_remove(election_t *e, const char *uname)
129 : {
130 0 : if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) {
131 0 : crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
132 0 : g_hash_table_remove(e->voted, uname);
133 : }
134 0 : }
135 :
136 : /*!
137 : * \brief Stop election timer and disregard all votes
138 : *
139 : * \param[in,out] e Election object
140 : */
141 : void
142 0 : election_reset(election_t *e)
143 : {
144 0 : if (e != NULL) {
145 0 : crm_trace("Resetting election %s", e->name);
146 0 : mainloop_timer_stop(e->timeout);
147 0 : if (e->voted) {
148 0 : crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
149 0 : g_hash_table_destroy(e->voted);
150 0 : e->voted = NULL;
151 : }
152 : }
153 0 : }
154 :
155 : /*!
156 : * \brief Free an election object
157 : *
158 : * Free all memory associated with an election object, stopping its
159 : * election timer (if running).
160 : *
161 : * \param[in,out] e Election object
162 : */
163 : void
164 0 : election_fini(election_t *e)
165 : {
166 0 : if (e != NULL) {
167 0 : election_reset(e);
168 0 : crm_trace("Destroying %s", e->name);
169 0 : mainloop_timer_del(e->timeout);
170 0 : free(e->uname);
171 0 : free(e->name);
172 0 : free(e);
173 : }
174 0 : }
175 :
176 : static void
177 0 : election_timeout_start(election_t *e)
178 : {
179 0 : if (e != NULL) {
180 0 : mainloop_timer_start(e->timeout);
181 : }
182 0 : }
183 :
184 : /*!
185 : * \brief Stop an election's timer, if running
186 : *
187 : * \param[in,out] e Election object
188 : */
189 : void
190 0 : election_timeout_stop(election_t *e)
191 : {
192 0 : if (e != NULL) {
193 0 : mainloop_timer_stop(e->timeout);
194 : }
195 0 : }
196 :
197 : /*!
198 : * \brief Change an election's timeout (restarting timer if running)
199 : *
200 : * \param[in,out] e Election object
201 : * \param[in] period New timeout
202 : */
203 : void
204 0 : election_timeout_set_period(election_t *e, guint period)
205 : {
206 0 : if (e != NULL) {
207 0 : mainloop_timer_set_period(e->timeout, period);
208 : } else {
209 0 : crm_err("No election defined");
210 : }
211 0 : }
212 :
213 : static int
214 0 : get_uptime(struct timeval *output)
215 : {
216 : static time_t expires = 0;
217 : static struct rusage info;
218 :
219 0 : time_t tm_now = time(NULL);
220 :
221 0 : if (expires < tm_now) {
222 0 : int rc = 0;
223 :
224 0 : info.ru_utime.tv_sec = 0;
225 0 : info.ru_utime.tv_usec = 0;
226 0 : rc = getrusage(RUSAGE_SELF, &info);
227 :
228 0 : output->tv_sec = 0;
229 0 : output->tv_usec = 0;
230 :
231 0 : if (rc < 0) {
232 0 : crm_perror(LOG_ERR, "Could not calculate the current uptime");
233 0 : expires = 0;
234 0 : return -1;
235 : }
236 :
237 0 : crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
238 : (long)info.ru_utime.tv_usec);
239 : }
240 :
241 0 : expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */
242 0 : output->tv_sec = info.ru_utime.tv_sec;
243 0 : output->tv_usec = info.ru_utime.tv_usec;
244 :
245 0 : return 1;
246 : }
247 :
248 : static int
249 0 : compare_age(struct timeval your_age)
250 : {
251 : struct timeval our_age;
252 :
253 0 : get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */
254 :
255 0 : if (our_age.tv_sec > your_age.tv_sec) {
256 0 : crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
257 0 : return 1;
258 0 : } else if (our_age.tv_sec < your_age.tv_sec) {
259 0 : crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
260 0 : return -1;
261 0 : } else if (our_age.tv_usec > your_age.tv_usec) {
262 0 : crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
263 : (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
264 0 : return 1;
265 0 : } else if (our_age.tv_usec < your_age.tv_usec) {
266 0 : crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
267 : (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
268 0 : return -1;
269 : }
270 :
271 0 : return 0;
272 : }
273 :
274 : /*!
275 : * \brief Start a new election by offering local node's candidacy
276 : *
277 : * Broadcast a "vote" election message containing the local node's ID,
278 : * (incremented) election counter, and uptime, and start the election timer.
279 : *
280 : * \param[in,out] e Election object
281 : *
282 : * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
283 : * all active peers do so, or if the election times out, the local node
284 : * wins the election. (If we lose to any peer vote, we will stop the
285 : * timer, so a timeout means we did not lose -- either some peer did not
286 : * vote, or we did not call election_check() in time.)
287 : */
288 : void
289 0 : election_vote(election_t *e)
290 : {
291 : struct timeval age;
292 0 : xmlNode *vote = NULL;
293 : crm_node_t *our_node;
294 :
295 0 : if (e == NULL) {
296 0 : crm_trace("Election vote requested, but no election available");
297 0 : return;
298 : }
299 :
300 0 : our_node = pcmk__get_node(0, e->uname, NULL,
301 : pcmk__node_search_cluster_member);
302 0 : if (!pcmk__cluster_is_node_active(our_node)) {
303 0 : crm_trace("Cannot vote in %s yet: local node not connected to cluster",
304 : e->name);
305 0 : return;
306 : }
307 :
308 0 : election_reset(e);
309 0 : e->state = election_in_progress;
310 0 : vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
311 :
312 0 : e->count++;
313 0 : crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->uuid);
314 0 : crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, e->count);
315 :
316 : // Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds
317 0 : get_uptime(&age);
318 0 : crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC,
319 : PCMK__XA_ELECTION_AGE_NANO_SEC, &age);
320 :
321 0 : pcmk__cluster_send_message(NULL, crm_msg_crmd, vote);
322 0 : free_xml(vote);
323 :
324 0 : crm_debug("Started %s round %d", e->name, e->count);
325 0 : election_timeout_start(e);
326 0 : return;
327 : }
328 :
329 : /*!
330 : * \brief Check whether local node has won an election
331 : *
332 : * If all known peers have sent no-vote messages, stop the election timer, set
333 : * the election state to won, and call any registered win callback.
334 : *
335 : * \param[in,out] e Election object
336 : *
337 : * \return TRUE if local node has won, FALSE otherwise
338 : * \note If all known peers have sent no-vote messages, but the election owner
339 : * does not call this function, the election will not be won (and the
340 : * callback will not be called) until the election times out.
341 : * \note This should be called when election_count_vote() returns
342 : * \c election_in_progress.
343 : */
344 : bool
345 0 : election_check(election_t *e)
346 : {
347 0 : int voted_size = 0;
348 0 : int num_members = 0;
349 :
350 0 : if (e == NULL) {
351 0 : crm_trace("Election check requested, but no election available");
352 0 : return FALSE;
353 : }
354 0 : if (e->voted == NULL) {
355 0 : crm_trace("%s check requested, but no votes received yet", e->name);
356 0 : return FALSE;
357 : }
358 :
359 0 : voted_size = g_hash_table_size(e->voted);
360 0 : num_members = pcmk__cluster_num_active_nodes();
361 :
362 : /* in the case of #voted > #members, it is better to
363 : * wait for the timeout and give the cluster time to
364 : * stabilize
365 : */
366 0 : if (voted_size >= num_members) {
367 : /* we won and everyone has voted */
368 0 : election_timeout_stop(e);
369 0 : if (voted_size > num_members) {
370 : GHashTableIter gIter;
371 : const crm_node_t *node;
372 0 : char *key = NULL;
373 :
374 0 : crm_warn("Received too many votes in %s", e->name);
375 0 : g_hash_table_iter_init(&gIter, crm_peer_cache);
376 0 : while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
377 0 : if (pcmk__cluster_is_node_active(node)) {
378 0 : crm_warn("* expected vote: %s", node->uname);
379 : }
380 : }
381 :
382 0 : g_hash_table_iter_init(&gIter, e->voted);
383 0 : while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
384 0 : crm_warn("* actual vote: %s", key);
385 : }
386 :
387 : }
388 :
389 0 : crm_info("%s won by local node", e->name);
390 0 : election_complete(e);
391 0 : return TRUE;
392 :
393 : } else {
394 0 : crm_debug("%s still waiting on %d of %d votes",
395 : e->name, num_members - voted_size, num_members);
396 : }
397 :
398 0 : return FALSE;
399 : }
400 :
401 : #define LOSS_DAMPEN 2 /* in seconds */
402 :
403 : struct vote {
404 : const char *op;
405 : const char *from;
406 : const char *version;
407 : const char *election_owner;
408 : int election_id;
409 : struct timeval age;
410 : };
411 :
412 : /*!
413 : * \brief Unpack an election message
414 : *
415 : * \param[in] e Election object (for logging only)
416 : * \param[in] message Election message XML
417 : * \param[out] vote Parsed fields from message
418 : *
419 : * \return TRUE if election message and election are valid, FALSE otherwise
420 : * \note The parsed struct's pointer members are valid only for the lifetime of
421 : * the message argument.
422 : */
423 : static bool
424 0 : parse_election_message(const election_t *e, const xmlNode *message,
425 : struct vote *vote)
426 : {
427 0 : CRM_CHECK(message && vote, return FALSE);
428 :
429 0 : vote->election_id = -1;
430 0 : vote->age.tv_sec = -1;
431 0 : vote->age.tv_usec = -1;
432 :
433 0 : vote->op = crm_element_value(message, PCMK__XA_CRM_TASK);
434 0 : vote->from = crm_element_value(message, PCMK__XA_SRC);
435 0 : vote->version = crm_element_value(message, PCMK_XA_VERSION);
436 0 : vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER);
437 :
438 0 : crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id));
439 :
440 0 : if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
441 0 : || (vote->election_owner == NULL) || (vote->election_id < 0)) {
442 :
443 0 : crm_warn("Invalid %s message from %s in %s ",
444 : (vote->op? vote->op : "election"),
445 : (vote->from? vote->from : "unspecified node"),
446 : (e? e->name : "election"));
447 0 : return FALSE;
448 : }
449 :
450 : // Op-specific validation
451 :
452 0 : if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
453 : /* Only vote ops have uptime.
454 : Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds.
455 : */
456 0 : crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC,
457 : PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age));
458 0 : if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
459 0 : crm_warn("Cannot count %s %s from %s because it is missing uptime",
460 : (e? e->name : "election"), vote->op, vote->from);
461 0 : return FALSE;
462 : }
463 :
464 0 : } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
465 0 : crm_info("Cannot process %s message from %s because %s is not a known election op",
466 : (e? e->name : "election"), vote->from, vote->op);
467 0 : return FALSE;
468 : }
469 :
470 : // Election validation
471 :
472 0 : if (e == NULL) {
473 0 : crm_info("Cannot count %s from %s because no election available",
474 : vote->op, vote->from);
475 0 : return FALSE;
476 : }
477 :
478 : /* If the membership cache is NULL, we REALLY shouldn't be voting --
479 : * the question is how we managed to get here.
480 : */
481 0 : if (crm_peer_cache == NULL) {
482 0 : crm_info("Cannot count %s %s from %s because no peer information available",
483 : e->name, vote->op, vote->from);
484 0 : return FALSE;
485 : }
486 0 : return TRUE;
487 : }
488 :
489 : static void
490 0 : record_vote(election_t *e, struct vote *vote)
491 : {
492 0 : CRM_ASSERT(e && vote && vote->from && vote->op);
493 :
494 0 : if (e->voted == NULL) {
495 0 : e->voted = pcmk__strkey_table(free, free);
496 : }
497 0 : pcmk__insert_dup(e->voted, vote->from, vote->op);
498 0 : }
499 :
500 : static void
501 0 : send_no_vote(crm_node_t *peer, struct vote *vote)
502 : {
503 : // @TODO probably shouldn't hardcode CRM_SYSTEM_CRMD and crm_msg_crmd
504 :
505 0 : xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
506 : CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
507 :
508 0 : crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner);
509 0 : crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id);
510 :
511 0 : pcmk__cluster_send_message(peer, crm_msg_crmd, novote);
512 0 : free_xml(novote);
513 0 : }
514 :
515 : /*!
516 : * \brief Process an election message (vote or no-vote) from a peer
517 : *
518 : * \param[in,out] e Election object
519 : * \param[in] message Election message XML from peer
520 : * \param[in] can_win Whether local node is eligible to win
521 : *
522 : * \return Election state after new vote is considered
523 : * \note If the peer message is a vote, and we prefer the peer to win, this will
524 : * send a no-vote reply to the peer.
525 : * \note The situations "we lost to this vote" from "this is a late no-vote
526 : * after we've already lost" both return election_lost. If a caller needs
527 : * to distinguish them, it should save the current state before calling
528 : * this function, and then compare the result.
529 : */
530 : enum election_result
531 0 : election_count_vote(election_t *e, const xmlNode *message, bool can_win)
532 : {
533 0 : int log_level = LOG_INFO;
534 0 : gboolean done = FALSE;
535 0 : gboolean we_lose = FALSE;
536 0 : const char *reason = "unknown";
537 0 : bool we_are_owner = FALSE;
538 0 : crm_node_t *our_node = NULL, *your_node = NULL;
539 0 : time_t tm_now = time(NULL);
540 : struct vote vote;
541 :
542 0 : CRM_CHECK(message != NULL, return election_error);
543 0 : if (parse_election_message(e, message, &vote) == FALSE) {
544 0 : return election_error;
545 : }
546 :
547 0 : your_node = pcmk__get_node(0, vote.from, NULL,
548 : pcmk__node_search_cluster_member);
549 0 : our_node = pcmk__get_node(0, e->uname, NULL,
550 : pcmk__node_search_cluster_member);
551 0 : we_are_owner = (our_node != NULL)
552 0 : && pcmk__str_eq(our_node->uuid, vote.election_owner,
553 : pcmk__str_none);
554 :
555 0 : if (!can_win) {
556 0 : reason = "Not eligible";
557 0 : we_lose = TRUE;
558 :
559 0 : } else if (!pcmk__cluster_is_node_active(our_node)) {
560 0 : reason = "We are not part of the cluster";
561 0 : log_level = LOG_ERR;
562 0 : we_lose = TRUE;
563 :
564 0 : } else if (we_are_owner && (vote.election_id != e->count)) {
565 0 : log_level = LOG_TRACE;
566 0 : reason = "Superseded";
567 0 : done = TRUE;
568 :
569 0 : } else if (!pcmk__cluster_is_node_active(your_node)) {
570 : /* Possibly we cached the message in the FSA queue at a point that it wasn't */
571 0 : reason = "Peer is not part of our cluster";
572 0 : log_level = LOG_WARNING;
573 0 : done = TRUE;
574 :
575 0 : } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
576 0 : || pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
577 : /* Receiving our own broadcast vote, or a no-vote from peer, is a vote
578 : * for us to win
579 : */
580 0 : if (!we_are_owner) {
581 0 : crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
582 : e->name, vote.election_id, vote.op, vote.from,
583 : vote.election_owner);
584 0 : return election_error;
585 : }
586 0 : if (e->state != election_in_progress) {
587 : // Should only happen if we already lost
588 0 : crm_debug("Not counting %s round %d %s from %s because no election in progress",
589 : e->name, vote.election_id, vote.op, vote.from);
590 0 : return e->state;
591 : }
592 0 : record_vote(e, &vote);
593 0 : reason = "Recorded";
594 0 : done = TRUE;
595 :
596 : } else {
597 : // A peer vote requires a comparison to determine which node is better
598 0 : int age_result = compare_age(vote.age);
599 0 : int version_result = compare_version(vote.version, CRM_FEATURE_SET);
600 :
601 0 : if (version_result < 0) {
602 0 : reason = "Version";
603 0 : we_lose = TRUE;
604 :
605 0 : } else if (version_result > 0) {
606 0 : reason = "Version";
607 :
608 0 : } else if (age_result < 0) {
609 0 : reason = "Uptime";
610 0 : we_lose = TRUE;
611 :
612 0 : } else if (age_result > 0) {
613 0 : reason = "Uptime";
614 :
615 0 : } else if (strcasecmp(e->uname, vote.from) > 0) {
616 0 : reason = "Host name";
617 0 : we_lose = TRUE;
618 :
619 : } else {
620 0 : reason = "Host name";
621 : }
622 : }
623 :
624 0 : if (e->expires < tm_now) {
625 0 : e->election_wins = 0;
626 0 : e->expires = tm_now + STORM_INTERVAL;
627 :
628 0 : } else if (done == FALSE && we_lose == FALSE) {
629 0 : int peers = 1 + g_hash_table_size(crm_peer_cache);
630 :
631 : /* If every node has to vote down every other node, thats N*(N-1) total elections
632 : * Allow some leeway before _really_ complaining
633 : */
634 0 : e->election_wins++;
635 0 : if (e->election_wins > (peers * peers)) {
636 0 : crm_warn("%s election storm detected: %d wins in %d seconds",
637 : e->name, e->election_wins, STORM_INTERVAL);
638 0 : e->election_wins = 0;
639 0 : e->expires = tm_now + STORM_INTERVAL;
640 0 : if (e->wrote_blackbox == FALSE) {
641 : /* It's questionable whether a black box (from every node in the
642 : * cluster) would be truly helpful in diagnosing an election
643 : * storm. It's also highly doubtful a production environment
644 : * would get multiple election storms from distinct causes, so
645 : * saving one blackbox per process lifetime should be
646 : * sufficient. Alternatives would be to save a timestamp of the
647 : * last blackbox write instead of a boolean, and write a new one
648 : * if some amount of time has passed; or to save a storm count,
649 : * write a blackbox on every Nth occurrence.
650 : */
651 0 : crm_write_blackbox(0, NULL);
652 0 : e->wrote_blackbox = TRUE;
653 : }
654 : }
655 : }
656 :
657 0 : if (done) {
658 0 : do_crm_log(log_level + 1,
659 : "Processed %s round %d %s (current round %d) from %s (%s)",
660 : e->name, vote.election_id, vote.op, e->count, vote.from,
661 : reason);
662 0 : return e->state;
663 :
664 0 : } else if (we_lose == FALSE) {
665 : /* We track the time of the last election loss to implement an election
666 : * dampening period, reducing the likelihood of an election storm. If
667 : * this node has lost within the dampening period, don't start a new
668 : * election, even if we win against a peer's vote -- the peer we lost to
669 : * should win again.
670 : *
671 : * @TODO This has a problem case: if an election winner immediately
672 : * leaves the cluster, and a new election is immediately called, all
673 : * nodes could lose, with no new winner elected. The ideal solution
674 : * would be to tie the election structure with the peer caches, which
675 : * would allow us to clear the dampening when the previous winner
676 : * leaves (and would allow other improvements as well).
677 : */
678 0 : if ((e->last_election_loss == 0)
679 0 : || ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {
680 :
681 0 : do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
682 : e->name, vote.election_id, vote.election_owner, vote.op,
683 : vote.from, reason);
684 :
685 0 : e->last_election_loss = 0;
686 0 : election_timeout_stop(e);
687 :
688 : /* Start a new election by voting down this, and other, peers */
689 0 : e->state = election_start;
690 0 : return e->state;
691 : } else {
692 0 : char *loss_time = ctime(&e->last_election_loss);
693 :
694 0 : if (loss_time) {
695 : // Show only HH:MM:SS
696 0 : loss_time += 11;
697 0 : loss_time[8] = '\0';
698 : }
699 0 : crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
700 : e->name, vote.election_id, vote.election_owner, vote.from,
701 : LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
702 : }
703 : }
704 :
705 0 : e->last_election_loss = tm_now;
706 :
707 0 : do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
708 : e->name, vote.election_id, vote.election_owner, vote.op,
709 : vote.from, reason);
710 :
711 0 : election_reset(e);
712 0 : send_no_vote(your_node, &vote);
713 0 : e->state = election_lost;
714 0 : return e->state;
715 : }
716 :
717 : /*!
718 : * \brief Reset any election dampening currently in effect
719 : *
720 : * \param[in,out] e Election object to clear
721 : */
722 : void
723 0 : election_clear_dampening(election_t *e)
724 : {
725 0 : e->last_election_loss = 0;
726 0 : }
|