LCOV - code coverage report
Current view: top level - cluster - election.c (source / functions) Hit Total Coverage
Test: Pacemaker code coverage Lines: 0 287 0.0 %
Date: 2024-05-07 11:09:47 Functions: 0 19 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright 2004-2024 the Pacemaker project contributors
       3             :  *
       4             :  * The version control history for this file may have further details.
       5             :  *
       6             :  * This source code is licensed under the GNU Lesser General Public License
       7             :  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
       8             :  */
       9             : 
      10             : #include <crm_internal.h>
      11             : 
      12             : #include <sys/time.h>
      13             : #include <sys/resource.h>
      14             : 
      15             : #include <crm/common/xml.h>
      16             : 
      17             : #include <crm/common/mainloop.h>
      18             : #include <crm/cluster/internal.h>
      19             : #include <crm/cluster/election_internal.h>
      20             : #include <crm/crm.h>
      21             : 
      22             : #define STORM_INTERVAL   2      /* in seconds */
      23             : 
      24             : struct election_s {
      25             :     enum election_result state;
      26             :     guint count;        // How many times local node has voted
      27             :     char *name;         // Descriptive name for this election
      28             :     char *uname;        // Local node's name
      29             :     GSourceFunc cb;     // Function to call if election is won
      30             :     GHashTable *voted;  // Key = node name, value = how node voted
      31             :     mainloop_timer_t *timeout; // When to abort if all votes not received
      32             :     int election_wins;         // Track wins, for storm detection
      33             :     bool wrote_blackbox;       // Write a storm blackbox at most once
      34             :     time_t expires;            // When storm detection period ends
      35             :     time_t last_election_loss; // When dampening period ends
      36             : };
      37             : 
      38             : static void
      39           0 : election_complete(election_t *e)
      40             : {
      41           0 :     e->state = election_won;
      42           0 :     if (e->cb != NULL) {
      43           0 :         e->cb(e);
      44             :     }
      45           0 :     election_reset(e);
      46           0 : }
      47             : 
      48             : static gboolean
      49           0 : election_timer_cb(gpointer user_data)
      50             : {
      51           0 :     election_t *e = user_data;
      52             : 
      53           0 :     crm_info("%s timed out, declaring local node as winner", e->name);
      54           0 :     election_complete(e);
      55           0 :     return FALSE;
      56             : }
      57             : 
      58             : /*!
      59             :  * \brief Get current state of an election
      60             :  *
      61             :  * \param[in] e  Election object
      62             :  *
      63             :  * \return Current state of \e
      64             :  */
      65             : enum election_result
      66           0 : election_state(const election_t *e)
      67             : {
      68           0 :     return (e == NULL)? election_error : e->state;
      69             : }
      70             : 
      71             : /*!
      72             :  * \brief Create a new election object
      73             :  *
      74             :  * Every node that wishes to participate in an election must create an election
      75             :  * object. Typically, this should be done once, at start-up. A caller should
      76             :  * only create a single election object.
      77             :  *
      78             :  * \param[in] name       Label for election (for logging)
      79             :  * \param[in] uname      Local node's name
      80             :  * \param[in] period_ms  How long to wait for all peers to vote
      81             :  * \param[in] cb         Function to call if local node wins election
      82             :  *
      83             :  * \return Newly allocated election object on success, NULL on error
      84             :  * \note The caller is responsible for freeing the returned value using
      85             :  *       election_fini().
      86             :  */
      87             : election_t *
      88           0 : election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
      89             : {
      90           0 :     election_t *e = NULL;
      91             : 
      92             :     static guint count = 0;
      93             : 
      94           0 :     CRM_CHECK(uname != NULL, return NULL);
      95             : 
      96           0 :     e = calloc(1, sizeof(election_t));
      97           0 :     if (e == NULL) {
      98           0 :         crm_perror(LOG_CRIT, "Cannot create election");
      99           0 :         return NULL;
     100             :     }
     101             : 
     102           0 :     e->uname = strdup(uname);
     103           0 :     if (e->uname == NULL) {
     104           0 :         crm_perror(LOG_CRIT, "Cannot create election");
     105           0 :         free(e);
     106           0 :         return NULL;
     107             :     }
     108             : 
     109           0 :     e->name = name? crm_strdup_printf("election-%s", name)
     110           0 :                   : crm_strdup_printf("election-%u", count++);
     111           0 :     e->cb = cb;
     112           0 :     e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
     113             :                                     election_timer_cb, e);
     114           0 :     crm_trace("Created %s", e->name);
     115           0 :     return e;
     116             : }
     117             : 
     118             : /*!
     119             :  * \brief Disregard any previous vote by specified peer
     120             :  *
     121             :  * This discards any recorded vote from a specified peer. Election users should
     122             :  * call this whenever a voting peer becomes inactive.
     123             :  *
     124             :  * \param[in,out] e      Election object
     125             :  * \param[in]     uname  Name of peer to disregard
     126             :  */
     127             : void
     128           0 : election_remove(election_t *e, const char *uname)
     129             : {
     130           0 :     if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) {
     131           0 :         crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
     132           0 :         g_hash_table_remove(e->voted, uname);
     133             :     }
     134           0 : }
     135             : 
     136             : /*!
     137             :  * \brief Stop election timer and disregard all votes
     138             :  *
     139             :  * \param[in,out] e  Election object
     140             :  */
     141             : void
     142           0 : election_reset(election_t *e)
     143             : {
     144           0 :     if (e != NULL) {
     145           0 :         crm_trace("Resetting election %s", e->name);
     146           0 :         mainloop_timer_stop(e->timeout);
     147           0 :         if (e->voted) {
     148           0 :             crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
     149           0 :             g_hash_table_destroy(e->voted);
     150           0 :             e->voted = NULL;
     151             :         }
     152             :     }
     153           0 : }
     154             : 
     155             : /*!
     156             :  * \brief Free an election object
     157             :  *
     158             :  * Free all memory associated with an election object, stopping its
     159             :  * election timer (if running).
     160             :  *
     161             :  * \param[in,out] e  Election object
     162             :  */
     163             : void
     164           0 : election_fini(election_t *e)
     165             : {
     166           0 :     if (e != NULL) {
     167           0 :         election_reset(e);
     168           0 :         crm_trace("Destroying %s", e->name);
     169           0 :         mainloop_timer_del(e->timeout);
     170           0 :         free(e->uname);
     171           0 :         free(e->name);
     172           0 :         free(e);
     173             :     }
     174           0 : }
     175             : 
     176             : static void
     177           0 : election_timeout_start(election_t *e)
     178             : {
     179           0 :     if (e != NULL) {
     180           0 :         mainloop_timer_start(e->timeout);
     181             :     }
     182           0 : }
     183             : 
     184             : /*!
     185             :  * \brief Stop an election's timer, if running
     186             :  *
     187             :  * \param[in,out] e  Election object
     188             :  */
     189             : void
     190           0 : election_timeout_stop(election_t *e)
     191             : {
     192           0 :     if (e != NULL) {
     193           0 :         mainloop_timer_stop(e->timeout);
     194             :     }
     195           0 : }
     196             : 
     197             : /*!
     198             :  * \brief Change an election's timeout (restarting timer if running)
     199             :  *
     200             :  * \param[in,out] e       Election object
     201             :  * \param[in]     period  New timeout
     202             :  */
     203             : void
     204           0 : election_timeout_set_period(election_t *e, guint period)
     205             : {
     206           0 :     if (e != NULL) {
     207           0 :         mainloop_timer_set_period(e->timeout, period);
     208             :     } else {
     209           0 :         crm_err("No election defined");
     210             :     }
     211           0 : }
     212             : 
     213             : static int
     214           0 : get_uptime(struct timeval *output)
     215             : {
     216             :     static time_t expires = 0;
     217             :     static struct rusage info;
     218             : 
     219           0 :     time_t tm_now = time(NULL);
     220             : 
     221           0 :     if (expires < tm_now) {
     222           0 :         int rc = 0;
     223             : 
     224           0 :         info.ru_utime.tv_sec = 0;
     225           0 :         info.ru_utime.tv_usec = 0;
     226           0 :         rc = getrusage(RUSAGE_SELF, &info);
     227             : 
     228           0 :         output->tv_sec = 0;
     229           0 :         output->tv_usec = 0;
     230             : 
     231           0 :         if (rc < 0) {
     232           0 :             crm_perror(LOG_ERR, "Could not calculate the current uptime");
     233           0 :             expires = 0;
     234           0 :             return -1;
     235             :         }
     236             : 
     237           0 :         crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
     238             :                   (long)info.ru_utime.tv_usec);
     239             :     }
     240             : 
     241           0 :     expires = tm_now + STORM_INTERVAL;  /* N seconds after the last _access_ */
     242           0 :     output->tv_sec = info.ru_utime.tv_sec;
     243           0 :     output->tv_usec = info.ru_utime.tv_usec;
     244             : 
     245           0 :     return 1;
     246             : }
     247             : 
     248             : static int
     249           0 : compare_age(struct timeval your_age)
     250             : {
     251             :     struct timeval our_age;
     252             : 
     253           0 :     get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */
     254             : 
     255           0 :     if (our_age.tv_sec > your_age.tv_sec) {
     256           0 :         crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
     257           0 :         return 1;
     258           0 :     } else if (our_age.tv_sec < your_age.tv_sec) {
     259           0 :         crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
     260           0 :         return -1;
     261           0 :     } else if (our_age.tv_usec > your_age.tv_usec) {
     262           0 :         crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
     263             :                   (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
     264           0 :         return 1;
     265           0 :     } else if (our_age.tv_usec < your_age.tv_usec) {
     266           0 :         crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
     267             :                   (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
     268           0 :         return -1;
     269             :     }
     270             : 
     271           0 :     return 0;
     272             : }
     273             : 
     274             : /*!
     275             :  * \brief Start a new election by offering local node's candidacy
     276             :  *
     277             :  * Broadcast a "vote" election message containing the local node's ID,
     278             :  * (incremented) election counter, and uptime, and start the election timer.
     279             :  *
     280             :  * \param[in,out] e  Election object
     281             :  *
     282             :  * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
     283             :  *       all active peers do so, or if the election times out, the local node
     284             :  *       wins the election. (If we lose to any peer vote, we will stop the
     285             :  *       timer, so a timeout means we did not lose -- either some peer did not
     286             :  *       vote, or we did not call election_check() in time.)
     287             :  */
     288             : void
     289           0 : election_vote(election_t *e)
     290             : {
     291             :     struct timeval age;
     292           0 :     xmlNode *vote = NULL;
     293             :     crm_node_t *our_node;
     294             : 
     295           0 :     if (e == NULL) {
     296           0 :         crm_trace("Election vote requested, but no election available");
     297           0 :         return;
     298             :     }
     299             : 
     300           0 :     our_node = pcmk__get_node(0, e->uname, NULL,
     301             :                               pcmk__node_search_cluster_member);
     302           0 :     if (!pcmk__cluster_is_node_active(our_node)) {
     303           0 :         crm_trace("Cannot vote in %s yet: local node not connected to cluster",
     304             :                   e->name);
     305           0 :         return;
     306             :     }
     307             : 
     308           0 :     election_reset(e);
     309           0 :     e->state = election_in_progress;
     310           0 :     vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
     311             : 
     312           0 :     e->count++;
     313           0 :     crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->uuid);
     314           0 :     crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, e->count);
     315             : 
     316             :     // Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds
     317           0 :     get_uptime(&age);
     318           0 :     crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC,
     319             :                         PCMK__XA_ELECTION_AGE_NANO_SEC, &age);
     320             : 
     321           0 :     pcmk__cluster_send_message(NULL, crm_msg_crmd, vote);
     322           0 :     free_xml(vote);
     323             : 
     324           0 :     crm_debug("Started %s round %d", e->name, e->count);
     325           0 :     election_timeout_start(e);
     326           0 :     return;
     327             : }
     328             : 
     329             : /*!
     330             :  * \brief Check whether local node has won an election
     331             :  *
     332             :  * If all known peers have sent no-vote messages, stop the election timer, set
     333             :  * the election state to won, and call any registered win callback.
     334             :  *
     335             :  * \param[in,out] e  Election object
     336             :  *
     337             :  * \return TRUE if local node has won, FALSE otherwise
     338             :  * \note If all known peers have sent no-vote messages, but the election owner
     339             :  *       does not call this function, the election will not be won (and the
     340             :  *       callback will not be called) until the election times out.
     341             :  * \note This should be called when election_count_vote() returns
     342             :  *       \c election_in_progress.
     343             :  */
     344             : bool
     345           0 : election_check(election_t *e)
     346             : {
     347           0 :     int voted_size = 0;
     348           0 :     int num_members = 0;
     349             : 
     350           0 :     if (e == NULL) {
     351           0 :         crm_trace("Election check requested, but no election available");
     352           0 :         return FALSE;
     353             :     }
     354           0 :     if (e->voted == NULL) {
     355           0 :         crm_trace("%s check requested, but no votes received yet", e->name);
     356           0 :         return FALSE;
     357             :     }
     358             : 
     359           0 :     voted_size = g_hash_table_size(e->voted);
     360           0 :     num_members = pcmk__cluster_num_active_nodes();
     361             : 
     362             :     /* in the case of #voted > #members, it is better to
     363             :      *   wait for the timeout and give the cluster time to
     364             :      *   stabilize
     365             :      */
     366           0 :     if (voted_size >= num_members) {
     367             :         /* we won and everyone has voted */
     368           0 :         election_timeout_stop(e);
     369           0 :         if (voted_size > num_members) {
     370             :             GHashTableIter gIter;
     371             :             const crm_node_t *node;
     372           0 :             char *key = NULL;
     373             : 
     374           0 :             crm_warn("Received too many votes in %s", e->name);
     375           0 :             g_hash_table_iter_init(&gIter, crm_peer_cache);
     376           0 :             while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
     377           0 :                 if (pcmk__cluster_is_node_active(node)) {
     378           0 :                     crm_warn("* expected vote: %s", node->uname);
     379             :                 }
     380             :             }
     381             : 
     382           0 :             g_hash_table_iter_init(&gIter, e->voted);
     383           0 :             while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
     384           0 :                 crm_warn("* actual vote: %s", key);
     385             :             }
     386             : 
     387             :         }
     388             : 
     389           0 :         crm_info("%s won by local node", e->name);
     390           0 :         election_complete(e);
     391           0 :         return TRUE;
     392             : 
     393             :     } else {
     394           0 :         crm_debug("%s still waiting on %d of %d votes",
     395             :                   e->name, num_members - voted_size, num_members);
     396             :     }
     397             : 
     398           0 :     return FALSE;
     399             : }
     400             : 
     401             : #define LOSS_DAMPEN 2           /* in seconds */
     402             : 
     403             : struct vote {
     404             :     const char *op;
     405             :     const char *from;
     406             :     const char *version;
     407             :     const char *election_owner;
     408             :     int election_id;
     409             :     struct timeval age;
     410             : };
     411             : 
     412             : /*!
     413             :  * \brief Unpack an election message
     414             :  *
     415             :  * \param[in] e        Election object (for logging only)
     416             :  * \param[in] message  Election message XML
     417             :  * \param[out] vote    Parsed fields from message
     418             :  *
     419             :  * \return TRUE if election message and election are valid, FALSE otherwise
     420             :  * \note The parsed struct's pointer members are valid only for the lifetime of
     421             :  *       the message argument.
     422             :  */
     423             : static bool
     424           0 : parse_election_message(const election_t *e, const xmlNode *message,
     425             :                        struct vote *vote)
     426             : {
     427           0 :     CRM_CHECK(message && vote, return FALSE);
     428             : 
     429           0 :     vote->election_id = -1;
     430           0 :     vote->age.tv_sec = -1;
     431           0 :     vote->age.tv_usec = -1;
     432             : 
     433           0 :     vote->op = crm_element_value(message, PCMK__XA_CRM_TASK);
     434           0 :     vote->from = crm_element_value(message, PCMK__XA_SRC);
     435           0 :     vote->version = crm_element_value(message, PCMK_XA_VERSION);
     436           0 :     vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER);
     437             : 
     438           0 :     crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id));
     439             : 
     440           0 :     if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
     441           0 :         || (vote->election_owner == NULL) || (vote->election_id < 0)) {
     442             : 
     443           0 :         crm_warn("Invalid %s message from %s in %s ",
     444             :                  (vote->op? vote->op : "election"),
     445             :                  (vote->from? vote->from : "unspecified node"),
     446             :                  (e? e->name : "election"));
     447           0 :         return FALSE;
     448             :     }
     449             : 
     450             :     // Op-specific validation
     451             : 
     452           0 :     if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
     453             :         /* Only vote ops have uptime.
     454             :            Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds.
     455             :          */
     456           0 :         crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC,
     457             :                                   PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age));
     458           0 :         if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
     459           0 :             crm_warn("Cannot count %s %s from %s because it is missing uptime",
     460             :                      (e? e->name : "election"), vote->op, vote->from);
     461           0 :             return FALSE;
     462             :         }
     463             : 
     464           0 :     } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
     465           0 :         crm_info("Cannot process %s message from %s because %s is not a known election op",
     466             :                  (e? e->name : "election"), vote->from, vote->op);
     467           0 :         return FALSE;
     468             :     }
     469             : 
     470             :     // Election validation
     471             : 
     472           0 :     if (e == NULL) {
     473           0 :         crm_info("Cannot count %s from %s because no election available",
     474             :                  vote->op, vote->from);
     475           0 :         return FALSE;
     476             :     }
     477             : 
     478             :     /* If the membership cache is NULL, we REALLY shouldn't be voting --
     479             :      * the question is how we managed to get here.
     480             :      */
     481           0 :     if (crm_peer_cache == NULL) {
     482           0 :         crm_info("Cannot count %s %s from %s because no peer information available",
     483             :                  e->name, vote->op, vote->from);
     484           0 :         return FALSE;
     485             :     }
     486           0 :     return TRUE;
     487             : }
     488             : 
     489             : static void
     490           0 : record_vote(election_t *e, struct vote *vote)
     491             : {
     492           0 :     CRM_ASSERT(e && vote && vote->from && vote->op);
     493             : 
     494           0 :     if (e->voted == NULL) {
     495           0 :         e->voted = pcmk__strkey_table(free, free);
     496             :     }
     497           0 :     pcmk__insert_dup(e->voted, vote->from, vote->op);
     498           0 : }
     499             : 
     500             : static void
     501           0 : send_no_vote(crm_node_t *peer, struct vote *vote)
     502             : {
     503             :     // @TODO probably shouldn't hardcode CRM_SYSTEM_CRMD and crm_msg_crmd
     504             : 
     505           0 :     xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
     506             :                                      CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
     507             : 
     508           0 :     crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner);
     509           0 :     crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id);
     510             : 
     511           0 :     pcmk__cluster_send_message(peer, crm_msg_crmd, novote);
     512           0 :     free_xml(novote);
     513           0 : }
     514             : 
     515             : /*!
     516             :  * \brief Process an election message (vote or no-vote) from a peer
     517             :  *
     518             :  * \param[in,out] e        Election object
     519             :  * \param[in]     message  Election message XML from peer
     520             :  * \param[in]     can_win  Whether local node is eligible to win
     521             :  *
     522             :  * \return Election state after new vote is considered
     523             :  * \note If the peer message is a vote, and we prefer the peer to win, this will
     524             :  *       send a no-vote reply to the peer.
     525             :  * \note The situations "we lost to this vote" from "this is a late no-vote
     526             :  *       after we've already lost" both return election_lost. If a caller needs
     527             :  *       to distinguish them, it should save the current state before calling
     528             :  *       this function, and then compare the result.
     529             :  */
     530             : enum election_result
     531           0 : election_count_vote(election_t *e, const xmlNode *message, bool can_win)
     532             : {
     533           0 :     int log_level = LOG_INFO;
     534           0 :     gboolean done = FALSE;
     535           0 :     gboolean we_lose = FALSE;
     536           0 :     const char *reason = "unknown";
     537           0 :     bool we_are_owner = FALSE;
     538           0 :     crm_node_t *our_node = NULL, *your_node = NULL;
     539           0 :     time_t tm_now = time(NULL);
     540             :     struct vote vote;
     541             : 
     542           0 :     CRM_CHECK(message != NULL, return election_error);
     543           0 :     if (parse_election_message(e, message, &vote) == FALSE) {
     544           0 :         return election_error;
     545             :     }
     546             : 
     547           0 :     your_node = pcmk__get_node(0, vote.from, NULL,
     548             :                                pcmk__node_search_cluster_member);
     549           0 :     our_node = pcmk__get_node(0, e->uname, NULL,
     550             :                               pcmk__node_search_cluster_member);
     551           0 :     we_are_owner = (our_node != NULL)
     552           0 :                    && pcmk__str_eq(our_node->uuid, vote.election_owner,
     553             :                                    pcmk__str_none);
     554             : 
     555           0 :     if (!can_win) {
     556           0 :         reason = "Not eligible";
     557           0 :         we_lose = TRUE;
     558             : 
     559           0 :     } else if (!pcmk__cluster_is_node_active(our_node)) {
     560           0 :         reason = "We are not part of the cluster";
     561           0 :         log_level = LOG_ERR;
     562           0 :         we_lose = TRUE;
     563             : 
     564           0 :     } else if (we_are_owner && (vote.election_id != e->count)) {
     565           0 :         log_level = LOG_TRACE;
     566           0 :         reason = "Superseded";
     567           0 :         done = TRUE;
     568             : 
     569           0 :     } else if (!pcmk__cluster_is_node_active(your_node)) {
     570             :         /* Possibly we cached the message in the FSA queue at a point that it wasn't */
     571           0 :         reason = "Peer is not part of our cluster";
     572           0 :         log_level = LOG_WARNING;
     573           0 :         done = TRUE;
     574             : 
     575           0 :     } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
     576           0 :                || pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
     577             :         /* Receiving our own broadcast vote, or a no-vote from peer, is a vote
     578             :          * for us to win
     579             :          */
     580           0 :         if (!we_are_owner) {
     581           0 :             crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
     582             :                      e->name, vote.election_id, vote.op, vote.from,
     583             :                      vote.election_owner);
     584           0 :             return election_error;
     585             :         }
     586           0 :         if (e->state != election_in_progress) {
     587             :             // Should only happen if we already lost
     588           0 :             crm_debug("Not counting %s round %d %s from %s because no election in progress",
     589             :                       e->name, vote.election_id, vote.op, vote.from);
     590           0 :             return e->state;
     591             :         }
     592           0 :         record_vote(e, &vote);
     593           0 :         reason = "Recorded";
     594           0 :         done = TRUE;
     595             : 
     596             :     } else {
     597             :         // A peer vote requires a comparison to determine which node is better
     598           0 :         int age_result = compare_age(vote.age);
     599           0 :         int version_result = compare_version(vote.version, CRM_FEATURE_SET);
     600             : 
     601           0 :         if (version_result < 0) {
     602           0 :             reason = "Version";
     603           0 :             we_lose = TRUE;
     604             : 
     605           0 :         } else if (version_result > 0) {
     606           0 :             reason = "Version";
     607             : 
     608           0 :         } else if (age_result < 0) {
     609           0 :             reason = "Uptime";
     610           0 :             we_lose = TRUE;
     611             : 
     612           0 :         } else if (age_result > 0) {
     613           0 :             reason = "Uptime";
     614             : 
     615           0 :         } else if (strcasecmp(e->uname, vote.from) > 0) {
     616           0 :             reason = "Host name";
     617           0 :             we_lose = TRUE;
     618             : 
     619             :         } else {
     620           0 :             reason = "Host name";
     621             :         }
     622             :     }
     623             : 
     624           0 :     if (e->expires < tm_now) {
     625           0 :         e->election_wins = 0;
     626           0 :         e->expires = tm_now + STORM_INTERVAL;
     627             : 
     628           0 :     } else if (done == FALSE && we_lose == FALSE) {
     629           0 :         int peers = 1 + g_hash_table_size(crm_peer_cache);
     630             : 
     631             :         /* If every node has to vote down every other node, thats N*(N-1) total elections
     632             :          * Allow some leeway before _really_ complaining
     633             :          */
     634           0 :         e->election_wins++;
     635           0 :         if (e->election_wins > (peers * peers)) {
     636           0 :             crm_warn("%s election storm detected: %d wins in %d seconds",
     637             :                      e->name, e->election_wins, STORM_INTERVAL);
     638           0 :             e->election_wins = 0;
     639           0 :             e->expires = tm_now + STORM_INTERVAL;
     640           0 :             if (e->wrote_blackbox == FALSE) {
     641             :                 /* It's questionable whether a black box (from every node in the
     642             :                  * cluster) would be truly helpful in diagnosing an election
     643             :                  * storm. It's also highly doubtful a production environment
     644             :                  * would get multiple election storms from distinct causes, so
     645             :                  * saving one blackbox per process lifetime should be
     646             :                  * sufficient. Alternatives would be to save a timestamp of the
     647             :                  * last blackbox write instead of a boolean, and write a new one
     648             :                  * if some amount of time has passed; or to save a storm count,
     649             :                  * write a blackbox on every Nth occurrence.
     650             :                  */
     651           0 :                 crm_write_blackbox(0, NULL);
     652           0 :                 e->wrote_blackbox = TRUE;
     653             :             }
     654             :         }
     655             :     }
     656             : 
     657           0 :     if (done) {
     658           0 :         do_crm_log(log_level + 1,
     659             :                    "Processed %s round %d %s (current round %d) from %s (%s)",
     660             :                    e->name, vote.election_id, vote.op, e->count, vote.from,
     661             :                    reason);
     662           0 :         return e->state;
     663             : 
     664           0 :     } else if (we_lose == FALSE) {
     665             :         /* We track the time of the last election loss to implement an election
     666             :          * dampening period, reducing the likelihood of an election storm. If
     667             :          * this node has lost within the dampening period, don't start a new
     668             :          * election, even if we win against a peer's vote -- the peer we lost to
     669             :          * should win again.
     670             :          *
     671             :          * @TODO This has a problem case: if an election winner immediately
     672             :          * leaves the cluster, and a new election is immediately called, all
     673             :          * nodes could lose, with no new winner elected. The ideal solution
     674             :          * would be to tie the election structure with the peer caches, which
     675             :          * would allow us to clear the dampening when the previous winner
     676             :          * leaves (and would allow other improvements as well).
     677             :          */
     678           0 :         if ((e->last_election_loss == 0)
     679           0 :             || ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {
     680             : 
     681           0 :             do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
     682             :                        e->name, vote.election_id, vote.election_owner, vote.op,
     683             :                        vote.from, reason);
     684             : 
     685           0 :             e->last_election_loss = 0;
     686           0 :             election_timeout_stop(e);
     687             : 
     688             :             /* Start a new election by voting down this, and other, peers */
     689           0 :             e->state = election_start;
     690           0 :             return e->state;
     691             :         } else {
     692           0 :             char *loss_time = ctime(&e->last_election_loss);
     693             : 
     694           0 :             if (loss_time) {
     695             :                 // Show only HH:MM:SS
     696           0 :                 loss_time += 11;
     697           0 :                 loss_time[8] = '\0';
     698             :             }
     699           0 :             crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
     700             :                      e->name, vote.election_id, vote.election_owner, vote.from,
     701             :                      LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
     702             :         }
     703             :     }
     704             : 
     705           0 :     e->last_election_loss = tm_now;
     706             : 
     707           0 :     do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
     708             :                e->name, vote.election_id, vote.election_owner, vote.op,
     709             :                vote.from, reason);
     710             : 
     711           0 :     election_reset(e);
     712           0 :     send_no_vote(your_node, &vote);
     713           0 :     e->state = election_lost;
     714           0 :     return e->state;
     715             : }
     716             : 
     717             : /*!
     718             :  * \brief Reset any election dampening currently in effect
     719             :  *
     720             :  * \param[in,out] e        Election object to clear
     721             :  */
     722             : void
     723           0 : election_clear_dampening(election_t *e)
     724             : {
     725           0 :     e->last_election_loss = 0;
     726           0 : }

Generated by: LCOV version 1.14