Line data Source code
1 : /*
2 : * Copyright 2004-2024 the Pacemaker project contributors
3 : *
4 : * The version control history for this file may have further details.
5 : *
6 : * This source code is licensed under the GNU Lesser General Public License
7 : * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 : */
9 :
10 : #include <crm_internal.h>
11 :
12 : #ifndef _GNU_SOURCE
13 : # define _GNU_SOURCE
14 : #endif
15 :
16 : #include <inttypes.h> // PRIu32
17 : #include <sys/param.h>
18 : #include <sys/types.h>
19 : #include <stdio.h>
20 : #include <unistd.h>
21 : #include <string.h>
22 : #include <glib.h>
23 : #include <crm/common/ipc.h>
24 : #include <crm/common/xml_internal.h>
25 : #include <crm/cluster/internal.h>
26 : #include <crm/common/xml.h>
27 : #include <crm/stonith-ng.h>
28 : #include "crmcluster_private.h"
29 :
30 : /* The peer cache remembers cluster nodes that have been seen.
31 : * This is managed mostly automatically by libcluster, based on
32 : * cluster membership events.
33 : *
34 : * Because cluster nodes can have conflicting names or UUIDs,
35 : * the hash table key is a uniquely generated ID.
36 : *
37 : * @COMPAT When this is internal, rename to cluster_node_member_cache and make
38 : * static.
39 : */
40 : GHashTable *crm_peer_cache = NULL;
41 :
42 : /*
43 : * The remote peer cache tracks pacemaker_remote nodes. While the
44 : * value has the same type as the peer cache's, it is tracked separately for
45 : * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
46 : * so the name (which is also the UUID) is used as the hash table key; there
47 : * is no equivalent of membership events, so management is not automatic; and
48 : * most users of the peer cache need to exclude pacemaker_remote nodes.
49 : *
50 : * That said, using a single cache would be more logical and less error-prone,
51 : * so it would be a good idea to merge them one day.
52 : *
53 : * libcluster provides two avenues for populating the cache:
54 : * pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node()
55 : * directly manage it, while refresh_remote_nodes() populates it via the CIB.
56 : */
57 : GHashTable *crm_remote_peer_cache = NULL;
58 :
59 : /*
60 : * The CIB cluster node cache tracks cluster nodes that have been seen in
61 : * the CIB. It is useful mainly when a caller needs to know about a node that
62 : * may no longer be in the membership, but doesn't want to add the node to the
63 : * main peer cache tables.
64 : */
65 : static GHashTable *cluster_node_cib_cache = NULL;
66 :
67 : unsigned long long crm_peer_seq = 0;
68 : gboolean crm_have_quorum = FALSE;
69 : static bool autoreap = true;
70 :
71 : // Flag setting and clearing for crm_node_t:flags
72 :
73 : #define set_peer_flags(peer, flags_to_set) do { \
74 : (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
75 : "Peer", (peer)->uname, \
76 : (peer)->flags, (flags_to_set), \
77 : #flags_to_set); \
78 : } while (0)
79 :
80 : #define clear_peer_flags(peer, flags_to_clear) do { \
81 : (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
82 : LOG_TRACE, \
83 : "Peer", (peer)->uname, \
84 : (peer)->flags, (flags_to_clear), \
85 : #flags_to_clear); \
86 : } while (0)
87 :
88 : static void update_peer_uname(crm_node_t *node, const char *uname);
89 : static crm_node_t *find_cib_cluster_node(const char *id, const char *uname);
90 :
91 : /*!
92 : * \internal
93 : * \brief Get the number of Pacemaker Remote nodes that have been seen
94 : *
95 : * \return Number of cached Pacemaker Remote nodes
96 : */
97 : unsigned int
98 0 : pcmk__cluster_num_remote_nodes(void)
99 : {
100 0 : if (crm_remote_peer_cache == NULL) {
101 0 : return 0U;
102 : }
103 0 : return g_hash_table_size(crm_remote_peer_cache);
104 : }
105 :
106 : /*!
107 : * \internal
108 : * \brief Get a remote node cache entry, creating it if necessary
109 : *
110 : * \param[in] node_name Name of remote node
111 : *
112 : * \return Cache entry for node on success, or \c NULL (and set \c errno)
113 : * otherwise
114 : *
115 : * \note When creating a new entry, this will leave the node state undetermined.
116 : * The caller should also call \c pcmk__update_peer_state() if the state
117 : * is known.
118 : * \note Because this can add and remove cache entries, callers should not
119 : * assume any previously obtained cache entry pointers remain valid.
120 : */
121 : crm_node_t *
122 0 : pcmk__cluster_lookup_remote_node(const char *node_name)
123 : {
124 : crm_node_t *node;
125 0 : char *node_name_copy = NULL;
126 :
127 0 : if (node_name == NULL) {
128 0 : errno = EINVAL;
129 0 : return NULL;
130 : }
131 :
132 : /* It's theoretically possible that the node was added to the cluster peer
133 : * cache before it was known to be a Pacemaker Remote node. Remove that
134 : * entry unless it has a node ID, which means the name actually is
135 : * associated with a cluster node. (@TODO return an error in that case?)
136 : */
137 0 : node = pcmk__search_node_caches(0, node_name,
138 : pcmk__node_search_cluster_member);
139 0 : if ((node != NULL) && (node->uuid == NULL)) {
140 : /* node_name could be a pointer into the cache entry being removed, so
141 : * reassign it to a copy before the original gets freed
142 : */
143 0 : node_name_copy = strdup(node_name);
144 0 : if (node_name_copy == NULL) {
145 0 : errno = ENOMEM;
146 0 : return NULL;
147 : }
148 0 : node_name = node_name_copy;
149 0 : pcmk__cluster_forget_cluster_node(0, node_name);
150 : }
151 :
152 : /* Return existing cache entry if one exists */
153 0 : node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
154 0 : if (node) {
155 0 : free(node_name_copy);
156 0 : return node;
157 : }
158 :
159 : /* Allocate a new entry */
160 0 : node = calloc(1, sizeof(crm_node_t));
161 0 : if (node == NULL) {
162 0 : free(node_name_copy);
163 0 : return NULL;
164 : }
165 :
166 : /* Populate the essential information */
167 0 : set_peer_flags(node, crm_remote_node);
168 0 : node->uuid = strdup(node_name);
169 0 : if (node->uuid == NULL) {
170 0 : free(node);
171 0 : errno = ENOMEM;
172 0 : free(node_name_copy);
173 0 : return NULL;
174 : }
175 :
176 : /* Add the new entry to the cache */
177 0 : g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
178 0 : crm_trace("added %s to remote cache", node_name);
179 :
180 : /* Update the entry's uname, ensuring peer status callbacks are called */
181 0 : update_peer_uname(node, node_name);
182 0 : free(node_name_copy);
183 0 : return node;
184 : }
185 :
186 : /*!
187 : * \internal
188 : * \brief Remove a node from the Pacemaker Remote node cache
189 : *
190 : * \param[in] node_name Name of node to remove from cache
191 : *
192 : * \note The caller must be careful not to use \p node_name after calling this
193 : * function if it might be a pointer into the cache entry being removed.
194 : */
195 : void
196 0 : pcmk__cluster_forget_remote_node(const char *node_name)
197 : {
198 : /* Do a lookup first, because node_name could be a pointer within the entry
199 : * being removed -- we can't log it *after* removing it.
200 : */
201 0 : if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) {
202 0 : crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
203 0 : g_hash_table_remove(crm_remote_peer_cache, node_name);
204 : }
205 0 : }
206 :
207 : /*!
208 : * \internal
209 : * \brief Return node status based on a CIB status entry
210 : *
211 : * \param[in] node_state XML of node state
212 : *
213 : * \return \c CRM_NODE_LOST if \c PCMK__XA_IN_CCM is false in
214 : * \c PCMK__XE_NODE_STATE, \c CRM_NODE_MEMBER otherwise
215 : * \note Unlike most boolean XML attributes, this one defaults to true, for
216 : * backward compatibility with older controllers that don't set it.
217 : */
218 : static const char *
219 0 : remote_state_from_cib(const xmlNode *node_state)
220 : {
221 0 : bool status = false;
222 :
223 0 : if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM,
224 0 : &status) == pcmk_rc_ok) && !status) {
225 0 : return CRM_NODE_LOST;
226 : } else {
227 0 : return CRM_NODE_MEMBER;
228 : }
229 : }
230 :
231 : /* user data for looping through remote node xpath searches */
232 : struct refresh_data {
233 : const char *field; /* XML attribute to check for node name */
234 : gboolean has_state; /* whether to update node state based on XML */
235 : };
236 :
237 : /*!
238 : * \internal
239 : * \brief Process one pacemaker_remote node xpath search result
240 : *
241 : * \param[in] result XML search result
242 : * \param[in] user_data what to look for in the XML
243 : */
244 : static void
245 0 : remote_cache_refresh_helper(xmlNode *result, void *user_data)
246 : {
247 0 : const struct refresh_data *data = user_data;
248 0 : const char *remote = crm_element_value(result, data->field);
249 0 : const char *state = NULL;
250 : crm_node_t *node;
251 :
252 0 : CRM_CHECK(remote != NULL, return);
253 :
254 : /* Determine node's state, if the result has it */
255 0 : if (data->has_state) {
256 0 : state = remote_state_from_cib(result);
257 : }
258 :
259 : /* Check whether cache already has entry for node */
260 0 : node = g_hash_table_lookup(crm_remote_peer_cache, remote);
261 :
262 0 : if (node == NULL) {
263 : /* Node is not in cache, so add a new entry for it */
264 0 : node = pcmk__cluster_lookup_remote_node(remote);
265 0 : CRM_ASSERT(node);
266 0 : if (state) {
267 0 : pcmk__update_peer_state(__func__, node, state, 0);
268 : }
269 :
270 0 : } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
271 : /* Node is in cache and hasn't been updated already, so mark it clean */
272 0 : clear_peer_flags(node, crm_node_dirty);
273 0 : if (state) {
274 0 : pcmk__update_peer_state(__func__, node, state, 0);
275 : }
276 : }
277 : }
278 :
279 : static void
280 0 : mark_dirty(gpointer key, gpointer value, gpointer user_data)
281 : {
282 0 : set_peer_flags((crm_node_t *) value, crm_node_dirty);
283 0 : }
284 :
285 : static gboolean
286 0 : is_dirty(gpointer key, gpointer value, gpointer user_data)
287 : {
288 0 : return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
289 : }
290 :
291 : /*!
292 : * \internal
293 : * \brief Repopulate the remote node cache based on CIB XML
294 : *
295 : * \param[in] cib CIB XML to parse
296 : */
297 : static void
298 0 : refresh_remote_nodes(xmlNode *cib)
299 : {
300 : struct refresh_data data;
301 :
302 0 : pcmk__cluster_init_node_caches();
303 :
304 : /* First, we mark all existing cache entries as dirty,
305 : * so that later we can remove any that weren't in the CIB.
306 : * We don't empty the cache, because we need to detect changes in state.
307 : */
308 0 : g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
309 :
310 : /* Look for guest nodes and remote nodes in the status section */
311 0 : data.field = PCMK_XA_ID;
312 0 : data.has_state = TRUE;
313 0 : crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS,
314 : remote_cache_refresh_helper, &data);
315 :
316 : /* Look for guest nodes and remote nodes in the configuration section,
317 : * because they may have just been added and not have a status entry yet.
318 : * In that case, the cached node state will be left NULL, so that the
319 : * peer status callback isn't called until we're sure the node started
320 : * successfully.
321 : */
322 0 : data.field = PCMK_XA_VALUE;
323 0 : data.has_state = FALSE;
324 0 : crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG,
325 : remote_cache_refresh_helper, &data);
326 0 : data.field = PCMK_XA_ID;
327 0 : data.has_state = FALSE;
328 0 : crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG,
329 : remote_cache_refresh_helper, &data);
330 :
331 : /* Remove all old cache entries that weren't seen in the CIB */
332 0 : g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
333 0 : }
334 :
335 : /*!
336 : * \internal
337 : * \brief Check whether a node is an active cluster node
338 : *
339 : * Remote nodes are never considered active. This guarantees that they can never
340 : * become DC.
341 : *
342 : * \param[in] node Node to check
343 : *
344 : * \return \c true if the node is an active cluster node, or \c false otherwise
345 : */
346 : bool
347 0 : pcmk__cluster_is_node_active(const crm_node_t *node)
348 : {
349 0 : const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
350 :
351 0 : if ((node == NULL) || pcmk_is_set(node->flags, crm_remote_node)) {
352 0 : return false;
353 : }
354 :
355 0 : switch (cluster_layer) {
356 0 : case pcmk_cluster_layer_corosync:
357 : #if SUPPORT_COROSYNC
358 0 : return pcmk__corosync_is_peer_active(node);
359 : #else
360 : break;
361 : #endif // SUPPORT_COROSYNC
362 0 : default:
363 0 : break;
364 : }
365 :
366 0 : crm_err("Unhandled cluster layer: %s",
367 : pcmk_cluster_layer_text(cluster_layer));
368 0 : return false;
369 : }
370 :
371 : /*!
372 : * \internal
373 : * \brief Check if a node's entry should be removed from the cluster node cache
374 : *
375 : * A node should be removed from the cache if it's inactive and matches another
376 : * \c crm_node_t (the search object). The node is considered a mismatch if any
377 : * of the following are true:
378 : * * The search object is \c NULL.
379 : * * The search object has an ID set and the cached node's ID does not match it.
380 : * * The search object does not have an ID set, and the cached node's name does
381 : * not match the search node's name. (If both names are \c NULL, it's a
382 : * match.)
383 : *
384 : * Otherwise, the node is considered a match.
385 : *
386 : * Note that if the search object has both an ID and a name set, the name is
387 : * ignored for matching purposes.
388 : *
389 : * \param[in] key Ignored
390 : * \param[in] value \c crm_node_t object from cluster node cache
391 : * \param[in] user_data \c crm_node_t object to match against (search object)
392 : *
393 : * \return \c TRUE if the node entry should be removed from \c crm_peer_cache,
394 : * or \c FALSE otherwise
395 : */
396 : static gboolean
397 0 : should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
398 : {
399 0 : crm_node_t *node = value;
400 0 : crm_node_t *search = user_data;
401 :
402 0 : if (search == NULL) {
403 0 : return FALSE;
404 : }
405 0 : if ((search->id != 0) && (node->id != search->id)) {
406 0 : return FALSE;
407 : }
408 0 : if ((search->id == 0)
409 0 : && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
410 : // @TODO Consider name even if ID is set?
411 0 : return FALSE;
412 : }
413 0 : if (pcmk__cluster_is_node_active(value)) {
414 0 : return FALSE;
415 : }
416 :
417 0 : crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership "
418 : "cache",
419 : pcmk__s(node->uname, "(unknown)"), node->id);
420 0 : return TRUE;
421 : }
422 :
423 : /*!
424 : * \internal
425 : * \brief Remove one or more inactive nodes from the cluster node cache
426 : *
427 : * All inactive nodes matching \p id and \p node_name as described in
428 : * \c should_forget_cluster_node documentation are removed from the cache.
429 : *
430 : * If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed
431 : * from the cache regardless of ID and name. This differs from clearing the
432 : * cache, in that entries for active nodes are preserved.
433 : *
434 : * \param[in] id ID of node to remove from cache (0 to ignore)
435 : * \param[in] node_name Name of node to remove from cache (ignored if \p id is
436 : * nonzero)
437 : *
438 : * \note \p node_name is not modified directly, but it will be freed if it's a
439 : * pointer into a cache entry that is removed.
440 : */
441 : void
442 0 : pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
443 : {
444 0 : crm_node_t search = { 0, };
445 0 : char *criterion = NULL; // For logging
446 0 : guint matches = 0;
447 :
448 0 : if (crm_peer_cache == NULL) {
449 0 : crm_trace("Membership cache not initialized, ignoring removal request");
450 0 : return;
451 : }
452 :
453 0 : search.id = id;
454 0 : search.uname = pcmk__str_copy(node_name); // May log after original freed
455 :
456 0 : if (id > 0) {
457 0 : criterion = crm_strdup_printf(PCMK_XA_ID "=%" PRIu32, id);
458 :
459 0 : } else if (node_name != NULL) {
460 0 : criterion = crm_strdup_printf(PCMK_XA_UNAME "=%s", node_name);
461 : }
462 :
463 0 : matches = g_hash_table_foreach_remove(crm_peer_cache,
464 : should_forget_cluster_node, &search);
465 0 : if (matches > 0) {
466 0 : if (criterion != NULL) {
467 0 : crm_notice("Removed %u inactive node%s with %s from the membership "
468 : "cache",
469 : matches, pcmk__plural_s(matches), criterion);
470 : } else {
471 0 : crm_notice("Removed all (%u) inactive cluster nodes from the "
472 : "membership cache",
473 : matches);
474 : }
475 :
476 : } else {
477 0 : crm_info("No inactive cluster nodes%s%s to remove from the membership "
478 : "cache",
479 : ((criterion != NULL)? " with" : ""), pcmk__s(criterion, ""));
480 : }
481 :
482 0 : free(search.uname);
483 0 : free(criterion);
484 : }
485 :
486 : static void
487 0 : count_peer(gpointer key, gpointer value, gpointer user_data)
488 : {
489 0 : unsigned int *count = user_data;
490 0 : crm_node_t *node = value;
491 :
492 0 : if (pcmk__cluster_is_node_active(node)) {
493 0 : *count = *count + 1;
494 : }
495 0 : }
496 :
497 : /*!
498 : * \internal
499 : * \brief Get the number of active cluster nodes that have been seen
500 : *
501 : * Remote nodes are never considered active. This guarantees that they can never
502 : * become DC.
503 : *
504 : * \return Number of active nodes in the cluster node cache
505 : */
506 : unsigned int
507 0 : pcmk__cluster_num_active_nodes(void)
508 : {
509 0 : unsigned int count = 0;
510 :
511 0 : if (crm_peer_cache != NULL) {
512 0 : g_hash_table_foreach(crm_peer_cache, count_peer, &count);
513 : }
514 0 : return count;
515 : }
516 :
517 : static void
518 0 : destroy_crm_node(gpointer data)
519 : {
520 0 : crm_node_t *node = data;
521 :
522 0 : crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
523 :
524 0 : free(node->uname);
525 0 : free(node->state);
526 0 : free(node->uuid);
527 0 : free(node->expected);
528 0 : free(node->conn_host);
529 0 : free(node);
530 0 : }
531 :
532 : /*!
533 : * \internal
534 : * \brief Initialize node caches
535 : */
536 : void
537 0 : pcmk__cluster_init_node_caches(void)
538 : {
539 0 : if (crm_peer_cache == NULL) {
540 0 : crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node);
541 : }
542 :
543 0 : if (crm_remote_peer_cache == NULL) {
544 0 : crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
545 : }
546 :
547 0 : if (cluster_node_cib_cache == NULL) {
548 0 : cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
549 : }
550 0 : }
551 :
552 : /*!
553 : * \internal
554 : * \brief Initialize node caches
555 : */
556 : void
557 0 : pcmk__cluster_destroy_node_caches(void)
558 : {
559 0 : if (crm_peer_cache != NULL) {
560 0 : crm_trace("Destroying peer cache with %d members",
561 : g_hash_table_size(crm_peer_cache));
562 0 : g_hash_table_destroy(crm_peer_cache);
563 0 : crm_peer_cache = NULL;
564 : }
565 :
566 0 : if (crm_remote_peer_cache != NULL) {
567 0 : crm_trace("Destroying remote peer cache with %d members",
568 : pcmk__cluster_num_remote_nodes());
569 0 : g_hash_table_destroy(crm_remote_peer_cache);
570 0 : crm_remote_peer_cache = NULL;
571 : }
572 :
573 0 : if (cluster_node_cib_cache != NULL) {
574 0 : crm_trace("Destroying configured cluster node cache with %d members",
575 : g_hash_table_size(cluster_node_cib_cache));
576 0 : g_hash_table_destroy(cluster_node_cib_cache);
577 0 : cluster_node_cib_cache = NULL;
578 : }
579 0 : }
580 :
581 : static void (*peer_status_callback)(enum crm_status_type, crm_node_t *,
582 : const void *) = NULL;
583 :
584 : /*!
585 : * \internal
586 : * \brief Set a client function that will be called after peer status changes
587 : *
588 : * \param[in] dispatch Pointer to function to use as callback
589 : *
590 : * \note Client callbacks should do only client-specific handling. Callbacks
591 : * must not add or remove entries in the peer caches.
592 : */
593 : void
594 0 : pcmk__cluster_set_status_callback(void (*dispatch)(enum crm_status_type,
595 : crm_node_t *, const void *))
596 : {
597 : // @TODO Improve documentation of peer_status_callback
598 0 : peer_status_callback = dispatch;
599 0 : }
600 :
601 : /*!
602 : * \internal
603 : * \brief Tell the library whether to automatically reap lost nodes
604 : *
605 : * If \c true (the default), calling \c crm_update_peer_proc() will also update
606 : * the peer state to \c CRM_NODE_MEMBER or \c CRM_NODE_LOST, and updating the
607 : * peer state will reap peers whose state changes to anything other than
608 : * \c CRM_NODE_MEMBER.
609 : *
610 : * Callers should leave this enabled unless they plan to manage the cache
611 : * separately on their own.
612 : *
613 : * \param[in] enable \c true to enable automatic reaping, \c false to disable
614 : */
615 : void
616 0 : pcmk__cluster_set_autoreap(bool enable)
617 : {
618 0 : autoreap = enable;
619 0 : }
620 :
621 : static void
622 0 : dump_peer_hash(int level, const char *caller)
623 : {
624 : GHashTableIter iter;
625 0 : const char *id = NULL;
626 0 : crm_node_t *node = NULL;
627 :
628 0 : g_hash_table_iter_init(&iter, crm_peer_cache);
629 0 : while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
630 0 : do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
631 : }
632 0 : }
633 :
634 : static gboolean
635 0 : hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
636 : {
637 0 : return value == user_data;
638 : }
639 :
640 : /*!
641 : * \internal
642 : * \brief Search cluster member node cache
643 : *
644 : * \param[in] id If not 0, cluster node ID to search for
645 : * \param[in] uname If not NULL, node name to search for
646 : * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster
647 : * node ID to search for
648 : *
649 : * \return Cluster node cache entry if found, otherwise NULL
650 : */
651 : static crm_node_t *
652 0 : search_cluster_member_cache(unsigned int id, const char *uname,
653 : const char *uuid)
654 : {
655 : GHashTableIter iter;
656 0 : crm_node_t *node = NULL;
657 0 : crm_node_t *by_id = NULL;
658 0 : crm_node_t *by_name = NULL;
659 :
660 0 : CRM_ASSERT(id > 0 || uname != NULL);
661 :
662 0 : pcmk__cluster_init_node_caches();
663 :
664 0 : if (uname != NULL) {
665 0 : g_hash_table_iter_init(&iter, crm_peer_cache);
666 0 : while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
667 0 : if(node->uname && strcasecmp(node->uname, uname) == 0) {
668 0 : crm_trace("Name match: %s = %p", node->uname, node);
669 0 : by_name = node;
670 0 : break;
671 : }
672 : }
673 : }
674 :
675 0 : if (id > 0) {
676 0 : g_hash_table_iter_init(&iter, crm_peer_cache);
677 0 : while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
678 0 : if(node->id == id) {
679 0 : crm_trace("ID match: %u = %p", node->id, node);
680 0 : by_id = node;
681 0 : break;
682 : }
683 : }
684 :
685 0 : } else if (uuid != NULL) {
686 0 : g_hash_table_iter_init(&iter, crm_peer_cache);
687 0 : while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
688 0 : if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
689 0 : crm_trace("UUID match: %s = %p", node->uuid, node);
690 0 : by_id = node;
691 0 : break;
692 : }
693 : }
694 : }
695 :
696 0 : node = by_id; /* Good default */
697 0 : if(by_id == by_name) {
698 : /* Nothing to do if they match (both NULL counts) */
699 0 : crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
700 :
701 0 : } else if(by_id == NULL && by_name) {
702 0 : crm_trace("Only one: %p for %u/%s", by_name, id, uname);
703 :
704 0 : if(id && by_name->id) {
705 0 : dump_peer_hash(LOG_WARNING, __func__);
706 0 : crm_crit("Node %u and %u share the same name '%s'",
707 : id, by_name->id, uname);
708 0 : node = NULL; /* Create a new one */
709 :
710 : } else {
711 0 : node = by_name;
712 : }
713 :
714 0 : } else if(by_name == NULL && by_id) {
715 0 : crm_trace("Only one: %p for %u/%s", by_id, id, uname);
716 :
717 0 : if(uname && by_id->uname) {
718 0 : dump_peer_hash(LOG_WARNING, __func__);
719 0 : crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
720 : uname, by_id->uname, id, uname);
721 : }
722 :
723 0 : } else if(uname && by_id->uname) {
724 0 : if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
725 0 : crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
726 0 : g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
727 :
728 : } else {
729 0 : crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
730 0 : dump_peer_hash(LOG_INFO, __func__);
731 0 : crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
732 : TRUE);
733 : }
734 :
735 0 : } else if(id && by_name->id) {
736 0 : crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
737 :
738 : } else {
739 : /* Simple merge */
740 :
741 : /* Only corosync-based clusters use node IDs. The functions that call
742 : * pcmk__update_peer_state() and crm_update_peer_proc() only know
743 : * nodeid, so 'by_id' is authoritative when merging.
744 : */
745 0 : dump_peer_hash(LOG_DEBUG, __func__);
746 :
747 0 : crm_info("Merging %p into %p", by_name, by_id);
748 0 : g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
749 : }
750 :
751 0 : return node;
752 : }
753 :
754 : /*!
755 : * \internal
756 : * \brief Search caches for a node (cluster or Pacemaker Remote)
757 : *
758 : * \param[in] id If not 0, cluster node ID to search for
759 : * \param[in] uname If not NULL, node name to search for
760 : * \param[in] flags Group of enum pcmk__node_search_flags
761 : *
762 : * \return Node cache entry if found, otherwise NULL
763 : */
764 : crm_node_t *
765 0 : pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
766 : {
767 0 : crm_node_t *node = NULL;
768 :
769 0 : CRM_ASSERT(id > 0 || uname != NULL);
770 :
771 0 : pcmk__cluster_init_node_caches();
772 :
773 0 : if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) {
774 0 : node = g_hash_table_lookup(crm_remote_peer_cache, uname);
775 : }
776 :
777 0 : if ((node == NULL)
778 0 : && pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
779 :
780 0 : node = search_cluster_member_cache(id, uname, NULL);
781 : }
782 :
783 0 : if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
784 0 : char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
785 :
786 0 : node = find_cib_cluster_node(id_str, uname);
787 0 : free(id_str);
788 : }
789 :
790 0 : return node;
791 : }
792 :
793 : /*!
794 : * \internal
795 : * \brief Purge a node from cache (both cluster and Pacemaker Remote)
796 : *
797 : * \param[in] node_name If not NULL, purge only nodes with this name
798 : * \param[in] node_id If not 0, purge cluster nodes only if they have this ID
799 : *
800 : * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged.
801 : * If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote
802 : * nodes that match \p node_name will be purged, and cluster nodes that
803 : * match both \p node_name and \p node_id will be purged.
804 : * \note The caller must be careful not to use \p node_name after calling this
805 : * function if it might be a pointer into a cache entry being removed.
806 : */
807 : void
808 0 : pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
809 : {
810 0 : char *node_name_copy = NULL;
811 :
812 0 : if ((node_name == NULL) && (node_id == 0U)) {
813 0 : return;
814 : }
815 :
816 : // Purge from Pacemaker Remote node cache
817 0 : if ((node_name != NULL)
818 0 : && (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) {
819 : /* node_name could be a pointer into the cache entry being purged,
820 : * so reassign it to a copy before the original gets freed
821 : */
822 0 : node_name_copy = pcmk__str_copy(node_name);
823 0 : node_name = node_name_copy;
824 :
825 0 : crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
826 0 : g_hash_table_remove(crm_remote_peer_cache, node_name);
827 : }
828 :
829 0 : pcmk__cluster_forget_cluster_node(node_id, node_name);
830 0 : free(node_name_copy);
831 : }
832 :
833 : #if SUPPORT_COROSYNC
834 : static guint
835 0 : remove_conflicting_peer(crm_node_t *node)
836 : {
837 0 : int matches = 0;
838 : GHashTableIter iter;
839 0 : crm_node_t *existing_node = NULL;
840 :
841 0 : if (node->id == 0 || node->uname == NULL) {
842 0 : return 0;
843 : }
844 :
845 0 : if (!pcmk__corosync_has_nodelist()) {
846 0 : return 0;
847 : }
848 :
849 0 : g_hash_table_iter_init(&iter, crm_peer_cache);
850 0 : while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
851 0 : if (existing_node->id > 0
852 0 : && existing_node->id != node->id
853 0 : && existing_node->uname != NULL
854 0 : && strcasecmp(existing_node->uname, node->uname) == 0) {
855 :
856 0 : if (pcmk__cluster_is_node_active(existing_node)) {
857 0 : continue;
858 : }
859 :
860 0 : crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
861 : existing_node->id, existing_node->uname, node->id);
862 :
863 0 : g_hash_table_iter_remove(&iter);
864 0 : matches++;
865 : }
866 : }
867 :
868 0 : return matches;
869 : }
870 : #endif
871 :
872 : /*!
873 : * \internal
874 : * \brief Get a cluster node cache entry, possibly creating one if not found
875 : *
876 : * If \c pcmk__node_search_cluster_member is set in \p flags, the return value
877 : * is guaranteed not to be \c NULL. A new cache entry is created if one does not
878 : * already exist.
879 : *
880 : * \param[in] id If not 0, cluster node ID to search for
881 : * \param[in] uname If not NULL, node name to search for
882 : * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster
883 : * node ID to search for
884 : * \param[in] flags Group of enum pcmk__node_search_flags
885 : *
886 : * \return (Possibly newly created) cluster node cache entry
887 : */
888 : /* coverity[-alloc] Memory is referenced in one or both hashtables */
889 : crm_node_t *
890 0 : pcmk__get_node(unsigned int id, const char *uname, const char *uuid,
891 : uint32_t flags)
892 : {
893 0 : crm_node_t *node = NULL;
894 0 : char *uname_lookup = NULL;
895 :
896 0 : CRM_ASSERT(id > 0 || uname != NULL);
897 :
898 0 : pcmk__cluster_init_node_caches();
899 :
900 : // Check the Pacemaker Remote node cache first
901 0 : if (pcmk_is_set(flags, pcmk__node_search_remote)) {
902 0 : node = g_hash_table_lookup(crm_remote_peer_cache, uname);
903 0 : if (node != NULL) {
904 0 : return node;
905 : }
906 : }
907 :
908 0 : if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
909 0 : return NULL;
910 : }
911 :
912 0 : node = search_cluster_member_cache(id, uname, uuid);
913 :
914 : /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
915 : * we need to do a lookup of the node name using the id in the cluster membership. */
916 0 : if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
917 0 : uname_lookup = pcmk__cluster_node_name(id);
918 : }
919 :
920 0 : if (uname_lookup) {
921 0 : uname = uname_lookup;
922 0 : crm_trace("Inferred a name of '%s' for node %u", uname, id);
923 :
924 : /* try to turn up the node one more time now that we know the uname. */
925 0 : if (node == NULL) {
926 0 : node = search_cluster_member_cache(id, uname, uuid);
927 : }
928 : }
929 :
930 0 : if (node == NULL) {
931 0 : char *uniqueid = crm_generate_uuid();
932 :
933 0 : node = pcmk__assert_alloc(1, sizeof(crm_node_t));
934 :
935 0 : crm_info("Created entry %s/%p for node %s/%u (%d total)",
936 : uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
937 0 : g_hash_table_replace(crm_peer_cache, uniqueid, node);
938 : }
939 :
940 0 : if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
941 0 : crm_info("Node %u is now known as %s", id, uname);
942 : }
943 :
944 0 : if(id > 0 && node->id == 0) {
945 0 : node->id = id;
946 : }
947 :
948 0 : if (uname && (node->uname == NULL)) {
949 0 : update_peer_uname(node, uname);
950 : }
951 :
952 0 : if(node->uuid == NULL) {
953 0 : if (uuid == NULL) {
954 0 : uuid = pcmk__cluster_node_uuid(node);
955 : }
956 :
957 0 : if (uuid) {
958 0 : crm_info("Node %u has uuid %s", id, uuid);
959 :
960 : } else {
961 0 : crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
962 : }
963 : }
964 :
965 0 : free(uname_lookup);
966 :
967 0 : return node;
968 : }
969 :
970 : /*!
971 : * \internal
972 : * \brief Update a node's uname
973 : *
974 : * \param[in,out] node Node object to update
975 : * \param[in] uname New name to set
976 : *
977 : * \note This function should not be called within a peer cache iteration,
978 : * because in some cases it can remove conflicting cache entries,
979 : * which would invalidate the iterator.
980 : */
981 : static void
982 0 : update_peer_uname(crm_node_t *node, const char *uname)
983 : {
984 0 : CRM_CHECK(uname != NULL,
985 : crm_err("Bug: can't update node name without name"); return);
986 0 : CRM_CHECK(node != NULL,
987 : crm_err("Bug: can't update node name to %s without node", uname);
988 : return);
989 :
990 0 : if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
991 0 : crm_debug("Node uname '%s' did not change", uname);
992 0 : return;
993 : }
994 :
995 0 : for (const char *c = uname; *c; ++c) {
996 0 : if ((*c >= 'A') && (*c <= 'Z')) {
997 0 : crm_warn("Node names with capitals are discouraged, consider changing '%s'",
998 : uname);
999 0 : break;
1000 : }
1001 : }
1002 :
1003 0 : pcmk__str_update(&node->uname, uname);
1004 :
1005 0 : if (peer_status_callback != NULL) {
1006 0 : peer_status_callback(crm_status_uname, node, NULL);
1007 : }
1008 :
1009 : #if SUPPORT_COROSYNC
1010 0 : if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)
1011 0 : && !pcmk_is_set(node->flags, crm_remote_node)) {
1012 :
1013 0 : remove_conflicting_peer(node);
1014 : }
1015 : #endif
1016 : }
1017 :
1018 : /*!
1019 : * \internal
1020 : * \brief Get log-friendly string equivalent of a process flag
1021 : *
1022 : * \param[in] proc Process flag
1023 : *
1024 : * \return Log-friendly string equivalent of \p proc
1025 : */
1026 : static inline const char *
1027 0 : proc2text(enum crm_proc_flag proc)
1028 : {
1029 0 : const char *text = "unknown";
1030 :
1031 0 : switch (proc) {
1032 0 : case crm_proc_none:
1033 0 : text = "none";
1034 0 : break;
1035 0 : case crm_proc_cpg:
1036 0 : text = "corosync-cpg";
1037 0 : break;
1038 : }
1039 0 : return text;
1040 : }
1041 :
1042 : /*!
1043 : * \internal
1044 : * \brief Update a node's process information (and potentially state)
1045 : *
1046 : * \param[in] source Caller's function name (for log messages)
1047 : * \param[in,out] node Node object to update
1048 : * \param[in] flag Bitmask of new process information
1049 : * \param[in] status node status (online, offline, etc.)
1050 : *
1051 : * \return NULL if any node was reaped from peer caches, value of node otherwise
1052 : *
1053 : * \note If this function returns NULL, the supplied node object was likely
1054 : * freed and should not be used again. This function should not be
1055 : * called within a cache iteration if reaping is possible, otherwise
1056 : * reaping could invalidate the iterator.
1057 : */
1058 : crm_node_t *
1059 0 : crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
1060 : {
1061 0 : uint32_t last = 0;
1062 0 : gboolean changed = FALSE;
1063 :
1064 0 : CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
1065 : source, proc2text(flag), status);
1066 : return NULL);
1067 :
1068 : /* Pacemaker doesn't spawn processes on remote nodes */
1069 0 : if (pcmk_is_set(node->flags, crm_remote_node)) {
1070 0 : return node;
1071 : }
1072 :
1073 0 : last = node->processes;
1074 0 : if (status == NULL) {
1075 0 : node->processes = flag;
1076 0 : if (node->processes != last) {
1077 0 : changed = TRUE;
1078 : }
1079 :
1080 0 : } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1081 0 : if ((node->processes & flag) != flag) {
1082 0 : node->processes = pcmk__set_flags_as(__func__, __LINE__,
1083 : LOG_TRACE, "Peer process",
1084 0 : node->uname, node->processes,
1085 : flag, "processes");
1086 0 : changed = TRUE;
1087 : }
1088 :
1089 0 : } else if (node->processes & flag) {
1090 0 : node->processes = pcmk__clear_flags_as(__func__, __LINE__,
1091 : LOG_TRACE, "Peer process",
1092 0 : node->uname, node->processes,
1093 : flag, "processes");
1094 0 : changed = TRUE;
1095 : }
1096 :
1097 0 : if (changed) {
1098 0 : if (status == NULL && flag <= crm_proc_none) {
1099 0 : crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
1100 : node->id);
1101 : } else {
1102 0 : crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
1103 : proc2text(flag), status);
1104 : }
1105 :
1106 0 : if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1107 0 : node->when_online = time(NULL);
1108 :
1109 : } else {
1110 0 : node->when_online = 0;
1111 : }
1112 :
1113 : /* Call the client callback first, then update the peer state,
1114 : * in case the node will be reaped
1115 : */
1116 0 : if (peer_status_callback != NULL) {
1117 0 : peer_status_callback(crm_status_processes, node, &last);
1118 : }
1119 :
1120 : /* The client callback shouldn't touch the peer caches,
1121 : * but as a safety net, bail if the peer cache was destroyed.
1122 : */
1123 0 : if (crm_peer_cache == NULL) {
1124 0 : return NULL;
1125 : }
1126 :
1127 0 : if (autoreap) {
1128 0 : const char *peer_state = NULL;
1129 :
1130 0 : if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1131 0 : peer_state = CRM_NODE_MEMBER;
1132 : } else {
1133 0 : peer_state = CRM_NODE_LOST;
1134 : }
1135 0 : node = pcmk__update_peer_state(__func__, node, peer_state, 0);
1136 : }
1137 : } else {
1138 0 : crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
1139 : proc2text(flag), status);
1140 : }
1141 0 : return node;
1142 : }
1143 :
1144 : /*!
1145 : * \internal
1146 : * \brief Update a cluster node cache entry's expected join state
1147 : *
1148 : * \param[in] source Caller's function name (for logging)
1149 : * \param[in,out] node Node to update
1150 : * \param[in] expected Node's new join state
1151 : */
1152 : void
1153 0 : pcmk__update_peer_expected(const char *source, crm_node_t *node,
1154 : const char *expected)
1155 : {
1156 0 : char *last = NULL;
1157 0 : gboolean changed = FALSE;
1158 :
1159 0 : CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
1160 : return);
1161 :
1162 : /* Remote nodes don't participate in joins */
1163 0 : if (pcmk_is_set(node->flags, crm_remote_node)) {
1164 0 : return;
1165 : }
1166 :
1167 0 : last = node->expected;
1168 0 : if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
1169 0 : node->expected = strdup(expected);
1170 0 : changed = TRUE;
1171 : }
1172 :
1173 0 : if (changed) {
1174 0 : crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
1175 : expected, last);
1176 0 : free(last);
1177 : } else {
1178 0 : crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
1179 : node->id, expected);
1180 : }
1181 : }
1182 :
1183 : /*!
1184 : * \internal
1185 : * \brief Update a node's state and membership information
1186 : *
1187 : * \param[in] source Caller's function name (for log messages)
1188 : * \param[in,out] node Node object to update
1189 : * \param[in] state Node's new state
1190 : * \param[in] membership Node's new membership ID
1191 : * \param[in,out] iter If not NULL, pointer to node's peer cache iterator
1192 : *
1193 : * \return NULL if any node was reaped, value of node otherwise
1194 : *
1195 : * \note If this function returns NULL, the supplied node object was likely
1196 : * freed and should not be used again. This function may be called from
1197 : * within a peer cache iteration if the iterator is supplied.
1198 : */
1199 : static crm_node_t *
1200 0 : update_peer_state_iter(const char *source, crm_node_t *node, const char *state,
1201 : uint64_t membership, GHashTableIter *iter)
1202 : {
1203 : gboolean is_member;
1204 :
1205 0 : CRM_CHECK(node != NULL,
1206 : crm_err("Could not set state for unknown host to %s"
1207 : CRM_XS " source=%s", state, source);
1208 : return NULL);
1209 :
1210 0 : is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
1211 0 : if (is_member) {
1212 0 : node->when_lost = 0;
1213 0 : if (membership) {
1214 0 : node->last_seen = membership;
1215 : }
1216 : }
1217 :
1218 0 : if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1219 0 : char *last = node->state;
1220 :
1221 0 : if (is_member) {
1222 0 : node->when_member = time(NULL);
1223 :
1224 : } else {
1225 0 : node->when_member = 0;
1226 : }
1227 :
1228 0 : node->state = strdup(state);
1229 0 : crm_notice("Node %s state is now %s " CRM_XS
1230 : " nodeid=%u previous=%s source=%s", node->uname, state,
1231 : node->id, (last? last : "unknown"), source);
1232 0 : if (peer_status_callback != NULL) {
1233 0 : peer_status_callback(crm_status_nstate, node, last);
1234 : }
1235 0 : free(last);
1236 :
1237 0 : if (autoreap && !is_member
1238 0 : && !pcmk_is_set(node->flags, crm_remote_node)) {
1239 : /* We only autoreap from the peer cache, not the remote peer cache,
1240 : * because the latter should be managed only by
1241 : * refresh_remote_nodes().
1242 : */
1243 0 : if(iter) {
1244 0 : crm_notice("Purged 1 peer with " PCMK_XA_ID
1245 : "=%u and/or uname=%s from the membership cache",
1246 : node->id, node->uname);
1247 0 : g_hash_table_iter_remove(iter);
1248 :
1249 : } else {
1250 0 : pcmk__cluster_forget_cluster_node(node->id, node->uname);
1251 : }
1252 0 : node = NULL;
1253 : }
1254 :
1255 : } else {
1256 0 : crm_trace("Node %s state is unchanged (%s) " CRM_XS
1257 : " nodeid=%u source=%s", node->uname, state, node->id, source);
1258 : }
1259 0 : return node;
1260 : }
1261 :
1262 : /*!
1263 : * \brief Update a node's state and membership information
1264 : *
1265 : * \param[in] source Caller's function name (for log messages)
1266 : * \param[in,out] node Node object to update
1267 : * \param[in] state Node's new state
1268 : * \param[in] membership Node's new membership ID
1269 : *
1270 : * \return NULL if any node was reaped, value of node otherwise
1271 : *
1272 : * \note If this function returns NULL, the supplied node object was likely
1273 : * freed and should not be used again. This function should not be
1274 : * called within a cache iteration if reaping is possible,
1275 : * otherwise reaping could invalidate the iterator.
1276 : */
1277 : crm_node_t *
1278 0 : pcmk__update_peer_state(const char *source, crm_node_t *node,
1279 : const char *state, uint64_t membership)
1280 : {
1281 0 : return update_peer_state_iter(source, node, state, membership, NULL);
1282 : }
1283 :
1284 : /*!
1285 : * \internal
1286 : * \brief Reap all nodes from cache whose membership information does not match
1287 : *
1288 : * \param[in] membership Membership ID of nodes to keep
1289 : */
1290 : void
1291 0 : pcmk__reap_unseen_nodes(uint64_t membership)
1292 : {
1293 : GHashTableIter iter;
1294 0 : crm_node_t *node = NULL;
1295 :
1296 0 : crm_trace("Reaping unseen nodes...");
1297 0 : g_hash_table_iter_init(&iter, crm_peer_cache);
1298 0 : while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1299 0 : if (node->last_seen != membership) {
1300 0 : if (node->state) {
1301 : /*
1302 : * Calling update_peer_state_iter() allows us to
1303 : * remove the node from crm_peer_cache without
1304 : * invalidating our iterator
1305 : */
1306 0 : update_peer_state_iter(__func__, node, CRM_NODE_LOST,
1307 : membership, &iter);
1308 :
1309 : } else {
1310 0 : crm_info("State of node %s[%u] is still unknown",
1311 : node->uname, node->id);
1312 : }
1313 : }
1314 : }
1315 0 : }
1316 :
1317 : static crm_node_t *
1318 0 : find_cib_cluster_node(const char *id, const char *uname)
1319 : {
1320 : GHashTableIter iter;
1321 0 : crm_node_t *node = NULL;
1322 0 : crm_node_t *by_id = NULL;
1323 0 : crm_node_t *by_name = NULL;
1324 :
1325 0 : if (uname) {
1326 0 : g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1327 0 : while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1328 0 : if (node->uname && strcasecmp(node->uname, uname) == 0) {
1329 0 : crm_trace("Name match: %s = %p", node->uname, node);
1330 0 : by_name = node;
1331 0 : break;
1332 : }
1333 : }
1334 : }
1335 :
1336 0 : if (id) {
1337 0 : g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1338 0 : while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1339 0 : if(strcasecmp(node->uuid, id) == 0) {
1340 0 : crm_trace("ID match: %s= %p", id, node);
1341 0 : by_id = node;
1342 0 : break;
1343 : }
1344 : }
1345 : }
1346 :
1347 0 : node = by_id; /* Good default */
1348 0 : if (by_id == by_name) {
1349 : /* Nothing to do if they match (both NULL counts) */
1350 0 : crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1351 :
1352 0 : } else if (by_id == NULL && by_name) {
1353 0 : crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1354 :
1355 0 : if (id) {
1356 0 : node = NULL;
1357 :
1358 : } else {
1359 0 : node = by_name;
1360 : }
1361 :
1362 0 : } else if (by_name == NULL && by_id) {
1363 0 : crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1364 :
1365 0 : if (uname) {
1366 0 : node = NULL;
1367 : }
1368 :
1369 0 : } else if (uname && by_id->uname
1370 0 : && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
1371 : /* Multiple nodes have the same uname in the CIB.
1372 : * Return by_id. */
1373 :
1374 0 : } else if (id && by_name->uuid
1375 0 : && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
1376 : /* Multiple nodes have the same id in the CIB.
1377 : * Return by_name. */
1378 0 : node = by_name;
1379 :
1380 : } else {
1381 0 : node = NULL;
1382 : }
1383 :
1384 0 : if (node == NULL) {
1385 0 : crm_debug("Couldn't find node%s%s%s%s",
1386 : id? " " : "",
1387 : id? id : "",
1388 : uname? " with name " : "",
1389 : uname? uname : "");
1390 : }
1391 :
1392 0 : return node;
1393 : }
1394 :
1395 : static void
1396 0 : cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1397 : {
1398 0 : const char *id = crm_element_value(xml_node, PCMK_XA_ID);
1399 0 : const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
1400 0 : crm_node_t * node = NULL;
1401 :
1402 0 : CRM_CHECK(id != NULL && uname !=NULL, return);
1403 0 : node = find_cib_cluster_node(id, uname);
1404 :
1405 0 : if (node == NULL) {
1406 0 : char *uniqueid = crm_generate_uuid();
1407 :
1408 0 : node = pcmk__assert_alloc(1, sizeof(crm_node_t));
1409 :
1410 0 : node->uname = pcmk__str_copy(uname);
1411 0 : node->uuid = pcmk__str_copy(id);
1412 :
1413 0 : g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);
1414 :
1415 0 : } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
1416 0 : pcmk__str_update(&node->uname, uname);
1417 :
1418 : /* Node is in cache and hasn't been updated already, so mark it clean */
1419 0 : clear_peer_flags(node, crm_node_dirty);
1420 : }
1421 :
1422 : }
1423 :
1424 : static void
1425 0 : refresh_cluster_node_cib_cache(xmlNode *cib)
1426 : {
1427 0 : pcmk__cluster_init_node_caches();
1428 :
1429 0 : g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);
1430 :
1431 0 : crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG,
1432 : cluster_node_cib_cache_refresh_helper, NULL);
1433 :
1434 : // Remove all old cache entries that weren't seen in the CIB
1435 0 : g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
1436 0 : }
1437 :
1438 : void
1439 0 : pcmk__refresh_node_caches_from_cib(xmlNode *cib)
1440 : {
1441 0 : refresh_remote_nodes(cib);
1442 0 : refresh_cluster_node_cib_cache(cib);
1443 0 : }
1444 :
1445 : // Deprecated functions kept only for backward API compatibility
1446 : // LCOV_EXCL_START
1447 :
1448 : #include <crm/cluster/compat.h>
1449 :
1450 : int
1451 : crm_terminate_member(int nodeid, const char *uname, void *unused)
1452 : {
1453 : return stonith_api_kick(nodeid, uname, 120, TRUE);
1454 : }
1455 :
1456 : int
1457 : crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1458 : {
1459 : return stonith_api_kick(nodeid, uname, 120, TRUE);
1460 : }
1461 :
1462 : crm_node_t *
1463 : crm_get_peer(unsigned int id, const char *uname)
1464 : {
1465 : return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster_member);
1466 : }
1467 :
1468 : crm_node_t *
1469 : crm_get_peer_full(unsigned int id, const char *uname, int flags)
1470 : {
1471 : return pcmk__get_node(id, uname, NULL, flags);
1472 : }
1473 :
1474 : int
1475 : crm_remote_peer_cache_size(void)
1476 : {
1477 : unsigned int count = pcmk__cluster_num_remote_nodes();
1478 :
1479 : return QB_MIN(count, INT_MAX);
1480 : }
1481 :
1482 : void
1483 : crm_remote_peer_cache_refresh(xmlNode *cib)
1484 : {
1485 : refresh_remote_nodes(cib);
1486 : }
1487 :
1488 : crm_node_t *
1489 : crm_remote_peer_get(const char *node_name)
1490 : {
1491 : return pcmk__cluster_lookup_remote_node(node_name);
1492 : }
1493 :
1494 : void
1495 : crm_remote_peer_cache_remove(const char *node_name)
1496 : {
1497 : pcmk__cluster_forget_remote_node(node_name);
1498 : }
1499 :
1500 : gboolean
1501 : crm_is_peer_active(const crm_node_t * node)
1502 : {
1503 : return pcmk__cluster_is_node_active(node);
1504 : }
1505 :
1506 : guint
1507 : crm_active_peers(void)
1508 : {
1509 : return pcmk__cluster_num_active_nodes();
1510 : }
1511 :
1512 : guint
1513 : reap_crm_member(uint32_t id, const char *name)
1514 : {
1515 : int matches = 0;
1516 : crm_node_t search = { 0, };
1517 :
1518 : if (crm_peer_cache == NULL) {
1519 : crm_trace("Membership cache not initialized, ignoring purge request");
1520 : return 0;
1521 : }
1522 :
1523 : search.id = id;
1524 : search.uname = pcmk__str_copy(name);
1525 : matches = g_hash_table_foreach_remove(crm_peer_cache,
1526 : should_forget_cluster_node, &search);
1527 : if(matches) {
1528 : crm_notice("Purged %d peer%s with " PCMK_XA_ID
1529 : "=%u%s%s from the membership cache",
1530 : matches, pcmk__plural_s(matches), search.id,
1531 : (search.uname? " and/or uname=" : ""),
1532 : (search.uname? search.uname : ""));
1533 :
1534 : } else {
1535 : crm_info("No peers with " PCMK_XA_ID
1536 : "=%u%s%s to purge from the membership cache",
1537 : search.id, (search.uname? " and/or uname=" : ""),
1538 : (search.uname? search.uname : ""));
1539 : }
1540 :
1541 : free(search.uname);
1542 : return matches;
1543 : }
1544 :
1545 : void
1546 : crm_peer_init(void)
1547 : {
1548 : pcmk__cluster_init_node_caches();
1549 : }
1550 :
1551 : void
1552 : crm_peer_destroy(void)
1553 : {
1554 : pcmk__cluster_destroy_node_caches();
1555 : }
1556 :
1557 : void
1558 : crm_set_autoreap(gboolean enable)
1559 : {
1560 : pcmk__cluster_set_autoreap(enable);
1561 : }
1562 :
1563 : void
1564 : crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
1565 : {
1566 : pcmk__cluster_set_status_callback(dispatch);
1567 : }
1568 :
1569 : // LCOV_EXCL_STOP
1570 : // End deprecated API
|