Line data Source code
1 : /*
2 : * Copyright 2004-2024 the Pacemaker project contributors
3 : *
4 : * The version control history for this file may have further details.
5 : *
6 : * This source code is licensed under the GNU General Public License version 2
7 : * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 : */
9 :
10 : #include <crm_internal.h>
11 : #include <crm/common/xml.h>
12 : #include <crm/common/xml_internal.h>
13 : #include <pacemaker-internal.h>
14 : #include <pacemaker.h>
15 : #include "libpacemaker_private.h"
16 :
17 : /*!
18 : * \internal
19 : * \brief Check whether a node is available to run resources
20 : *
21 : * \param[in] node Node to check
22 : * \param[in] consider_score If true, consider a negative score unavailable
23 : * \param[in] consider_guest If true, consider a guest node unavailable whose
24 : * resource will not be active
25 : *
26 : * \return true if node is online and not shutting down, unclean, or in standby
27 : * or maintenance mode, otherwise false
28 : */
29 : bool
30 0 : pcmk__node_available(const pcmk_node_t *node, bool consider_score,
31 : bool consider_guest)
32 : {
33 0 : if ((node == NULL) || (node->details == NULL) || !node->details->online
34 0 : || node->details->shutdown || node->details->unclean
35 0 : || node->details->standby || node->details->maintenance) {
36 0 : return false;
37 : }
38 :
39 0 : if (consider_score && (node->weight < 0)) {
40 0 : return false;
41 : }
42 :
43 : // @TODO Go through all callers to see which should set consider_guest
44 0 : if (consider_guest && pcmk__is_guest_or_bundle_node(node)) {
45 0 : pcmk_resource_t *guest = node->details->remote_rsc->container;
46 :
47 0 : if (guest->fns->location(guest, NULL, FALSE) == NULL) {
48 0 : return false;
49 : }
50 : }
51 :
52 0 : return true;
53 : }
54 :
55 : /*!
56 : * \internal
57 : * \brief Copy a hash table of node objects
58 : *
59 : * \param[in] nodes Hash table to copy
60 : *
61 : * \return New copy of nodes (or NULL if nodes is NULL)
62 : */
63 : GHashTable *
64 0 : pcmk__copy_node_table(GHashTable *nodes)
65 : {
66 0 : GHashTable *new_table = NULL;
67 : GHashTableIter iter;
68 0 : pcmk_node_t *node = NULL;
69 :
70 0 : if (nodes == NULL) {
71 0 : return NULL;
72 : }
73 0 : new_table = pcmk__strkey_table(NULL, free);
74 0 : g_hash_table_iter_init(&iter, nodes);
75 0 : while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
76 0 : pcmk_node_t *new_node = pe__copy_node(node);
77 :
78 0 : g_hash_table_insert(new_table, (gpointer) new_node->details->id,
79 : new_node);
80 : }
81 0 : return new_table;
82 : }
83 :
84 : /*!
85 : * \internal
86 : * \brief Free a table of node tables
87 : *
88 : * \param[in,out] data Table to free
89 : *
90 : * \note This is a \c GDestroyNotify wrapper for \c g_hash_table_destroy().
91 : */
92 : static void
93 0 : destroy_node_tables(gpointer data)
94 : {
95 0 : g_hash_table_destroy((GHashTable *) data);
96 0 : }
97 :
98 : /*!
99 : * \internal
100 : * \brief Recursively copy the node tables of a resource
101 : *
102 : * Build a hash table containing copies of the allowed nodes tables of \p rsc
103 : * and its entire tree of descendants. The key is the resource ID, and the value
104 : * is a copy of the resource's node table.
105 : *
106 : * \param[in] rsc Resource whose node table to copy
107 : * \param[in,out] copy Where to store the copied node tables
108 : *
109 : * \note \p *copy should be \c NULL for the top-level call.
110 : * \note The caller is responsible for freeing \p copy using
111 : * \c g_hash_table_destroy().
112 : */
113 : void
114 0 : pcmk__copy_node_tables(const pcmk_resource_t *rsc, GHashTable **copy)
115 : {
116 0 : CRM_ASSERT((rsc != NULL) && (copy != NULL));
117 :
118 0 : if (*copy == NULL) {
119 0 : *copy = pcmk__strkey_table(NULL, destroy_node_tables);
120 : }
121 :
122 0 : g_hash_table_insert(*copy, rsc->id,
123 0 : pcmk__copy_node_table(rsc->allowed_nodes));
124 :
125 0 : for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) {
126 0 : pcmk__copy_node_tables((const pcmk_resource_t *) iter->data, copy);
127 : }
128 0 : }
129 :
130 : /*!
131 : * \internal
132 : * \brief Recursively restore the node tables of a resource from backup
133 : *
134 : * Given a hash table containing backup copies of the allowed nodes tables of
135 : * \p rsc and its entire tree of descendants, replace the resources' current
136 : * node tables with the backed-up copies.
137 : *
138 : * \param[in,out] rsc Resource whose node tables to restore
139 : * \param[in] backup Table of backup node tables (created by
140 : * \c pcmk__copy_node_tables())
141 : *
142 : * \note This function frees the resources' current node tables.
143 : */
144 : void
145 0 : pcmk__restore_node_tables(pcmk_resource_t *rsc, GHashTable *backup)
146 : {
147 0 : CRM_ASSERT((rsc != NULL) && (backup != NULL));
148 :
149 0 : g_hash_table_destroy(rsc->allowed_nodes);
150 :
151 : // Copy to avoid danger with multiple restores
152 0 : rsc->allowed_nodes = g_hash_table_lookup(backup, rsc->id);
153 0 : rsc->allowed_nodes = pcmk__copy_node_table(rsc->allowed_nodes);
154 :
155 0 : for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
156 0 : pcmk__restore_node_tables((pcmk_resource_t *) iter->data, backup);
157 : }
158 0 : }
159 :
160 : /*!
161 : * \internal
162 : * \brief Copy a list of node objects
163 : *
164 : * \param[in] list List to copy
165 : * \param[in] reset Set copies' scores to 0
166 : *
167 : * \return New list of shallow copies of nodes in original list
168 : */
169 : GList *
170 0 : pcmk__copy_node_list(const GList *list, bool reset)
171 : {
172 0 : GList *result = NULL;
173 :
174 0 : for (const GList *iter = list; iter != NULL; iter = iter->next) {
175 0 : pcmk_node_t *new_node = NULL;
176 0 : pcmk_node_t *this_node = iter->data;
177 :
178 0 : new_node = pe__copy_node(this_node);
179 0 : if (reset) {
180 0 : new_node->weight = 0;
181 : }
182 0 : result = g_list_prepend(result, new_node);
183 : }
184 0 : return result;
185 : }
186 :
187 : /*!
188 : * \internal
189 : * \brief Compare two nodes for assignment preference
190 : *
191 : * Given two nodes, check which one is more preferred by assignment criteria
192 : * such as node score and utilization.
193 : *
194 : * \param[in] a First node to compare
195 : * \param[in] b Second node to compare
196 : * \param[in] data Node to prefer if all else equal
197 : *
198 : * \return -1 if \p a is preferred, +1 if \p b is preferred, or 0 if they are
199 : * equally preferred
200 : */
201 : static gint
202 0 : compare_nodes(gconstpointer a, gconstpointer b, gpointer data)
203 : {
204 0 : const pcmk_node_t *node1 = (const pcmk_node_t *) a;
205 0 : const pcmk_node_t *node2 = (const pcmk_node_t *) b;
206 0 : const pcmk_node_t *preferred = (const pcmk_node_t *) data;
207 :
208 0 : int node1_score = -PCMK_SCORE_INFINITY;
209 0 : int node2_score = -PCMK_SCORE_INFINITY;
210 :
211 0 : int result = 0;
212 :
213 0 : if (a == NULL) {
214 0 : return 1;
215 : }
216 0 : if (b == NULL) {
217 0 : return -1;
218 : }
219 :
220 : // Compare node scores
221 :
222 0 : if (pcmk__node_available(node1, false, false)) {
223 0 : node1_score = node1->weight;
224 : }
225 0 : if (pcmk__node_available(node2, false, false)) {
226 0 : node2_score = node2->weight;
227 : }
228 :
229 0 : if (node1_score > node2_score) {
230 0 : crm_trace("%s before %s (score %d > %d)",
231 : pcmk__node_name(node1), pcmk__node_name(node2),
232 : node1_score, node2_score);
233 0 : return -1;
234 : }
235 :
236 0 : if (node1_score < node2_score) {
237 0 : crm_trace("%s after %s (score %d < %d)",
238 : pcmk__node_name(node1), pcmk__node_name(node2),
239 : node1_score, node2_score);
240 0 : return 1;
241 : }
242 :
243 : // If appropriate, compare node utilization
244 :
245 0 : if (pcmk__str_eq(node1->details->data_set->placement_strategy,
246 : PCMK_VALUE_MINIMAL, pcmk__str_casei)) {
247 0 : goto equal;
248 : }
249 :
250 0 : if (pcmk__str_eq(node1->details->data_set->placement_strategy,
251 : PCMK_VALUE_BALANCED, pcmk__str_casei)) {
252 :
253 0 : result = pcmk__compare_node_capacities(node1, node2);
254 0 : if (result < 0) {
255 0 : crm_trace("%s before %s (greater capacity by %d attributes)",
256 : pcmk__node_name(node1), pcmk__node_name(node2),
257 : result * -1);
258 0 : return -1;
259 0 : } else if (result > 0) {
260 0 : crm_trace("%s after %s (lower capacity by %d attributes)",
261 : pcmk__node_name(node1), pcmk__node_name(node2), result);
262 0 : return 1;
263 : }
264 : }
265 :
266 : // Compare number of resources already assigned to node
267 :
268 0 : if (node1->details->num_resources < node2->details->num_resources) {
269 0 : crm_trace("%s before %s (%d resources < %d)",
270 : pcmk__node_name(node1), pcmk__node_name(node2),
271 : node1->details->num_resources, node2->details->num_resources);
272 0 : return -1;
273 :
274 0 : } else if (node1->details->num_resources > node2->details->num_resources) {
275 0 : crm_trace("%s after %s (%d resources > %d)",
276 : pcmk__node_name(node1), pcmk__node_name(node2),
277 : node1->details->num_resources, node2->details->num_resources);
278 0 : return 1;
279 : }
280 :
281 : // Check whether one node is already running desired resource
282 :
283 0 : if (preferred != NULL) {
284 0 : if (pcmk__same_node(preferred, node1)) {
285 0 : crm_trace("%s before %s (preferred node)",
286 : pcmk__node_name(node1), pcmk__node_name(node2));
287 0 : return -1;
288 0 : } else if (pcmk__same_node(preferred, node2)) {
289 0 : crm_trace("%s after %s (not preferred node)",
290 : pcmk__node_name(node1), pcmk__node_name(node2));
291 0 : return 1;
292 : }
293 : }
294 :
295 : // If all else is equal, prefer node with lowest-sorting name
296 0 : equal:
297 0 : result = strcmp(node1->details->uname, node2->details->uname);
298 0 : if (result < 0) {
299 0 : crm_trace("%s before %s (name)",
300 : pcmk__node_name(node1), pcmk__node_name(node2));
301 0 : return -1;
302 0 : } else if (result > 0) {
303 0 : crm_trace("%s after %s (name)",
304 : pcmk__node_name(node1), pcmk__node_name(node2));
305 0 : return 1;
306 : }
307 :
308 0 : crm_trace("%s == %s", pcmk__node_name(node1), pcmk__node_name(node2));
309 0 : return 0;
310 : }
311 :
312 : /*!
313 : * \internal
314 : * \brief Sort a list of nodes by assigment preference
315 : *
316 : * \param[in,out] nodes Node list to sort
317 : * \param[in] active_node Node where resource being assigned is active
318 : *
319 : * \return New head of sorted list
320 : */
321 : GList *
322 0 : pcmk__sort_nodes(GList *nodes, pcmk_node_t *active_node)
323 : {
324 0 : return g_list_sort_with_data(nodes, compare_nodes, active_node);
325 : }
326 :
327 : /*!
328 : * \internal
329 : * \brief Check whether any node is available to run resources
330 : *
331 : * \param[in] nodes Nodes to check
332 : *
333 : * \return true if any node in \p nodes is available to run resources,
334 : * otherwise false
335 : */
336 : bool
337 0 : pcmk__any_node_available(GHashTable *nodes)
338 : {
339 : GHashTableIter iter;
340 0 : const pcmk_node_t *node = NULL;
341 :
342 0 : if (nodes == NULL) {
343 0 : return false;
344 : }
345 0 : g_hash_table_iter_init(&iter, nodes);
346 0 : while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
347 0 : if (pcmk__node_available(node, true, false)) {
348 0 : return true;
349 : }
350 : }
351 0 : return false;
352 : }
353 :
354 : /*!
355 : * \internal
356 : * \brief Apply node health values for all nodes in cluster
357 : *
358 : * \param[in,out] scheduler Scheduler data
359 : */
360 : void
361 0 : pcmk__apply_node_health(pcmk_scheduler_t *scheduler)
362 : {
363 0 : int base_health = 0;
364 : enum pcmk__health_strategy strategy;
365 : const char *strategy_str =
366 0 : pcmk__cluster_option(scheduler->config_hash,
367 : PCMK_OPT_NODE_HEALTH_STRATEGY);
368 :
369 0 : strategy = pcmk__parse_health_strategy(strategy_str);
370 0 : if (strategy == pcmk__health_strategy_none) {
371 0 : return;
372 : }
373 0 : crm_info("Applying node health strategy '%s'", strategy_str);
374 :
375 : // The progressive strategy can use a base health score
376 0 : if (strategy == pcmk__health_strategy_progressive) {
377 0 : base_health = pe__health_score(PCMK_OPT_NODE_HEALTH_BASE, scheduler);
378 : }
379 :
380 0 : for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
381 0 : pcmk_node_t *node = (pcmk_node_t *) iter->data;
382 0 : int health = pe__sum_node_health_scores(node, base_health);
383 :
384 : // An overall health score of 0 has no effect
385 0 : if (health == 0) {
386 0 : continue;
387 : }
388 0 : crm_info("Overall system health of %s is %d",
389 : pcmk__node_name(node), health);
390 :
391 : // Use node health as a location score for each resource on the node
392 0 : for (GList *r = scheduler->resources; r != NULL; r = r->next) {
393 0 : pcmk_resource_t *rsc = (pcmk_resource_t *) r->data;
394 :
395 0 : bool constrain = true;
396 :
397 0 : if (health < 0) {
398 : /* Negative health scores do not apply to resources with
399 : * PCMK_META_ALLOW_UNHEALTHY_NODES=true.
400 : */
401 0 : constrain = !crm_is_true(g_hash_table_lookup(rsc->meta,
402 : PCMK_META_ALLOW_UNHEALTHY_NODES));
403 : }
404 0 : if (constrain) {
405 0 : pcmk__new_location(strategy_str, rsc, health, NULL, node);
406 : } else {
407 0 : pcmk__rsc_trace(rsc, "%s is immune from health ban on %s",
408 : rsc->id, pcmk__node_name(node));
409 : }
410 : }
411 : }
412 : }
413 :
414 : /*!
415 : * \internal
416 : * \brief Check for a node in a resource's parent's allowed nodes
417 : *
418 : * \param[in] rsc Resource whose parent should be checked
419 : * \param[in] node Node to check for
420 : *
421 : * \return Equivalent of \p node from \p rsc's parent's allowed nodes if any,
422 : * otherwise NULL
423 : */
424 : pcmk_node_t *
425 0 : pcmk__top_allowed_node(const pcmk_resource_t *rsc, const pcmk_node_t *node)
426 : {
427 0 : GHashTable *allowed_nodes = NULL;
428 :
429 0 : if ((rsc == NULL) || (node == NULL)) {
430 0 : return NULL;
431 0 : } else if (rsc->parent == NULL) {
432 0 : allowed_nodes = rsc->allowed_nodes;
433 : } else {
434 0 : allowed_nodes = rsc->parent->allowed_nodes;
435 : }
436 0 : return g_hash_table_lookup(allowed_nodes, node->details->id);
437 : }
|