Line data Source code
1 : /*
2 : * Copyright 2004-2024 the Pacemaker project contributors
3 : *
4 : * The version control history for this file may have further details.
5 : *
6 : * This source code is licensed under the GNU General Public License version 2
7 : * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 : */
9 :
10 : /* This file is intended for code usable with both clone instances and bundle
11 : * replica containers.
12 : */
13 :
14 : #include <crm_internal.h>
15 : #include <crm/common/xml.h>
16 : #include <pacemaker-internal.h>
17 : #include "libpacemaker_private.h"
18 :
19 : /*!
20 : * \internal
21 : * \brief Check whether a node is allowed to run an instance
22 : *
23 : * \param[in] instance Clone instance or bundle container to check
24 : * \param[in] node Node to check
25 : * \param[in] max_per_node Maximum number of instances allowed to run on a node
26 : *
27 : * \return true if \p node is allowed to run \p instance, otherwise false
28 : */
29 : static bool
30 0 : can_run_instance(const pcmk_resource_t *instance, const pcmk_node_t *node,
31 : int max_per_node)
32 : {
33 0 : pcmk_node_t *allowed_node = NULL;
34 :
35 0 : if (pcmk_is_set(instance->flags, pcmk_rsc_removed)) {
36 0 : pcmk__rsc_trace(instance, "%s cannot run on %s: orphaned",
37 : instance->id, pcmk__node_name(node));
38 0 : return false;
39 : }
40 :
41 0 : if (!pcmk__node_available(node, false, false)) {
42 0 : pcmk__rsc_trace(instance,
43 : "%s cannot run on %s: node cannot run resources",
44 : instance->id, pcmk__node_name(node));
45 0 : return false;
46 : }
47 :
48 0 : allowed_node = pcmk__top_allowed_node(instance, node);
49 0 : if (allowed_node == NULL) {
50 0 : crm_warn("%s cannot run on %s: node not allowed",
51 : instance->id, pcmk__node_name(node));
52 0 : return false;
53 : }
54 :
55 0 : if (allowed_node->weight < 0) {
56 0 : pcmk__rsc_trace(instance,
57 : "%s cannot run on %s: parent score is %s there",
58 : instance->id, pcmk__node_name(node),
59 : pcmk_readable_score(allowed_node->weight));
60 0 : return false;
61 : }
62 :
63 0 : if (allowed_node->count >= max_per_node) {
64 0 : pcmk__rsc_trace(instance,
65 : "%s cannot run on %s: node already has %d instance%s",
66 : instance->id, pcmk__node_name(node), max_per_node,
67 : pcmk__plural_s(max_per_node));
68 0 : return false;
69 : }
70 :
71 0 : pcmk__rsc_trace(instance, "%s can run on %s (%d already running)",
72 : instance->id, pcmk__node_name(node), allowed_node->count);
73 0 : return true;
74 : }
75 :
76 : /*!
77 : * \internal
78 : * \brief Ban a clone instance or bundle replica from unavailable allowed nodes
79 : *
80 : * \param[in,out] instance Clone instance or bundle replica to ban
81 : * \param[in] max_per_node Maximum instances allowed to run on a node
82 : */
83 : static void
84 0 : ban_unavailable_allowed_nodes(pcmk_resource_t *instance, int max_per_node)
85 : {
86 0 : if (instance->allowed_nodes != NULL) {
87 : GHashTableIter iter;
88 0 : pcmk_node_t *node = NULL;
89 :
90 0 : g_hash_table_iter_init(&iter, instance->allowed_nodes);
91 0 : while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
92 0 : if (!can_run_instance(instance, node, max_per_node)) {
93 0 : pcmk__rsc_trace(instance, "Banning %s from unavailable node %s",
94 : instance->id, pcmk__node_name(node));
95 0 : node->weight = -PCMK_SCORE_INFINITY;
96 0 : for (GList *child_iter = instance->children;
97 0 : child_iter != NULL; child_iter = child_iter->next) {
98 0 : pcmk_resource_t *child = child_iter->data;
99 0 : pcmk_node_t *child_node = NULL;
100 :
101 0 : child_node = g_hash_table_lookup(child->allowed_nodes,
102 0 : node->details->id);
103 0 : if (child_node != NULL) {
104 0 : pcmk__rsc_trace(instance,
105 : "Banning %s child %s "
106 : "from unavailable node %s",
107 : instance->id, child->id,
108 : pcmk__node_name(node));
109 0 : child_node->weight = -PCMK_SCORE_INFINITY;
110 : }
111 : }
112 : }
113 : }
114 : }
115 0 : }
116 :
117 : /*!
118 : * \internal
119 : * \brief Create a hash table with a single node in it
120 : *
121 : * \param[in] node Node to copy into new table
122 : *
123 : * \return Newly created hash table containing a copy of \p node
124 : * \note The caller is responsible for freeing the result with
125 : * g_hash_table_destroy().
126 : */
127 : static GHashTable *
128 0 : new_node_table(pcmk_node_t *node)
129 : {
130 0 : GHashTable *table = pcmk__strkey_table(NULL, free);
131 :
132 0 : node = pe__copy_node(node);
133 0 : g_hash_table_insert(table, (gpointer) node->details->id, node);
134 0 : return table;
135 : }
136 :
137 : /*!
138 : * \internal
139 : * \brief Apply a resource's parent's colocation scores to a node table
140 : *
141 : * \param[in] rsc Resource whose colocations should be applied
142 : * \param[in,out] nodes Node table to apply colocations to
143 : */
144 : static void
145 0 : apply_parent_colocations(const pcmk_resource_t *rsc, GHashTable **nodes)
146 : {
147 0 : GList *colocations = pcmk__this_with_colocations(rsc);
148 :
149 0 : for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
150 0 : const pcmk__colocation_t *colocation = iter->data;
151 0 : pcmk_resource_t *other = colocation->primary;
152 0 : float factor = colocation->score / (float) PCMK_SCORE_INFINITY;
153 :
154 0 : other->cmds->add_colocated_node_scores(other, rsc, rsc->id, nodes,
155 : colocation, factor,
156 : pcmk__coloc_select_default);
157 : }
158 0 : g_list_free(colocations);
159 0 : colocations = pcmk__with_this_colocations(rsc);
160 :
161 0 : for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
162 0 : const pcmk__colocation_t *colocation = iter->data;
163 0 : pcmk_resource_t *other = colocation->dependent;
164 0 : float factor = colocation->score / (float) PCMK_SCORE_INFINITY;
165 :
166 0 : if (!pcmk__colocation_has_influence(colocation, rsc)) {
167 0 : continue;
168 : }
169 0 : other->cmds->add_colocated_node_scores(other, rsc, rsc->id, nodes,
170 : colocation, factor,
171 : pcmk__coloc_select_nonnegative);
172 : }
173 0 : g_list_free(colocations);
174 0 : }
175 :
176 : /*!
177 : * \internal
178 : * \brief Compare clone or bundle instances based on colocation scores
179 : *
180 : * Determine the relative order in which two clone or bundle instances should be
181 : * assigned to nodes, considering the scores of colocation constraints directly
182 : * or indirectly involving them.
183 : *
184 : * \param[in] instance1 First instance to compare
185 : * \param[in] instance2 Second instance to compare
186 : *
187 : * \return A negative number if \p instance1 should be assigned first,
188 : * a positive number if \p instance2 should be assigned first,
189 : * or 0 if assignment order doesn't matter
190 : */
191 : static int
192 0 : cmp_instance_by_colocation(const pcmk_resource_t *instance1,
193 : const pcmk_resource_t *instance2)
194 : {
195 0 : int rc = 0;
196 0 : pcmk_node_t *node1 = NULL;
197 0 : pcmk_node_t *node2 = NULL;
198 0 : pcmk_node_t *current_node1 = pcmk__current_node(instance1);
199 0 : pcmk_node_t *current_node2 = pcmk__current_node(instance2);
200 0 : GHashTable *colocated_scores1 = NULL;
201 0 : GHashTable *colocated_scores2 = NULL;
202 :
203 0 : CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL)
204 : && (instance2 != NULL) && (instance2->parent != NULL)
205 : && (current_node1 != NULL) && (current_node2 != NULL));
206 :
207 : // Create node tables initialized with each node
208 0 : colocated_scores1 = new_node_table(current_node1);
209 0 : colocated_scores2 = new_node_table(current_node2);
210 :
211 : // Apply parental colocations
212 0 : apply_parent_colocations(instance1, &colocated_scores1);
213 0 : apply_parent_colocations(instance2, &colocated_scores2);
214 :
215 : // Find original nodes again, with scores updated for colocations
216 0 : node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id);
217 0 : node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id);
218 :
219 : // Compare nodes by updated scores
220 0 : if (node1->weight < node2->weight) {
221 0 : crm_trace("Assign %s (%d on %s) after %s (%d on %s)",
222 : instance1->id, node1->weight, pcmk__node_name(node1),
223 : instance2->id, node2->weight, pcmk__node_name(node2));
224 0 : rc = 1;
225 :
226 0 : } else if (node1->weight > node2->weight) {
227 0 : crm_trace("Assign %s (%d on %s) before %s (%d on %s)",
228 : instance1->id, node1->weight, pcmk__node_name(node1),
229 : instance2->id, node2->weight, pcmk__node_name(node2));
230 0 : rc = -1;
231 : }
232 :
233 0 : g_hash_table_destroy(colocated_scores1);
234 0 : g_hash_table_destroy(colocated_scores2);
235 0 : return rc;
236 : }
237 :
238 : /*!
239 : * \internal
240 : * \brief Check whether a resource or any of its children are failed
241 : *
242 : * \param[in] rsc Resource to check
243 : *
244 : * \return true if \p rsc or any of its children are failed, otherwise false
245 : */
246 : static bool
247 0 : did_fail(const pcmk_resource_t *rsc)
248 : {
249 0 : if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
250 0 : return true;
251 : }
252 0 : for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
253 0 : if (did_fail((const pcmk_resource_t *) iter->data)) {
254 0 : return true;
255 : }
256 : }
257 0 : return false;
258 : }
259 :
260 : /*!
261 : * \internal
262 : * \brief Check whether a node is allowed to run a resource
263 : *
264 : * \param[in] rsc Resource to check
265 : * \param[in,out] node Node to check (will be set NULL if not allowed)
266 : *
267 : * \return true if *node is either NULL or allowed for \p rsc, otherwise false
268 : */
269 : static bool
270 0 : node_is_allowed(const pcmk_resource_t *rsc, pcmk_node_t **node)
271 : {
272 0 : if (*node != NULL) {
273 0 : pcmk_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes,
274 0 : (*node)->details->id);
275 :
276 0 : if ((allowed == NULL) || (allowed->weight < 0)) {
277 0 : pcmk__rsc_trace(rsc, "%s: current location (%s) is unavailable",
278 : rsc->id, pcmk__node_name(*node));
279 0 : *node = NULL;
280 0 : return false;
281 : }
282 : }
283 0 : return true;
284 : }
285 :
286 : /*!
287 : * \internal
288 : * \brief Compare two clone or bundle instances' instance numbers
289 : *
290 : * \param[in] a First instance to compare
291 : * \param[in] b Second instance to compare
292 : *
293 : * \return A negative number if \p a's instance number is lower,
294 : * a positive number if \p b's instance number is lower,
295 : * or 0 if their instance numbers are the same
296 : */
297 : gint
298 0 : pcmk__cmp_instance_number(gconstpointer a, gconstpointer b)
299 : {
300 0 : const pcmk_resource_t *instance1 = (const pcmk_resource_t *) a;
301 0 : const pcmk_resource_t *instance2 = (const pcmk_resource_t *) b;
302 0 : char *div1 = NULL;
303 0 : char *div2 = NULL;
304 :
305 0 : CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
306 :
307 : // Clone numbers are after a colon, bundle numbers after a dash
308 0 : div1 = strrchr(instance1->id, ':');
309 0 : if (div1 == NULL) {
310 0 : div1 = strrchr(instance1->id, '-');
311 : }
312 0 : div2 = strrchr(instance2->id, ':');
313 0 : if (div2 == NULL) {
314 0 : div2 = strrchr(instance2->id, '-');
315 : }
316 0 : CRM_ASSERT((div1 != NULL) && (div2 != NULL));
317 :
318 0 : return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10));
319 : }
320 :
321 : /*!
322 : * \internal
323 : * \brief Compare clone or bundle instances according to assignment order
324 : *
325 : * Compare two clone or bundle instances according to the order they should be
326 : * assigned to nodes, preferring (in order):
327 : *
328 : * - Active instance that is less multiply active
329 : * - Instance that is not active on a disallowed node
330 : * - Instance with higher configured priority
331 : * - Active instance whose current node can run resources
332 : * - Active instance whose parent is allowed on current node
333 : * - Active instance whose current node has fewer other instances
334 : * - Active instance
335 : * - Instance that isn't failed
336 : * - Instance whose colocations result in higher score on current node
337 : * - Instance with lower ID in lexicographic order
338 : *
339 : * \param[in] a First instance to compare
340 : * \param[in] b Second instance to compare
341 : *
342 : * \return A negative number if \p a should be assigned first,
343 : * a positive number if \p b should be assigned first,
344 : * or 0 if assignment order doesn't matter
345 : */
346 : gint
347 0 : pcmk__cmp_instance(gconstpointer a, gconstpointer b)
348 : {
349 0 : int rc = 0;
350 0 : pcmk_node_t *node1 = NULL;
351 0 : pcmk_node_t *node2 = NULL;
352 0 : unsigned int nnodes1 = 0;
353 0 : unsigned int nnodes2 = 0;
354 :
355 0 : bool can1 = true;
356 0 : bool can2 = true;
357 :
358 0 : const pcmk_resource_t *instance1 = (const pcmk_resource_t *) a;
359 0 : const pcmk_resource_t *instance2 = (const pcmk_resource_t *) b;
360 :
361 0 : CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
362 :
363 0 : node1 = instance1->fns->active_node(instance1, &nnodes1, NULL);
364 0 : node2 = instance2->fns->active_node(instance2, &nnodes2, NULL);
365 :
366 : /* If both instances are running and at least one is multiply
367 : * active, prefer instance that's running on fewer nodes.
368 : */
369 0 : if ((nnodes1 > 0) && (nnodes2 > 0)) {
370 0 : if (nnodes1 < nnodes2) {
371 0 : crm_trace("Assign %s (active on %d) before %s (active on %d): "
372 : "less multiply active",
373 : instance1->id, nnodes1, instance2->id, nnodes2);
374 0 : return -1;
375 :
376 0 : } else if (nnodes1 > nnodes2) {
377 0 : crm_trace("Assign %s (active on %d) after %s (active on %d): "
378 : "more multiply active",
379 : instance1->id, nnodes1, instance2->id, nnodes2);
380 0 : return 1;
381 : }
382 : }
383 :
384 : /* An instance that is either inactive or active on an allowed node is
385 : * preferred over an instance that is active on a no-longer-allowed node.
386 : */
387 0 : can1 = node_is_allowed(instance1, &node1);
388 0 : can2 = node_is_allowed(instance2, &node2);
389 0 : if (can1 && !can2) {
390 0 : crm_trace("Assign %s before %s: not active on a disallowed node",
391 : instance1->id, instance2->id);
392 0 : return -1;
393 :
394 0 : } else if (!can1 && can2) {
395 0 : crm_trace("Assign %s after %s: active on a disallowed node",
396 : instance1->id, instance2->id);
397 0 : return 1;
398 : }
399 :
400 : // Prefer instance with higher configured priority
401 0 : if (instance1->priority > instance2->priority) {
402 0 : crm_trace("Assign %s before %s: priority (%d > %d)",
403 : instance1->id, instance2->id,
404 : instance1->priority, instance2->priority);
405 0 : return -1;
406 :
407 0 : } else if (instance1->priority < instance2->priority) {
408 0 : crm_trace("Assign %s after %s: priority (%d < %d)",
409 : instance1->id, instance2->id,
410 : instance1->priority, instance2->priority);
411 0 : return 1;
412 : }
413 :
414 : // Prefer active instance
415 0 : if ((node1 == NULL) && (node2 == NULL)) {
416 0 : crm_trace("No assignment preference for %s vs. %s: inactive",
417 : instance1->id, instance2->id);
418 0 : return 0;
419 :
420 0 : } else if (node1 == NULL) {
421 0 : crm_trace("Assign %s after %s: active", instance1->id, instance2->id);
422 0 : return 1;
423 :
424 0 : } else if (node2 == NULL) {
425 0 : crm_trace("Assign %s before %s: active", instance1->id, instance2->id);
426 0 : return -1;
427 : }
428 :
429 : // Prefer instance whose current node can run resources
430 0 : can1 = pcmk__node_available(node1, false, false);
431 0 : can2 = pcmk__node_available(node2, false, false);
432 0 : if (can1 && !can2) {
433 0 : crm_trace("Assign %s before %s: current node can run resources",
434 : instance1->id, instance2->id);
435 0 : return -1;
436 :
437 0 : } else if (!can1 && can2) {
438 0 : crm_trace("Assign %s after %s: current node can't run resources",
439 : instance1->id, instance2->id);
440 0 : return 1;
441 : }
442 :
443 : // Prefer instance whose parent is allowed to run on instance's current node
444 0 : node1 = pcmk__top_allowed_node(instance1, node1);
445 0 : node2 = pcmk__top_allowed_node(instance2, node2);
446 0 : if ((node1 == NULL) && (node2 == NULL)) {
447 0 : crm_trace("No assignment preference for %s vs. %s: "
448 : "parent not allowed on either instance's current node",
449 : instance1->id, instance2->id);
450 0 : return 0;
451 :
452 0 : } else if (node1 == NULL) {
453 0 : crm_trace("Assign %s after %s: parent not allowed on current node",
454 : instance1->id, instance2->id);
455 0 : return 1;
456 :
457 0 : } else if (node2 == NULL) {
458 0 : crm_trace("Assign %s before %s: parent allowed on current node",
459 : instance1->id, instance2->id);
460 0 : return -1;
461 : }
462 :
463 : // Prefer instance whose current node is running fewer other instances
464 0 : if (node1->count < node2->count) {
465 0 : crm_trace("Assign %s before %s: fewer active instances on current node",
466 : instance1->id, instance2->id);
467 0 : return -1;
468 :
469 0 : } else if (node1->count > node2->count) {
470 0 : crm_trace("Assign %s after %s: more active instances on current node",
471 : instance1->id, instance2->id);
472 0 : return 1;
473 : }
474 :
475 : // Prefer instance that isn't failed
476 0 : can1 = did_fail(instance1);
477 0 : can2 = did_fail(instance2);
478 0 : if (!can1 && can2) {
479 0 : crm_trace("Assign %s before %s: not failed",
480 : instance1->id, instance2->id);
481 0 : return -1;
482 0 : } else if (can1 && !can2) {
483 0 : crm_trace("Assign %s after %s: failed",
484 : instance1->id, instance2->id);
485 0 : return 1;
486 : }
487 :
488 : // Prefer instance with higher cumulative colocation score on current node
489 0 : rc = cmp_instance_by_colocation(instance1, instance2);
490 0 : if (rc != 0) {
491 0 : return rc;
492 : }
493 :
494 : // Prefer instance with lower instance number
495 0 : rc = pcmk__cmp_instance_number(instance1, instance2);
496 0 : if (rc < 0) {
497 0 : crm_trace("Assign %s before %s: instance number",
498 : instance1->id, instance2->id);
499 0 : } else if (rc > 0) {
500 0 : crm_trace("Assign %s after %s: instance number",
501 : instance1->id, instance2->id);
502 : } else {
503 0 : crm_trace("No assignment preference for %s vs. %s",
504 : instance1->id, instance2->id);
505 : }
506 0 : return rc;
507 : }
508 :
509 : /*!
510 : * \internal
511 : * \brief Increment the parent's instance count after assigning an instance
512 : *
513 : * An instance's parent tracks how many instances have been assigned to each
514 : * node via its pcmk_node_t:count member. After assigning an instance to a node,
515 : * find the corresponding node in the parent's allowed table and increment it.
516 : *
517 : * \param[in,out] instance Instance whose parent to update
518 : * \param[in] assigned_to Node to which the instance was assigned
519 : */
520 : static void
521 0 : increment_parent_count(pcmk_resource_t *instance,
522 : const pcmk_node_t *assigned_to)
523 : {
524 0 : pcmk_node_t *allowed = NULL;
525 :
526 0 : if (assigned_to == NULL) {
527 0 : return;
528 : }
529 0 : allowed = pcmk__top_allowed_node(instance, assigned_to);
530 :
531 0 : if (allowed == NULL) {
532 : /* The instance is allowed on the node, but its parent isn't. This
533 : * shouldn't be possible if the resource is managed, and we won't be
534 : * able to limit the number of instances assigned to the node.
535 : */
536 0 : CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pcmk_rsc_managed));
537 :
538 : } else {
539 0 : allowed->count++;
540 : }
541 : }
542 :
543 : /*!
544 : * \internal
545 : * \brief Assign an instance to a node
546 : *
547 : * \param[in,out] instance Clone instance or bundle replica container
548 : * \param[in] prefer If not NULL, attempt early assignment to this
549 : * node, if still the best choice; otherwise,
550 : * perform final assignment
551 : * \param[in] max_per_node Assign at most this many instances to one node
552 : *
553 : * \return Node to which \p instance is assigned
554 : */
555 : static const pcmk_node_t *
556 0 : assign_instance(pcmk_resource_t *instance, const pcmk_node_t *prefer,
557 : int max_per_node)
558 : {
559 0 : pcmk_node_t *chosen = NULL;
560 :
561 0 : pcmk__rsc_trace(instance, "Assigning %s (preferring %s)", instance->id,
562 : ((prefer == NULL)? "no node" : prefer->details->uname));
563 :
564 0 : if (pcmk_is_set(instance->flags, pcmk_rsc_assigning)) {
565 0 : pcmk__rsc_debug(instance,
566 : "Assignment loop detected involving %s colocations",
567 : instance->id);
568 0 : return NULL;
569 : }
570 0 : ban_unavailable_allowed_nodes(instance, max_per_node);
571 :
572 : // Failed early assignments are reversible (stop_if_fail=false)
573 0 : chosen = instance->cmds->assign(instance, prefer, (prefer == NULL));
574 0 : increment_parent_count(instance, chosen);
575 0 : return chosen;
576 : }
577 :
578 : /*!
579 : * \internal
580 : * \brief Try to assign an instance to its current node early
581 : *
582 : * \param[in] rsc Clone or bundle being assigned (for logs only)
583 : * \param[in] instance Clone instance or bundle replica container
584 : * \param[in] current Instance's current node
585 : * \param[in] max_per_node Maximum number of instances per node
586 : * \param[in] available Number of instances still available for assignment
587 : *
588 : * \return \c true if \p instance was successfully assigned to its current node,
589 : * or \c false otherwise
590 : */
591 : static bool
592 0 : assign_instance_early(const pcmk_resource_t *rsc, pcmk_resource_t *instance,
593 : const pcmk_node_t *current, int max_per_node,
594 : int available)
595 : {
596 0 : const pcmk_node_t *chosen = NULL;
597 0 : int reserved = 0;
598 :
599 0 : pcmk_resource_t *parent = instance->parent;
600 0 : GHashTable *allowed_orig = NULL;
601 0 : GHashTable *allowed_orig_parent = parent->allowed_nodes;
602 0 : const pcmk_node_t *allowed_node = NULL;
603 :
604 0 : pcmk__rsc_trace(instance, "Trying to assign %s to its current node %s",
605 : instance->id, pcmk__node_name(current));
606 :
607 0 : allowed_node = g_hash_table_lookup(instance->allowed_nodes,
608 0 : current->details->id);
609 0 : if (!pcmk__node_available(allowed_node, true, false)) {
610 0 : pcmk__rsc_info(instance,
611 : "Not assigning %s to current node %s: unavailable",
612 : instance->id, pcmk__node_name(current));
613 0 : return false;
614 : }
615 :
616 : /* On each iteration, if instance gets assigned to a node other than its
617 : * current one, we reserve one instance for the chosen node, unassign
618 : * instance, restore instance's original node tables, and try again. This
619 : * way, instances are proportionally assigned to nodes based on preferences,
620 : * but shuffling of specific instances is minimized. If a node will be
621 : * assigned instances at all, it preferentially receives instances that are
622 : * currently active there.
623 : *
624 : * parent->allowed_nodes tracks the number of instances assigned to each
625 : * node. If a node already has max_per_node instances assigned,
626 : * ban_unavailable_allowed_nodes() marks it as unavailable.
627 : *
628 : * In the end, we restore the original parent->allowed_nodes to undo the
629 : * changes to counts during tentative assignments. If we successfully
630 : * assigned instance to its current node, we increment that node's counter.
631 : */
632 :
633 : // Back up the allowed node tables of instance and its children recursively
634 0 : pcmk__copy_node_tables(instance, &allowed_orig);
635 :
636 : // Update instances-per-node counts in a scratch table
637 0 : parent->allowed_nodes = pcmk__copy_node_table(parent->allowed_nodes);
638 :
639 0 : while (reserved < available) {
640 0 : chosen = assign_instance(instance, current, max_per_node);
641 :
642 0 : if (pcmk__same_node(chosen, current)) {
643 : // Successfully assigned to current node
644 0 : break;
645 : }
646 :
647 : // Assignment updates scores, so restore to original state
648 0 : pcmk__rsc_debug(instance, "Rolling back node scores for %s",
649 : instance->id);
650 0 : pcmk__restore_node_tables(instance, allowed_orig);
651 :
652 0 : if (chosen == NULL) {
653 : // Assignment failed, so give up
654 0 : pcmk__rsc_info(instance,
655 : "Not assigning %s to current node %s: unavailable",
656 : instance->id, pcmk__node_name(current));
657 0 : pcmk__set_rsc_flags(instance, pcmk_rsc_unassigned);
658 0 : break;
659 : }
660 :
661 : // We prefer more strongly to assign an instance to the chosen node
662 0 : pcmk__rsc_debug(instance,
663 : "Not assigning %s to current node %s: %s is better",
664 : instance->id, pcmk__node_name(current),
665 : pcmk__node_name(chosen));
666 :
667 : // Reserve one instance for the chosen node and try again
668 0 : if (++reserved >= available) {
669 0 : pcmk__rsc_info(instance,
670 : "Not assigning %s to current node %s: "
671 : "other assignments are more important",
672 : instance->id, pcmk__node_name(current));
673 :
674 : } else {
675 0 : pcmk__rsc_debug(instance,
676 : "Reserved an instance of %s for %s. Retrying "
677 : "assignment of %s to %s",
678 : rsc->id, pcmk__node_name(chosen), instance->id,
679 : pcmk__node_name(current));
680 : }
681 :
682 : // Clear this assignment (frees chosen); leave instance counts in parent
683 0 : pcmk__unassign_resource(instance);
684 0 : chosen = NULL;
685 : }
686 :
687 0 : g_hash_table_destroy(allowed_orig);
688 :
689 : // Restore original instances-per-node counts
690 0 : g_hash_table_destroy(parent->allowed_nodes);
691 0 : parent->allowed_nodes = allowed_orig_parent;
692 :
693 0 : if (chosen == NULL) {
694 : // Couldn't assign instance to current node
695 0 : return false;
696 : }
697 0 : pcmk__rsc_trace(instance, "Assigned %s to current node %s",
698 : instance->id, pcmk__node_name(current));
699 0 : increment_parent_count(instance, chosen);
700 0 : return true;
701 : }
702 :
703 : /*!
704 : * \internal
705 : * \brief Reset the node counts of a resource's allowed nodes to zero
706 : *
707 : * \param[in,out] rsc Resource to reset
708 : *
709 : * \return Number of nodes that are available to run resources
710 : */
711 : static unsigned int
712 0 : reset_allowed_node_counts(pcmk_resource_t *rsc)
713 : {
714 0 : unsigned int available_nodes = 0;
715 0 : pcmk_node_t *node = NULL;
716 : GHashTableIter iter;
717 :
718 0 : g_hash_table_iter_init(&iter, rsc->allowed_nodes);
719 0 : while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
720 0 : node->count = 0;
721 0 : if (pcmk__node_available(node, false, false)) {
722 0 : available_nodes++;
723 : }
724 : }
725 0 : return available_nodes;
726 : }
727 :
728 : /*!
729 : * \internal
730 : * \brief Check whether an instance has a preferred node
731 : *
732 : * \param[in] instance Clone instance or bundle replica container
733 : * \param[in] optimal_per_node Optimal number of instances per node
734 : *
735 : * \return Instance's current node if still available, otherwise NULL
736 : */
737 : static const pcmk_node_t *
738 0 : preferred_node(const pcmk_resource_t *instance, int optimal_per_node)
739 : {
740 0 : const pcmk_node_t *node = NULL;
741 0 : const pcmk_node_t *parent_node = NULL;
742 :
743 : // Check whether instance is active, healthy, and not yet assigned
744 0 : if ((instance->running_on == NULL)
745 0 : || !pcmk_is_set(instance->flags, pcmk_rsc_unassigned)
746 0 : || pcmk_is_set(instance->flags, pcmk_rsc_failed)) {
747 0 : return NULL;
748 : }
749 :
750 : // Check whether instance's current node can run resources
751 0 : node = pcmk__current_node(instance);
752 0 : if (!pcmk__node_available(node, true, false)) {
753 0 : pcmk__rsc_trace(instance, "Not assigning %s to %s early (unavailable)",
754 : instance->id, pcmk__node_name(node));
755 0 : return NULL;
756 : }
757 :
758 : // Check whether node already has optimal number of instances assigned
759 0 : parent_node = pcmk__top_allowed_node(instance, node);
760 0 : if ((parent_node != NULL) && (parent_node->count >= optimal_per_node)) {
761 0 : pcmk__rsc_trace(instance,
762 : "Not assigning %s to %s early "
763 : "(optimal instances already assigned)",
764 : instance->id, pcmk__node_name(node));
765 0 : return NULL;
766 : }
767 :
768 0 : return node;
769 : }
770 :
771 : /*!
772 : * \internal
773 : * \brief Assign collective instances to nodes
774 : *
775 : * \param[in,out] collective Clone or bundle resource being assigned
776 : * \param[in,out] instances List of clone instances or bundle containers
777 : * \param[in] max_total Maximum instances to assign in total
778 : * \param[in] max_per_node Maximum instances to assign to any one node
779 : */
780 : void
781 0 : pcmk__assign_instances(pcmk_resource_t *collective, GList *instances,
782 : int max_total, int max_per_node)
783 : {
784 : // Reuse node count to track number of assigned instances
785 0 : unsigned int available_nodes = reset_allowed_node_counts(collective);
786 :
787 0 : int optimal_per_node = 0;
788 0 : int assigned = 0;
789 0 : GList *iter = NULL;
790 0 : pcmk_resource_t *instance = NULL;
791 0 : const pcmk_node_t *current = NULL;
792 :
793 0 : if (available_nodes > 0) {
794 0 : optimal_per_node = max_total / available_nodes;
795 : }
796 0 : if (optimal_per_node < 1) {
797 0 : optimal_per_node = 1;
798 : }
799 :
800 0 : pcmk__rsc_debug(collective,
801 : "Assigning up to %d %s instance%s to up to %u node%s "
802 : "(at most %d per host, %d optimal)",
803 : max_total, collective->id, pcmk__plural_s(max_total),
804 : available_nodes, pcmk__plural_s(available_nodes),
805 : max_per_node, optimal_per_node);
806 :
807 : // Assign as many instances as possible to their current location
808 0 : for (iter = instances; (iter != NULL) && (assigned < max_total);
809 0 : iter = iter->next) {
810 0 : int available = max_total - assigned;
811 :
812 0 : instance = iter->data;
813 0 : if (!pcmk_is_set(instance->flags, pcmk_rsc_unassigned)) {
814 0 : continue; // Already assigned
815 : }
816 :
817 0 : current = preferred_node(instance, optimal_per_node);
818 0 : if ((current != NULL)
819 0 : && assign_instance_early(collective, instance, current,
820 : max_per_node, available)) {
821 0 : assigned++;
822 : }
823 : }
824 :
825 0 : pcmk__rsc_trace(collective, "Assigned %d of %d instance%s to current node",
826 : assigned, max_total, pcmk__plural_s(max_total));
827 :
828 0 : for (iter = instances; iter != NULL; iter = iter->next) {
829 0 : instance = (pcmk_resource_t *) iter->data;
830 :
831 0 : if (!pcmk_is_set(instance->flags, pcmk_rsc_unassigned)) {
832 0 : continue; // Already assigned
833 : }
834 :
835 0 : if (instance->running_on != NULL) {
836 0 : current = pcmk__current_node(instance);
837 0 : if (pcmk__top_allowed_node(instance, current) == NULL) {
838 0 : const char *unmanaged = "";
839 :
840 0 : if (!pcmk_is_set(instance->flags, pcmk_rsc_managed)) {
841 0 : unmanaged = "Unmanaged resource ";
842 : }
843 0 : crm_notice("%s%s is running on %s which is no longer allowed",
844 : unmanaged, instance->id, pcmk__node_name(current));
845 : }
846 : }
847 :
848 0 : if (assigned >= max_total) {
849 0 : pcmk__rsc_debug(collective,
850 : "Not assigning %s because maximum %d instances "
851 : "already assigned",
852 : instance->id, max_total);
853 0 : resource_location(instance, NULL, -PCMK_SCORE_INFINITY,
854 : "collective_limit_reached", collective->cluster);
855 :
856 0 : } else if (assign_instance(instance, NULL, max_per_node) != NULL) {
857 0 : assigned++;
858 : }
859 : }
860 :
861 0 : pcmk__rsc_debug(collective, "Assigned %d of %d possible instance%s of %s",
862 : assigned, max_total, pcmk__plural_s(max_total),
863 : collective->id);
864 0 : }
865 :
866 : enum instance_state {
867 : instance_starting = (1 << 0),
868 : instance_stopping = (1 << 1),
869 :
870 : /* This indicates that some instance is restarting. It's not the same as
871 : * instance_starting|instance_stopping, which would indicate that some
872 : * instance is starting, and some instance (not necessarily the same one) is
873 : * stopping.
874 : */
875 : instance_restarting = (1 << 2),
876 :
877 : instance_active = (1 << 3),
878 :
879 : instance_all = instance_starting|instance_stopping
880 : |instance_restarting|instance_active,
881 : };
882 :
883 : /*!
884 : * \internal
885 : * \brief Check whether an instance is active, starting, and/or stopping
886 : *
887 : * \param[in] instance Clone instance or bundle replica container
888 : * \param[in,out] state Whether any instance is starting, stopping, etc.
889 : */
890 : static void
891 0 : check_instance_state(const pcmk_resource_t *instance, uint32_t *state)
892 : {
893 0 : const GList *iter = NULL;
894 0 : uint32_t instance_state = 0; // State of just this instance
895 :
896 : // No need to check further if all conditions have already been detected
897 0 : if (pcmk_all_flags_set(*state, instance_all)) {
898 0 : return;
899 : }
900 :
901 : // If instance is a collective (a cloned group), check its children instead
902 0 : if (instance->variant > pcmk_rsc_variant_primitive) {
903 0 : for (iter = instance->children;
904 0 : (iter != NULL) && !pcmk_all_flags_set(*state, instance_all);
905 0 : iter = iter->next) {
906 0 : check_instance_state((const pcmk_resource_t *) iter->data, state);
907 : }
908 0 : return;
909 : }
910 :
911 : // If we get here, instance is a primitive
912 :
913 0 : if (instance->running_on != NULL) {
914 0 : instance_state |= instance_active;
915 : }
916 :
917 : // Check each of the instance's actions for runnable start or stop
918 0 : for (iter = instance->actions;
919 0 : (iter != NULL) && !pcmk_all_flags_set(instance_state,
920 : instance_starting
921 : |instance_stopping);
922 0 : iter = iter->next) {
923 :
924 0 : const pcmk_action_t *action = (const pcmk_action_t *) iter->data;
925 0 : const bool optional = pcmk_is_set(action->flags, pcmk_action_optional);
926 :
927 0 : if (pcmk__str_eq(PCMK_ACTION_START, action->task, pcmk__str_none)) {
928 0 : if (!optional
929 0 : && pcmk_is_set(action->flags, pcmk_action_runnable)) {
930 :
931 0 : pcmk__rsc_trace(instance, "Instance is starting due to %s",
932 : action->uuid);
933 0 : instance_state |= instance_starting;
934 : } else {
935 0 : pcmk__rsc_trace(instance, "%s doesn't affect %s state (%s)",
936 : action->uuid, instance->id,
937 : (optional? "optional" : "unrunnable"));
938 : }
939 :
940 0 : } else if (pcmk__str_eq(PCMK_ACTION_STOP, action->task,
941 : pcmk__str_none)) {
942 : /* Only stop actions can be pseudo-actions for primitives. That
943 : * indicates that the node they are on is being fenced, so the stop
944 : * is implied rather than actually executed.
945 : */
946 0 : if (!optional
947 0 : && pcmk_any_flags_set(action->flags, pcmk_action_pseudo
948 : |pcmk_action_runnable)) {
949 0 : pcmk__rsc_trace(instance, "Instance is stopping due to %s",
950 : action->uuid);
951 0 : instance_state |= instance_stopping;
952 : } else {
953 0 : pcmk__rsc_trace(instance, "%s doesn't affect %s state (%s)",
954 : action->uuid, instance->id,
955 : (optional? "optional" : "unrunnable"));
956 : }
957 : }
958 : }
959 :
960 0 : if (pcmk_all_flags_set(instance_state,
961 : instance_starting|instance_stopping)) {
962 0 : instance_state |= instance_restarting;
963 : }
964 0 : *state |= instance_state;
965 : }
966 :
967 : /*!
968 : * \internal
969 : * \brief Create actions for collective resource instances
970 : *
971 : * \param[in,out] collective Clone or bundle resource to create actions for
972 : * \param[in,out] instances List of clone instances or bundle containers
973 : */
974 : void
975 0 : pcmk__create_instance_actions(pcmk_resource_t *collective, GList *instances)
976 : {
977 0 : uint32_t state = 0;
978 :
979 0 : pcmk_action_t *stop = NULL;
980 0 : pcmk_action_t *stopped = NULL;
981 :
982 0 : pcmk_action_t *start = NULL;
983 0 : pcmk_action_t *started = NULL;
984 :
985 0 : pcmk__rsc_trace(collective, "Creating collective instance actions for %s",
986 : collective->id);
987 :
988 : // Create actions for each instance appropriate to its variant
989 0 : for (GList *iter = instances; iter != NULL; iter = iter->next) {
990 0 : pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
991 :
992 0 : instance->cmds->create_actions(instance);
993 0 : check_instance_state(instance, &state);
994 : }
995 :
996 : // Create pseudo-actions for rsc start and started
997 0 : start = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_START,
998 0 : !pcmk_is_set(state, instance_starting),
999 0 : true);
1000 0 : started = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_RUNNING,
1001 0 : !pcmk_is_set(state, instance_starting),
1002 0 : false);
1003 0 : started->priority = PCMK_SCORE_INFINITY;
1004 0 : if (pcmk_any_flags_set(state, instance_active|instance_starting)) {
1005 0 : pcmk__set_action_flags(started, pcmk_action_runnable);
1006 : }
1007 :
1008 : // Create pseudo-actions for rsc stop and stopped
1009 0 : stop = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOP,
1010 0 : !pcmk_is_set(state, instance_stopping),
1011 0 : true);
1012 0 : stopped = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOPPED,
1013 0 : !pcmk_is_set(state, instance_stopping),
1014 0 : true);
1015 0 : stopped->priority = PCMK_SCORE_INFINITY;
1016 0 : if (!pcmk_is_set(state, instance_restarting)) {
1017 0 : pcmk__set_action_flags(stop, pcmk_action_migratable);
1018 : }
1019 :
1020 0 : if (collective->variant == pcmk_rsc_variant_clone) {
1021 0 : pe__create_clone_notif_pseudo_ops(collective, start, started, stop,
1022 : stopped);
1023 : }
1024 0 : }
1025 :
1026 : /*!
1027 : * \internal
1028 : * \brief Get a list of clone instances or bundle replica containers
1029 : *
1030 : * \param[in] rsc Clone or bundle resource
1031 : *
1032 : * \return Clone instances if \p rsc is a clone, or a newly created list of
1033 : * \p rsc's replica containers if \p rsc is a bundle
1034 : * \note The caller must call free_instance_list() on the result when the list
1035 : * is no longer needed.
1036 : */
1037 : static inline GList *
1038 0 : get_instance_list(const pcmk_resource_t *rsc)
1039 : {
1040 0 : if (rsc->variant == pcmk_rsc_variant_bundle) {
1041 0 : return pe__bundle_containers(rsc);
1042 : } else {
1043 0 : return rsc->children;
1044 : }
1045 : }
1046 :
1047 : /*!
1048 : * \internal
1049 : * \brief Free any memory created by get_instance_list()
1050 : *
1051 : * \param[in] rsc Clone or bundle resource passed to get_instance_list()
1052 : * \param[in,out] list Return value of get_instance_list() for \p rsc
1053 : */
1054 : static inline void
1055 0 : free_instance_list(const pcmk_resource_t *rsc, GList *list)
1056 : {
1057 0 : if (list != rsc->children) {
1058 0 : g_list_free(list);
1059 : }
1060 0 : }
1061 :
1062 : /*!
1063 : * \internal
1064 : * \brief Check whether an instance is compatible with a role and node
1065 : *
1066 : * \param[in] instance Clone instance or bundle replica container
1067 : * \param[in] node Instance must match this node
1068 : * \param[in] role If not pcmk_role_unknown, instance must match this role
1069 : * \param[in] current If true, compare instance's original node and role,
1070 : * otherwise compare assigned next node and role
1071 : *
1072 : * \return true if \p instance is compatible with \p node and \p role,
1073 : * otherwise false
1074 : */
1075 : bool
1076 0 : pcmk__instance_matches(const pcmk_resource_t *instance, const pcmk_node_t *node,
1077 : enum rsc_role_e role, bool current)
1078 : {
1079 0 : pcmk_node_t *instance_node = NULL;
1080 :
1081 0 : CRM_CHECK((instance != NULL) && (node != NULL), return false);
1082 :
1083 0 : if ((role != pcmk_role_unknown)
1084 0 : && (role != instance->fns->state(instance, current))) {
1085 0 : pcmk__rsc_trace(instance,
1086 : "%s is not a compatible instance (role is not %s)",
1087 : instance->id, pcmk_role_text(role));
1088 0 : return false;
1089 : }
1090 :
1091 0 : if (!is_set_recursive(instance, pcmk_rsc_blocked, true)) {
1092 : // We only want instances that haven't failed
1093 0 : instance_node = instance->fns->location(instance, NULL, current);
1094 : }
1095 :
1096 0 : if (instance_node == NULL) {
1097 0 : pcmk__rsc_trace(instance,
1098 : "%s is not a compatible instance "
1099 : "(not assigned to a node)",
1100 : instance->id);
1101 0 : return false;
1102 : }
1103 :
1104 0 : if (!pcmk__same_node(instance_node, node)) {
1105 0 : pcmk__rsc_trace(instance,
1106 : "%s is not a compatible instance "
1107 : "(assigned to %s not %s)",
1108 : instance->id, pcmk__node_name(instance_node),
1109 : pcmk__node_name(node));
1110 0 : return false;
1111 : }
1112 :
1113 0 : return true;
1114 : }
1115 :
1116 : #define display_role(r) \
1117 : (((r) == pcmk_role_unknown)? "matching" : pcmk_role_text(r))
1118 :
1119 : /*!
1120 : * \internal
1121 : * \brief Find an instance that matches a given resource by node and role
1122 : *
1123 : * \param[in] match_rsc Resource that instance must match (for logging only)
1124 : * \param[in] rsc Clone or bundle resource to check for matching instance
1125 : * \param[in] node Instance must match this node
1126 : * \param[in] role If not pcmk_role_unknown, instance must match this role
1127 : * \param[in] current If true, compare instance's original node and role,
1128 : * otherwise compare assigned next node and role
1129 : *
1130 : * \return \p rsc instance matching \p node and \p role if any, otherwise NULL
1131 : */
1132 : static pcmk_resource_t *
1133 0 : find_compatible_instance_on_node(const pcmk_resource_t *match_rsc,
1134 : const pcmk_resource_t *rsc,
1135 : const pcmk_node_t *node, enum rsc_role_e role,
1136 : bool current)
1137 : {
1138 0 : GList *instances = NULL;
1139 :
1140 0 : instances = get_instance_list(rsc);
1141 0 : for (GList *iter = instances; iter != NULL; iter = iter->next) {
1142 0 : pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1143 :
1144 0 : if (pcmk__instance_matches(instance, node, role, current)) {
1145 0 : pcmk__rsc_trace(match_rsc,
1146 : "Found %s %s instance %s compatible with %s on %s",
1147 : display_role(role), rsc->id, instance->id,
1148 : match_rsc->id, pcmk__node_name(node));
1149 0 : free_instance_list(rsc, instances); // Only frees list, not contents
1150 0 : return instance;
1151 : }
1152 : }
1153 0 : free_instance_list(rsc, instances);
1154 :
1155 0 : pcmk__rsc_trace(match_rsc,
1156 : "No %s %s instance found compatible with %s on %s",
1157 : display_role(role), rsc->id, match_rsc->id,
1158 : pcmk__node_name(node));
1159 0 : return NULL;
1160 : }
1161 :
1162 : /*!
1163 : * \internal
1164 : * \brief Find a clone instance or bundle container compatible with a resource
1165 : *
1166 : * \param[in] match_rsc Resource that instance must match
1167 : * \param[in] rsc Clone or bundle resource to check for matching instance
1168 : * \param[in] role If not pcmk_role_unknown, instance must match this role
1169 : * \param[in] current If true, compare instance's original node and role,
1170 : * otherwise compare assigned next node and role
1171 : *
1172 : * \return Compatible (by \p role and \p match_rsc location) instance of \p rsc
1173 : * if any, otherwise NULL
1174 : */
1175 : pcmk_resource_t *
1176 0 : pcmk__find_compatible_instance(const pcmk_resource_t *match_rsc,
1177 : const pcmk_resource_t *rsc, enum rsc_role_e role,
1178 : bool current)
1179 : {
1180 0 : pcmk_resource_t *instance = NULL;
1181 0 : GList *nodes = NULL;
1182 0 : const pcmk_node_t *node = NULL;
1183 :
1184 : // If match_rsc has a node, check only that node
1185 0 : node = match_rsc->fns->location(match_rsc, NULL, current);
1186 0 : if (node != NULL) {
1187 0 : return find_compatible_instance_on_node(match_rsc, rsc, node, role,
1188 : current);
1189 : }
1190 :
1191 : // Otherwise check for an instance matching any of match_rsc's allowed nodes
1192 0 : nodes = pcmk__sort_nodes(g_hash_table_get_values(match_rsc->allowed_nodes),
1193 : NULL);
1194 0 : for (GList *iter = nodes; (iter != NULL) && (instance == NULL);
1195 0 : iter = iter->next) {
1196 0 : instance = find_compatible_instance_on_node(match_rsc, rsc,
1197 0 : (pcmk_node_t *) iter->data,
1198 : role, current);
1199 : }
1200 :
1201 0 : if (instance == NULL) {
1202 0 : pcmk__rsc_debug(rsc, "No %s instance found compatible with %s",
1203 : rsc->id, match_rsc->id);
1204 : }
1205 0 : g_list_free(nodes);
1206 0 : return instance;
1207 : }
1208 :
1209 : /*!
1210 : * \internal
1211 : * \brief Unassign an instance if mandatory ordering has no interleave match
1212 : *
1213 : * \param[in] first 'First' action in an ordering
1214 : * \param[in] then 'Then' action in an ordering
1215 : * \param[in,out] then_instance 'Then' instance that has no interleave match
1216 : * \param[in] type Group of enum pcmk__action_relation_flags
1217 : * \param[in] current If true, "then" action is stopped or demoted
1218 : *
1219 : * \return true if \p then_instance was unassigned, otherwise false
1220 : */
1221 : static bool
1222 0 : unassign_if_mandatory(const pcmk_action_t *first, const pcmk_action_t *then,
1223 : pcmk_resource_t *then_instance, uint32_t type,
1224 : bool current)
1225 : {
1226 : // Allow "then" instance to go down even without an interleave match
1227 0 : if (current) {
1228 0 : pcmk__rsc_trace(then->rsc,
1229 : "%s has no instance to order before stopping "
1230 : "or demoting %s",
1231 : first->rsc->id, then_instance->id);
1232 :
1233 : /* If the "first" action must be runnable, but there is no "first"
1234 : * instance, the "then" instance must not be allowed to come up.
1235 : */
1236 0 : } else if (pcmk_any_flags_set(type, pcmk__ar_unrunnable_first_blocks
1237 : |pcmk__ar_first_implies_then)) {
1238 0 : pcmk__rsc_info(then->rsc,
1239 : "Inhibiting %s from being active "
1240 : "because there is no %s instance to interleave",
1241 : then_instance->id, first->rsc->id);
1242 0 : return pcmk__assign_resource(then_instance, NULL, true, true);
1243 : }
1244 0 : return false;
1245 : }
1246 :
1247 : /*!
1248 : * \internal
1249 : * \brief Find first matching action for a clone instance or bundle container
1250 : *
1251 : * \param[in] action Action in an interleaved ordering
1252 : * \param[in] instance Clone instance or bundle container being interleaved
1253 : * \param[in] action_name Action to look for
1254 : * \param[in] node If not NULL, require action to be on this node
1255 : * \param[in] for_first If true, \p instance is the 'first' resource in the
1256 : * ordering, otherwise it is the 'then' resource
1257 : *
1258 : * \return First action for \p instance (or in some cases if \p instance is a
1259 : * bundle container, its containerized resource) that matches
1260 : * \p action_name and \p node if any, otherwise NULL
1261 : */
1262 : static pcmk_action_t *
1263 0 : find_instance_action(const pcmk_action_t *action, const pcmk_resource_t *instance,
1264 : const char *action_name, const pcmk_node_t *node,
1265 : bool for_first)
1266 : {
1267 0 : const pcmk_resource_t *rsc = NULL;
1268 0 : pcmk_action_t *matching_action = NULL;
1269 :
1270 : /* If instance is a bundle container, sometimes we should interleave the
1271 : * action for the container itself, and sometimes for the containerized
1272 : * resource.
1273 : *
1274 : * For example, given "start bundle A then bundle B", B likely requires the
1275 : * service inside A's container to be active, rather than just the
1276 : * container, so we should interleave the action for A's containerized
1277 : * resource. On the other hand, it's possible B's container itself requires
1278 : * something from A, so we should interleave the action for B's container.
1279 : *
1280 : * Essentially, for 'first', we should use the containerized resource for
1281 : * everything except stop, and for 'then', we should use the container for
1282 : * everything except promote and demote (which can only be performed on the
1283 : * containerized resource).
1284 : */
1285 0 : if ((for_first && !pcmk__str_any_of(action->task, PCMK_ACTION_STOP,
1286 : PCMK_ACTION_STOPPED, NULL))
1287 :
1288 0 : || (!for_first && pcmk__str_any_of(action->task, PCMK_ACTION_PROMOTE,
1289 : PCMK_ACTION_PROMOTED,
1290 : PCMK_ACTION_DEMOTE,
1291 : PCMK_ACTION_DEMOTED, NULL))) {
1292 :
1293 0 : rsc = pe__get_rsc_in_container(instance);
1294 : }
1295 0 : if (rsc == NULL) {
1296 0 : rsc = instance; // No containerized resource, use instance itself
1297 : } else {
1298 0 : node = NULL; // Containerized actions are on bundle-created guest
1299 : }
1300 :
1301 0 : matching_action = find_first_action(rsc->actions, NULL, action_name, node);
1302 0 : if (matching_action != NULL) {
1303 0 : return matching_action;
1304 : }
1305 :
1306 0 : if (pcmk_is_set(instance->flags, pcmk_rsc_removed)
1307 0 : || pcmk__str_any_of(action_name, PCMK_ACTION_STOP, PCMK_ACTION_DEMOTE,
1308 : NULL)) {
1309 0 : crm_trace("No %s action found for %s%s",
1310 : action_name,
1311 : pcmk_is_set(instance->flags, pcmk_rsc_removed)? "orphan " : "",
1312 : instance->id);
1313 : } else {
1314 0 : crm_err("No %s action found for %s to interleave (bug?)",
1315 : action_name, instance->id);
1316 : }
1317 0 : return NULL;
1318 : }
1319 :
1320 : /*!
1321 : * \internal
1322 : * \brief Get the original action name of a bundle or clone action
1323 : *
1324 : * Given an action for a bundle or clone, get the original action name,
1325 : * mapping notify to the action being notified, and if the instances are
1326 : * primitives, mapping completion actions to the action that was completed
1327 : * (for example, stopped to stop).
1328 : *
1329 : * \param[in] action Clone or bundle action to check
1330 : *
1331 : * \return Original action name for \p action
1332 : */
1333 : static const char *
1334 0 : orig_action_name(const pcmk_action_t *action)
1335 : {
1336 : // Any instance will do
1337 0 : const pcmk_resource_t *instance = action->rsc->children->data;
1338 :
1339 0 : char *action_type = NULL;
1340 0 : const char *action_name = action->task;
1341 0 : enum action_tasks orig_task = pcmk_action_unspecified;
1342 :
1343 0 : if (pcmk__strcase_any_of(action->task, PCMK_ACTION_NOTIFY,
1344 : PCMK_ACTION_NOTIFIED, NULL)) {
1345 : // action->uuid is RSC_(confirmed-){pre,post}_notify_ACTION_INTERVAL
1346 0 : CRM_CHECK(parse_op_key(action->uuid, NULL, &action_type, NULL),
1347 : return pcmk_action_text(pcmk_action_unspecified));
1348 0 : action_name = strstr(action_type, "_notify_");
1349 0 : CRM_CHECK(action_name != NULL,
1350 : return pcmk_action_text(pcmk_action_unspecified));
1351 0 : action_name += strlen("_notify_");
1352 : }
1353 0 : orig_task = get_complex_task(instance, action_name);
1354 0 : free(action_type);
1355 0 : return pcmk_action_text(orig_task);
1356 : }
1357 :
1358 : /*!
1359 : * \internal
1360 : * \brief Update two interleaved actions according to an ordering between them
1361 : *
1362 : * Given information about an ordering of two interleaved actions, update the
1363 : * actions' flags (and runnable_before members if appropriate) as appropriate
1364 : * for the ordering. Effects may cascade to other orderings involving the
1365 : * actions as well.
1366 : *
1367 : * \param[in,out] first 'First' action in an ordering
1368 : * \param[in,out] then 'Then' action in an ordering
1369 : * \param[in] node If not NULL, limit scope of ordering to this node
1370 : * \param[in] filter Action flags to limit scope of certain updates (may
1371 : * include pcmk_action_optional to affect only
1372 : * mandatory actions, and pcmk_action_runnable to
1373 : * affect only runnable actions)
1374 : * \param[in] type Group of enum pcmk__action_relation_flags to apply
1375 : *
1376 : * \return Group of enum pcmk__updated flags indicating what was updated
1377 : */
1378 : static uint32_t
1379 0 : update_interleaved_actions(pcmk_action_t *first, pcmk_action_t *then,
1380 : const pcmk_node_t *node, uint32_t filter,
1381 : uint32_t type)
1382 : {
1383 0 : GList *instances = NULL;
1384 0 : uint32_t changed = pcmk__updated_none;
1385 0 : const char *orig_first_task = orig_action_name(first);
1386 :
1387 : // Stops and demotes must be interleaved with instance on current node
1388 0 : bool current = pcmk__ends_with(first->uuid, "_" PCMK_ACTION_STOPPED "_0")
1389 0 : || pcmk__ends_with(first->uuid,
1390 : "_" PCMK_ACTION_DEMOTED "_0");
1391 :
1392 : // Update the specified actions for each "then" instance individually
1393 0 : instances = get_instance_list(then->rsc);
1394 0 : for (GList *iter = instances; iter != NULL; iter = iter->next) {
1395 0 : pcmk_resource_t *first_instance = NULL;
1396 0 : pcmk_resource_t *then_instance = iter->data;
1397 :
1398 0 : pcmk_action_t *first_action = NULL;
1399 0 : pcmk_action_t *then_action = NULL;
1400 :
1401 : // Find a "first" instance to interleave with this "then" instance
1402 0 : first_instance = pcmk__find_compatible_instance(then_instance,
1403 0 : first->rsc,
1404 : pcmk_role_unknown,
1405 : current);
1406 :
1407 0 : if (first_instance == NULL) { // No instance can be interleaved
1408 0 : if (unassign_if_mandatory(first, then, then_instance, type,
1409 : current)) {
1410 0 : pcmk__set_updated_flags(changed, first, pcmk__updated_then);
1411 : }
1412 0 : continue;
1413 : }
1414 :
1415 0 : first_action = find_instance_action(first, first_instance,
1416 : orig_first_task, node, true);
1417 0 : if (first_action == NULL) {
1418 0 : continue;
1419 : }
1420 :
1421 0 : then_action = find_instance_action(then, then_instance, then->task,
1422 : node, false);
1423 0 : if (then_action == NULL) {
1424 0 : continue;
1425 : }
1426 :
1427 0 : if (order_actions(first_action, then_action, type)) {
1428 0 : pcmk__set_updated_flags(changed, first,
1429 : pcmk__updated_first|pcmk__updated_then);
1430 : }
1431 :
1432 0 : changed |= then_instance->cmds->update_ordered_actions(
1433 : first_action, then_action, node,
1434 0 : first_instance->cmds->action_flags(first_action, node), filter,
1435 0 : type, then->rsc->cluster);
1436 : }
1437 0 : free_instance_list(then->rsc, instances);
1438 0 : return changed;
1439 : }
1440 :
1441 : /*!
1442 : * \internal
1443 : * \brief Check whether two actions in an ordering can be interleaved
1444 : *
1445 : * \param[in] first 'First' action in the ordering
1446 : * \param[in] then 'Then' action in the ordering
1447 : *
1448 : * \return true if \p first and \p then can be interleaved, otherwise false
1449 : */
1450 : static bool
1451 0 : can_interleave_actions(const pcmk_action_t *first, const pcmk_action_t *then)
1452 : {
1453 0 : bool interleave = false;
1454 0 : pcmk_resource_t *rsc = NULL;
1455 :
1456 0 : if ((first->rsc == NULL) || (then->rsc == NULL)) {
1457 0 : crm_trace("Not interleaving %s with %s: not resource actions",
1458 : first->uuid, then->uuid);
1459 0 : return false;
1460 : }
1461 :
1462 0 : if (first->rsc == then->rsc) {
1463 0 : crm_trace("Not interleaving %s with %s: same resource",
1464 : first->uuid, then->uuid);
1465 0 : return false;
1466 : }
1467 :
1468 0 : if ((first->rsc->variant < pcmk_rsc_variant_clone)
1469 0 : || (then->rsc->variant < pcmk_rsc_variant_clone)) {
1470 0 : crm_trace("Not interleaving %s with %s: not clones or bundles",
1471 : first->uuid, then->uuid);
1472 0 : return false;
1473 : }
1474 :
1475 0 : if (pcmk__ends_with(then->uuid, "_stop_0")
1476 0 : || pcmk__ends_with(then->uuid, "_demote_0")) {
1477 0 : rsc = first->rsc;
1478 : } else {
1479 0 : rsc = then->rsc;
1480 : }
1481 :
1482 0 : interleave = crm_is_true(g_hash_table_lookup(rsc->meta,
1483 : PCMK_META_INTERLEAVE));
1484 0 : pcmk__rsc_trace(rsc, "'%s then %s' will %sbe interleaved (based on %s)",
1485 : first->uuid, then->uuid, (interleave? "" : "not "),
1486 : rsc->id);
1487 0 : return interleave;
1488 : }
1489 :
1490 : /*!
1491 : * \internal
1492 : * \brief Update non-interleaved instance actions according to an ordering
1493 : *
1494 : * Given information about an ordering of two non-interleaved actions, update
1495 : * the actions' flags (and runnable_before members if appropriate) as
1496 : * appropriate for the ordering. Effects may cascade to other orderings
1497 : * involving the actions as well.
1498 : *
1499 : * \param[in,out] instance Clone instance or bundle container
1500 : * \param[in,out] first "First" action in ordering
1501 : * \param[in] then "Then" action in ordering (for \p instance's parent)
1502 : * \param[in] node If not NULL, limit scope of ordering to this node
1503 : * \param[in] flags Action flags for \p first for ordering purposes
1504 : * \param[in] filter Action flags to limit scope of certain updates (may
1505 : * include pcmk_action_optional to affect only
1506 : * mandatory actions, and pcmk_action_runnable to
1507 : * affect only runnable actions)
1508 : * \param[in] type Group of enum pcmk__action_relation_flags to apply
1509 : *
1510 : * \return Group of enum pcmk__updated flags indicating what was updated
1511 : */
1512 : static uint32_t
1513 0 : update_noninterleaved_actions(pcmk_resource_t *instance, pcmk_action_t *first,
1514 : const pcmk_action_t *then, const pcmk_node_t *node,
1515 : uint32_t flags, uint32_t filter, uint32_t type)
1516 : {
1517 0 : pcmk_action_t *instance_action = NULL;
1518 0 : uint32_t instance_flags = 0;
1519 0 : uint32_t changed = pcmk__updated_none;
1520 :
1521 : // Check whether instance has an equivalent of "then" action
1522 0 : instance_action = find_first_action(instance->actions, NULL, then->task,
1523 : node);
1524 0 : if (instance_action == NULL) {
1525 0 : return changed;
1526 : }
1527 :
1528 : // Check whether action is runnable
1529 0 : instance_flags = instance->cmds->action_flags(instance_action, node);
1530 0 : if (!pcmk_is_set(instance_flags, pcmk_action_runnable)) {
1531 0 : return changed;
1532 : }
1533 :
1534 : // If so, update actions for the instance
1535 0 : changed = instance->cmds->update_ordered_actions(first, instance_action,
1536 : node, flags, filter, type,
1537 : instance->cluster);
1538 :
1539 : // Propagate any changes to later actions
1540 0 : if (pcmk_is_set(changed, pcmk__updated_then)) {
1541 0 : for (GList *after_iter = instance_action->actions_after;
1542 0 : after_iter != NULL; after_iter = after_iter->next) {
1543 0 : pcmk__related_action_t *after = after_iter->data;
1544 :
1545 0 : pcmk__update_action_for_orderings(after->action, instance->cluster);
1546 : }
1547 : }
1548 :
1549 0 : return changed;
1550 : }
1551 :
1552 : /*!
1553 : * \internal
1554 : * \brief Update two actions according to an ordering between them
1555 : *
1556 : * Given information about an ordering of two clone or bundle actions, update
1557 : * the actions' flags (and runnable_before members if appropriate) as
1558 : * appropriate for the ordering. Effects may cascade to other orderings
1559 : * involving the actions as well.
1560 : *
1561 : * \param[in,out] first 'First' action in an ordering
1562 : * \param[in,out] then 'Then' action in an ordering
1563 : * \param[in] node If not NULL, limit scope of ordering to this node
1564 : * (only used when interleaving instances)
1565 : * \param[in] flags Action flags for \p first for ordering purposes
1566 : * \param[in] filter Action flags to limit scope of certain updates (may
1567 : * include pcmk_action_optional to affect only
1568 : * mandatory actions, and pcmk_action_runnable to
1569 : * affect only runnable actions)
1570 : * \param[in] type Group of enum pcmk__action_relation_flags to apply
1571 : * \param[in,out] scheduler Scheduler data
1572 : *
1573 : * \return Group of enum pcmk__updated flags indicating what was updated
1574 : */
1575 : uint32_t
1576 0 : pcmk__instance_update_ordered_actions(pcmk_action_t *first, pcmk_action_t *then,
1577 : const pcmk_node_t *node, uint32_t flags,
1578 : uint32_t filter, uint32_t type,
1579 : pcmk_scheduler_t *scheduler)
1580 : {
1581 0 : CRM_ASSERT((first != NULL) && (then != NULL) && (scheduler != NULL));
1582 :
1583 0 : if (then->rsc == NULL) {
1584 0 : return pcmk__updated_none;
1585 :
1586 0 : } else if (can_interleave_actions(first, then)) {
1587 0 : return update_interleaved_actions(first, then, node, filter, type);
1588 :
1589 : } else {
1590 0 : uint32_t changed = pcmk__updated_none;
1591 0 : GList *instances = get_instance_list(then->rsc);
1592 :
1593 : // Update actions for the clone or bundle resource itself
1594 0 : changed |= pcmk__update_ordered_actions(first, then, node, flags,
1595 : filter, type, scheduler);
1596 :
1597 : // Update the 'then' clone instances or bundle containers individually
1598 0 : for (GList *iter = instances; iter != NULL; iter = iter->next) {
1599 0 : pcmk_resource_t *instance = iter->data;
1600 :
1601 0 : changed |= update_noninterleaved_actions(instance, first, then,
1602 : node, flags, filter, type);
1603 : }
1604 0 : free_instance_list(then->rsc, instances);
1605 0 : return changed;
1606 : }
1607 : }
1608 :
1609 : #define pe__clear_action_summary_flags(flags, action, flag) do { \
1610 : flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
1611 : "Action summary", action->rsc->id, \
1612 : flags, flag, #flag); \
1613 : } while (0)
1614 :
1615 : /*!
1616 : * \internal
1617 : * \brief Return action flags for a given clone or bundle action
1618 : *
1619 : * \param[in,out] action Action for a clone or bundle
1620 : * \param[in] instances Clone instances or bundle containers
1621 : * \param[in] node If not NULL, limit effects to this node
1622 : *
1623 : * \return Flags appropriate to \p action on \p node
1624 : */
1625 : uint32_t
1626 0 : pcmk__collective_action_flags(pcmk_action_t *action, const GList *instances,
1627 : const pcmk_node_t *node)
1628 : {
1629 0 : bool any_runnable = false;
1630 0 : const char *action_name = orig_action_name(action);
1631 :
1632 : // Set original assumptions (optional and runnable may be cleared below)
1633 0 : uint32_t flags = pcmk_action_optional
1634 : |pcmk_action_runnable
1635 : |pcmk_action_pseudo;
1636 :
1637 0 : for (const GList *iter = instances; iter != NULL; iter = iter->next) {
1638 0 : const pcmk_resource_t *instance = iter->data;
1639 0 : const pcmk_node_t *instance_node = NULL;
1640 0 : pcmk_action_t *instance_action = NULL;
1641 : uint32_t instance_flags;
1642 :
1643 : // Node is relevant only to primitive instances
1644 0 : if (instance->variant == pcmk_rsc_variant_primitive) {
1645 0 : instance_node = node;
1646 : }
1647 :
1648 0 : instance_action = find_first_action(instance->actions, NULL,
1649 : action_name, instance_node);
1650 0 : if (instance_action == NULL) {
1651 0 : pcmk__rsc_trace(action->rsc, "%s has no %s action on %s",
1652 : instance->id, action_name, pcmk__node_name(node));
1653 0 : continue;
1654 : }
1655 :
1656 0 : pcmk__rsc_trace(action->rsc, "%s has %s for %s on %s",
1657 : instance->id, instance_action->uuid, action_name,
1658 : pcmk__node_name(node));
1659 :
1660 0 : instance_flags = instance->cmds->action_flags(instance_action, node);
1661 :
1662 : // If any instance action is mandatory, so is the collective action
1663 0 : if (pcmk_is_set(flags, pcmk_action_optional)
1664 0 : && !pcmk_is_set(instance_flags, pcmk_action_optional)) {
1665 0 : pcmk__rsc_trace(instance, "%s is mandatory because %s is",
1666 : action->uuid, instance_action->uuid);
1667 0 : pe__clear_action_summary_flags(flags, action,
1668 : pcmk_action_optional);
1669 0 : pcmk__clear_action_flags(action, pcmk_action_optional);
1670 : }
1671 :
1672 : // If any instance action is runnable, so is the collective action
1673 0 : if (pcmk_is_set(instance_flags, pcmk_action_runnable)) {
1674 0 : any_runnable = true;
1675 : }
1676 : }
1677 :
1678 0 : if (!any_runnable) {
1679 0 : pcmk__rsc_trace(action->rsc,
1680 : "%s is not runnable because no instance can run %s",
1681 : action->uuid, action_name);
1682 0 : pe__clear_action_summary_flags(flags, action, pcmk_action_runnable);
1683 0 : if (node == NULL) {
1684 0 : pcmk__clear_action_flags(action, pcmk_action_runnable);
1685 : }
1686 : }
1687 :
1688 0 : return flags;
1689 : }
|