Line data Source code
1 : /*
2 : * Copyright 2004-2024 the Pacemaker project contributors
3 : *
4 : * The version control history for this file may have further details.
5 : *
6 : * This source code is licensed under the GNU General Public License version 2
7 : * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 : */
9 :
10 : #include <crm_internal.h>
11 :
12 : #include <stdbool.h>
13 : #include <stdint.h> // uint8_t, uint32_t
14 :
15 : #include <crm/common/xml.h>
16 : #include <pacemaker-internal.h>
17 :
18 : #include "libpacemaker_private.h"
19 :
20 : static void stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
21 : bool optional);
22 : static void start_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
23 : bool optional);
24 : static void demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
25 : bool optional);
26 : static void promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
27 : bool optional);
28 : static void assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node,
29 : bool optional);
30 :
31 : #define RSC_ROLE_MAX (pcmk_role_promoted + 1)
32 :
33 : static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
34 : /* This array lists the immediate next role when transitioning from one role
35 : * to a target role. For example, when going from Stopped to Promoted, the
36 : * next role is Unpromoted, because the resource must be started before it
37 : * can be promoted. The current state then becomes Started, which is fed
38 : * into this array again, giving a next role of Promoted.
39 : *
40 : * Current role Immediate next role Final target role
41 : * ------------ ------------------- -----------------
42 : */
43 : /* Unknown */ { pcmk_role_unknown, /* Unknown */
44 : pcmk_role_stopped, /* Stopped */
45 : pcmk_role_stopped, /* Started */
46 : pcmk_role_stopped, /* Unpromoted */
47 : pcmk_role_stopped, /* Promoted */
48 : },
49 : /* Stopped */ { pcmk_role_stopped, /* Unknown */
50 : pcmk_role_stopped, /* Stopped */
51 : pcmk_role_started, /* Started */
52 : pcmk_role_unpromoted, /* Unpromoted */
53 : pcmk_role_unpromoted, /* Promoted */
54 : },
55 : /* Started */ { pcmk_role_stopped, /* Unknown */
56 : pcmk_role_stopped, /* Stopped */
57 : pcmk_role_started, /* Started */
58 : pcmk_role_unpromoted, /* Unpromoted */
59 : pcmk_role_promoted, /* Promoted */
60 : },
61 : /* Unpromoted */ { pcmk_role_stopped, /* Unknown */
62 : pcmk_role_stopped, /* Stopped */
63 : pcmk_role_stopped, /* Started */
64 : pcmk_role_unpromoted, /* Unpromoted */
65 : pcmk_role_promoted, /* Promoted */
66 : },
67 : /* Promoted */ { pcmk_role_stopped, /* Unknown */
68 : pcmk_role_unpromoted, /* Stopped */
69 : pcmk_role_unpromoted, /* Started */
70 : pcmk_role_unpromoted, /* Unpromoted */
71 : pcmk_role_promoted, /* Promoted */
72 : },
73 : };
74 :
75 : /*!
76 : * \internal
77 : * \brief Function to schedule actions needed for a role change
78 : *
79 : * \param[in,out] rsc Resource whose role is changing
80 : * \param[in,out] node Node where resource will be in its next role
81 : * \param[in] optional Whether scheduled actions should be optional
82 : */
83 : typedef void (*rsc_transition_fn)(pcmk_resource_t *rsc, pcmk_node_t *node,
84 : bool optional);
85 :
86 : static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
87 : /* This array lists the function needed to transition directly from one role
88 : * to another. NULL indicates that nothing is needed.
89 : *
90 : * Current role Transition function Next role
91 : * ------------ ------------------- ----------
92 : */
93 : /* Unknown */ { assert_role_error, /* Unknown */
94 : stop_resource, /* Stopped */
95 : assert_role_error, /* Started */
96 : assert_role_error, /* Unpromoted */
97 : assert_role_error, /* Promoted */
98 : },
99 : /* Stopped */ { assert_role_error, /* Unknown */
100 : NULL, /* Stopped */
101 : start_resource, /* Started */
102 : start_resource, /* Unpromoted */
103 : assert_role_error, /* Promoted */
104 : },
105 : /* Started */ { assert_role_error, /* Unknown */
106 : stop_resource, /* Stopped */
107 : NULL, /* Started */
108 : NULL, /* Unpromoted */
109 : promote_resource, /* Promoted */
110 : },
111 : /* Unpromoted */ { assert_role_error, /* Unknown */
112 : stop_resource, /* Stopped */
113 : stop_resource, /* Started */
114 : NULL, /* Unpromoted */
115 : promote_resource, /* Promoted */
116 : },
117 : /* Promoted */ { assert_role_error, /* Unknown */
118 : demote_resource, /* Stopped */
119 : demote_resource, /* Started */
120 : demote_resource, /* Unpromoted */
121 : NULL, /* Promoted */
122 : },
123 : };
124 :
125 : /*!
126 : * \internal
127 : * \brief Get a list of a resource's allowed nodes sorted by node score
128 : *
129 : * \param[in] rsc Resource to check
130 : *
131 : * \return List of allowed nodes sorted by node score
132 : */
133 : static GList *
134 0 : sorted_allowed_nodes(const pcmk_resource_t *rsc)
135 : {
136 0 : if (rsc->allowed_nodes != NULL) {
137 0 : GList *nodes = g_hash_table_get_values(rsc->allowed_nodes);
138 :
139 0 : if (nodes != NULL) {
140 0 : return pcmk__sort_nodes(nodes, pcmk__current_node(rsc));
141 : }
142 : }
143 0 : return NULL;
144 : }
145 :
146 : /*!
147 : * \internal
148 : * \brief Assign a resource to its best allowed node, if possible
149 : *
150 : * \param[in,out] rsc Resource to choose a node for
151 : * \param[in] prefer If not \c NULL, prefer this node when all else
152 : * equal
153 : * \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a
154 : * node, set next role to stopped and update
155 : * existing actions
156 : *
157 : * \return true if \p rsc could be assigned to a node, otherwise false
158 : *
159 : * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
160 : * completely undo the assignment. A successful assignment can be either
161 : * undone or left alone as final. A failed assignment has the same effect
162 : * as calling pcmk__unassign_resource(); there are no side effects on
163 : * roles or actions.
164 : */
165 : static bool
166 0 : assign_best_node(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
167 : bool stop_if_fail)
168 : {
169 0 : GList *nodes = NULL;
170 0 : pcmk_node_t *chosen = NULL;
171 0 : pcmk_node_t *best = NULL;
172 0 : const pcmk_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc);
173 :
174 0 : if (prefer == NULL) {
175 0 : prefer = most_free_node;
176 : }
177 :
178 0 : if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
179 : // We've already finished assignment of resources to nodes
180 0 : return rsc->allocated_to != NULL;
181 : }
182 :
183 : // Sort allowed nodes by score
184 0 : nodes = sorted_allowed_nodes(rsc);
185 0 : if (nodes != NULL) {
186 0 : best = (pcmk_node_t *) nodes->data; // First node has best score
187 : }
188 :
189 0 : if ((prefer != NULL) && (nodes != NULL)) {
190 : // Get the allowed node version of prefer
191 0 : chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
192 :
193 0 : if (chosen == NULL) {
194 0 : pcmk__rsc_trace(rsc, "Preferred node %s for %s was unknown",
195 : pcmk__node_name(prefer), rsc->id);
196 :
197 : /* Favor the preferred node as long as its score is at least as good as
198 : * the best allowed node's.
199 : *
200 : * An alternative would be to favor the preferred node even if the best
201 : * node is better, when the best node's score is less than INFINITY.
202 : */
203 0 : } else if (chosen->weight < best->weight) {
204 0 : pcmk__rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
205 : pcmk__node_name(chosen), rsc->id);
206 0 : chosen = NULL;
207 :
208 0 : } else if (!pcmk__node_available(chosen, true, false)) {
209 0 : pcmk__rsc_trace(rsc, "Preferred node %s for %s was unavailable",
210 : pcmk__node_name(chosen), rsc->id);
211 0 : chosen = NULL;
212 :
213 : } else {
214 0 : pcmk__rsc_trace(rsc,
215 : "Chose preferred node %s for %s "
216 : "(ignoring %d candidates)",
217 : pcmk__node_name(chosen), rsc->id,
218 : g_list_length(nodes));
219 : }
220 : }
221 :
222 0 : if ((chosen == NULL) && (best != NULL)) {
223 : /* Either there is no preferred node, or the preferred node is not
224 : * suitable, but another node is allowed to run the resource.
225 : */
226 :
227 0 : chosen = best;
228 :
229 0 : if (!pcmk__is_unique_clone(rsc->parent)
230 0 : && (chosen->weight > 0) // Zero not acceptable
231 0 : && pcmk__node_available(chosen, false, false)) {
232 : /* If the resource is already running on a node, prefer that node if
233 : * it is just as good as the chosen node.
234 : *
235 : * We don't do this for unique clone instances, because
236 : * pcmk__assign_instances() has already assigned instances to their
237 : * running nodes when appropriate, and if we get here, we don't want
238 : * remaining unassigned instances to prefer a node that's already
239 : * running another instance.
240 : */
241 0 : pcmk_node_t *running = pcmk__current_node(rsc);
242 :
243 0 : if (running == NULL) {
244 : // Nothing to do
245 :
246 0 : } else if (!pcmk__node_available(running, true, false)) {
247 0 : pcmk__rsc_trace(rsc,
248 : "Current node for %s (%s) can't run resources",
249 : rsc->id, pcmk__node_name(running));
250 :
251 : } else {
252 0 : int nodes_with_best_score = 1;
253 :
254 0 : for (GList *iter = nodes->next; iter; iter = iter->next) {
255 0 : pcmk_node_t *allowed = (pcmk_node_t *) iter->data;
256 :
257 0 : if (allowed->weight != chosen->weight) {
258 : // The nodes are sorted by score, so no more are equal
259 0 : break;
260 : }
261 0 : if (pcmk__same_node(allowed, running)) {
262 : // Scores are equal, so prefer the current node
263 0 : chosen = allowed;
264 : }
265 0 : nodes_with_best_score++;
266 : }
267 :
268 0 : if (nodes_with_best_score > 1) {
269 0 : uint8_t log_level = LOG_INFO;
270 :
271 0 : if (chosen->weight >= PCMK_SCORE_INFINITY) {
272 0 : log_level = LOG_WARNING;
273 : }
274 0 : do_crm_log(log_level,
275 : "Chose %s for %s from %d nodes with score %s",
276 : pcmk__node_name(chosen), rsc->id,
277 : nodes_with_best_score,
278 : pcmk_readable_score(chosen->weight));
279 : }
280 : }
281 : }
282 :
283 0 : pcmk__rsc_trace(rsc, "Chose %s for %s from %d candidates",
284 : pcmk__node_name(chosen), rsc->id, g_list_length(nodes));
285 : }
286 :
287 0 : pcmk__assign_resource(rsc, chosen, false, stop_if_fail);
288 0 : g_list_free(nodes);
289 0 : return rsc->allocated_to != NULL;
290 : }
291 :
292 : /*!
293 : * \internal
294 : * \brief Apply a "this with" colocation to a node's allowed node scores
295 : *
296 : * \param[in,out] colocation Colocation to apply
297 : * \param[in,out] rsc Resource being assigned
298 : */
299 : static void
300 0 : apply_this_with(pcmk__colocation_t *colocation, pcmk_resource_t *rsc)
301 : {
302 0 : GHashTable *archive = NULL;
303 0 : pcmk_resource_t *other = colocation->primary;
304 :
305 : // In certain cases, we will need to revert the node scores
306 0 : if ((colocation->dependent_role >= pcmk_role_promoted)
307 0 : || ((colocation->score < 0)
308 0 : && (colocation->score > -PCMK_SCORE_INFINITY))) {
309 0 : archive = pcmk__copy_node_table(rsc->allowed_nodes);
310 : }
311 :
312 0 : if (pcmk_is_set(other->flags, pcmk_rsc_unassigned)) {
313 0 : pcmk__rsc_trace(rsc,
314 : "%s: Assigning colocation %s primary %s first"
315 : "(score=%d role=%s)",
316 : rsc->id, colocation->id, other->id,
317 : colocation->score,
318 : pcmk_role_text(colocation->dependent_role));
319 0 : other->cmds->assign(other, NULL, true);
320 : }
321 :
322 : // Apply the colocation score to this resource's allowed node scores
323 0 : rsc->cmds->apply_coloc_score(rsc, other, colocation, true);
324 0 : if ((archive != NULL)
325 0 : && !pcmk__any_node_available(rsc->allowed_nodes)) {
326 0 : pcmk__rsc_info(rsc,
327 : "%s: Reverting scores from colocation with %s "
328 : "because no nodes allowed",
329 : rsc->id, other->id);
330 0 : g_hash_table_destroy(rsc->allowed_nodes);
331 0 : rsc->allowed_nodes = archive;
332 0 : archive = NULL;
333 : }
334 0 : if (archive != NULL) {
335 0 : g_hash_table_destroy(archive);
336 : }
337 0 : }
338 :
339 : /*!
340 : * \internal
341 : * \brief Update a Pacemaker Remote node once its connection has been assigned
342 : *
343 : * \param[in] connection Connection resource that has been assigned
344 : */
345 : static void
346 0 : remote_connection_assigned(const pcmk_resource_t *connection)
347 : {
348 0 : pcmk_node_t *remote_node = pcmk_find_node(connection->cluster,
349 0 : connection->id);
350 :
351 0 : CRM_CHECK(remote_node != NULL, return);
352 :
353 0 : if ((connection->allocated_to != NULL)
354 0 : && (connection->next_role != pcmk_role_stopped)) {
355 :
356 0 : crm_trace("Pacemaker Remote node %s will be online",
357 : remote_node->details->id);
358 0 : remote_node->details->online = TRUE;
359 0 : if (remote_node->details->unseen) {
360 : // Avoid unnecessary fence, since we will attempt connection
361 0 : remote_node->details->unclean = FALSE;
362 : }
363 :
364 : } else {
365 0 : crm_trace("Pacemaker Remote node %s will be shut down "
366 : "(%sassigned connection's next role is %s)",
367 : remote_node->details->id,
368 : ((connection->allocated_to == NULL)? "un" : ""),
369 : pcmk_role_text(connection->next_role));
370 0 : remote_node->details->shutdown = TRUE;
371 : }
372 : }
373 :
374 : /*!
375 : * \internal
376 : * \brief Assign a primitive resource to a node
377 : *
378 : * \param[in,out] rsc Resource to assign to a node
379 : * \param[in] prefer Node to prefer, if all else is equal
380 : * \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a
381 : * node, set next role to stopped and update
382 : * existing actions
383 : *
384 : * \return Node that \p rsc is assigned to, if assigned entirely to one node
385 : *
386 : * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
387 : * completely undo the assignment. A successful assignment can be either
388 : * undone or left alone as final. A failed assignment has the same effect
389 : * as calling pcmk__unassign_resource(); there are no side effects on
390 : * roles or actions.
391 : */
392 : pcmk_node_t *
393 0 : pcmk__primitive_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
394 : bool stop_if_fail)
395 : {
396 0 : GList *this_with_colocations = NULL;
397 0 : GList *with_this_colocations = NULL;
398 0 : GList *iter = NULL;
399 0 : pcmk__colocation_t *colocation = NULL;
400 :
401 0 : CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
402 :
403 : // Never assign a child without parent being assigned first
404 0 : if ((rsc->parent != NULL)
405 0 : && !pcmk_is_set(rsc->parent->flags, pcmk_rsc_assigning)) {
406 0 : pcmk__rsc_debug(rsc, "%s: Assigning parent %s first",
407 : rsc->id, rsc->parent->id);
408 0 : rsc->parent->cmds->assign(rsc->parent, prefer, stop_if_fail);
409 : }
410 :
411 0 : if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
412 : // Assignment has already been done
413 0 : const char *node_name = "no node";
414 :
415 0 : if (rsc->allocated_to != NULL) {
416 0 : node_name = pcmk__node_name(rsc->allocated_to);
417 : }
418 0 : pcmk__rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id, node_name);
419 0 : return rsc->allocated_to;
420 : }
421 :
422 : // Ensure we detect assignment loops
423 0 : if (pcmk_is_set(rsc->flags, pcmk_rsc_assigning)) {
424 0 : pcmk__rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
425 0 : return NULL;
426 : }
427 0 : pcmk__set_rsc_flags(rsc, pcmk_rsc_assigning);
428 :
429 0 : pe__show_node_scores(true, rsc, "Pre-assignment", rsc->allowed_nodes,
430 : rsc->cluster);
431 :
432 0 : this_with_colocations = pcmk__this_with_colocations(rsc);
433 0 : with_this_colocations = pcmk__with_this_colocations(rsc);
434 :
435 : // Apply mandatory colocations first, to satisfy as many as possible
436 0 : for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
437 0 : colocation = iter->data;
438 :
439 0 : if ((colocation->score <= -PCMK_SCORE_INFINITY)
440 0 : || (colocation->score >= PCMK_SCORE_INFINITY)) {
441 0 : apply_this_with(colocation, rsc);
442 : }
443 : }
444 0 : for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
445 0 : colocation = iter->data;
446 :
447 0 : if ((colocation->score <= -PCMK_SCORE_INFINITY)
448 0 : || (colocation->score >= PCMK_SCORE_INFINITY)) {
449 0 : pcmk__add_dependent_scores(colocation, rsc);
450 : }
451 : }
452 :
453 0 : pe__show_node_scores(true, rsc, "Mandatory-colocations",
454 : rsc->allowed_nodes, rsc->cluster);
455 :
456 : // Then apply optional colocations
457 0 : for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
458 0 : colocation = iter->data;
459 :
460 0 : if ((colocation->score > -PCMK_SCORE_INFINITY)
461 0 : && (colocation->score < PCMK_SCORE_INFINITY)) {
462 0 : apply_this_with(colocation, rsc);
463 : }
464 : }
465 0 : for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
466 0 : colocation = iter->data;
467 :
468 0 : if ((colocation->score > -PCMK_SCORE_INFINITY)
469 0 : && (colocation->score < PCMK_SCORE_INFINITY)) {
470 0 : pcmk__add_dependent_scores(colocation, rsc);
471 : }
472 : }
473 :
474 0 : g_list_free(this_with_colocations);
475 0 : g_list_free(with_this_colocations);
476 :
477 0 : if (rsc->next_role == pcmk_role_stopped) {
478 0 : pcmk__rsc_trace(rsc,
479 : "Banning %s from all nodes because it will be stopped",
480 : rsc->id);
481 0 : resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
482 : PCMK_META_TARGET_ROLE, rsc->cluster);
483 :
484 0 : } else if ((rsc->next_role > rsc->role)
485 0 : && !pcmk_is_set(rsc->cluster->flags, pcmk_sched_quorate)
486 0 : && (rsc->cluster->no_quorum_policy == pcmk_no_quorum_freeze)) {
487 0 : crm_notice("Resource %s cannot be elevated from %s to %s due to "
488 : PCMK_OPT_NO_QUORUM_POLICY "=" PCMK_VALUE_FREEZE,
489 : rsc->id, pcmk_role_text(rsc->role),
490 : pcmk_role_text(rsc->next_role));
491 0 : pe__set_next_role(rsc, rsc->role,
492 : PCMK_OPT_NO_QUORUM_POLICY "=" PCMK_VALUE_FREEZE);
493 : }
494 :
495 0 : pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags,
496 : pcmk_sched_output_scores),
497 : rsc, __func__, rsc->allowed_nodes, rsc->cluster);
498 :
499 : // Unmanage resource if fencing is enabled but no device is configured
500 0 : if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)
501 0 : && !pcmk_is_set(rsc->cluster->flags, pcmk_sched_have_fencing)) {
502 0 : pcmk__clear_rsc_flags(rsc, pcmk_rsc_managed);
503 : }
504 :
505 0 : if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
506 : // Unmanaged resources stay on their current node
507 0 : const char *reason = NULL;
508 0 : pcmk_node_t *assign_to = NULL;
509 :
510 0 : pe__set_next_role(rsc, rsc->role, "unmanaged");
511 0 : assign_to = pcmk__current_node(rsc);
512 0 : if (assign_to == NULL) {
513 0 : reason = "inactive";
514 0 : } else if (rsc->role == pcmk_role_promoted) {
515 0 : reason = "promoted";
516 0 : } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
517 0 : reason = "failed";
518 : } else {
519 0 : reason = "active";
520 : }
521 0 : pcmk__rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
522 : (assign_to? assign_to->details->uname : "no node"),
523 : reason);
524 0 : pcmk__assign_resource(rsc, assign_to, true, stop_if_fail);
525 :
526 0 : } else if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_stop_all)) {
527 : // Must stop at some point, but be consistent with stop_if_fail
528 0 : if (stop_if_fail) {
529 0 : pcmk__rsc_debug(rsc,
530 : "Forcing %s to stop: " PCMK_OPT_STOP_ALL_RESOURCES,
531 : rsc->id);
532 : }
533 0 : pcmk__assign_resource(rsc, NULL, true, stop_if_fail);
534 :
535 0 : } else if (!assign_best_node(rsc, prefer, stop_if_fail)) {
536 : // Assignment failed
537 0 : if (!pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
538 0 : pcmk__rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
539 0 : } else if ((rsc->running_on != NULL) && stop_if_fail) {
540 0 : pcmk__rsc_info(rsc, "Stopping removed resource %s", rsc->id);
541 : }
542 : }
543 :
544 0 : pcmk__clear_rsc_flags(rsc, pcmk_rsc_assigning);
545 :
546 0 : if (rsc->is_remote_node) {
547 0 : remote_connection_assigned(rsc);
548 : }
549 :
550 0 : return rsc->allocated_to;
551 : }
552 :
553 : /*!
554 : * \internal
555 : * \brief Schedule actions to bring resource down and back to current role
556 : *
557 : * \param[in,out] rsc Resource to restart
558 : * \param[in,out] current Node that resource should be brought down on
559 : * \param[in] need_stop Whether the resource must be stopped
560 : * \param[in] need_promote Whether the resource must be promoted
561 : *
562 : * \return Role that resource would have after scheduled actions are taken
563 : */
564 : static void
565 0 : schedule_restart_actions(pcmk_resource_t *rsc, pcmk_node_t *current,
566 : bool need_stop, bool need_promote)
567 : {
568 0 : enum rsc_role_e role = rsc->role;
569 : enum rsc_role_e next_role;
570 0 : rsc_transition_fn fn = NULL;
571 :
572 0 : pcmk__set_rsc_flags(rsc, pcmk_rsc_restarting);
573 :
574 : // Bring resource down to a stop on its current node
575 0 : while (role != pcmk_role_stopped) {
576 0 : next_role = rsc_state_matrix[role][pcmk_role_stopped];
577 0 : pcmk__rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
578 : (need_stop? "required" : "optional"), rsc->id,
579 : pcmk_role_text(role), pcmk_role_text(next_role));
580 0 : fn = rsc_action_matrix[role][next_role];
581 0 : if (fn == NULL) {
582 0 : break;
583 : }
584 0 : fn(rsc, current, !need_stop);
585 0 : role = next_role;
586 : }
587 :
588 : // Bring resource up to its next role on its next node
589 0 : while ((rsc->role <= rsc->next_role) && (role != rsc->role)
590 0 : && !pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
591 0 : bool required = need_stop;
592 :
593 0 : next_role = rsc_state_matrix[role][rsc->role];
594 0 : if ((next_role == pcmk_role_promoted) && need_promote) {
595 0 : required = true;
596 : }
597 0 : pcmk__rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
598 : (required? "required" : "optional"), rsc->id,
599 : pcmk_role_text(role), pcmk_role_text(next_role));
600 0 : fn = rsc_action_matrix[role][next_role];
601 0 : if (fn == NULL) {
602 0 : break;
603 : }
604 0 : fn(rsc, rsc->allocated_to, !required);
605 0 : role = next_role;
606 : }
607 :
608 0 : pcmk__clear_rsc_flags(rsc, pcmk_rsc_restarting);
609 0 : }
610 :
611 : /*!
612 : * \internal
613 : * \brief If a resource's next role is not explicitly specified, set a default
614 : *
615 : * \param[in,out] rsc Resource to set next role for
616 : *
617 : * \return "explicit" if next role was explicitly set, otherwise "implicit"
618 : */
619 : static const char *
620 0 : set_default_next_role(pcmk_resource_t *rsc)
621 : {
622 0 : if (rsc->next_role != pcmk_role_unknown) {
623 0 : return "explicit";
624 : }
625 :
626 0 : if (rsc->allocated_to == NULL) {
627 0 : pe__set_next_role(rsc, pcmk_role_stopped, "assignment");
628 : } else {
629 0 : pe__set_next_role(rsc, pcmk_role_started, "assignment");
630 : }
631 0 : return "implicit";
632 : }
633 :
634 : /*!
635 : * \internal
636 : * \brief Create an action to represent an already pending start
637 : *
638 : * \param[in,out] rsc Resource to create start action for
639 : */
640 : static void
641 0 : create_pending_start(pcmk_resource_t *rsc)
642 : {
643 0 : pcmk_action_t *start = NULL;
644 :
645 0 : pcmk__rsc_trace(rsc,
646 : "Creating action for %s to represent already pending start",
647 : rsc->id);
648 0 : start = start_action(rsc, rsc->allocated_to, TRUE);
649 0 : pcmk__set_action_flags(start, pcmk_action_always_in_graph);
650 0 : }
651 :
652 : /*!
653 : * \internal
654 : * \brief Schedule actions needed to take a resource to its next role
655 : *
656 : * \param[in,out] rsc Resource to schedule actions for
657 : */
658 : static void
659 0 : schedule_role_transition_actions(pcmk_resource_t *rsc)
660 : {
661 0 : enum rsc_role_e role = rsc->role;
662 :
663 0 : while (role != rsc->next_role) {
664 0 : enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role];
665 0 : rsc_transition_fn fn = NULL;
666 :
667 0 : pcmk__rsc_trace(rsc,
668 : "Creating action to take %s from %s to %s "
669 : "(ending at %s)",
670 : rsc->id, pcmk_role_text(role),
671 : pcmk_role_text(next_role),
672 : pcmk_role_text(rsc->next_role));
673 0 : fn = rsc_action_matrix[role][next_role];
674 0 : if (fn == NULL) {
675 0 : break;
676 : }
677 0 : fn(rsc, rsc->allocated_to, false);
678 0 : role = next_role;
679 : }
680 0 : }
681 :
682 : /*!
683 : * \internal
684 : * \brief Create all actions needed for a given primitive resource
685 : *
686 : * \param[in,out] rsc Primitive resource to create actions for
687 : */
688 : void
689 0 : pcmk__primitive_create_actions(pcmk_resource_t *rsc)
690 : {
691 0 : bool need_stop = false;
692 0 : bool need_promote = false;
693 0 : bool is_moving = false;
694 0 : bool allow_migrate = false;
695 0 : bool multiply_active = false;
696 :
697 0 : pcmk_node_t *current = NULL;
698 0 : unsigned int num_all_active = 0;
699 0 : unsigned int num_clean_active = 0;
700 0 : const char *next_role_source = NULL;
701 :
702 0 : CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
703 :
704 0 : next_role_source = set_default_next_role(rsc);
705 0 : pcmk__rsc_trace(rsc,
706 : "Creating all actions for %s transition from %s to %s "
707 : "(%s) on %s",
708 : rsc->id, pcmk_role_text(rsc->role),
709 : pcmk_role_text(rsc->next_role), next_role_source,
710 : pcmk__node_name(rsc->allocated_to));
711 :
712 0 : current = rsc->fns->active_node(rsc, &num_all_active, &num_clean_active);
713 :
714 0 : g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration,
715 : rsc);
716 :
717 0 : if ((current != NULL) && (rsc->allocated_to != NULL)
718 0 : && !pcmk__same_node(current, rsc->allocated_to)
719 0 : && (rsc->next_role >= pcmk_role_started)) {
720 :
721 0 : pcmk__rsc_trace(rsc, "Moving %s from %s to %s",
722 : rsc->id, pcmk__node_name(current),
723 : pcmk__node_name(rsc->allocated_to));
724 0 : is_moving = true;
725 0 : allow_migrate = pcmk__rsc_can_migrate(rsc, current);
726 :
727 : // This is needed even if migrating (though I'm not sure why ...)
728 0 : need_stop = true;
729 : }
730 :
731 : // Check whether resource is partially migrated and/or multiply active
732 0 : if ((rsc->partial_migration_source != NULL)
733 0 : && (rsc->partial_migration_target != NULL)
734 0 : && allow_migrate && (num_all_active == 2)
735 0 : && pcmk__same_node(current, rsc->partial_migration_source)
736 0 : && pcmk__same_node(rsc->allocated_to, rsc->partial_migration_target)) {
737 : /* A partial migration is in progress, and the migration target remains
738 : * the same as when the migration began.
739 : */
740 0 : pcmk__rsc_trace(rsc,
741 : "Partial migration of %s from %s to %s will continue",
742 : rsc->id, pcmk__node_name(rsc->partial_migration_source),
743 : pcmk__node_name(rsc->partial_migration_target));
744 :
745 0 : } else if ((rsc->partial_migration_source != NULL)
746 0 : || (rsc->partial_migration_target != NULL)) {
747 : // A partial migration is in progress but can't be continued
748 :
749 0 : if (num_all_active > 2) {
750 : // The resource is migrating *and* multiply active!
751 0 : crm_notice("Forcing recovery of %s because it is migrating "
752 : "from %s to %s and possibly active elsewhere",
753 : rsc->id, pcmk__node_name(rsc->partial_migration_source),
754 : pcmk__node_name(rsc->partial_migration_target));
755 : } else {
756 : // The migration source or target isn't available
757 0 : crm_notice("Forcing recovery of %s because it can no longer "
758 : "migrate from %s to %s",
759 : rsc->id, pcmk__node_name(rsc->partial_migration_source),
760 : pcmk__node_name(rsc->partial_migration_target));
761 : }
762 0 : need_stop = true;
763 0 : rsc->partial_migration_source = rsc->partial_migration_target = NULL;
764 0 : allow_migrate = false;
765 :
766 0 : } else if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)) {
767 0 : multiply_active = (num_all_active > 1);
768 : } else {
769 : /* If a resource has PCMK_META_REQUIRES set to PCMK_VALUE_NOTHING or
770 : * PCMK_VALUE_QUORUM, don't consider it active on unclean nodes (similar
771 : * to how all resources behave when PCMK_OPT_STONITH_ENABLED is false).
772 : * We can start such resources elsewhere before fencing completes, and
773 : * if we considered the resource active on the failed node, we would
774 : * attempt recovery for being active on multiple nodes.
775 : */
776 0 : multiply_active = (num_clean_active > 1);
777 : }
778 :
779 0 : if (multiply_active) {
780 0 : const char *class = crm_element_value(rsc->xml, PCMK_XA_CLASS);
781 :
782 : // Resource was (possibly) incorrectly multiply active
783 0 : pcmk__sched_err("%s resource %s might be active on %u nodes (%s)",
784 : pcmk__s(class, "Untyped"), rsc->id, num_all_active,
785 : pcmk_multiply_active_text(rsc->recovery_type));
786 0 : crm_notice("For more information, see \"What are multiply active "
787 : "resources?\" at "
788 : "https://projects.clusterlabs.org/w/clusterlabs/faq/");
789 :
790 0 : switch (rsc->recovery_type) {
791 0 : case pcmk_multiply_active_restart:
792 0 : need_stop = true;
793 0 : break;
794 0 : case pcmk_multiply_active_unexpected:
795 0 : need_stop = true; // stop_resource() will skip expected node
796 0 : pcmk__set_rsc_flags(rsc, pcmk_rsc_stop_unexpected);
797 0 : break;
798 0 : default:
799 0 : break;
800 : }
801 :
802 : } else {
803 0 : pcmk__clear_rsc_flags(rsc, pcmk_rsc_stop_unexpected);
804 : }
805 :
806 0 : if (pcmk_is_set(rsc->flags, pcmk_rsc_start_pending)) {
807 0 : create_pending_start(rsc);
808 : }
809 :
810 0 : if (is_moving) {
811 : // Remaining tests are only for resources staying where they are
812 :
813 0 : } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
814 0 : if (pcmk_is_set(rsc->flags, pcmk_rsc_stop_if_failed)) {
815 0 : need_stop = true;
816 0 : pcmk__rsc_trace(rsc, "Recovering %s", rsc->id);
817 : } else {
818 0 : pcmk__rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
819 0 : if (rsc->next_role == pcmk_role_promoted) {
820 0 : need_promote = true;
821 : }
822 : }
823 :
824 0 : } else if (pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
825 0 : pcmk__rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
826 0 : need_stop = true;
827 :
828 0 : } else if ((rsc->role > pcmk_role_started) && (current != NULL)
829 0 : && (rsc->allocated_to != NULL)) {
830 0 : pcmk_action_t *start = NULL;
831 :
832 0 : pcmk__rsc_trace(rsc, "Creating start action for promoted resource %s",
833 : rsc->id);
834 0 : start = start_action(rsc, rsc->allocated_to, TRUE);
835 0 : if (!pcmk_is_set(start->flags, pcmk_action_optional)) {
836 : // Recovery of a promoted resource
837 0 : pcmk__rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
838 0 : need_stop = true;
839 : }
840 : }
841 :
842 : // Create any actions needed to bring resource down and back up to same role
843 0 : schedule_restart_actions(rsc, current, need_stop, need_promote);
844 :
845 : // Create any actions needed to take resource from this role to the next
846 0 : schedule_role_transition_actions(rsc);
847 :
848 0 : pcmk__create_recurring_actions(rsc);
849 :
850 0 : if (allow_migrate) {
851 0 : pcmk__create_migration_actions(rsc, current);
852 : }
853 0 : }
854 :
855 : /*!
856 : * \internal
857 : * \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes
858 : *
859 : * \param[in] rsc Resource to check
860 : */
861 : static void
862 0 : rsc_avoids_remote_nodes(const pcmk_resource_t *rsc)
863 : {
864 : GHashTableIter iter;
865 0 : pcmk_node_t *node = NULL;
866 :
867 0 : g_hash_table_iter_init(&iter, rsc->allowed_nodes);
868 0 : while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
869 0 : if (node->details->remote_rsc != NULL) {
870 0 : node->weight = -PCMK_SCORE_INFINITY;
871 : }
872 : }
873 0 : }
874 :
875 : /*!
876 : * \internal
877 : * \brief Return allowed nodes as (possibly sorted) list
878 : *
879 : * Convert a resource's hash table of allowed nodes to a list. If printing to
880 : * stdout, sort the list, to keep action ID numbers consistent for regression
881 : * test output (while avoiding the performance hit on a live cluster).
882 : *
883 : * \param[in] rsc Resource to check for allowed nodes
884 : *
885 : * \return List of resource's allowed nodes
886 : * \note Callers should take care not to rely on the list being sorted.
887 : */
888 : static GList *
889 0 : allowed_nodes_as_list(const pcmk_resource_t *rsc)
890 : {
891 0 : GList *allowed_nodes = NULL;
892 :
893 0 : if (rsc->allowed_nodes) {
894 0 : allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes);
895 : }
896 :
897 0 : if (!pcmk__is_daemon) {
898 0 : allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
899 : }
900 :
901 0 : return allowed_nodes;
902 : }
903 :
904 : /*!
905 : * \internal
906 : * \brief Create implicit constraints needed for a primitive resource
907 : *
908 : * \param[in,out] rsc Primitive resource to create implicit constraints for
909 : */
910 : void
911 0 : pcmk__primitive_internal_constraints(pcmk_resource_t *rsc)
912 : {
913 0 : GList *allowed_nodes = NULL;
914 0 : bool check_unfencing = false;
915 0 : bool check_utilization = false;
916 :
917 0 : CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
918 :
919 0 : if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
920 0 : pcmk__rsc_trace(rsc,
921 : "Skipping implicit constraints for unmanaged resource "
922 : "%s", rsc->id);
923 0 : return;
924 : }
925 :
926 : // Whether resource requires unfencing
927 0 : check_unfencing = !pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)
928 0 : && pcmk_is_set(rsc->cluster->flags,
929 : pcmk_sched_enable_unfencing)
930 0 : && pcmk_is_set(rsc->flags, pcmk_rsc_needs_unfencing);
931 :
932 : // Whether a non-default placement strategy is used
933 0 : check_utilization = (g_hash_table_size(rsc->utilization) > 0)
934 0 : && !pcmk__str_eq(rsc->cluster->placement_strategy,
935 : PCMK_VALUE_DEFAULT, pcmk__str_casei);
936 :
937 : // Order stops before starts (i.e. restart)
938 0 : pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL,
939 0 : rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL,
940 : pcmk__ar_ordered
941 : |pcmk__ar_first_implies_then
942 : |pcmk__ar_intermediate_stop,
943 : rsc->cluster);
944 :
945 : // Promotable ordering: demote before stop, start before promote
946 0 : if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
947 : pcmk_rsc_promotable)
948 0 : || (rsc->role > pcmk_role_unpromoted)) {
949 :
950 0 : pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_DEMOTE, 0),
951 : NULL,
952 0 : rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
953 : NULL,
954 : pcmk__ar_promoted_then_implies_first, rsc->cluster);
955 :
956 0 : pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
957 : NULL,
958 0 : rsc, pcmk__op_key(rsc->id, PCMK_ACTION_PROMOTE, 0),
959 : NULL,
960 : pcmk__ar_unrunnable_first_blocks, rsc->cluster);
961 : }
962 :
963 : // Don't clear resource history if probing on same node
964 0 : pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_LRM_DELETE, 0),
965 : NULL, rsc,
966 0 : pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0),
967 : NULL,
968 : pcmk__ar_if_on_same_node|pcmk__ar_then_cancels_first,
969 : rsc->cluster);
970 :
971 : // Certain checks need allowed nodes
972 0 : if (check_unfencing || check_utilization || (rsc->container != NULL)) {
973 0 : allowed_nodes = allowed_nodes_as_list(rsc);
974 : }
975 :
976 0 : if (check_unfencing) {
977 0 : g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc);
978 : }
979 :
980 0 : if (check_utilization) {
981 0 : pcmk__create_utilization_constraints(rsc, allowed_nodes);
982 : }
983 :
984 0 : if (rsc->container != NULL) {
985 0 : pcmk_resource_t *remote_rsc = NULL;
986 :
987 0 : if (rsc->is_remote_node) {
988 : // rsc is the implicit remote connection for a guest or bundle node
989 :
990 : /* Guest resources are not allowed to run on Pacemaker Remote nodes,
991 : * to avoid nesting remotes. However, bundles are allowed.
992 : */
993 0 : if (!pcmk_is_set(rsc->flags, pcmk_rsc_remote_nesting_allowed)) {
994 0 : rsc_avoids_remote_nodes(rsc->container);
995 : }
996 :
997 : /* If someone cleans up a guest or bundle node's container, we will
998 : * likely schedule a (re-)probe of the container and recovery of the
999 : * connection. Order the connection stop after the container probe,
1000 : * so that if we detect the container running, we will trigger a new
1001 : * transition and avoid the unnecessary recovery.
1002 : */
1003 0 : pcmk__order_resource_actions(rsc->container, PCMK_ACTION_MONITOR,
1004 : rsc, PCMK_ACTION_STOP,
1005 : pcmk__ar_ordered);
1006 :
1007 : /* A user can specify that a resource must start on a Pacemaker Remote
1008 : * node by explicitly configuring it with the container=NODENAME
1009 : * meta-attribute. This is of questionable merit, since location
1010 : * constraints can accomplish the same thing. But we support it, so here
1011 : * we check whether a resource (that is not itself a remote connection)
1012 : * has container set to a remote node or guest node resource.
1013 : */
1014 0 : } else if (rsc->container->is_remote_node) {
1015 0 : remote_rsc = rsc->container;
1016 : } else {
1017 0 : remote_rsc = pe__resource_contains_guest_node(rsc->cluster,
1018 0 : rsc->container);
1019 : }
1020 :
1021 0 : if (remote_rsc != NULL) {
1022 : /* Force the resource on the Pacemaker Remote node instead of
1023 : * colocating the resource with the container resource.
1024 : */
1025 0 : for (GList *item = allowed_nodes; item; item = item->next) {
1026 0 : pcmk_node_t *node = item->data;
1027 :
1028 0 : if (node->details->remote_rsc != remote_rsc) {
1029 0 : node->weight = -PCMK_SCORE_INFINITY;
1030 : }
1031 : }
1032 :
1033 : } else {
1034 : /* This resource is either a filler for a container that does NOT
1035 : * represent a Pacemaker Remote node, or a Pacemaker Remote
1036 : * connection resource for a guest node or bundle.
1037 : */
1038 : int score;
1039 :
1040 0 : crm_trace("Order and colocate %s relative to its container %s",
1041 : rsc->id, rsc->container->id);
1042 :
1043 0 : pcmk__new_ordering(rsc->container,
1044 0 : pcmk__op_key(rsc->container->id,
1045 : PCMK_ACTION_START, 0),
1046 : NULL, rsc,
1047 0 : pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
1048 : NULL,
1049 : pcmk__ar_first_implies_then
1050 : |pcmk__ar_unrunnable_first_blocks,
1051 : rsc->cluster);
1052 :
1053 0 : pcmk__new_ordering(rsc,
1054 0 : pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
1055 : NULL,
1056 : rsc->container,
1057 0 : pcmk__op_key(rsc->container->id,
1058 : PCMK_ACTION_STOP, 0),
1059 : NULL, pcmk__ar_then_implies_first, rsc->cluster);
1060 :
1061 0 : if (pcmk_is_set(rsc->flags, pcmk_rsc_remote_nesting_allowed)) {
1062 0 : score = 10000; /* Highly preferred but not essential */
1063 : } else {
1064 0 : score = PCMK_SCORE_INFINITY; // Force to run on same host
1065 : }
1066 0 : pcmk__new_colocation("#resource-with-container", NULL, score, rsc,
1067 : rsc->container, NULL, NULL,
1068 : pcmk__coloc_influence);
1069 : }
1070 : }
1071 :
1072 0 : if (rsc->is_remote_node
1073 0 : || pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)) {
1074 : /* Remote connections and fencing devices are not allowed to run on
1075 : * Pacemaker Remote nodes
1076 : */
1077 0 : rsc_avoids_remote_nodes(rsc);
1078 : }
1079 0 : g_list_free(allowed_nodes);
1080 : }
1081 :
1082 : /*!
1083 : * \internal
1084 : * \brief Apply a colocation's score to node scores or resource priority
1085 : *
1086 : * Given a colocation constraint, apply its score to the dependent's
1087 : * allowed node scores (if we are still placing resources) or priority (if
1088 : * we are choosing promotable clone instance roles).
1089 : *
1090 : * \param[in,out] dependent Dependent resource in colocation
1091 : * \param[in] primary Primary resource in colocation
1092 : * \param[in] colocation Colocation constraint to apply
1093 : * \param[in] for_dependent true if called on behalf of dependent
1094 : */
1095 : void
1096 0 : pcmk__primitive_apply_coloc_score(pcmk_resource_t *dependent,
1097 : const pcmk_resource_t *primary,
1098 : const pcmk__colocation_t *colocation,
1099 : bool for_dependent)
1100 : {
1101 : enum pcmk__coloc_affects filter_results;
1102 :
1103 0 : CRM_ASSERT((dependent != NULL) && (primary != NULL)
1104 : && (colocation != NULL));
1105 :
1106 0 : if (for_dependent) {
1107 : // Always process on behalf of primary resource
1108 0 : primary->cmds->apply_coloc_score(dependent, primary, colocation, false);
1109 0 : return;
1110 : }
1111 :
1112 0 : filter_results = pcmk__colocation_affects(dependent, primary, colocation,
1113 : false);
1114 0 : pcmk__rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
1115 : ((colocation->score > 0)? "Colocating" : "Anti-colocating"),
1116 : dependent->id, primary->id, colocation->id,
1117 : colocation->score,
1118 : filter_results);
1119 :
1120 0 : switch (filter_results) {
1121 0 : case pcmk__coloc_affects_role:
1122 0 : pcmk__apply_coloc_to_priority(dependent, primary, colocation);
1123 0 : break;
1124 0 : case pcmk__coloc_affects_location:
1125 0 : pcmk__apply_coloc_to_scores(dependent, primary, colocation);
1126 0 : break;
1127 0 : default: // pcmk__coloc_affects_nothing
1128 0 : return;
1129 : }
1130 : }
1131 :
1132 : /* Primitive implementation of
1133 : * pcmk_assignment_methods_t:with_this_colocations()
1134 : */
1135 : void
1136 0 : pcmk__with_primitive_colocations(const pcmk_resource_t *rsc,
1137 : const pcmk_resource_t *orig_rsc, GList **list)
1138 : {
1139 0 : CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
1140 : && (list != NULL));
1141 :
1142 0 : if (rsc == orig_rsc) {
1143 : /* For the resource itself, add all of its own colocations and relevant
1144 : * colocations from its parent (if any).
1145 : */
1146 0 : pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
1147 0 : if (rsc->parent != NULL) {
1148 0 : rsc->parent->cmds->with_this_colocations(rsc->parent, orig_rsc, list);
1149 : }
1150 : } else {
1151 : // For an ancestor, add only explicitly configured constraints
1152 0 : for (GList *iter = rsc->rsc_cons_lhs; iter != NULL; iter = iter->next) {
1153 0 : pcmk__colocation_t *colocation = iter->data;
1154 :
1155 0 : if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
1156 0 : pcmk__add_with_this(list, colocation, orig_rsc);
1157 : }
1158 : }
1159 : }
1160 0 : }
1161 :
1162 : /* Primitive implementation of
1163 : * pcmk_assignment_methods_t:this_with_colocations()
1164 : */
1165 : void
1166 0 : pcmk__primitive_with_colocations(const pcmk_resource_t *rsc,
1167 : const pcmk_resource_t *orig_rsc, GList **list)
1168 : {
1169 0 : CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
1170 : && (list != NULL));
1171 :
1172 0 : if (rsc == orig_rsc) {
1173 : /* For the resource itself, add all of its own colocations and relevant
1174 : * colocations from its parent (if any).
1175 : */
1176 0 : pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
1177 0 : if (rsc->parent != NULL) {
1178 0 : rsc->parent->cmds->this_with_colocations(rsc->parent, orig_rsc, list);
1179 : }
1180 : } else {
1181 : // For an ancestor, add only explicitly configured constraints
1182 0 : for (GList *iter = rsc->rsc_cons; iter != NULL; iter = iter->next) {
1183 0 : pcmk__colocation_t *colocation = iter->data;
1184 :
1185 0 : if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
1186 0 : pcmk__add_this_with(list, colocation, orig_rsc);
1187 : }
1188 : }
1189 : }
1190 0 : }
1191 :
1192 : /*!
1193 : * \internal
1194 : * \brief Return action flags for a given primitive resource action
1195 : *
1196 : * \param[in,out] action Action to get flags for
1197 : * \param[in] node If not NULL, limit effects to this node (ignored)
1198 : *
1199 : * \return Flags appropriate to \p action on \p node
1200 : */
1201 : uint32_t
1202 0 : pcmk__primitive_action_flags(pcmk_action_t *action, const pcmk_node_t *node)
1203 : {
1204 0 : CRM_ASSERT(action != NULL);
1205 0 : return (uint32_t) action->flags;
1206 : }
1207 :
1208 : /*!
1209 : * \internal
1210 : * \brief Check whether a node is a multiply active resource's expected node
1211 : *
1212 : * \param[in] rsc Resource to check
1213 : * \param[in] node Node to check
1214 : *
1215 : * \return \c true if \p rsc is multiply active with
1216 : * \c PCMK_META_MULTIPLE_ACTIVE set to \c PCMK_VALUE_STOP_UNEXPECTED,
1217 : * and \p node is the node where it will remain active
1218 : * \note This assumes that the resource's next role cannot be changed to stopped
1219 : * after this is called, which should be reasonable if status has already
1220 : * been unpacked and resources have been assigned to nodes.
1221 : */
1222 : static bool
1223 0 : is_expected_node(const pcmk_resource_t *rsc, const pcmk_node_t *node)
1224 : {
1225 0 : return pcmk_all_flags_set(rsc->flags,
1226 : pcmk_rsc_stop_unexpected|pcmk_rsc_restarting)
1227 0 : && (rsc->next_role > pcmk_role_stopped)
1228 0 : && pcmk__same_node(rsc->allocated_to, node);
1229 : }
1230 :
1231 : /*!
1232 : * \internal
1233 : * \brief Schedule actions needed to stop a resource wherever it is active
1234 : *
1235 : * \param[in,out] rsc Resource being stopped
1236 : * \param[in] node Node where resource is being stopped (ignored)
1237 : * \param[in] optional Whether actions should be optional
1238 : */
1239 : static void
1240 0 : stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1241 : {
1242 0 : for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
1243 0 : pcmk_node_t *current = (pcmk_node_t *) iter->data;
1244 0 : pcmk_action_t *stop = NULL;
1245 :
1246 0 : if (is_expected_node(rsc, current)) {
1247 : /* We are scheduling restart actions for a multiply active resource
1248 : * with PCMK_META_MULTIPLE_ACTIVE=PCMK_VALUE_STOP_UNEXPECTED, and
1249 : * this is where it should not be stopped.
1250 : */
1251 0 : pcmk__rsc_trace(rsc,
1252 : "Skipping stop of multiply active resource %s "
1253 : "on expected node %s",
1254 : rsc->id, pcmk__node_name(current));
1255 0 : continue;
1256 : }
1257 :
1258 0 : if (rsc->partial_migration_target != NULL) {
1259 : // Continue migration if node originally was and remains target
1260 0 : if (pcmk__same_node(current, rsc->partial_migration_target)
1261 0 : && pcmk__same_node(current, rsc->allocated_to)) {
1262 0 : pcmk__rsc_trace(rsc,
1263 : "Skipping stop of %s on %s "
1264 : "because partial migration there will continue",
1265 : rsc->id, pcmk__node_name(current));
1266 0 : continue;
1267 : } else {
1268 0 : pcmk__rsc_trace(rsc,
1269 : "Forcing stop of %s on %s "
1270 : "because migration target changed",
1271 : rsc->id, pcmk__node_name(current));
1272 0 : optional = false;
1273 : }
1274 : }
1275 :
1276 0 : pcmk__rsc_trace(rsc, "Scheduling stop of %s on %s",
1277 : rsc->id, pcmk__node_name(current));
1278 0 : stop = stop_action(rsc, current, optional);
1279 :
1280 0 : if (rsc->allocated_to == NULL) {
1281 0 : pe_action_set_reason(stop, "node availability", true);
1282 0 : } else if (pcmk_all_flags_set(rsc->flags, pcmk_rsc_restarting
1283 : |pcmk_rsc_stop_unexpected)) {
1284 : /* We are stopping a multiply active resource on a node that is
1285 : * not its expected node, and we are still scheduling restart
1286 : * actions, so the stop is for being multiply active.
1287 : */
1288 0 : pe_action_set_reason(stop, "being multiply active", true);
1289 : }
1290 :
1291 0 : if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
1292 0 : pcmk__clear_action_flags(stop, pcmk_action_runnable);
1293 : }
1294 :
1295 0 : if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_remove_after_stop)) {
1296 0 : pcmk__schedule_cleanup(rsc, current, optional);
1297 : }
1298 :
1299 0 : if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_unfencing)) {
1300 0 : pcmk_action_t *unfence = pe_fence_op(current, PCMK_ACTION_ON, true,
1301 : NULL, false, rsc->cluster);
1302 :
1303 0 : order_actions(stop, unfence, pcmk__ar_then_implies_first);
1304 0 : if (!pcmk__node_unfenced(current)) {
1305 0 : pcmk__sched_err("Stopping %s until %s can be unfenced",
1306 : rsc->id, pcmk__node_name(current));
1307 : }
1308 : }
1309 : }
1310 0 : }
1311 :
1312 : /*!
1313 : * \internal
1314 : * \brief Schedule actions needed to start a resource on a node
1315 : *
1316 : * \param[in,out] rsc Resource being started
1317 : * \param[in,out] node Node where resource should be started
1318 : * \param[in] optional Whether actions should be optional
1319 : */
1320 : static void
1321 0 : start_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1322 : {
1323 0 : pcmk_action_t *start = NULL;
1324 :
1325 0 : CRM_ASSERT(node != NULL);
1326 :
1327 0 : pcmk__rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)",
1328 : (optional? "optional" : "required"), rsc->id,
1329 : pcmk__node_name(node), node->weight);
1330 0 : start = start_action(rsc, node, TRUE);
1331 :
1332 0 : pcmk__order_vs_unfence(rsc, node, start, pcmk__ar_first_implies_then);
1333 :
1334 0 : if (pcmk_is_set(start->flags, pcmk_action_runnable) && !optional) {
1335 0 : pcmk__clear_action_flags(start, pcmk_action_optional);
1336 : }
1337 :
1338 0 : if (is_expected_node(rsc, node)) {
1339 : /* This could be a problem if the start becomes necessary for other
1340 : * reasons later.
1341 : */
1342 0 : pcmk__rsc_trace(rsc,
1343 : "Start of multiply active resouce %s "
1344 : "on expected node %s will be a pseudo-action",
1345 : rsc->id, pcmk__node_name(node));
1346 0 : pcmk__set_action_flags(start, pcmk_action_pseudo);
1347 : }
1348 0 : }
1349 :
1350 : /*!
1351 : * \internal
1352 : * \brief Schedule actions needed to promote a resource on a node
1353 : *
1354 : * \param[in,out] rsc Resource being promoted
1355 : * \param[in] node Node where resource should be promoted
1356 : * \param[in] optional Whether actions should be optional
1357 : */
1358 : static void
1359 0 : promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1360 : {
1361 0 : GList *iter = NULL;
1362 0 : GList *action_list = NULL;
1363 0 : bool runnable = true;
1364 :
1365 0 : CRM_ASSERT(node != NULL);
1366 :
1367 : // Any start must be runnable for promotion to be runnable
1368 0 : action_list = pe__resource_actions(rsc, node, PCMK_ACTION_START, true);
1369 0 : for (iter = action_list; iter != NULL; iter = iter->next) {
1370 0 : pcmk_action_t *start = (pcmk_action_t *) iter->data;
1371 :
1372 0 : if (!pcmk_is_set(start->flags, pcmk_action_runnable)) {
1373 0 : runnable = false;
1374 : }
1375 : }
1376 0 : g_list_free(action_list);
1377 :
1378 0 : if (runnable) {
1379 0 : pcmk_action_t *promote = promote_action(rsc, node, optional);
1380 :
1381 0 : pcmk__rsc_trace(rsc, "Scheduling %s promotion of %s on %s",
1382 : (optional? "optional" : "required"), rsc->id,
1383 : pcmk__node_name(node));
1384 :
1385 0 : if (is_expected_node(rsc, node)) {
1386 : /* This could be a problem if the promote becomes necessary for
1387 : * other reasons later.
1388 : */
1389 0 : pcmk__rsc_trace(rsc,
1390 : "Promotion of multiply active resouce %s "
1391 : "on expected node %s will be a pseudo-action",
1392 : rsc->id, pcmk__node_name(node));
1393 0 : pcmk__set_action_flags(promote, pcmk_action_pseudo);
1394 : }
1395 : } else {
1396 0 : pcmk__rsc_trace(rsc, "Not promoting %s on %s: start unrunnable",
1397 : rsc->id, pcmk__node_name(node));
1398 0 : action_list = pe__resource_actions(rsc, node, PCMK_ACTION_PROMOTE,
1399 : true);
1400 0 : for (iter = action_list; iter != NULL; iter = iter->next) {
1401 0 : pcmk_action_t *promote = (pcmk_action_t *) iter->data;
1402 :
1403 0 : pcmk__clear_action_flags(promote, pcmk_action_runnable);
1404 : }
1405 0 : g_list_free(action_list);
1406 : }
1407 0 : }
1408 :
1409 : /*!
1410 : * \internal
1411 : * \brief Schedule actions needed to demote a resource wherever it is active
1412 : *
1413 : * \param[in,out] rsc Resource being demoted
1414 : * \param[in] node Node where resource should be demoted (ignored)
1415 : * \param[in] optional Whether actions should be optional
1416 : */
1417 : static void
1418 0 : demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1419 : {
1420 : /* Since this will only be called for a primitive (possibly as an instance
1421 : * of a collective resource), the resource is multiply active if it is
1422 : * running on more than one node, so we want to demote on all of them as
1423 : * part of recovery, regardless of which one is the desired node.
1424 : */
1425 0 : for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
1426 0 : pcmk_node_t *current = (pcmk_node_t *) iter->data;
1427 :
1428 0 : if (is_expected_node(rsc, current)) {
1429 0 : pcmk__rsc_trace(rsc,
1430 : "Skipping demote of multiply active resource %s "
1431 : "on expected node %s",
1432 : rsc->id, pcmk__node_name(current));
1433 : } else {
1434 0 : pcmk__rsc_trace(rsc, "Scheduling %s demotion of %s on %s",
1435 : (optional? "optional" : "required"), rsc->id,
1436 : pcmk__node_name(current));
1437 0 : demote_action(rsc, current, optional);
1438 : }
1439 : }
1440 0 : }
1441 :
1442 : static void
1443 0 : assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1444 : {
1445 0 : CRM_ASSERT(false);
1446 0 : }
1447 :
1448 : /*!
1449 : * \internal
1450 : * \brief Schedule cleanup of a resource
1451 : *
1452 : * \param[in,out] rsc Resource to clean up
1453 : * \param[in] node Node to clean up on
1454 : * \param[in] optional Whether clean-up should be optional
1455 : */
1456 : void
1457 0 : pcmk__schedule_cleanup(pcmk_resource_t *rsc, const pcmk_node_t *node,
1458 : bool optional)
1459 : {
1460 : /* If the cleanup is required, its orderings are optional, because they're
1461 : * relevant only if both actions are required. Conversely, if the cleanup is
1462 : * optional, the orderings make the then action required if the first action
1463 : * becomes required.
1464 : */
1465 0 : uint32_t flag = optional? pcmk__ar_first_implies_then : pcmk__ar_ordered;
1466 :
1467 0 : CRM_CHECK((rsc != NULL) && (node != NULL), return);
1468 :
1469 0 : if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
1470 0 : pcmk__rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed",
1471 : rsc->id, pcmk__node_name(node));
1472 0 : return;
1473 : }
1474 :
1475 0 : if (node->details->unclean || !node->details->online) {
1476 0 : pcmk__rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable",
1477 : rsc->id, pcmk__node_name(node));
1478 0 : return;
1479 : }
1480 :
1481 0 : crm_notice("Scheduling clean-up of %s on %s",
1482 : rsc->id, pcmk__node_name(node));
1483 0 : delete_action(rsc, node, optional);
1484 :
1485 : // stop -> clean-up -> start
1486 0 : pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP,
1487 : rsc, PCMK_ACTION_DELETE, flag);
1488 0 : pcmk__order_resource_actions(rsc, PCMK_ACTION_DELETE,
1489 : rsc, PCMK_ACTION_START, flag);
1490 : }
1491 :
1492 : /*!
1493 : * \internal
1494 : * \brief Add primitive meta-attributes relevant to graph actions to XML
1495 : *
1496 : * \param[in] rsc Primitive resource whose meta-attributes should be added
1497 : * \param[in,out] xml Transition graph action attributes XML to add to
1498 : */
1499 : void
1500 0 : pcmk__primitive_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml)
1501 : {
1502 0 : char *name = NULL;
1503 0 : char *value = NULL;
1504 0 : const pcmk_resource_t *parent = NULL;
1505 :
1506 0 : CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
1507 : && (xml != NULL));
1508 :
1509 : /* Clone instance numbers get set internally as meta-attributes, and are
1510 : * needed in the transition graph (for example, to tell unique clone
1511 : * instances apart).
1512 : */
1513 0 : value = g_hash_table_lookup(rsc->meta, PCMK__META_CLONE);
1514 0 : if (value != NULL) {
1515 0 : name = crm_meta_name(PCMK__META_CLONE);
1516 0 : crm_xml_add(xml, name, value);
1517 0 : free(name);
1518 : }
1519 :
1520 : // Not sure if this one is really needed ...
1521 0 : value = g_hash_table_lookup(rsc->meta, PCMK_META_REMOTE_NODE);
1522 0 : if (value != NULL) {
1523 0 : name = crm_meta_name(PCMK_META_REMOTE_NODE);
1524 0 : crm_xml_add(xml, name, value);
1525 0 : free(name);
1526 : }
1527 :
1528 : /* The container meta-attribute can be set on the primitive itself or one of
1529 : * its parents (for example, a group inside a container resource), so check
1530 : * them all, and keep the highest one found.
1531 : */
1532 0 : for (parent = rsc; parent != NULL; parent = parent->parent) {
1533 0 : if (parent->container != NULL) {
1534 0 : crm_xml_add(xml, CRM_META "_" PCMK__META_CONTAINER,
1535 0 : parent->container->id);
1536 : }
1537 : }
1538 :
1539 : /* Bundle replica children will get their external-ip set internally as a
1540 : * meta-attribute. The graph action needs it, but under a different naming
1541 : * convention than other meta-attributes.
1542 : */
1543 0 : value = g_hash_table_lookup(rsc->meta, "external-ip");
1544 0 : if (value != NULL) {
1545 0 : crm_xml_add(xml, "pcmk_external_ip", value);
1546 : }
1547 0 : }
1548 :
1549 : // Primitive implementation of pcmk_assignment_methods_t:add_utilization()
1550 : void
1551 0 : pcmk__primitive_add_utilization(const pcmk_resource_t *rsc,
1552 : const pcmk_resource_t *orig_rsc,
1553 : GList *all_rscs, GHashTable *utilization)
1554 : {
1555 0 : CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
1556 : && (orig_rsc != NULL) && (utilization != NULL));
1557 :
1558 0 : if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
1559 0 : return;
1560 : }
1561 :
1562 0 : pcmk__rsc_trace(orig_rsc,
1563 : "%s: Adding primitive %s as colocated utilization",
1564 : orig_rsc->id, rsc->id);
1565 0 : pcmk__release_node_capacity(utilization, rsc);
1566 : }
1567 :
1568 : /*!
1569 : * \internal
1570 : * \brief Get epoch time of node's shutdown attribute (or now if none)
1571 : *
1572 : * \param[in,out] node Node to check
1573 : *
1574 : * \return Epoch time corresponding to shutdown attribute if set or now if not
1575 : */
1576 : static time_t
1577 0 : shutdown_time(pcmk_node_t *node)
1578 : {
1579 0 : const char *shutdown = pcmk__node_attr(node, PCMK__NODE_ATTR_SHUTDOWN, NULL,
1580 : pcmk__rsc_node_current);
1581 0 : time_t result = 0;
1582 :
1583 0 : if (shutdown != NULL) {
1584 : long long result_ll;
1585 :
1586 0 : if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) {
1587 0 : result = (time_t) result_ll;
1588 : }
1589 : }
1590 0 : return (result == 0)? get_effective_time(node->details->data_set) : result;
1591 : }
1592 :
1593 : /*!
1594 : * \internal
1595 : * \brief Ban a resource from a node if it's not locked to the node
1596 : *
1597 : * \param[in] data Node to check
1598 : * \param[in,out] user_data Resource to check
1599 : */
1600 : static void
1601 0 : ban_if_not_locked(gpointer data, gpointer user_data)
1602 : {
1603 0 : const pcmk_node_t *node = (const pcmk_node_t *) data;
1604 0 : pcmk_resource_t *rsc = (pcmk_resource_t *) user_data;
1605 :
1606 0 : if (strcmp(node->details->uname, rsc->lock_node->details->uname) != 0) {
1607 0 : resource_location(rsc, node, -PCMK_SCORE_INFINITY,
1608 : PCMK_OPT_SHUTDOWN_LOCK, rsc->cluster);
1609 : }
1610 0 : }
1611 :
1612 : // Primitive implementation of pcmk_assignment_methods_t:shutdown_lock()
1613 : void
1614 0 : pcmk__primitive_shutdown_lock(pcmk_resource_t *rsc)
1615 : {
1616 0 : const char *class = NULL;
1617 :
1618 0 : CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
1619 :
1620 0 : class = crm_element_value(rsc->xml, PCMK_XA_CLASS);
1621 :
1622 : // Fence devices and remote connections can't be locked
1623 0 : if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches)
1624 0 : || rsc->is_remote_node) {
1625 0 : return;
1626 : }
1627 :
1628 0 : if (rsc->lock_node != NULL) {
1629 : // The lock was obtained from resource history
1630 :
1631 0 : if (rsc->running_on != NULL) {
1632 : /* The resource was started elsewhere even though it is now
1633 : * considered locked. This shouldn't be possible, but as a
1634 : * failsafe, we don't want to disturb the resource now.
1635 : */
1636 0 : pcmk__rsc_info(rsc,
1637 : "Cancelling shutdown lock "
1638 : "because %s is already active", rsc->id);
1639 0 : pe__clear_resource_history(rsc, rsc->lock_node);
1640 0 : rsc->lock_node = NULL;
1641 0 : rsc->lock_time = 0;
1642 : }
1643 :
1644 : // Only a resource active on exactly one node can be locked
1645 0 : } else if (pcmk__list_of_1(rsc->running_on)) {
1646 0 : pcmk_node_t *node = rsc->running_on->data;
1647 :
1648 0 : if (node->details->shutdown) {
1649 0 : if (node->details->unclean) {
1650 0 : pcmk__rsc_debug(rsc,
1651 : "Not locking %s to unclean %s for shutdown",
1652 : rsc->id, pcmk__node_name(node));
1653 : } else {
1654 0 : rsc->lock_node = node;
1655 0 : rsc->lock_time = shutdown_time(node);
1656 : }
1657 : }
1658 : }
1659 :
1660 0 : if (rsc->lock_node == NULL) {
1661 : // No lock needed
1662 0 : return;
1663 : }
1664 :
1665 0 : if (rsc->cluster->shutdown_lock > 0) {
1666 0 : time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock;
1667 :
1668 0 : pcmk__rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
1669 : rsc->id, pcmk__node_name(rsc->lock_node),
1670 : (long long) lock_expiration);
1671 0 : pe__update_recheck_time(++lock_expiration, rsc->cluster,
1672 : "shutdown lock expiration");
1673 : } else {
1674 0 : pcmk__rsc_info(rsc, "Locking %s to %s due to shutdown",
1675 : rsc->id, pcmk__node_name(rsc->lock_node));
1676 : }
1677 :
1678 : // If resource is locked to one node, ban it from all other nodes
1679 0 : g_list_foreach(rsc->cluster->nodes, ban_if_not_locked, rsc);
1680 : }
|