Line data Source code
1 : /*
2 : * Copyright 2004-2024 the Pacemaker project contributors
3 : *
4 : * The version control history for this file may have further details.
5 : *
6 : * This source code is licensed under the GNU General Public License version 2
7 : * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 : */
9 :
10 : #include <crm_internal.h>
11 :
12 : #include <crm/crm.h>
13 : #include <crm/cib.h>
14 : #include <crm/cib/internal.h>
15 : #include <crm/common/xml.h>
16 : #include <crm/common/xml_internal.h>
17 : #include <crm/common/scheduler_internal.h>
18 :
19 : #include <glib.h>
20 :
21 : #include <crm/pengine/status.h>
22 : #include <pacemaker-internal.h>
23 : #include "libpacemaker_private.h"
24 :
25 : CRM_TRACE_INIT_DATA(pacemaker);
26 :
27 : /*!
28 : * \internal
29 : * \brief Do deferred action checks after assignment
30 : *
31 : * When unpacking the resource history, the scheduler checks for resource
32 : * configurations that have changed since an action was run. However, at that
33 : * time, bundles using the REMOTE_CONTAINER_HACK don't have their final
34 : * parameter information, so instead they add a deferred check to a list. This
35 : * function processes one entry in that list.
36 : *
37 : * \param[in,out] rsc Resource that action history is for
38 : * \param[in,out] node Node that action history is for
39 : * \param[in] rsc_op Action history entry
40 : * \param[in] check Type of deferred check to do
41 : */
42 : static void
43 0 : check_params(pcmk_resource_t *rsc, pcmk_node_t *node, const xmlNode *rsc_op,
44 : enum pcmk__check_parameters check)
45 : {
46 0 : const char *reason = NULL;
47 0 : pcmk__op_digest_t *digest_data = NULL;
48 :
49 0 : switch (check) {
50 0 : case pcmk__check_active:
51 0 : if (pcmk__check_action_config(rsc, node, rsc_op)
52 0 : && pe_get_failcount(node, rsc, NULL, pcmk__fc_effective,
53 : NULL)) {
54 0 : reason = "action definition changed";
55 : }
56 0 : break;
57 :
58 0 : case pcmk__check_last_failure:
59 0 : digest_data = rsc_action_digest_cmp(rsc, rsc_op, node,
60 : rsc->cluster);
61 0 : switch (digest_data->rc) {
62 0 : case pcmk__digest_unknown:
63 0 : crm_trace("Resource %s history entry %s on %s has "
64 : "no digest to compare",
65 : rsc->id, pcmk__xe_id(rsc_op), node->details->id);
66 0 : break;
67 0 : case pcmk__digest_match:
68 0 : break;
69 0 : default:
70 0 : reason = "resource parameters have changed";
71 0 : break;
72 : }
73 0 : break;
74 : }
75 0 : if (reason != NULL) {
76 0 : pe__clear_failcount(rsc, node, reason, rsc->cluster);
77 : }
78 0 : }
79 :
80 : /*!
81 : * \internal
82 : * \brief Check whether a resource has failcount clearing scheduled on a node
83 : *
84 : * \param[in] node Node to check
85 : * \param[in] rsc Resource to check
86 : *
87 : * \return true if \p rsc has failcount clearing scheduled on \p node,
88 : * otherwise false
89 : */
90 : static bool
91 0 : failcount_clear_action_exists(const pcmk_node_t *node,
92 : const pcmk_resource_t *rsc)
93 : {
94 0 : GList *list = pe__resource_actions(rsc, node, PCMK_ACTION_CLEAR_FAILCOUNT,
95 : TRUE);
96 :
97 0 : if (list != NULL) {
98 0 : g_list_free(list);
99 0 : return true;
100 : }
101 0 : return false;
102 : }
103 :
104 : /*!
105 : * \internal
106 : * \brief Ban a resource from a node if it reached its failure threshold there
107 : *
108 : * \param[in,out] data Resource to check failure threshold for
109 : * \param[in] user_data Node to check resource on
110 : */
111 : static void
112 0 : check_failure_threshold(gpointer data, gpointer user_data)
113 : {
114 0 : pcmk_resource_t *rsc = data;
115 0 : const pcmk_node_t *node = user_data;
116 :
117 : // If this is a collective resource, apply recursively to children instead
118 0 : if (rsc->children != NULL) {
119 0 : g_list_foreach(rsc->children, check_failure_threshold, user_data);
120 0 : return;
121 : }
122 :
123 0 : if (!failcount_clear_action_exists(node, rsc)) {
124 : /* Don't force the resource away from this node due to a failcount
125 : * that's going to be cleared.
126 : *
127 : * @TODO Failcount clearing can be scheduled in
128 : * pcmk__handle_rsc_config_changes() via process_rsc_history(), or in
129 : * schedule_resource_actions() via check_params(). This runs well before
130 : * then, so it cannot detect those, meaning we might check the migration
131 : * threshold when we shouldn't. Worst case, we stop or move the
132 : * resource, then move it back in the next transition.
133 : */
134 0 : pcmk_resource_t *failed = NULL;
135 :
136 0 : if (pcmk__threshold_reached(rsc, node, &failed)) {
137 0 : resource_location(failed, node, -PCMK_SCORE_INFINITY,
138 : "__fail_limit__", rsc->cluster);
139 : }
140 : }
141 : }
142 :
143 : /*!
144 : * \internal
145 : * \brief If resource has exclusive discovery, ban node if not allowed
146 : *
147 : * Location constraints have a PCMK_XA_RESOURCE_DISCOVERY option that allows
148 : * users to specify where probes are done for the affected resource. If this is
149 : * set to \c exclusive, probes will only be done on nodes listed in exclusive
150 : * constraints. This function bans the resource from the node if the node is not
151 : * listed.
152 : *
153 : * \param[in,out] data Resource to check
154 : * \param[in] user_data Node to check resource on
155 : */
156 : static void
157 0 : apply_exclusive_discovery(gpointer data, gpointer user_data)
158 : {
159 0 : pcmk_resource_t *rsc = data;
160 0 : const pcmk_node_t *node = user_data;
161 :
162 0 : if (rsc->exclusive_discover
163 0 : || pe__const_top_resource(rsc, false)->exclusive_discover) {
164 0 : pcmk_node_t *match = NULL;
165 :
166 : // If this is a collective resource, apply recursively to children
167 0 : g_list_foreach(rsc->children, apply_exclusive_discovery, user_data);
168 :
169 0 : match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
170 0 : if ((match != NULL)
171 0 : && (match->rsc_discover_mode != pcmk_probe_exclusive)) {
172 0 : match->weight = -PCMK_SCORE_INFINITY;
173 : }
174 : }
175 0 : }
176 :
177 : /*!
178 : * \internal
179 : * \brief Apply stickiness to a resource if appropriate
180 : *
181 : * \param[in,out] data Resource to check for stickiness
182 : * \param[in] user_data Ignored
183 : */
184 : static void
185 0 : apply_stickiness(gpointer data, gpointer user_data)
186 : {
187 0 : pcmk_resource_t *rsc = data;
188 0 : pcmk_node_t *node = NULL;
189 :
190 : // If this is a collective resource, apply recursively to children instead
191 0 : if (rsc->children != NULL) {
192 0 : g_list_foreach(rsc->children, apply_stickiness, NULL);
193 0 : return;
194 : }
195 :
196 : /* A resource is sticky if it is managed, has stickiness configured, and is
197 : * active on a single node.
198 : */
199 0 : if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)
200 0 : || (rsc->stickiness < 1) || !pcmk__list_of_1(rsc->running_on)) {
201 0 : return;
202 : }
203 :
204 0 : node = rsc->running_on->data;
205 :
206 : /* In a symmetric cluster, stickiness can always be used. In an
207 : * asymmetric cluster, we have to check whether the resource is still
208 : * allowed on the node, so we don't keep the resource somewhere it is no
209 : * longer explicitly enabled.
210 : */
211 0 : if (!pcmk_is_set(rsc->cluster->flags, pcmk_sched_symmetric_cluster)
212 0 : && (g_hash_table_lookup(rsc->allowed_nodes,
213 0 : node->details->id) == NULL)) {
214 0 : pcmk__rsc_debug(rsc,
215 : "Ignoring %s stickiness because the cluster is "
216 : "asymmetric and %s is not explicitly allowed",
217 : rsc->id, pcmk__node_name(node));
218 0 : return;
219 : }
220 :
221 0 : pcmk__rsc_debug(rsc, "Resource %s has %d stickiness on %s",
222 : rsc->id, rsc->stickiness, pcmk__node_name(node));
223 0 : resource_location(rsc, node, rsc->stickiness, "stickiness", rsc->cluster);
224 : }
225 :
226 : /*!
227 : * \internal
228 : * \brief Apply shutdown locks for all resources as appropriate
229 : *
230 : * \param[in,out] scheduler Scheduler data
231 : */
232 : static void
233 0 : apply_shutdown_locks(pcmk_scheduler_t *scheduler)
234 : {
235 0 : if (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
236 0 : return;
237 : }
238 0 : for (GList *iter = scheduler->resources; iter != NULL; iter = iter->next) {
239 0 : pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
240 :
241 0 : rsc->cmds->shutdown_lock(rsc);
242 : }
243 : }
244 :
245 : /*!
246 : * \internal
247 : * \brief Calculate the number of available nodes in the cluster
248 : *
249 : * \param[in,out] scheduler Scheduler data
250 : */
251 : static void
252 0 : count_available_nodes(pcmk_scheduler_t *scheduler)
253 : {
254 0 : if (pcmk_is_set(scheduler->flags, pcmk_sched_no_compat)) {
255 0 : return;
256 : }
257 :
258 : // @COMPAT for API backward compatibility only (cluster does not use value)
259 0 : for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
260 0 : pcmk_node_t *node = (pcmk_node_t *) iter->data;
261 :
262 0 : if ((node != NULL) && (node->weight >= 0) && node->details->online
263 0 : && (node->details->type != node_ping)) {
264 0 : scheduler->max_valid_nodes++;
265 : }
266 : }
267 0 : crm_trace("Online node count: %d", scheduler->max_valid_nodes);
268 : }
269 :
270 : /*
271 : * \internal
272 : * \brief Apply node-specific scheduling criteria
273 : *
274 : * After the CIB has been unpacked, process node-specific scheduling criteria
275 : * including shutdown locks, location constraints, resource stickiness,
276 : * migration thresholds, and exclusive resource discovery.
277 : */
278 : static void
279 0 : apply_node_criteria(pcmk_scheduler_t *scheduler)
280 : {
281 0 : crm_trace("Applying node-specific scheduling criteria");
282 0 : apply_shutdown_locks(scheduler);
283 0 : count_available_nodes(scheduler);
284 0 : pcmk__apply_locations(scheduler);
285 0 : g_list_foreach(scheduler->resources, apply_stickiness, NULL);
286 :
287 0 : for (GList *node_iter = scheduler->nodes; node_iter != NULL;
288 0 : node_iter = node_iter->next) {
289 0 : for (GList *rsc_iter = scheduler->resources; rsc_iter != NULL;
290 0 : rsc_iter = rsc_iter->next) {
291 0 : check_failure_threshold(rsc_iter->data, node_iter->data);
292 0 : apply_exclusive_discovery(rsc_iter->data, node_iter->data);
293 : }
294 : }
295 0 : }
296 :
297 : /*!
298 : * \internal
299 : * \brief Assign resources to nodes
300 : *
301 : * \param[in,out] scheduler Scheduler data
302 : */
303 : static void
304 0 : assign_resources(pcmk_scheduler_t *scheduler)
305 : {
306 0 : GList *iter = NULL;
307 :
308 0 : crm_trace("Assigning resources to nodes");
309 :
310 0 : if (!pcmk__str_eq(scheduler->placement_strategy, PCMK_VALUE_DEFAULT,
311 : pcmk__str_casei)) {
312 0 : pcmk__sort_resources(scheduler);
313 : }
314 0 : pcmk__show_node_capacities("Original", scheduler);
315 :
316 0 : if (pcmk_is_set(scheduler->flags, pcmk_sched_have_remote_nodes)) {
317 : /* Assign remote connection resources first (which will also assign any
318 : * colocation dependencies). If the connection is migrating, always
319 : * prefer the partial migration target.
320 : */
321 0 : for (iter = scheduler->resources; iter != NULL; iter = iter->next) {
322 0 : pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
323 :
324 0 : if (rsc->is_remote_node) {
325 0 : pcmk__rsc_trace(rsc, "Assigning remote connection resource '%s'",
326 : rsc->id);
327 0 : rsc->cmds->assign(rsc, rsc->partial_migration_target, true);
328 : }
329 : }
330 : }
331 :
332 : /* now do the rest of the resources */
333 0 : for (iter = scheduler->resources; iter != NULL; iter = iter->next) {
334 0 : pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
335 :
336 0 : if (!rsc->is_remote_node) {
337 0 : pcmk__rsc_trace(rsc, "Assigning %s resource '%s'",
338 : rsc->xml->name, rsc->id);
339 0 : rsc->cmds->assign(rsc, NULL, true);
340 : }
341 : }
342 :
343 0 : pcmk__show_node_capacities("Remaining", scheduler);
344 0 : }
345 :
346 : /*!
347 : * \internal
348 : * \brief Schedule fail count clearing on online nodes if resource is orphaned
349 : *
350 : * \param[in,out] data Resource to check
351 : * \param[in] user_data Ignored
352 : */
353 : static void
354 0 : clear_failcounts_if_orphaned(gpointer data, gpointer user_data)
355 : {
356 0 : pcmk_resource_t *rsc = data;
357 :
358 0 : if (!pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
359 0 : return;
360 : }
361 0 : crm_trace("Clear fail counts for orphaned resource %s", rsc->id);
362 :
363 : /* There's no need to recurse into rsc->children because those
364 : * should just be unassigned clone instances.
365 : */
366 :
367 0 : for (GList *iter = rsc->cluster->nodes; iter != NULL; iter = iter->next) {
368 0 : pcmk_node_t *node = (pcmk_node_t *) iter->data;
369 0 : pcmk_action_t *clear_op = NULL;
370 :
371 0 : if (!node->details->online) {
372 0 : continue;
373 : }
374 0 : if (pe_get_failcount(node, rsc, NULL, pcmk__fc_effective, NULL) == 0) {
375 0 : continue;
376 : }
377 :
378 0 : clear_op = pe__clear_failcount(rsc, node, "it is orphaned",
379 : rsc->cluster);
380 :
381 : /* We can't use order_action_then_stop() here because its
382 : * pcmk__ar_guest_allowed breaks things
383 : */
384 0 : pcmk__new_ordering(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc),
385 : NULL, pcmk__ar_ordered, rsc->cluster);
386 : }
387 : }
388 :
389 : /*!
390 : * \internal
391 : * \brief Schedule any resource actions needed
392 : *
393 : * \param[in,out] scheduler Scheduler data
394 : */
395 : static void
396 0 : schedule_resource_actions(pcmk_scheduler_t *scheduler)
397 : {
398 : // Process deferred action checks
399 0 : pe__foreach_param_check(scheduler, check_params);
400 0 : pe__free_param_checks(scheduler);
401 :
402 0 : if (pcmk_is_set(scheduler->flags, pcmk_sched_probe_resources)) {
403 0 : crm_trace("Scheduling probes");
404 0 : pcmk__schedule_probes(scheduler);
405 : }
406 :
407 0 : if (pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) {
408 0 : g_list_foreach(scheduler->resources, clear_failcounts_if_orphaned,
409 : NULL);
410 : }
411 :
412 0 : crm_trace("Scheduling resource actions");
413 0 : for (GList *iter = scheduler->resources; iter != NULL; iter = iter->next) {
414 0 : pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
415 :
416 0 : rsc->cmds->create_actions(rsc);
417 : }
418 0 : }
419 :
420 : /*!
421 : * \internal
422 : * \brief Check whether a resource or any of its descendants are managed
423 : *
424 : * \param[in] rsc Resource to check
425 : *
426 : * \return true if resource or any descendant is managed, otherwise false
427 : */
428 : static bool
429 0 : is_managed(const pcmk_resource_t *rsc)
430 : {
431 0 : if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
432 0 : return true;
433 : }
434 0 : for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
435 0 : if (is_managed((pcmk_resource_t *) iter->data)) {
436 0 : return true;
437 : }
438 : }
439 0 : return false;
440 : }
441 :
442 : /*!
443 : * \internal
444 : * \brief Check whether any resources in the cluster are managed
445 : *
446 : * \param[in] scheduler Scheduler data
447 : *
448 : * \return true if any resource is managed, otherwise false
449 : */
450 : static bool
451 0 : any_managed_resources(const pcmk_scheduler_t *scheduler)
452 : {
453 0 : for (const GList *iter = scheduler->resources;
454 0 : iter != NULL; iter = iter->next) {
455 0 : if (is_managed((const pcmk_resource_t *) iter->data)) {
456 0 : return true;
457 : }
458 : }
459 0 : return false;
460 : }
461 :
462 : /*!
463 : * \internal
464 : * \brief Check whether a node requires fencing
465 : *
466 : * \param[in] node Node to check
467 : * \param[in] have_managed Whether any resource in cluster is managed
468 : *
469 : * \return true if \p node should be fenced, otherwise false
470 : */
471 : static bool
472 0 : needs_fencing(const pcmk_node_t *node, bool have_managed)
473 : {
474 0 : return have_managed && node->details->unclean
475 0 : && pe_can_fence(node->details->data_set, node);
476 : }
477 :
478 : /*!
479 : * \internal
480 : * \brief Check whether a node requires shutdown
481 : *
482 : * \param[in] node Node to check
483 : *
484 : * \return true if \p node should be shut down, otherwise false
485 : */
486 : static bool
487 0 : needs_shutdown(const pcmk_node_t *node)
488 : {
489 0 : if (pcmk__is_pacemaker_remote_node(node)) {
490 : /* Do not send shutdown actions for Pacemaker Remote nodes.
491 : * @TODO We might come up with a good use for this in the future.
492 : */
493 0 : return false;
494 : }
495 0 : return node->details->online && node->details->shutdown;
496 : }
497 :
498 : /*!
499 : * \internal
500 : * \brief Track and order non-DC fencing
501 : *
502 : * \param[in,out] list List of existing non-DC fencing actions
503 : * \param[in,out] action Fencing action to prepend to \p list
504 : * \param[in] scheduler Scheduler data
505 : *
506 : * \return (Possibly new) head of \p list
507 : */
508 : static GList *
509 0 : add_nondc_fencing(GList *list, pcmk_action_t *action,
510 : const pcmk_scheduler_t *scheduler)
511 : {
512 0 : if (!pcmk_is_set(scheduler->flags, pcmk_sched_concurrent_fencing)
513 0 : && (list != NULL)) {
514 : /* Concurrent fencing is disabled, so order each non-DC
515 : * fencing in a chain. If there is any DC fencing or
516 : * shutdown, it will be ordered after the last action in the
517 : * chain later.
518 : */
519 0 : order_actions((pcmk_action_t *) list->data, action, pcmk__ar_ordered);
520 : }
521 0 : return g_list_prepend(list, action);
522 : }
523 :
524 : /*!
525 : * \internal
526 : * \brief Schedule a node for fencing
527 : *
528 : * \param[in,out] node Node that requires fencing
529 : */
530 : static pcmk_action_t *
531 0 : schedule_fencing(pcmk_node_t *node)
532 : {
533 0 : pcmk_action_t *fencing = pe_fence_op(node, NULL, FALSE, "node is unclean",
534 0 : FALSE, node->details->data_set);
535 :
536 0 : pcmk__sched_warn("Scheduling node %s for fencing", pcmk__node_name(node));
537 0 : pcmk__order_vs_fence(fencing, node->details->data_set);
538 0 : return fencing;
539 : }
540 :
541 : /*!
542 : * \internal
543 : * \brief Create and order node fencing and shutdown actions
544 : *
545 : * \param[in,out] scheduler Scheduler data
546 : */
547 : static void
548 0 : schedule_fencing_and_shutdowns(pcmk_scheduler_t *scheduler)
549 : {
550 0 : pcmk_action_t *dc_down = NULL;
551 0 : bool integrity_lost = false;
552 0 : bool have_managed = any_managed_resources(scheduler);
553 0 : GList *fencing_ops = NULL;
554 0 : GList *shutdown_ops = NULL;
555 :
556 0 : crm_trace("Scheduling fencing and shutdowns as needed");
557 0 : if (!have_managed) {
558 0 : crm_notice("No fencing will be done until there are resources "
559 : "to manage");
560 : }
561 :
562 : // Check each node for whether it needs fencing or shutdown
563 0 : for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
564 0 : pcmk_node_t *node = (pcmk_node_t *) iter->data;
565 0 : pcmk_action_t *fencing = NULL;
566 :
567 : /* Guest nodes are "fenced" by recovering their container resource,
568 : * so handle them separately.
569 : */
570 0 : if (pcmk__is_guest_or_bundle_node(node)) {
571 0 : if (node->details->remote_requires_reset && have_managed
572 0 : && pe_can_fence(scheduler, node)) {
573 0 : pcmk__fence_guest(node);
574 : }
575 0 : continue;
576 : }
577 :
578 0 : if (needs_fencing(node, have_managed)) {
579 0 : fencing = schedule_fencing(node);
580 :
581 : // Track DC and non-DC fence actions separately
582 0 : if (node->details->is_dc) {
583 0 : dc_down = fencing;
584 : } else {
585 0 : fencing_ops = add_nondc_fencing(fencing_ops, fencing,
586 : scheduler);
587 : }
588 :
589 0 : } else if (needs_shutdown(node)) {
590 0 : pcmk_action_t *down_op = pcmk__new_shutdown_action(node);
591 :
592 : // Track DC and non-DC shutdown actions separately
593 0 : if (node->details->is_dc) {
594 0 : dc_down = down_op;
595 : } else {
596 0 : shutdown_ops = g_list_prepend(shutdown_ops, down_op);
597 : }
598 : }
599 :
600 0 : if ((fencing == NULL) && node->details->unclean) {
601 0 : integrity_lost = true;
602 0 : pcmk__config_warn("Node %s is unclean but cannot be fenced",
603 : pcmk__node_name(node));
604 : }
605 : }
606 :
607 0 : if (integrity_lost) {
608 0 : if (!pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
609 0 : pcmk__config_warn("Resource functionality and data integrity "
610 : "cannot be guaranteed (configure, enable, "
611 : "and test fencing to correct this)");
612 :
613 0 : } else if (!pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
614 0 : crm_notice("Unclean nodes will not be fenced until quorum is "
615 : "attained or " PCMK_OPT_NO_QUORUM_POLICY " is set to "
616 : PCMK_VALUE_IGNORE);
617 : }
618 : }
619 :
620 0 : if (dc_down != NULL) {
621 : /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated
622 : * DC elections. However, we don't want to order non-DC shutdowns before
623 : * a DC *fencing*, because even though we don't want a node that's
624 : * shutting down to become DC, the DC fencing could be ordered before a
625 : * clone stop that's also ordered before the shutdowns, thus leading to
626 : * a graph loop.
627 : */
628 0 : if (pcmk__str_eq(dc_down->task, PCMK_ACTION_DO_SHUTDOWN,
629 : pcmk__str_none)) {
630 0 : pcmk__order_after_each(dc_down, shutdown_ops);
631 : }
632 :
633 : // Order any non-DC fencing before any DC fencing or shutdown
634 :
635 0 : if (pcmk_is_set(scheduler->flags, pcmk_sched_concurrent_fencing)) {
636 : /* With concurrent fencing, order each non-DC fencing action
637 : * separately before any DC fencing or shutdown.
638 : */
639 0 : pcmk__order_after_each(dc_down, fencing_ops);
640 0 : } else if (fencing_ops != NULL) {
641 : /* Without concurrent fencing, the non-DC fencing actions are
642 : * already ordered relative to each other, so we just need to order
643 : * the DC fencing after the last action in the chain (which is the
644 : * first item in the list).
645 : */
646 0 : order_actions((pcmk_action_t *) fencing_ops->data, dc_down,
647 : pcmk__ar_ordered);
648 : }
649 : }
650 0 : g_list_free(fencing_ops);
651 0 : g_list_free(shutdown_ops);
652 0 : }
653 :
654 : static void
655 0 : log_resource_details(pcmk_scheduler_t *scheduler)
656 : {
657 0 : pcmk__output_t *out = scheduler->priv;
658 0 : GList *all = NULL;
659 :
660 : /* Due to the `crm_mon --node=` feature, out->message() for all the
661 : * resource-related messages expects a list of nodes that we are allowed to
662 : * output information for. Here, we create a wildcard to match all nodes.
663 : */
664 0 : all = g_list_prepend(all, (gpointer) "*");
665 :
666 0 : for (GList *item = scheduler->resources; item != NULL; item = item->next) {
667 0 : pcmk_resource_t *rsc = (pcmk_resource_t *) item->data;
668 :
669 : // Log all resources except inactive orphans
670 0 : if (!pcmk_is_set(rsc->flags, pcmk_rsc_removed)
671 0 : || (rsc->role != pcmk_role_stopped)) {
672 0 : out->message(out, pcmk__map_element_name(rsc->xml), 0UL, rsc, all,
673 : all);
674 : }
675 : }
676 :
677 0 : g_list_free(all);
678 0 : }
679 :
680 : static void
681 0 : log_all_actions(pcmk_scheduler_t *scheduler)
682 : {
683 : /* This only ever outputs to the log, so ignore whatever output object was
684 : * previously set and just log instead.
685 : */
686 0 : pcmk__output_t *prev_out = scheduler->priv;
687 0 : pcmk__output_t *out = NULL;
688 :
689 0 : if (pcmk__log_output_new(&out) != pcmk_rc_ok) {
690 0 : return;
691 : }
692 :
693 0 : pe__register_messages(out);
694 0 : pcmk__register_lib_messages(out);
695 0 : pcmk__output_set_log_level(out, LOG_NOTICE);
696 0 : scheduler->priv = out;
697 :
698 0 : out->begin_list(out, NULL, NULL, "Actions");
699 0 : pcmk__output_actions(scheduler);
700 0 : out->end_list(out);
701 0 : out->finish(out, CRM_EX_OK, true, NULL);
702 0 : pcmk__output_free(out);
703 :
704 0 : scheduler->priv = prev_out;
705 : }
706 :
707 : /*!
708 : * \internal
709 : * \brief Log all required but unrunnable actions at trace level
710 : *
711 : * \param[in] scheduler Scheduler data
712 : */
713 : static void
714 0 : log_unrunnable_actions(const pcmk_scheduler_t *scheduler)
715 : {
716 0 : const uint64_t flags = pcmk_action_optional
717 : |pcmk_action_runnable
718 : |pcmk_action_pseudo;
719 :
720 0 : crm_trace("Required but unrunnable actions:");
721 0 : for (const GList *iter = scheduler->actions;
722 0 : iter != NULL; iter = iter->next) {
723 :
724 0 : const pcmk_action_t *action = (const pcmk_action_t *) iter->data;
725 :
726 0 : if (!pcmk_any_flags_set(action->flags, flags)) {
727 0 : pcmk__log_action("\t", action, true);
728 : }
729 : }
730 0 : }
731 :
732 : /*!
733 : * \internal
734 : * \brief Unpack the CIB for scheduling
735 : *
736 : * \param[in,out] cib CIB XML to unpack (may be NULL if already unpacked)
737 : * \param[in] flags Scheduler flags to set in addition to defaults
738 : * \param[in,out] scheduler Scheduler data
739 : */
740 : static void
741 0 : unpack_cib(xmlNode *cib, unsigned long long flags, pcmk_scheduler_t *scheduler)
742 : {
743 0 : const char* localhost_save = NULL;
744 :
745 0 : if (pcmk_is_set(scheduler->flags, pcmk_sched_have_status)) {
746 0 : crm_trace("Reusing previously calculated cluster status");
747 0 : pcmk__set_scheduler_flags(scheduler, flags);
748 0 : return;
749 : }
750 :
751 0 : if (scheduler->localhost) {
752 0 : localhost_save = scheduler->localhost;
753 : }
754 :
755 0 : CRM_ASSERT(cib != NULL);
756 0 : crm_trace("Calculating cluster status");
757 :
758 : /* This will zero the entire struct without freeing anything first, so
759 : * callers should never call pcmk__schedule_actions() with a populated data
760 : * set unless pcmk_sched_have_status is set (i.e. cluster_status() was
761 : * previously called, whether directly or via pcmk__schedule_actions()).
762 : */
763 0 : set_working_set_defaults(scheduler);
764 :
765 0 : if (localhost_save) {
766 0 : scheduler->localhost = localhost_save;
767 : }
768 :
769 0 : pcmk__set_scheduler_flags(scheduler, flags);
770 0 : scheduler->input = cib;
771 0 : cluster_status(scheduler); // Sets pcmk_sched_have_status
772 : }
773 :
774 : /*!
775 : * \internal
776 : * \brief Run the scheduler for a given CIB
777 : *
778 : * \param[in,out] cib CIB XML to use as scheduler input
779 : * \param[in] flags Scheduler flags to set in addition to defaults
780 : * \param[in,out] scheduler Scheduler data
781 : */
782 : void
783 0 : pcmk__schedule_actions(xmlNode *cib, unsigned long long flags,
784 : pcmk_scheduler_t *scheduler)
785 : {
786 0 : unpack_cib(cib, flags, scheduler);
787 0 : pcmk__set_assignment_methods(scheduler);
788 0 : pcmk__apply_node_health(scheduler);
789 0 : pcmk__unpack_constraints(scheduler);
790 0 : if (pcmk_is_set(scheduler->flags, pcmk_sched_validate_only)) {
791 0 : return;
792 : }
793 :
794 0 : if (!pcmk_is_set(scheduler->flags, pcmk_sched_location_only)
795 0 : && pcmk__is_daemon) {
796 0 : log_resource_details(scheduler);
797 : }
798 :
799 0 : apply_node_criteria(scheduler);
800 :
801 0 : if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) {
802 0 : return;
803 : }
804 :
805 0 : pcmk__create_internal_constraints(scheduler);
806 0 : pcmk__handle_rsc_config_changes(scheduler);
807 0 : assign_resources(scheduler);
808 0 : schedule_resource_actions(scheduler);
809 :
810 : /* Remote ordering constraints need to happen prior to calculating fencing
811 : * because it is one more place we can mark nodes as needing fencing.
812 : */
813 0 : pcmk__order_remote_connection_actions(scheduler);
814 :
815 0 : schedule_fencing_and_shutdowns(scheduler);
816 0 : pcmk__apply_orderings(scheduler);
817 0 : log_all_actions(scheduler);
818 0 : pcmk__create_graph(scheduler);
819 :
820 0 : if (get_crm_log_level() == LOG_TRACE) {
821 0 : log_unrunnable_actions(scheduler);
822 : }
823 : }
824 :
825 : /*!
826 : * \internal
827 : * \brief Initialize scheduler data
828 : *
829 : * Make our own copies of the CIB XML and date/time object, if they're not
830 : * \c NULL. This way we don't have to take ownership of the objects passed via
831 : * the API.
832 : *
833 : * This function is most useful for public API functions that want the caller
834 : * to retain ownership of the CIB object
835 : *
836 : * \param[in,out] out Output object
837 : * \param[in] input The CIB XML to check (if \c NULL, use current CIB)
838 : * \param[in] date Date and time to use in the scheduler (if \c NULL,
839 : * use current date and time). This can be used for
840 : * checking whether a rule is in effect at a certa
841 : * date and time.
842 : * \param[out] scheduler Where to store initialized scheduler data
843 : *
844 : * \return Standard Pacemaker return code
845 : */
846 : int
847 0 : pcmk__init_scheduler(pcmk__output_t *out, xmlNodePtr input, const crm_time_t *date,
848 : pcmk_scheduler_t **scheduler)
849 : {
850 : // Allows for cleaner syntax than dereferencing the scheduler argument
851 0 : pcmk_scheduler_t *new_scheduler = NULL;
852 :
853 0 : new_scheduler = pe_new_working_set();
854 0 : if (new_scheduler == NULL) {
855 0 : return ENOMEM;
856 : }
857 :
858 0 : pcmk__set_scheduler_flags(new_scheduler,
859 : pcmk_sched_no_counts|pcmk_sched_no_compat);
860 :
861 : // Populate the scheduler data
862 :
863 : // Make our own copy of the given input or fetch the CIB and use that
864 0 : if (input != NULL) {
865 0 : new_scheduler->input = pcmk__xml_copy(NULL, input);
866 0 : if (new_scheduler->input == NULL) {
867 0 : out->err(out, "Failed to copy input XML");
868 0 : pe_free_working_set(new_scheduler);
869 0 : return ENOMEM;
870 : }
871 :
872 : } else {
873 0 : int rc = cib__signon_query(out, NULL, &(new_scheduler->input));
874 :
875 0 : if (rc != pcmk_rc_ok) {
876 0 : pe_free_working_set(new_scheduler);
877 0 : return rc;
878 : }
879 : }
880 :
881 : // Make our own copy of the given crm_time_t object; otherwise
882 : // cluster_status() populates with the current time
883 0 : if (date != NULL) {
884 : // pcmk_copy_time() guarantees non-NULL
885 0 : new_scheduler->now = pcmk_copy_time(date);
886 : }
887 :
888 : // Unpack everything
889 0 : cluster_status(new_scheduler);
890 0 : *scheduler = new_scheduler;
891 :
892 0 : return pcmk_rc_ok;
893 : }
|