LCOV - code coverage report
Current view: top level - pacemaker - pcmk_sched_fencing.c (source / functions) Hit Total Coverage
Test: Pacemaker code coverage Lines: 0 138 0.0 %
Date: 2024-05-07 11:09:47 Functions: 0 9 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright 2004-2024 the Pacemaker project contributors
       3             :  *
       4             :  * The version control history for this file may have further details.
       5             :  *
       6             :  * This source code is licensed under the GNU General Public License version 2
       7             :  * or later (GPLv2+) WITHOUT ANY WARRANTY.
       8             :  */
       9             : 
      10             : #include <crm_internal.h>
      11             : 
      12             : #include <glib.h>
      13             : 
      14             : #include <crm/crm.h>
      15             : #include <crm/pengine/status.h>
      16             : #include <pacemaker-internal.h>
      17             : #include "libpacemaker_private.h"
      18             : 
      19             : /*!
      20             :  * \internal
      21             :  * \brief Check whether a resource is known on a particular node
      22             :  *
      23             :  * \param[in] rsc   Resource to check
      24             :  * \param[in] node  Node to check
      25             :  *
      26             :  * \return TRUE if resource (or parent if an anonymous clone) is known
      27             :  */
      28             : static bool
      29           0 : rsc_is_known_on(const pcmk_resource_t *rsc, const pcmk_node_t *node)
      30             : {
      31           0 :    if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
      32           0 :        return TRUE;
      33             : 
      34           0 :    } else if ((rsc->variant == pcmk_rsc_variant_primitive)
      35           0 :               && pcmk__is_anonymous_clone(rsc->parent)
      36           0 :               && (g_hash_table_lookup(rsc->parent->known_on,
      37           0 :                                       node->details->id) != NULL)) {
      38             :        /* We check only the parent, not the uber-parent, because we cannot
      39             :         * assume that the resource is known if it is in an anonymously cloned
      40             :         * group (which may be only partially known).
      41             :         */
      42           0 :        return TRUE;
      43             :    }
      44           0 :    return FALSE;
      45             : }
      46             : 
      47             : /*!
      48             :  * \internal
      49             :  * \brief Order a resource's start and promote actions relative to fencing
      50             :  *
      51             :  * \param[in,out] rsc         Resource to be ordered
      52             :  * \param[in,out] stonith_op  Fence action
      53             :  */
      54             : static void
      55           0 : order_start_vs_fencing(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
      56             : {
      57             :     pcmk_node_t *target;
      58             : 
      59           0 :     CRM_CHECK(stonith_op && stonith_op->node, return);
      60           0 :     target = stonith_op->node;
      61             : 
      62           0 :     for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
      63           0 :         pcmk_action_t *action = iter->data;
      64             : 
      65           0 :         switch (action->needs) {
      66           0 :             case pcmk_requires_nothing:
      67             :                 // Anything other than start or promote requires nothing
      68           0 :                 break;
      69             : 
      70           0 :             case pcmk_requires_fencing:
      71           0 :                 order_actions(stonith_op, action, pcmk__ar_ordered);
      72           0 :                 break;
      73             : 
      74           0 :             case pcmk_requires_quorum:
      75           0 :                 if (pcmk__str_eq(action->task, PCMK_ACTION_START,
      76             :                                  pcmk__str_none)
      77           0 :                     && (g_hash_table_lookup(rsc->allowed_nodes,
      78           0 :                                             target->details->id) != NULL)
      79           0 :                     && !rsc_is_known_on(rsc, target)) {
      80             : 
      81             :                     /* If we don't know the status of the resource on the node
      82             :                      * we're about to shoot, we have to assume it may be active
      83             :                      * there. Order the resource start after the fencing. This
      84             :                      * is analogous to waiting for all the probes for a resource
      85             :                      * to complete before starting it.
      86             :                      *
      87             :                      * The most likely explanation is that the DC died and took
      88             :                      * its status with it.
      89             :                      */
      90           0 :                     pcmk__rsc_debug(rsc, "Ordering %s after %s recovery",
      91             :                                     action->uuid, pcmk__node_name(target));
      92           0 :                     order_actions(stonith_op, action,
      93             :                                   pcmk__ar_ordered
      94             :                                   |pcmk__ar_unrunnable_first_blocks);
      95             :                 }
      96           0 :                 break;
      97             :         }
      98             :     }
      99             : }
     100             : 
     101             : /*!
     102             :  * \internal
     103             :  * \brief Order a resource's stop and demote actions relative to fencing
     104             :  *
     105             :  * \param[in,out] rsc         Resource to be ordered
     106             :  * \param[in,out] stonith_op  Fence action
     107             :  */
     108             : static void
     109           0 : order_stop_vs_fencing(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
     110             : {
     111           0 :     GList *iter = NULL;
     112           0 :     GList *action_list = NULL;
     113           0 :     bool order_implicit = false;
     114             : 
     115           0 :     pcmk_resource_t *top = uber_parent(rsc);
     116           0 :     pcmk_action_t *parent_stop = NULL;
     117             :     pcmk_node_t *target;
     118             : 
     119           0 :     CRM_CHECK(stonith_op && stonith_op->node, return);
     120           0 :     target = stonith_op->node;
     121             : 
     122             :     /* Get a list of stop actions potentially implied by the fencing */
     123           0 :     action_list = pe__resource_actions(rsc, target, PCMK_ACTION_STOP, FALSE);
     124             : 
     125             :     /* If resource requires fencing, implicit actions must occur after fencing.
     126             :      *
     127             :      * Implied stops and demotes of resources running on guest nodes are always
     128             :      * ordered after fencing, even if the resource does not require fencing,
     129             :      * because guest node "fencing" is actually just a resource stop.
     130             :      */
     131           0 :     if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
     132           0 :         || pcmk__is_guest_or_bundle_node(target)) {
     133             : 
     134           0 :         order_implicit = true;
     135             :     }
     136             : 
     137           0 :     if (action_list && order_implicit) {
     138           0 :         parent_stop = find_first_action(top->actions, NULL, PCMK_ACTION_STOP,
     139             :                                         NULL);
     140             :     }
     141             : 
     142           0 :     for (iter = action_list; iter != NULL; iter = iter->next) {
     143           0 :         pcmk_action_t *action = iter->data;
     144             : 
     145             :         // The stop would never complete, so convert it into a pseudo-action.
     146           0 :         pcmk__set_action_flags(action, pcmk_action_pseudo|pcmk_action_runnable);
     147             : 
     148           0 :         if (order_implicit) {
     149             :             /* Order the stonith before the parent stop (if any).
     150             :              *
     151             :              * Also order the stonith before the resource stop, unless the
     152             :              * resource is inside a bundle -- that would cause a graph loop.
     153             :              * We can rely on the parent stop's ordering instead.
     154             :              *
     155             :              * User constraints must not order a resource in a guest node
     156             :              * relative to the guest node container resource. The
     157             :              * pcmk__ar_guest_allowed flag marks constraints as generated by the
     158             :              * cluster and thus immune to that check (and is irrelevant if
     159             :              * target is not a guest).
     160             :              */
     161           0 :             if (!pcmk__is_bundled(rsc)) {
     162           0 :                 order_actions(stonith_op, action, pcmk__ar_guest_allowed);
     163             :             }
     164           0 :             order_actions(stonith_op, parent_stop, pcmk__ar_guest_allowed);
     165             :         }
     166             : 
     167           0 :         if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
     168           0 :             crm_notice("Stop of failed resource %s is implicit %s %s is fenced",
     169             :                        rsc->id, (order_implicit? "after" : "because"),
     170             :                        pcmk__node_name(target));
     171             :         } else {
     172           0 :             crm_info("%s is implicit %s %s is fenced",
     173             :                      action->uuid, (order_implicit? "after" : "because"),
     174             :                      pcmk__node_name(target));
     175             :         }
     176             : 
     177           0 :         if (pcmk_is_set(rsc->flags, pcmk_rsc_notify)) {
     178           0 :             pe__order_notifs_after_fencing(action, rsc, stonith_op);
     179             :         }
     180             : 
     181             : #if 0
     182             :         /* It might be a good idea to stop healthy resources on a node about to
     183             :          * be fenced, when possible.
     184             :          *
     185             :          * However, fencing must be done before a failed resource's
     186             :          * (pseudo-)stop action, so that could create a loop. For example, given
     187             :          * a group of A and B running on node N with a failed stop of B:
     188             :          *
     189             :          *    fence N -> stop B (pseudo-op) -> stop A -> fence N
     190             :          *
     191             :          * The block below creates the stop A -> fence N ordering and therefore
     192             :          * must (at least for now) be disabled. Instead, run the block above and
     193             :          * treat all resources on N as B would be (i.e., as a pseudo-op after
     194             :          * the fencing).
     195             :          *
     196             :          * @TODO Maybe break the "A requires B" dependency in
     197             :          * pcmk__update_action_for_orderings() and use this block for healthy
     198             :          * resources instead of the above.
     199             :          */
     200             :          crm_info("Moving healthy resource %s off %s before fencing",
     201             :                   rsc->id, pcmk__node_name(node));
     202             :          pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL,
     203             :                             strdup(PCMK_ACTION_STONITH), stonith_op,
     204             :                             pcmk__ar_ordered, rsc->cluster);
     205             : #endif
     206             :     }
     207             : 
     208           0 :     g_list_free(action_list);
     209             : 
     210             :     /* Get a list of demote actions potentially implied by the fencing */
     211           0 :     action_list = pe__resource_actions(rsc, target, PCMK_ACTION_DEMOTE, FALSE);
     212             : 
     213           0 :     for (iter = action_list; iter != NULL; iter = iter->next) {
     214           0 :         pcmk_action_t *action = iter->data;
     215             : 
     216           0 :         if (!(action->node->details->online) || action->node->details->unclean
     217           0 :             || pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
     218             : 
     219           0 :             if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
     220           0 :                 pcmk__rsc_info(rsc,
     221             :                                "Demote of failed resource %s is implicit "
     222             :                                "after %s is fenced",
     223             :                                rsc->id, pcmk__node_name(target));
     224             :             } else {
     225           0 :                 pcmk__rsc_info(rsc, "%s is implicit after %s is fenced",
     226             :                                action->uuid, pcmk__node_name(target));
     227             :             }
     228             : 
     229             :             /* The demote would never complete and is now implied by the
     230             :              * fencing, so convert it into a pseudo-action.
     231             :              */
     232           0 :             pcmk__set_action_flags(action,
     233             :                                    pcmk_action_pseudo|pcmk_action_runnable);
     234             : 
     235           0 :             if (pcmk__is_bundled(rsc)) {
     236             :                 // Recovery will be ordered as usual after parent's implied stop
     237             : 
     238           0 :             } else if (order_implicit) {
     239           0 :                 order_actions(stonith_op, action,
     240             :                               pcmk__ar_guest_allowed|pcmk__ar_ordered);
     241             :             }
     242             :         }
     243             :     }
     244             : 
     245           0 :     g_list_free(action_list);
     246             : }
     247             : 
     248             : /*!
     249             :  * \internal
     250             :  * \brief Order resource actions properly relative to fencing
     251             :  *
     252             :  * \param[in,out] rsc         Resource whose actions should be ordered
     253             :  * \param[in,out] stonith_op  Fencing operation to be ordered against
     254             :  */
     255             : static void
     256           0 : rsc_stonith_ordering(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
     257             : {
     258           0 :     if (rsc->children) {
     259           0 :         for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
     260           0 :             pcmk_resource_t *child_rsc = iter->data;
     261             : 
     262           0 :             rsc_stonith_ordering(child_rsc, stonith_op);
     263             :         }
     264             : 
     265           0 :     } else if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
     266           0 :         pcmk__rsc_trace(rsc,
     267             :                         "Skipping fencing constraints for unmanaged resource: "
     268             :                         "%s", rsc->id);
     269             : 
     270             :     } else {
     271           0 :         order_start_vs_fencing(rsc, stonith_op);
     272           0 :         order_stop_vs_fencing(rsc, stonith_op);
     273             :     }
     274           0 : }
     275             : 
     276             : /*!
     277             :  * \internal
     278             :  * \brief Order all actions appropriately relative to a fencing operation
     279             :  *
     280             :  * Ensure start operations of affected resources are ordered after fencing,
     281             :  * imply stop and demote operations of affected resources by marking them as
     282             :  * pseudo-actions, etc.
     283             :  *
     284             :  * \param[in,out] stonith_op  Fencing operation
     285             :  * \param[in,out] scheduler   Scheduler data
     286             :  */
     287             : void
     288           0 : pcmk__order_vs_fence(pcmk_action_t *stonith_op, pcmk_scheduler_t *scheduler)
     289             : {
     290           0 :     CRM_CHECK(stonith_op && scheduler, return);
     291           0 :     for (GList *r = scheduler->resources; r != NULL; r = r->next) {
     292           0 :         rsc_stonith_ordering((pcmk_resource_t *) r->data, stonith_op);
     293             :     }
     294             : }
     295             : 
     296             : /*!
     297             :  * \internal
     298             :  * \brief Order an action after unfencing
     299             :  *
     300             :  * \param[in]     rsc       Resource that action is for
     301             :  * \param[in,out] node      Node that action is on
     302             :  * \param[in,out] action    Action to be ordered after unfencing
     303             :  * \param[in]     order     Ordering flags
     304             :  */
     305             : void
     306           0 : pcmk__order_vs_unfence(const pcmk_resource_t *rsc, pcmk_node_t *node,
     307             :                        pcmk_action_t *action,
     308             :                        enum pcmk__action_relation_flags order)
     309             : {
     310             :     /* When unfencing is in use, we order unfence actions before any probe or
     311             :      * start of resources that require unfencing, and also of fence devices.
     312             :      *
     313             :      * This might seem to violate the principle that fence devices require
     314             :      * only quorum. However, fence agents that unfence often don't have enough
     315             :      * information to even probe or start unless the node is first unfenced.
     316             :      */
     317           0 :     if ((pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)
     318           0 :          && pcmk_is_set(rsc->cluster->flags, pcmk_sched_enable_unfencing))
     319           0 :         || pcmk_is_set(rsc->flags, pcmk_rsc_needs_unfencing)) {
     320             : 
     321             :         /* Start with an optional ordering. Requiring unfencing would result in
     322             :          * the node being unfenced, and all its resources being stopped,
     323             :          * whenever a new resource is added -- which would be highly suboptimal.
     324             :          */
     325           0 :         pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, TRUE, NULL,
     326           0 :                                            FALSE, node->details->data_set);
     327             : 
     328           0 :         order_actions(unfence, action, order);
     329             : 
     330           0 :         if (!pcmk__node_unfenced(node)) {
     331             :             // But unfencing is required if it has never been done
     332           0 :             char *reason = crm_strdup_printf("required by %s %s",
     333           0 :                                              rsc->id, action->task);
     334             : 
     335           0 :             trigger_unfencing(NULL, node, reason, NULL,
     336           0 :                               node->details->data_set);
     337           0 :             free(reason);
     338             :         }
     339             :     }
     340           0 : }
     341             : 
     342             : /*!
     343             :  * \internal
     344             :  * \brief Create pseudo-op for guest node fence, and order relative to it
     345             :  *
     346             :  * \param[in,out] node  Guest node to fence
     347             :  */
     348             : void
     349           0 : pcmk__fence_guest(pcmk_node_t *node)
     350             : {
     351           0 :     pcmk_resource_t *container = NULL;
     352           0 :     pcmk_action_t *stop = NULL;
     353           0 :     pcmk_action_t *stonith_op = NULL;
     354             : 
     355             :     /* The fence action is just a label; we don't do anything differently for
     356             :      * off vs. reboot. We specify it explicitly, rather than let it default to
     357             :      * cluster's default action, because we are not _initiating_ fencing -- we
     358             :      * are creating a pseudo-event to describe fencing that is already occurring
     359             :      * by other means (container recovery).
     360             :      */
     361           0 :     const char *fence_action = PCMK_ACTION_OFF;
     362             : 
     363           0 :     CRM_ASSERT(node != NULL);
     364             : 
     365             :     /* Check whether guest's container resource has any explicit stop or
     366             :      * start (the stop may be implied by fencing of the guest's host).
     367             :      */
     368           0 :     container = node->details->remote_rsc->container;
     369           0 :     if (container) {
     370           0 :         stop = find_first_action(container->actions, NULL, PCMK_ACTION_STOP,
     371             :                                  NULL);
     372             : 
     373           0 :         if (find_first_action(container->actions, NULL, PCMK_ACTION_START,
     374             :                               NULL)) {
     375           0 :             fence_action = PCMK_ACTION_REBOOT;
     376             :         }
     377             :     }
     378             : 
     379             :     /* Create a fence pseudo-event, so we have an event to order actions
     380             :      * against, and the controller can always detect it.
     381             :      */
     382           0 :     stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean",
     383           0 :                              FALSE, node->details->data_set);
     384           0 :     pcmk__set_action_flags(stonith_op, pcmk_action_pseudo|pcmk_action_runnable);
     385             : 
     386             :     /* We want to imply stops/demotes after the guest is stopped, not wait until
     387             :      * it is restarted, so we always order pseudo-fencing after stop, not start
     388             :      * (even though start might be closer to what is done for a real reboot).
     389             :      */
     390           0 :     if ((stop != NULL) && pcmk_is_set(stop->flags, pcmk_action_pseudo)) {
     391           0 :         pcmk_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE,
     392             :                                                      NULL, FALSE,
     393           0 :                                                      node->details->data_set);
     394             : 
     395           0 :         crm_info("Implying guest %s is down (action %d) after %s fencing",
     396             :                  pcmk__node_name(node), stonith_op->id,
     397             :                  pcmk__node_name(stop->node));
     398           0 :         order_actions(parent_stonith_op, stonith_op,
     399             :                       pcmk__ar_unrunnable_first_blocks
     400             :                       |pcmk__ar_first_implies_then);
     401             : 
     402           0 :     } else if (stop) {
     403           0 :         order_actions(stop, stonith_op,
     404             :                       pcmk__ar_unrunnable_first_blocks
     405             :                       |pcmk__ar_first_implies_then);
     406           0 :         crm_info("Implying guest %s is down (action %d) "
     407             :                  "after container %s is stopped (action %d)",
     408             :                  pcmk__node_name(node), stonith_op->id,
     409             :                  container->id, stop->id);
     410             :     } else {
     411             :         /* If we're fencing the guest node but there's no stop for the guest
     412             :          * resource, we must think the guest is already stopped. However, we may
     413             :          * think so because its resource history was just cleaned. To avoid
     414             :          * unnecessarily considering the guest node down if it's really up,
     415             :          * order the pseudo-fencing after any stop of the connection resource,
     416             :          * which will be ordered after any container (re-)probe.
     417             :          */
     418           0 :         stop = find_first_action(node->details->remote_rsc->actions, NULL,
     419             :                                  PCMK_ACTION_STOP, NULL);
     420             : 
     421           0 :         if (stop) {
     422           0 :             order_actions(stop, stonith_op, pcmk__ar_ordered);
     423           0 :             crm_info("Implying guest %s is down (action %d) "
     424             :                      "after connection is stopped (action %d)",
     425             :                      pcmk__node_name(node), stonith_op->id, stop->id);
     426             :         } else {
     427             :             /* Not sure why we're fencing, but everything must already be
     428             :              * cleanly stopped.
     429             :              */
     430           0 :             crm_info("Implying guest %s is down (action %d) ",
     431             :                      pcmk__node_name(node), stonith_op->id);
     432             :         }
     433             :     }
     434             : 
     435             :     // Order/imply other actions relative to pseudo-fence as with real fence
     436           0 :     pcmk__order_vs_fence(stonith_op, node->details->data_set);
     437           0 : }
     438             : 
     439             : /*!
     440             :  * \internal
     441             :  * \brief Check whether node has already been unfenced
     442             :  *
     443             :  * \param[in] node  Node to check
     444             :  *
     445             :  * \return true if node has a nonzero #node-unfenced attribute (or none),
     446             :  *         otherwise false
     447             :  */
     448             : bool
     449           0 : pcmk__node_unfenced(const pcmk_node_t *node)
     450             : {
     451           0 :     const char *unfenced = pcmk__node_attr(node, CRM_ATTR_UNFENCED, NULL,
     452             :                                            pcmk__rsc_node_current);
     453             : 
     454           0 :     return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches);
     455             : }
     456             : 
     457             : /*!
     458             :  * \internal
     459             :  * \brief Order a resource's start and stop relative to unfencing of a node
     460             :  *
     461             :  * \param[in,out] data       Node that could be unfenced
     462             :  * \param[in,out] user_data  Resource to order
     463             :  */
     464             : void
     465           0 : pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
     466             : {
     467           0 :     pcmk_node_t *node = (pcmk_node_t *) data;
     468           0 :     pcmk_resource_t *rsc = (pcmk_resource_t *) user_data;
     469             : 
     470           0 :     pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, true, NULL,
     471             :                                          false, rsc->cluster);
     472             : 
     473           0 :     crm_debug("Ordering any stops of %s before %s, and any starts after",
     474             :               rsc->id, unfence->uuid);
     475             : 
     476             :     /*
     477             :      * It would be more efficient to order clone resources once,
     478             :      * rather than order each instance, but ordering the instance
     479             :      * allows us to avoid unnecessary dependencies that might conflict
     480             :      * with user constraints.
     481             :      *
     482             :      * @TODO: This constraint can still produce a transition loop if the
     483             :      * resource has a stop scheduled on the node being unfenced, and
     484             :      * there is a user ordering constraint to start some other resource
     485             :      * (which will be ordered after the unfence) before stopping this
     486             :      * resource. An example is "start some slow-starting cloned service
     487             :      * before stopping an associated virtual IP that may be moving to
     488             :      * it":
     489             :      *       stop this -> unfencing -> start that -> stop this
     490             :      */
     491           0 :     pcmk__new_ordering(rsc, stop_key(rsc), NULL,
     492           0 :                        NULL, strdup(unfence->uuid), unfence,
     493             :                        pcmk__ar_ordered|pcmk__ar_if_on_same_node,
     494             :                        rsc->cluster);
     495             : 
     496           0 :     pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
     497           0 :                        rsc, start_key(rsc), NULL,
     498             :                        pcmk__ar_first_implies_same_node_then
     499             :                        |pcmk__ar_if_on_same_node,
     500             :                        rsc->cluster);
     501           0 : }

Generated by: LCOV version 1.14