LCOV - code coverage report
Current view: top level - pacemaker - pcmk_sched_probes.c (source / functions) Hit Total Coverage
Test: Pacemaker code coverage Lines: 0 298 0.0 %
Date: 2024-05-07 11:09:47 Functions: 0 15 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright 2004-2024 the Pacemaker project contributors
       3             :  *
       4             :  * The version control history for this file may have further details.
       5             :  *
       6             :  * This source code is licensed under the GNU General Public License version 2
       7             :  * or later (GPLv2+) WITHOUT ANY WARRANTY.
       8             :  */
       9             : 
      10             : #include <crm_internal.h>
      11             : 
      12             : #include <glib.h>
      13             : 
      14             : #include <crm/crm.h>
      15             : #include <crm/pengine/status.h>
      16             : #include <pacemaker-internal.h>
      17             : #include "libpacemaker_private.h"
      18             : 
      19             : /*!
      20             :  * \internal
      21             :  * \brief Add the expected result to a newly created probe
      22             :  *
      23             :  * \param[in,out] probe  Probe action to add expected result to
      24             :  * \param[in]     rsc    Resource that probe is for
      25             :  * \param[in]     node   Node that probe will run on
      26             :  */
      27             : static void
      28           0 : add_expected_result(pcmk_action_t *probe, const pcmk_resource_t *rsc,
      29             :                     const pcmk_node_t *node)
      30             : {
      31             :     // Check whether resource is currently active on node
      32           0 :     pcmk_node_t *running = pe_find_node_id(rsc->running_on, node->details->id);
      33             : 
      34             :     // The expected result is what we think the resource's current state is
      35           0 :     if (running == NULL) {
      36           0 :         pe__add_action_expected_result(probe, CRM_EX_NOT_RUNNING);
      37             : 
      38           0 :     } else if (rsc->role == pcmk_role_promoted) {
      39           0 :         pe__add_action_expected_result(probe, CRM_EX_PROMOTED);
      40             :     }
      41           0 : }
      42             : 
      43             : /*!
      44             :  * \internal
      45             :  * \brief Create any needed robes on a node for a list of resources
      46             :  *
      47             :  * \param[in,out] rscs  List of resources to create probes for
      48             :  * \param[in,out] node  Node to create probes on
      49             :  *
      50             :  * \return true if any probe was created, otherwise false
      51             :  */
      52             : bool
      53           0 : pcmk__probe_resource_list(GList *rscs, pcmk_node_t *node)
      54             : {
      55           0 :     bool any_created = false;
      56             : 
      57           0 :     for (GList *iter = rscs; iter != NULL; iter = iter->next) {
      58           0 :         pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
      59             : 
      60           0 :         if (rsc->cmds->create_probe(rsc, node)) {
      61           0 :             any_created = true;
      62             :         }
      63             :     }
      64           0 :     return any_created;
      65             : }
      66             : 
      67             : /*!
      68             :  * \internal
      69             :  * \brief Order one resource's start after another's start-up probe
      70             :  *
      71             :  * \param[in,out] rsc1  Resource that might get start-up probe
      72             :  * \param[in]     rsc2  Resource that might be started
      73             :  */
      74             : static void
      75           0 : probe_then_start(pcmk_resource_t *rsc1, pcmk_resource_t *rsc2)
      76             : {
      77           0 :     if ((rsc1->allocated_to != NULL)
      78           0 :         && (g_hash_table_lookup(rsc1->known_on,
      79           0 :                                 rsc1->allocated_to->details->id) == NULL)) {
      80             : 
      81           0 :         pcmk__new_ordering(rsc1,
      82           0 :                            pcmk__op_key(rsc1->id, PCMK_ACTION_MONITOR, 0),
      83             :                            NULL,
      84           0 :                            rsc2, pcmk__op_key(rsc2->id, PCMK_ACTION_START, 0),
      85             :                            NULL,
      86             :                            pcmk__ar_ordered, rsc1->cluster);
      87             :     }
      88           0 : }
      89             : 
      90             : /*!
      91             :  * \internal
      92             :  * \brief Check whether a guest resource will stop
      93             :  *
      94             :  * \param[in] node  Guest node to check
      95             :  *
      96             :  * \return true if guest resource will likely stop, otherwise false
      97             :  */
      98             : static bool
      99           0 : guest_resource_will_stop(const pcmk_node_t *node)
     100             : {
     101           0 :     const pcmk_resource_t *guest_rsc = node->details->remote_rsc->container;
     102             : 
     103             :     /* Ideally, we'd check whether the guest has a required stop, but that
     104             :      * information doesn't exist yet, so approximate it ...
     105             :      */
     106           0 :     return node->details->remote_requires_reset
     107           0 :            || node->details->unclean
     108           0 :            || pcmk_is_set(guest_rsc->flags, pcmk_rsc_failed)
     109           0 :            || (guest_rsc->next_role == pcmk_role_stopped)
     110             : 
     111             :            // Guest is moving
     112           0 :            || ((guest_rsc->role > pcmk_role_stopped)
     113           0 :                && (guest_rsc->allocated_to != NULL)
     114           0 :                && (pcmk__find_node_in_list(guest_rsc->running_on,
     115           0 :                    guest_rsc->allocated_to->details->uname) == NULL));
     116             : }
     117             : 
     118             : /*!
     119             :  * \internal
     120             :  * \brief Create a probe action for a resource on a node
     121             :  *
     122             :  * \param[in,out] rsc   Resource to create probe for
     123             :  * \param[in,out] node  Node to create probe on
     124             :  *
     125             :  * \return Newly created probe action
     126             :  */
     127             : static pcmk_action_t *
     128           0 : probe_action(pcmk_resource_t *rsc, pcmk_node_t *node)
     129             : {
     130           0 :     pcmk_action_t *probe = NULL;
     131           0 :     char *key = pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0);
     132             : 
     133           0 :     crm_debug("Scheduling probe of %s %s on %s",
     134             :               pcmk_role_text(rsc->role), rsc->id, pcmk__node_name(node));
     135             : 
     136           0 :     probe = custom_action(rsc, key, PCMK_ACTION_MONITOR, node, FALSE,
     137             :                           rsc->cluster);
     138           0 :     pcmk__clear_action_flags(probe, pcmk_action_optional);
     139             : 
     140           0 :     pcmk__order_vs_unfence(rsc, node, probe, pcmk__ar_ordered);
     141           0 :     add_expected_result(probe, rsc, node);
     142           0 :     return probe;
     143             : }
     144             : 
     145             : /*!
     146             :  * \internal
     147             :  * \brief Create probes for a resource on a node, if needed
     148             :  *
     149             :  * \brief Schedule any probes needed for a resource on a node
     150             :  *
     151             :  * \param[in,out] rsc   Resource to create probe for
     152             :  * \param[in,out] node  Node to create probe on
     153             :  *
     154             :  * \return true if any probe was created, otherwise false
     155             :  */
     156             : bool
     157           0 : pcmk__probe_rsc_on_node(pcmk_resource_t *rsc, pcmk_node_t *node)
     158             : {
     159           0 :     uint32_t flags = pcmk__ar_ordered;
     160           0 :     pcmk_action_t *probe = NULL;
     161           0 :     pcmk_node_t *allowed = NULL;
     162           0 :     pcmk_resource_t *top = uber_parent(rsc);
     163           0 :     const char *reason = NULL;
     164             : 
     165           0 :     CRM_ASSERT((rsc != NULL) && (node != NULL));
     166             : 
     167           0 :     if (!pcmk_is_set(rsc->cluster->flags, pcmk_sched_probe_resources)) {
     168           0 :         reason = "start-up probes are disabled";
     169           0 :         goto no_probe;
     170             :     }
     171             : 
     172           0 :     if (pcmk__is_pacemaker_remote_node(node)) {
     173           0 :         const char *class = crm_element_value(rsc->xml, PCMK_XA_CLASS);
     174             : 
     175           0 :         if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) {
     176           0 :             reason = "Pacemaker Remote nodes cannot run stonith agents";
     177           0 :             goto no_probe;
     178             : 
     179           0 :         } else if (pcmk__is_guest_or_bundle_node(node)
     180           0 :                    && pe__resource_contains_guest_node(rsc->cluster, rsc)) {
     181           0 :             reason = "guest nodes cannot run resources containing guest nodes";
     182           0 :             goto no_probe;
     183             : 
     184           0 :         } else if (rsc->is_remote_node) {
     185           0 :             reason = "Pacemaker Remote nodes cannot host remote connections";
     186           0 :             goto no_probe;
     187             :         }
     188             :     }
     189             : 
     190             :     // If this is a collective resource, probes are created for its children
     191           0 :     if (rsc->children != NULL) {
     192           0 :         return pcmk__probe_resource_list(rsc->children, node);
     193             :     }
     194             : 
     195           0 :     if ((rsc->container != NULL) && !rsc->is_remote_node) {
     196           0 :         reason = "resource is inside a container";
     197           0 :         goto no_probe;
     198             : 
     199           0 :     } else if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
     200           0 :         reason = "resource is orphaned";
     201           0 :         goto no_probe;
     202             : 
     203           0 :     } else if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
     204           0 :         reason = "resource state is already known";
     205           0 :         goto no_probe;
     206             :     }
     207             : 
     208           0 :     allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
     209             : 
     210           0 :     if (rsc->exclusive_discover || top->exclusive_discover) {
     211             :         // Exclusive discovery is enabled ...
     212             : 
     213           0 :         if (allowed == NULL) {
     214             :             // ... but this node is not allowed to run the resource
     215           0 :             reason = "resource has exclusive discovery but is not allowed "
     216             :                      "on node";
     217           0 :             goto no_probe;
     218             : 
     219           0 :         } else if (allowed->rsc_discover_mode != pcmk_probe_exclusive) {
     220             :             // ... but no constraint marks this node for discovery of resource
     221           0 :             reason = "resource has exclusive discovery but is not enabled "
     222             :                      "on node";
     223           0 :             goto no_probe;
     224             :         }
     225             :     }
     226             : 
     227           0 :     if (allowed == NULL) {
     228           0 :         allowed = node;
     229             :     }
     230           0 :     if (allowed->rsc_discover_mode == pcmk_probe_never) {
     231           0 :         reason = "node has discovery disabled";
     232           0 :         goto no_probe;
     233             :     }
     234             : 
     235           0 :     if (pcmk__is_guest_or_bundle_node(node)) {
     236           0 :         pcmk_resource_t *guest = node->details->remote_rsc->container;
     237             : 
     238           0 :         if (guest->role == pcmk_role_stopped) {
     239             :             // The guest is stopped, so we know no resource is active there
     240           0 :             reason = "node's guest is stopped";
     241           0 :             probe_then_start(guest, top);
     242           0 :             goto no_probe;
     243             : 
     244           0 :         } else if (guest_resource_will_stop(node)) {
     245           0 :             reason = "node's guest will stop";
     246             : 
     247             :             // Order resource start after guest stop (in case it's restarting)
     248           0 :             pcmk__new_ordering(guest,
     249           0 :                                pcmk__op_key(guest->id, PCMK_ACTION_STOP, 0),
     250             :                                NULL, top,
     251           0 :                                pcmk__op_key(top->id, PCMK_ACTION_START, 0),
     252             :                                NULL, pcmk__ar_ordered, rsc->cluster);
     253           0 :             goto no_probe;
     254             :         }
     255             :     }
     256             : 
     257             :     // We've eliminated all cases where a probe is not needed, so now it is
     258           0 :     probe = probe_action(rsc, node);
     259             : 
     260             :     /* Below, we will order the probe relative to start or reload. If this is a
     261             :      * clone instance, the start or reload is for the entire clone rather than
     262             :      * just the instance. Otherwise, the start or reload is for the resource
     263             :      * itself.
     264             :      */
     265           0 :     if (!pcmk__is_clone(top)) {
     266           0 :         top = rsc;
     267             :     }
     268             : 
     269             :     /* Prevent a start if the resource can't be probed, but don't cause the
     270             :      * resource or entire clone to stop if already active.
     271             :      */
     272           0 :     if (!pcmk_is_set(probe->flags, pcmk_action_runnable)
     273           0 :         && (top->running_on == NULL)) {
     274           0 :         pcmk__set_relation_flags(flags, pcmk__ar_unrunnable_first_blocks);
     275             :     }
     276             : 
     277             :     // Start or reload after probing the resource
     278           0 :     pcmk__new_ordering(rsc, NULL, probe,
     279           0 :                        top, pcmk__op_key(top->id, PCMK_ACTION_START, 0), NULL,
     280             :                        flags, rsc->cluster);
     281           0 :     pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
     282             :                        pcmk__ar_ordered, rsc->cluster);
     283             : 
     284           0 :     return true;
     285             : 
     286           0 : no_probe:
     287           0 :     pcmk__rsc_trace(rsc,
     288             :                     "Skipping probe for %s on %s because %s",
     289             :                     rsc->id, node->details->id, reason);
     290           0 :     return false;
     291             : }
     292             : 
     293             : /*!
     294             :  * \internal
     295             :  * \brief Check whether a probe should be ordered before another action
     296             :  *
     297             :  * \param[in] probe  Probe action to check
     298             :  * \param[in] then   Other action to check
     299             :  *
     300             :  * \return true if \p probe should be ordered before \p then, otherwise false
     301             :  */
     302             : static bool
     303           0 : probe_needed_before_action(const pcmk_action_t *probe,
     304             :                            const pcmk_action_t *then)
     305             : {
     306             :     // Probes on a node are performed after unfencing it, not before
     307           0 :     if (pcmk__str_eq(then->task, PCMK_ACTION_STONITH, pcmk__str_none)
     308           0 :         && pcmk__same_node(probe->node, then->node)) {
     309           0 :         const char *op = g_hash_table_lookup(then->meta,
     310             :                                              PCMK__META_STONITH_ACTION);
     311             : 
     312           0 :         if (pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) {
     313           0 :             return false;
     314             :         }
     315             :     }
     316             : 
     317             :     // Probes should be done on a node before shutting it down
     318           0 :     if (pcmk__str_eq(then->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)
     319           0 :         && (probe->node != NULL) && (then->node != NULL)
     320           0 :         && !pcmk__same_node(probe->node, then->node)) {
     321           0 :         return false;
     322             :     }
     323             : 
     324             :     // Otherwise probes should always be done before any other action
     325           0 :     return true;
     326             : }
     327             : 
     328             : /*!
     329             :  * \internal
     330             :  * \brief Add implicit "probe then X" orderings for "stop then X" orderings
     331             :  *
     332             :  * If the state of a resource is not known yet, a probe will be scheduled,
     333             :  * expecting a "not running" result. If the probe fails, a stop will not be
     334             :  * scheduled until the next transition. Thus, if there are ordering constraints
     335             :  * like "stop this resource then do something else that's not for the same
     336             :  * resource", add implicit "probe this resource then do something" equivalents
     337             :  * so the relation is upheld until we know whether a stop is needed.
     338             :  *
     339             :  * \param[in,out] scheduler  Scheduler data
     340             :  */
     341             : static void
     342           0 : add_probe_orderings_for_stops(pcmk_scheduler_t *scheduler)
     343             : {
     344           0 :     for (GList *iter = scheduler->ordering_constraints; iter != NULL;
     345           0 :          iter = iter->next) {
     346             : 
     347           0 :         pcmk__action_relation_t *order = iter->data;
     348           0 :         uint32_t order_flags = pcmk__ar_ordered;
     349           0 :         GList *probes = NULL;
     350           0 :         GList *then_actions = NULL;
     351           0 :         pcmk_action_t *first = NULL;
     352           0 :         pcmk_action_t *then = NULL;
     353             : 
     354             :         // Skip disabled orderings
     355           0 :         if (order->flags == pcmk__ar_none) {
     356           0 :             continue;
     357             :         }
     358             : 
     359             :         // Skip non-resource orderings, and orderings for the same resource
     360           0 :         if ((order->rsc1 == NULL) || (order->rsc1 == order->rsc2)) {
     361           0 :             continue;
     362             :         }
     363             : 
     364             :         // Skip invalid orderings (shouldn't be possible)
     365           0 :         first = order->action1;
     366           0 :         then = order->action2;
     367           0 :         if (((first == NULL) && (order->task1 == NULL))
     368           0 :             || ((then == NULL) && (order->task2 == NULL))) {
     369           0 :             continue;
     370             :         }
     371             : 
     372             :         // Skip orderings for first actions other than stop
     373           0 :         if ((first != NULL) && !pcmk__str_eq(first->task, PCMK_ACTION_STOP,
     374             :                                              pcmk__str_none)) {
     375           0 :             continue;
     376           0 :         } else if ((first == NULL)
     377           0 :                    && !pcmk__ends_with(order->task1,
     378             :                                        "_" PCMK_ACTION_STOP "_0")) {
     379           0 :             continue;
     380             :         }
     381             : 
     382             :         /* Do not imply a probe ordering for a resource inside of a stopping
     383             :          * container. Otherwise, it might introduce a transition loop, since a
     384             :          * probe could be scheduled after the container starts again.
     385             :          */
     386           0 :         if ((order->rsc2 != NULL) && (order->rsc1->container == order->rsc2)) {
     387             : 
     388           0 :             if ((then != NULL) && pcmk__str_eq(then->task, PCMK_ACTION_STOP,
     389             :                                                pcmk__str_none)) {
     390           0 :                 continue;
     391           0 :             } else if ((then == NULL)
     392           0 :                        && pcmk__ends_with(order->task2,
     393             :                                           "_" PCMK_ACTION_STOP "_0")) {
     394           0 :                 continue;
     395             :             }
     396             :         }
     397             : 
     398             :         // Preserve certain order options for future filtering
     399           0 :         if (pcmk_is_set(order->flags, pcmk__ar_if_first_unmigratable)) {
     400           0 :             pcmk__set_relation_flags(order_flags,
     401             :                                      pcmk__ar_if_first_unmigratable);
     402             :         }
     403           0 :         if (pcmk_is_set(order->flags, pcmk__ar_if_on_same_node)) {
     404           0 :             pcmk__set_relation_flags(order_flags, pcmk__ar_if_on_same_node);
     405             :         }
     406             : 
     407             :         // Preserve certain order types for future filtering
     408           0 :         if ((order->flags == pcmk__ar_if_required_on_same_node)
     409           0 :             || (order->flags == pcmk__ar_if_on_same_node_or_target)) {
     410           0 :             order_flags = order->flags;
     411             :         }
     412             : 
     413             :         // List all scheduled probes for the first resource
     414           0 :         probes = pe__resource_actions(order->rsc1, NULL, PCMK_ACTION_MONITOR,
     415             :                                       FALSE);
     416           0 :         if (probes == NULL) { // There aren't any
     417           0 :             continue;
     418             :         }
     419             : 
     420             :         // List all relevant "then" actions
     421           0 :         if (then != NULL) {
     422           0 :             then_actions = g_list_prepend(NULL, then);
     423             : 
     424           0 :         } else if (order->rsc2 != NULL) {
     425           0 :             then_actions = find_actions(order->rsc2->actions, order->task2,
     426             :                                         NULL);
     427           0 :             if (then_actions == NULL) { // There aren't any
     428           0 :                 g_list_free(probes);
     429           0 :                 continue;
     430             :             }
     431             :         }
     432             : 
     433           0 :         crm_trace("Implying 'probe then' orderings for '%s then %s' "
     434             :                   "(id=%d, type=%.6x)",
     435             :                   ((first == NULL)? order->task1 : first->uuid),
     436             :                   ((then == NULL)? order->task2 : then->uuid),
     437             :                   order->id, order->flags);
     438             : 
     439           0 :         for (GList *probe_iter = probes; probe_iter != NULL;
     440           0 :              probe_iter = probe_iter->next) {
     441             : 
     442           0 :             pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
     443             : 
     444           0 :             for (GList *then_iter = then_actions; then_iter != NULL;
     445           0 :                  then_iter = then_iter->next) {
     446             : 
     447           0 :                 pcmk_action_t *then = (pcmk_action_t *) then_iter->data;
     448             : 
     449           0 :                 if (probe_needed_before_action(probe, then)) {
     450           0 :                     order_actions(probe, then, order_flags);
     451             :                 }
     452             :             }
     453             :         }
     454             : 
     455           0 :         g_list_free(then_actions);
     456           0 :         g_list_free(probes);
     457             :     }
     458           0 : }
     459             : 
     460             : /*!
     461             :  * \internal
     462             :  * \brief Add necessary orderings between probe and starts of clone instances
     463             :  *
     464             :  * , in additon to the ordering with the parent resource added upon creating
     465             :  * the probe.
     466             :  *
     467             :  * \param[in,out] probe     Probe as 'first' action in an ordering
     468             :  * \param[in,out] after     'then' action wrapper in the ordering
     469             :  */
     470             : static void
     471           0 : add_start_orderings_for_probe(pcmk_action_t *probe,
     472             :                               pcmk__related_action_t *after)
     473             : {
     474           0 :     uint32_t flags = pcmk__ar_ordered|pcmk__ar_unrunnable_first_blocks;
     475             : 
     476             :     /* Although the ordering between the probe of the clone instance and the
     477             :      * start of its parent has been added in pcmk__probe_rsc_on_node(), we
     478             :      * avoided enforcing `pcmk__ar_unrunnable_first_blocks` order type for that
     479             :      * as long as any of the clone instances are running to prevent them from
     480             :      * being unexpectedly stopped.
     481             :      *
     482             :      * On the other hand, we still need to prevent any inactive instances from
     483             :      * starting unless the probe is runnable so that we don't risk starting too
     484             :      * many instances before we know the state on all nodes.
     485             :      */
     486           0 :     if ((after->action->rsc->variant <= pcmk_rsc_variant_group)
     487           0 :         || pcmk_is_set(probe->flags, pcmk_action_runnable)
     488             :         // The order type is already enforced for its parent.
     489           0 :         || pcmk_is_set(after->type, pcmk__ar_unrunnable_first_blocks)
     490           0 :         || (pe__const_top_resource(probe->rsc, false) != after->action->rsc)
     491           0 :         || !pcmk__str_eq(after->action->task, PCMK_ACTION_START,
     492             :                          pcmk__str_none)) {
     493           0 :         return;
     494             :     }
     495             : 
     496           0 :     crm_trace("Adding probe start orderings for 'unrunnable %s@%s "
     497             :               "then instances of %s@%s'",
     498             :               probe->uuid, pcmk__node_name(probe->node),
     499             :               after->action->uuid, pcmk__node_name(after->action->node));
     500             : 
     501           0 :     for (GList *then_iter = after->action->actions_after; then_iter != NULL;
     502           0 :          then_iter = then_iter->next) {
     503             : 
     504           0 :         pcmk__related_action_t *then = then_iter->data;
     505             : 
     506           0 :         if (then->action->rsc->running_on
     507           0 :             || (pe__const_top_resource(then->action->rsc, false)
     508           0 :                 != after->action->rsc)
     509           0 :             || !pcmk__str_eq(then->action->task, PCMK_ACTION_START,
     510             :                              pcmk__str_none)) {
     511           0 :             continue;
     512             :         }
     513             : 
     514           0 :         crm_trace("Adding probe start ordering for 'unrunnable %s@%s "
     515             :                   "then %s@%s' (type=%#.6x)",
     516             :                   probe->uuid, pcmk__node_name(probe->node),
     517             :                   then->action->uuid, pcmk__node_name(then->action->node),
     518             :                   flags);
     519             : 
     520             :         /* Prevent the instance from starting if the instance can't, but don't
     521             :          * cause any other intances to stop if already active.
     522             :          */
     523           0 :         order_actions(probe, then->action, flags);
     524             :     }
     525             : 
     526           0 :     return;
     527             : }
     528             : 
     529             : /*!
     530             :  * \internal
     531             :  * \brief Order probes before restarts and re-promotes
     532             :  *
     533             :  * If a given ordering is a "probe then start" or "probe then promote" ordering,
     534             :  * add an implicit "probe then stop/demote" ordering in case the action is part
     535             :  * of a restart/re-promote, and do the same recursively for all actions ordered
     536             :  * after the "then" action.
     537             :  *
     538             :  * \param[in,out] probe     Probe as 'first' action in an ordering
     539             :  * \param[in,out] after     'then' action in the ordering
     540             :  */
     541             : static void
     542           0 : add_restart_orderings_for_probe(pcmk_action_t *probe, pcmk_action_t *after)
     543             : {
     544           0 :     GList *iter = NULL;
     545           0 :     bool interleave = false;
     546           0 :     pcmk_resource_t *compatible_rsc = NULL;
     547             : 
     548             :     // Validate that this is a resource probe followed by some action
     549           0 :     if ((after == NULL) || (probe == NULL) || (probe->rsc == NULL)
     550           0 :         || (probe->rsc->variant != pcmk_rsc_variant_primitive)
     551           0 :         || !pcmk__str_eq(probe->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
     552           0 :         return;
     553             :     }
     554             : 
     555             :     // Avoid running into any possible loop
     556           0 :     if (pcmk_is_set(after->flags, pcmk_action_detect_loop)) {
     557           0 :         return;
     558             :     }
     559           0 :     pcmk__set_action_flags(after, pcmk_action_detect_loop);
     560             : 
     561           0 :     crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
     562             :               probe->uuid, pcmk__node_name(probe->node),
     563             :               after->uuid, pcmk__node_name(after->node));
     564             : 
     565             :     /* Add restart orderings if "then" is for a different primitive.
     566             :      * Orderings for collective resources will be added later.
     567             :      */
     568           0 :     if ((after->rsc != NULL)
     569           0 :         && (after->rsc->variant == pcmk_rsc_variant_primitive)
     570           0 :         && (probe->rsc != after->rsc)) {
     571             : 
     572           0 :             GList *then_actions = NULL;
     573             : 
     574           0 :             if (pcmk__str_eq(after->task, PCMK_ACTION_START, pcmk__str_none)) {
     575           0 :                 then_actions = pe__resource_actions(after->rsc, NULL,
     576             :                                                     PCMK_ACTION_STOP, FALSE);
     577             : 
     578           0 :             } else if (pcmk__str_eq(after->task, PCMK_ACTION_PROMOTE,
     579             :                                     pcmk__str_none)) {
     580           0 :                 then_actions = pe__resource_actions(after->rsc, NULL,
     581             :                                                     PCMK_ACTION_DEMOTE, FALSE);
     582             :             }
     583             : 
     584           0 :             for (iter = then_actions; iter != NULL; iter = iter->next) {
     585           0 :                 pcmk_action_t *then = (pcmk_action_t *) iter->data;
     586             : 
     587             :                 // Skip pseudo-actions (for example, those implied by fencing)
     588           0 :                 if (!pcmk_is_set(then->flags, pcmk_action_pseudo)) {
     589           0 :                     order_actions(probe, then, pcmk__ar_ordered);
     590             :                 }
     591             :             }
     592           0 :             g_list_free(then_actions);
     593             :     }
     594             : 
     595             :     /* Detect whether "then" is an interleaved clone action. For these, we want
     596             :      * to add orderings only for the relevant instance.
     597             :      */
     598           0 :     if ((after->rsc != NULL)
     599           0 :         && (after->rsc->variant > pcmk_rsc_variant_group)) {
     600           0 :         const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
     601             :                                                        PCMK_META_INTERLEAVE);
     602             : 
     603           0 :         interleave = crm_is_true(interleave_s);
     604           0 :         if (interleave) {
     605           0 :             compatible_rsc = pcmk__find_compatible_instance(probe->rsc,
     606           0 :                                                             after->rsc,
     607             :                                                             pcmk_role_unknown,
     608             :                                                             false);
     609             :         }
     610             :     }
     611             : 
     612             :     /* Now recursively do the same for all actions ordered after "then". This
     613             :      * also handles collective resources since the collective action will be
     614             :      * ordered before its individual instances' actions.
     615             :      */
     616           0 :     for (iter = after->actions_after; iter != NULL; iter = iter->next) {
     617           0 :         pcmk__related_action_t *after_wrapper = iter->data;
     618             : 
     619             :         /* pcmk__ar_first_implies_then is the reason why a required A.start
     620             :          * implies/enforces B.start to be required too, which is the cause of
     621             :          * B.restart/re-promote.
     622             :          *
     623             :          * Not sure about pcmk__ar_first_implies_same_node_then though. It's now
     624             :          * only used for unfencing case, which tends to introduce transition
     625             :          * loops...
     626             :          */
     627           0 :         if (!pcmk_is_set(after_wrapper->type, pcmk__ar_first_implies_then)) {
     628             :             /* The order type between a group/clone and its child such as
     629             :              * B.start-> B_child.start is:
     630             :              * pcmk__ar_then_implies_first_graphed
     631             :              * |pcmk__ar_unrunnable_first_blocks
     632             :              *
     633             :              * Proceed through the ordering chain and build dependencies with
     634             :              * its children.
     635             :              */
     636           0 :             if ((after->rsc == NULL)
     637           0 :                 || (after->rsc->variant < pcmk_rsc_variant_group)
     638           0 :                 || (probe->rsc->parent == after->rsc)
     639           0 :                 || (after_wrapper->action->rsc == NULL)
     640           0 :                 || (after_wrapper->action->rsc->variant > pcmk_rsc_variant_group)
     641           0 :                 || (after->rsc != after_wrapper->action->rsc->parent)) {
     642           0 :                 continue;
     643             :             }
     644             : 
     645             :             /* Proceed to the children of a group or a non-interleaved clone.
     646             :              * For an interleaved clone, proceed only to the relevant child.
     647             :              */
     648           0 :             if ((after->rsc->variant > pcmk_rsc_variant_group) && interleave
     649           0 :                 && ((compatible_rsc == NULL)
     650           0 :                     || (compatible_rsc != after_wrapper->action->rsc))) {
     651           0 :                 continue;
     652             :             }
     653             :         }
     654             : 
     655           0 :         crm_trace("Recursively adding probe restart orderings for "
     656             :                   "'%s@%s then %s@%s' (type=%#.6x)",
     657             :                   after->uuid, pcmk__node_name(after->node),
     658             :                   after_wrapper->action->uuid,
     659             :                   pcmk__node_name(after_wrapper->action->node),
     660             :                   after_wrapper->type);
     661             : 
     662           0 :         add_restart_orderings_for_probe(probe, after_wrapper->action);
     663             :     }
     664             : }
     665             : 
     666             : /*!
     667             :  * \internal
     668             :  * \brief Clear the tracking flag on all scheduled actions
     669             :  *
     670             :  * \param[in,out] scheduler  Scheduler data
     671             :  */
     672             : static void
     673           0 : clear_actions_tracking_flag(pcmk_scheduler_t *scheduler)
     674             : {
     675           0 :     for (GList *iter = scheduler->actions; iter != NULL; iter = iter->next) {
     676           0 :         pcmk_action_t *action = iter->data;
     677             : 
     678           0 :         pcmk__clear_action_flags(action, pcmk_action_detect_loop);
     679             :     }
     680           0 : }
     681             : 
     682             : /*!
     683             :  * \internal
     684             :  * \brief Add start and restart orderings for probes scheduled for a resource
     685             :  *
     686             :  * \param[in,out] data       Resource whose probes should be ordered
     687             :  * \param[in]     user_data  Unused
     688             :  */
     689             : static void
     690           0 : add_start_restart_orderings_for_rsc(gpointer data, gpointer user_data)
     691             : {
     692           0 :     pcmk_resource_t *rsc = data;
     693           0 :     GList *probes = NULL;
     694             : 
     695             :     // For collective resources, order each instance recursively
     696           0 :     if (rsc->variant != pcmk_rsc_variant_primitive) {
     697           0 :         g_list_foreach(rsc->children, add_start_restart_orderings_for_rsc,
     698             :                        NULL);
     699           0 :         return;
     700             :     }
     701             : 
     702             :     // Find all probes for given resource
     703           0 :     probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
     704             : 
     705             :     // Add probe restart orderings for each probe found
     706           0 :     for (GList *iter = probes; iter != NULL; iter = iter->next) {
     707           0 :         pcmk_action_t *probe = (pcmk_action_t *) iter->data;
     708             : 
     709           0 :         for (GList *then_iter = probe->actions_after; then_iter != NULL;
     710           0 :              then_iter = then_iter->next) {
     711             : 
     712           0 :             pcmk__related_action_t *then = then_iter->data;
     713             : 
     714           0 :             add_start_orderings_for_probe(probe, then);
     715           0 :             add_restart_orderings_for_probe(probe, then->action);
     716           0 :             clear_actions_tracking_flag(rsc->cluster);
     717             :         }
     718             :     }
     719             : 
     720           0 :     g_list_free(probes);
     721             : }
     722             : 
     723             : /*!
     724             :  * \internal
     725             :  * \brief Add "A then probe B" orderings for "A then B" orderings
     726             :  *
     727             :  * \param[in,out] scheduler  Scheduler data
     728             :  *
     729             :  * \note This function is currently disabled (see next comment).
     730             :  */
     731             : static void
     732           0 : order_then_probes(pcmk_scheduler_t *scheduler)
     733             : {
     734             : #if 0
     735             :     /* Given an ordering "A then B", we would prefer to wait for A to be started
     736             :      * before probing B.
     737             :      *
     738             :      * For example, if A is a filesystem which B can't even run without, it
     739             :      * would be helpful if the author of B's agent could assume that A is
     740             :      * running before B.monitor will be called.
     741             :      *
     742             :      * However, we can't _only_ probe after A is running, otherwise we wouldn't
     743             :      * detect the state of B if A could not be started. We can't even do an
     744             :      * opportunistic version of this, because B may be moving:
     745             :      *
     746             :      *   A.stop -> A.start -> B.probe -> B.stop -> B.start
     747             :      *
     748             :      * and if we add B.stop -> A.stop here, we get a loop:
     749             :      *
     750             :      *   A.stop -> A.start -> B.probe -> B.stop -> A.stop
     751             :      *
     752             :      * We could kill the "B.probe -> B.stop" dependency, but that could mean
     753             :      * stopping B "too" soon, because B.start must wait for the probe, and
     754             :      * we don't want to stop B if we can't start it.
     755             :      *
     756             :      * We could add the ordering only if A is an anonymous clone with
     757             :      * clone-max == node-max (since we'll never be moving it). However, we could
     758             :      * still be stopping one instance at the same time as starting another.
     759             :      *
     760             :      * The complexity of checking for allowed conditions combined with the ever
     761             :      * narrowing use case suggests that this code should remain disabled until
     762             :      * someone gets smarter.
     763             :      */
     764             :     for (GList *iter = scheduler->resources; iter != NULL; iter = iter->next) {
     765             :         pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
     766             : 
     767             :         pcmk_action_t *start = NULL;
     768             :         GList *actions = NULL;
     769             :         GList *probes = NULL;
     770             : 
     771             :         actions = pe__resource_actions(rsc, NULL, PCMK_ACTION_START, FALSE);
     772             : 
     773             :         if (actions) {
     774             :             start = actions->data;
     775             :             g_list_free(actions);
     776             :         }
     777             : 
     778             :         if (start == NULL) {
     779             :             crm_debug("No start action for %s", rsc->id);
     780             :             continue;
     781             :         }
     782             : 
     783             :         probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
     784             : 
     785             :         for (actions = start->actions_before; actions != NULL;
     786             :              actions = actions->next) {
     787             : 
     788             :             pcmk__related_action_t *before = actions->data;
     789             : 
     790             :             pcmk_action_t *first = before->action;
     791             :             pcmk_resource_t *first_rsc = first->rsc;
     792             : 
     793             :             if (first->required_runnable_before) {
     794             :                 for (GList *clone_actions = first->actions_before;
     795             :                      clone_actions != NULL;
     796             :                      clone_actions = clone_actions->next) {
     797             : 
     798             :                     before = clone_actions->data;
     799             : 
     800             :                     crm_trace("Testing '%s then %s' for %s",
     801             :                               first->uuid, before->action->uuid, start->uuid);
     802             : 
     803             :                     CRM_ASSERT(before->action->rsc != NULL);
     804             :                     first_rsc = before->action->rsc;
     805             :                     break;
     806             :                 }
     807             : 
     808             :             } else if (!pcmk__str_eq(first->task, PCMK_ACTION_START,
     809             :                                      pcmk__str_none)) {
     810             :                 crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
     811             :             }
     812             : 
     813             :             if (first_rsc == NULL) {
     814             :                 continue;
     815             : 
     816             :             } else if (pe__const_top_resource(first_rsc, false)
     817             :                        == pe__const_top_resource(start->rsc, false)) {
     818             :                 crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
     819             :                 continue;
     820             : 
     821             :             } else if (!pcmk__is_clone(pe__const_top_resource(first_rsc,
     822             :                                                               false))) {
     823             :                 crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
     824             :                 continue;
     825             :             }
     826             : 
     827             :             crm_debug("Applying %s before %s %d", first->uuid, start->uuid,
     828             :                       pe__const_top_resource(first_rsc, false)->variant);
     829             : 
     830             :             for (GList *probe_iter = probes; probe_iter != NULL;
     831             :                  probe_iter = probe_iter->next) {
     832             : 
     833             :                 pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
     834             : 
     835             :                 crm_debug("Ordering %s before %s", first->uuid, probe->uuid);
     836             :                 order_actions(first, probe, pcmk__ar_ordered);
     837             :             }
     838             :         }
     839             :     }
     840             : #endif
     841           0 : }
     842             : 
     843             : void
     844           0 : pcmk__order_probes(pcmk_scheduler_t *scheduler)
     845             : {
     846             :     // Add orderings for "probe then X"
     847           0 :     g_list_foreach(scheduler->resources, add_start_restart_orderings_for_rsc,
     848             :                    NULL);
     849           0 :     add_probe_orderings_for_stops(scheduler);
     850             : 
     851           0 :     order_then_probes(scheduler);
     852           0 : }
     853             : 
     854             : /*!
     855             :  * \internal
     856             :  * \brief Schedule any probes needed
     857             :  *
     858             :  * \param[in,out] scheduler  Scheduler data
     859             :  *
     860             :  * \note This may also schedule fencing of failed remote nodes.
     861             :  */
     862             : void
     863           0 : pcmk__schedule_probes(pcmk_scheduler_t *scheduler)
     864             : {
     865             :     // Schedule probes on each node in the cluster as needed
     866           0 :     for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
     867           0 :         pcmk_node_t *node = (pcmk_node_t *) iter->data;
     868           0 :         const char *probed = NULL;
     869             : 
     870           0 :         if (!node->details->online) { // Don't probe offline nodes
     871           0 :             if (pcmk__is_failed_remote_node(node)) {
     872           0 :                 pe_fence_node(scheduler, node,
     873             :                               "the connection is unrecoverable", FALSE);
     874             :             }
     875           0 :             continue;
     876             : 
     877           0 :         } else if (node->details->unclean) { // ... or nodes that need fencing
     878           0 :             continue;
     879             : 
     880           0 :         } else if (!node->details->rsc_discovery_enabled) {
     881             :             // The user requested that probes not be done on this node
     882           0 :             continue;
     883             :         }
     884             : 
     885             :         /* This is no longer needed for live clusters, since the probe_complete
     886             :          * node attribute will never be in the CIB. However this is still useful
     887             :          * for processing old saved CIBs (< 1.1.14), including the
     888             :          * reprobe-target_rc regression test.
     889             :          */
     890           0 :         probed = pcmk__node_attr(node, CRM_OP_PROBED, NULL,
     891             :                                  pcmk__rsc_node_current);
     892           0 :         if (probed != NULL && crm_is_true(probed) == FALSE) {
     893           0 :             pcmk_action_t *probe_op = NULL;
     894             : 
     895           0 :             probe_op = custom_action(NULL,
     896             :                                      crm_strdup_printf("%s-%s", CRM_OP_REPROBE,
     897           0 :                                                        node->details->uname),
     898             :                                      CRM_OP_REPROBE, node, FALSE, scheduler);
     899           0 :             pcmk__insert_meta(probe_op, PCMK__META_OP_NO_WAIT, PCMK_VALUE_TRUE);
     900           0 :             continue;
     901             :         }
     902             : 
     903             :         // Probe each resource in the cluster on this node, as needed
     904           0 :         pcmk__probe_resource_list(scheduler->resources, node);
     905             :     }
     906           0 : }

Generated by: LCOV version 1.14