Merge pull request #849 from juju/application-status-from-api

jujubot · web-flow · commit a531e4bdbc08 · 2023-05-12T13:15:54.000+02:00
#849 #### Description Determining the status of an application is a convoluted and logically a bit tricky process in Juju. Some applications are reported as in the `error` state not because of its units' `workload_status` or the `agent_status`, but for instance in caas models because of the `operator_status`, and through the API this can only be observed in the `DetailedStatus` of an `Application` coming from the `FullStatus` (and not through our conventional way of getting a delta about it via the AllWatcher). Therefore pylibjuju is unable to see that the application is in the error state, while in the `juju status` we clearly see that it is, in fact, in the error state. [This LP bug](https://bugs.launchpad.net/juju/+bug/1981833) is an example of this scenario (see the QA steps below to reproduce it). This change remedies this by introducing `application.get_status()` that calls the `FullStatus` to get the most up to date status info from the API and include that in our `derive_status` process. #### QA Steps This requires a caas model, as it's not observed in machine models. (this is because the `admission-webhook` charm transitions into the error state for different reasons in machine models than on the caas models). Additionally, for the same reason, it's not possible to make an integration test as our tests are set up to run on the lxd. Though maybe devising an elaborate setup to simulate that behavior through the API might be possible. Bootstrap a k8s model on microk8s. ``` $ juju bootstrap microk8s removeme ``` Deploy the `admission-webhook` charm. ``` $ juju deploy admission-webhook --channel 1.4/stable ``` It will go into the `error` state. Confirm this with `juju status`. It may take a couple seconds to get there. Now run `wait_for_idle` as follows. I modified one of the examples in the `examples` folder. ```python await model.wait_for_idle( apps=["admission-webhook"], status="active", raise_on_blocked=True, raise_on_error=True, timeout=1500, ) ``` As also said in the reported [LP bug](https://bugs.launchpad.net/juju/+bug/1981833), without this PR this `wait_for_idle` call returns, because the application status is set as `active` (because all the unit workload_statuses are active and the agent_status is idle). With this change, it will correctly raise the `JujuAppError` because the application's status is correctly set to `error`, coming through the API. #### Notes & Discussion This should probably be cherry-picked onto the main branch later on.
diff --git a/juju/application.py b/juju/application.py
@@ -91,10 +91,15 @@ def status(self):
         """
         status = self.safe_data['status']['current']
         if status == "unset":
-            unit_status = []
+            known_statuses = []
             for unit in self.units:
-                unit_status.append(unit.workload_status)
-            return derive_status(unit_status)
+                known_statuses.append(unit.workload_status)
+            # If the self.get_status() is called (i.e. the status
+            # is received by FullStatus from the API) then add
+            # that into this computation as it might be more up
+            # to date (and more severe).
+            known_statuses.append(self._status)
+            return derive_status(known_statuses)
         return status
 
     @property
@@ -445,6 +450,23 @@ async def get_actions(self, schema=False):
             actions = {k: v.description for k, v in actions.items()}
         return actions
 
+    async def get_status(self):
+        """Get the application status using info from the FullStatus
+        as well, because it might be more up to date than our model
+
+        :return: str status
+        """
+
+        client_facade = client.ClientFacade.from_connection(self.connection)
+
+        full_status = await client_facade.FullStatus(patterns=None)
+        _app = full_status.applications.get(self.name, None)
+        if not _app:
+            raise JujuError(f"application is not in FullStatus : {self.name}")
+
+        self._status = derive_status([self.status, _app.status.status])
+        return self._status
+
     def attach_resource(self, resource_name, file_name, file_obj):
         """Updates the resource for an application by uploading file from
         local disk to the Juju controller.
diff --git a/juju/model.py b/juju/model.py
@@ -264,6 +264,7 @@ def __init__(self, entity_id, model, history_index=-1, connected=True):
         self._history_index = history_index
         self.connected = connected
         self.connection = model.connection()
+        self._status = 'unknown'
 
     def __repr__(self):
         return '<{} entity_id="{}">'.format(type(self).__name__,
@@ -2516,9 +2517,10 @@ def _raise_for_status(entities, status):
                     busy.append(app_name + " (missing)")
                     continue
                 app = self.applications[app_name]
-                if raise_on_error and app.status == "error":
+                app_status = await app.get_status()
+                if raise_on_error and app_status == "error":
                     errors.setdefault("App", []).append(app.name)
-                if raise_on_blocked and app.status == "blocked":
+                if raise_on_blocked and app_status == "blocked":
                     blocks.setdefault("App", []).append(app.name)
                 # Check if wait_for_exact_units flag is used
                 if wait_for_exact_units > 0:
diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py
@@ -13,6 +13,8 @@
 from juju import jasyncio
 from juju.errors import JujuConnectionError
 
+from .. import base
+
 
 def _make_delta(entity, type_, data=None):
     from juju.client.client import Delta
@@ -297,7 +299,7 @@ async def test_timeout(self):
     async def test_wait_for_active_status(self):
         # create a custom apps mock
         from types import SimpleNamespace
-        apps = {"dummy_app": SimpleNamespace(
+        app = SimpleNamespace(
             status="active",
             units=[SimpleNamespace(
                 name="mockunit/0",
@@ -306,7 +308,14 @@ async def test_wait_for_active_status(self):
                 machine=None,
                 agent_status="idle",
             )],
-        )}
+        )
+        # This is a small hack to act like we're getting 'unknown'
+        # from the api (the get_status() call), which shouldn't
+        # change the semantics of this test, as the 'unknown'
+        # has the lowest severity (so the app's 'active' status
+        # will overrule it)
+        app.get_status = base.AsyncMock(return_value='unknown')
+        apps = {"dummy_app": app}
 
         with patch.object(Model, 'applications', new_callable=PropertyMock) as mock_apps:
             mock_apps.return_value = apps