From 847c181ec9b73e51daf39efc5c597eff2e7cdb31 Mon Sep 17 00:00:00 2001 From: Jesse Hallam Date: Wed, 23 May 2018 14:26:35 -0400 Subject: MM-8622: Improved plugin error reporting (#8737) * allow `Wait()`ing on the supervisor In the event the plugin supervisor shuts down a plugin for crashing too many times, the new `Wait()` interface allows the `ActivatePlugin` to accept a callback function to trigger when `supervisor.Wait()` returns. If the supervisor shuts down normally, this callback is invoked with a nil error, otherwise any error reported by the supervisor is passed along. * improve plugin activation/deactivation logic Avoid triggering activation of previously failed-to-start plugins just becase something in the configuration changed. Now, intelligently compare the global enable bit as well as the each individual plugin's enabled bit. * expose store to manipulate PluginStatuses * expose API to fetch plugin statuses * keep track of whether or not plugin sandboxing is supported * transition plugin statuses * restore error on plugin activation if already active * don't initialize test plugins until successfully loaded * emit websocket events when plugin statuses change * skip pruning if already initialized * MM-8622: maintain plugin statuses in memory Switch away from persisting plugin statuses to the database, and maintain in memory instead. This will be followed by a cluster interface to query the in-memory status of plugin statuses from all cluster nodes. At the same time, rename `cluster_discovery_id` on the `PluginStatus` model object to `cluster_id`. * MM-8622: aggregate plugin statuses across cluster * fetch cluster plugin statuses when emitting websocket notification * address unit test fixes after rebasing * relax (poor) racey unit test re: supervisor.Wait() * make store-mocks --- plugin/rpcplugin/rpcplugintest/supervisor.go | 39 ++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'plugin/rpcplugin/rpcplugintest') diff --git a/plugin/rpcplugin/rpcplugintest/supervisor.go b/plugin/rpcplugin/rpcplugintest/supervisor.go index 2ae065621..d225f96fc 100644 --- a/plugin/rpcplugin/rpcplugintest/supervisor.go +++ b/plugin/rpcplugin/rpcplugintest/supervisor.go @@ -174,6 +174,14 @@ func testSupervisor_PluginCrash(t *testing.T, sp SupervisorProviderFunc) { bundle := model.BundleInfoForPath(dir) supervisor, err := sp(bundle) require.NoError(t, err) + + var supervisorWaitErr error + supervisorWaitDone := make(chan bool, 1) + go func() { + supervisorWaitErr = supervisor.Wait() + close(supervisorWaitDone) + }() + require.NoError(t, supervisor.Start(&api)) failed := false @@ -189,7 +197,21 @@ func testSupervisor_PluginCrash(t *testing.T, sp SupervisorProviderFunc) { time.Sleep(time.Millisecond * 100) } assert.True(t, recovered) + + select { + case <-supervisorWaitDone: + require.Fail(t, "supervisor.Wait() unexpectedly returned") + case <-time.After(500 * time.Millisecond): + } + require.NoError(t, supervisor.Stop()) + + select { + case <-supervisorWaitDone: + require.Nil(t, supervisorWaitErr) + case <-time.After(5000 * time.Millisecond): + require.Fail(t, "supervisor.Wait() failed to return") + } } // Crashed plugins should be relaunched at most three times. @@ -239,6 +261,14 @@ func testSupervisor_PluginRepeatedlyCrash(t *testing.T, sp SupervisorProviderFun bundle := model.BundleInfoForPath(dir) supervisor, err := sp(bundle) require.NoError(t, err) + + var supervisorWaitErr error + supervisorWaitDone := make(chan bool, 1) + go func() { + supervisorWaitErr = supervisor.Wait() + close(supervisorWaitDone) + }() + require.NoError(t, supervisor.Start(&api)) for attempt := 1; attempt <= 4; attempt++ { @@ -264,10 +294,19 @@ func testSupervisor_PluginRepeatedlyCrash(t *testing.T, sp SupervisorProviderFun } if attempt < 4 { + require.Nil(t, supervisorWaitErr) require.True(t, recovered, "failed to recover after attempt %d", attempt) } else { require.False(t, recovered, "unexpectedly recovered after attempt %d", attempt) } } + + select { + case <-supervisorWaitDone: + require.NotNil(t, supervisorWaitErr) + case <-time.After(500 * time.Millisecond): + require.Fail(t, "supervisor.Wait() failed to return after plugin crashed") + } + require.NoError(t, supervisor.Stop()) } -- cgit v1.2.3-1-g7c22