/*
OpenIO SDS sqliterepo
Copyright (C) 2014 Worldline, as part of Redcurrant
Copyright (C) 2015-2017 OpenIO SAS, as part of OpenIO SDS

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3.0 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library.
*/

#include <stddef.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>

#include <metautils/lib/metautils.h>
#include <sqliterepo/sqliterepo_variables.h>

#include "sqliterepo.h"
#include "hash.h"
#include "election.h"
#include "version.h"
#include "synchro.h"
#include "internals.h"

#define EVENTLOG_SIZE 32
#define STATUS_FINAL(e) ((e) >= STEP_SLAVE)

#define MEMBER_NAME(n, m) NAME2CONST(n, m->inline_name)

#ifdef HAVE_EXTRA_DEBUG
#define TRACE_EXECUTION(M) _manager_record_activity((M), __FUNCTION__, __LINE__)
#else
#define TRACE_EXECUTION(...)
#endif

typedef guint req_id_t;

enum event_type_e
{
	/* ping, a.k.a poke, etc */
	EVT_NONE = 0,

	/* interruptions */
	EVT_DISCONNECTED,
	EVT_LEAVE_REQ,
	EVT_SYNC_REQ,
	EVT_LEFT_SELF,
	EVT_LEFT_MASTER,

	/* actions results */
	EVT_GETPEERS_DONE,

	EVT_GETVERS_OK,
	EVT_GETVERS_KO,
	EVT_GETVERS_OLD,
	EVT_GETVERS_RACE,

	EVT_MASTER_OK,
	EVT_MASTER_KO,
	EVT_MASTER_BAD,

	EVT_CREATE_OK,
	EVT_CREATE_KO,

	EVT_EXISTS_OK,
	EVT_EXISTS_KO,

	EVT_LIST_OK,
	EVT_LIST_KO,

	EVT_LEAVE_OK,
	EVT_LEAVE_KO,

	EVT_SYNC_OK,
	EVT_SYNC_KO,
};

/* @private */
struct deque_beacon_s
{
	struct election_member_s *front;
	struct election_member_s *back;
	guint count;
};

/* @private */
struct activity_trace_element_s
{
	gint64 when;
	const char *func;
	int line;
};

/* @private */
struct election_manager_s
{
	struct election_manager_vtable_s *vtable;

	struct sqlx_peering_s *peering;

	struct sqlx_sync_s **sync_tab;
	guint sync_nb;

	/* do not free or change the fields below */
	const struct replication_config_s *config;

	/* GTree<gchar*,GCond*> */
	GTree *conditions;

	GThreadPool *completions;

	GThreadPool *tasks_getpeers;

	/* GTree<gchar*,struct election_member_s*> */
	GTree *members_by_key;

	GMutex lock;

	/* Trace of actions while the lock was held */
	GArray *activity_trace;

	gboolean exiting;

	gboolean deferred_peering_notify;

	struct deque_beacon_s members_by_state[STEP_MAX];
};

/* @private */
struct logged_event_s
{
	gint64 time;
	enum event_type_e event   :8;
	enum election_step_e pre  :8;
	enum election_step_e post :8;
};

/* @private */
struct election_member_s
{
	struct election_member_s *prev;
	struct election_member_s *next;

	struct election_manager_s *manager;
	struct sqlx_sync_s *sync;

	/* Weak pointer to the condition, do not free! */
	GCond *cond;

	/* Since when do we loop between pending states. That value is used by
	 * client threads to decide wether to wait (or not) for a final state. */
	gint64 when_unstable;

	/* last time the status was changed */
	gint64 last_status;

	/* last time a USE request has been sent */
	gint64 last_USE;

	/* last time the app wanted a status */
	gint64 last_atime;

	/* First node of the children sequence (sorted by ID) */
	gchar *master_url;

	guint refcount;

	gint32 master_id; /* ID of the master */
	gint32 local_id; /* ID generated by zookeeper */

	/* Incremented when creating a new ZK node, as a new membership. We use
	 * the `generation_id` to know if watchers are based on the current or an
	 * older membership. */
	guint16 generation_id;

	/* How many times a set of GETVERS requests will be retried (in addition
	 * to the initial trial). */
	guint8 attempts_GETVERS;

	guint8 pending_GETVERS; /* not finished yet */
	guint8 count_GETVERS; /* initially sent */
	guint8 outdated_GETVERS; /* finished, ask RESYNC */
	guint8 concurrent_GETVERS; /* Finished, shows race condition */
	guint8 errors_GETVERS; /* Finished, no result */

	guint8 log_index;

	enum election_step_e step : 8;

	/* Peers as they are at the election start */
	gchar **peers;

	unsigned char requested_peers_decache : 1;

	/* flags managing unconventional transsitions */
	unsigned char requested_USE : 1;
	unsigned char requested_PIPEFROM : 1;
	unsigned char requested_LEAVE : 1;
	unsigned char requested_LEFT_SELF : 1;
	unsigned char requested_LEFT_MASTER : 1;

	/* Request started, waiting for the status */
	unsigned char pending_PIPEFROM : 1;
	unsigned char pending_ZK_CREATE : 1;
	unsigned char pending_ZK_EXISTS : 1;
	unsigned char pending_ZK_LIST : 1;
	unsigned char pending_ZK_GET : 1;
	unsigned char pending_ZK_DELETE : 1;

	unsigned char flag_local_id : 1;
	unsigned char flag_master_id : 1;

	gchar key[OIO_ELECTION_KEY_LIMIT_LENGTH];
	struct sqlx_name_inline_s inline_name;

	struct logged_event_s log[EVENTLOG_SIZE];
};

static void _noop (gpointer p) { (void)p; }

static GPrivate th_local_key_manager = G_PRIVATE_INIT(_noop);

static const char * _step2str(const enum election_step_e step) {
	switch (step) {
		ON_ENUM(STEP_,NONE);
		ON_ENUM(STEP_,PEERING);
		ON_ENUM(STEP_,CREATING);
		ON_ENUM(STEP_,WATCHING);
		ON_ENUM(STEP_,LISTING);
		ON_ENUM(STEP_,ASKING);
		ON_ENUM(STEP_,CHECKING_MASTER);
		ON_ENUM(STEP_,CHECKING_SLAVES);
		ON_ENUM(STEP_,DELAYED_CHECKING_MASTER);
		ON_ENUM(STEP_,REFRESH_CHECKING_MASTER);
		ON_ENUM(STEP_,DELAYED_CHECKING_SLAVES);
		ON_ENUM(STEP_,REFRESH_CHECKING_SLAVES);
		ON_ENUM(STEP_,SYNCING);
		ON_ENUM(STEP_,LEAVING);
		ON_ENUM(STEP_,LEAVING_FAILING);
		ON_ENUM(STEP_,FAILED);
		ON_ENUM(STEP_,SLAVE);
		ON_ENUM(STEP_,MASTER);
	}

	return "STEP?";
}

static const char * _evt2str(const enum event_type_e evt) {
	switch (evt) {
		ON_ENUM(EVT_,NONE);

		ON_ENUM(EVT_,DISCONNECTED);
		ON_ENUM(EVT_,LEAVE_REQ);
		ON_ENUM(EVT_,SYNC_REQ);
		ON_ENUM(EVT_,LEFT_SELF);
		ON_ENUM(EVT_,LEFT_MASTER);

		ON_ENUM(EVT_,GETPEERS_DONE);

		ON_ENUM(EVT_,GETVERS_OK);
		ON_ENUM(EVT_,GETVERS_KO);
		ON_ENUM(EVT_,GETVERS_OLD);
		ON_ENUM(EVT_,GETVERS_RACE);

		ON_ENUM(EVT_,CREATE_OK);
		ON_ENUM(EVT_,CREATE_KO);

		ON_ENUM(EVT_,MASTER_KO);
		ON_ENUM(EVT_,MASTER_BAD);
		ON_ENUM(EVT_,MASTER_OK);

		ON_ENUM(EVT_,EXISTS_OK);
		ON_ENUM(EVT_,EXISTS_KO);

		ON_ENUM(EVT_,LIST_OK);
		ON_ENUM(EVT_,LIST_KO);

		ON_ENUM(EVT_,LEAVE_OK);
		ON_ENUM(EVT_,LEAVE_KO);

		ON_ENUM(EVT_,SYNC_OK);
		ON_ENUM(EVT_,SYNC_KO);
	}

	return "EVENT?";
}

static gboolean _zoo_disconnected(int zrc) {
	switch (zrc) {
		case ZRUNTIMEINCONSISTENCY:
		case ZDATAINCONSISTENCY:
		case ZMARSHALLINGERROR:
		case ZUNIMPLEMENTED:
		case ZINVALIDSTATE:
		case ZSESSIONEXPIRED:
		case ZAUTHFAILED:
			return TRUE;
		default:
			return FALSE;
	}
}

/* ------------------------------------------------------------------------- */

#define member_log_change(member,evt,action) do { \
	const enum election_step_e pre = member->step; \
	action; \
	const enum election_step_e post = member->step; \
	if (evt != EVT_NONE || pre != post) \
		member_log_event(member, pre, evt); \
} while (0)

static void member_destroy(struct election_member_s *member);

static void _manager_clean(struct election_manager_s *manager);

enum election_mode_e _manager_get_mode (const struct election_manager_s *manager);

const char * _manager_get_local (const struct election_manager_s *manager);

static GError * _election_get_peers(struct election_manager_s *manager,
		const struct sqlx_name_s *n, gboolean nocache, gchar ***peers);

static GError * _election_trigger_RESYNC(struct election_manager_s *manager,
		const struct sqlx_name_s *n);

static GError * _election_init(struct election_manager_s *manager,
		const struct sqlx_name_s *n, enum election_step_e *out_status,
		gboolean *replicated);

static GError * _election_start(struct election_manager_s *manager,
		const struct sqlx_name_s *n);

static GError * _election_exit(struct election_manager_s *manager,
		const struct sqlx_name_s *n);

static enum election_status_e _election_get_status(struct election_manager_s *m,
		const struct sqlx_name_s *n, gchar **master_url);

static struct election_manager_vtable_s VTABLE =
{
	_manager_clean,
	_manager_get_mode,
	_manager_get_local,
	_election_get_peers,
	_election_init,
	_election_start,
	_election_exit,
	_election_get_status,
	_election_trigger_RESYNC,
};

static void transition_error(struct election_member_s *member,
		enum event_type_e evt, enum ZOO_ERRORS zrc);

static void transition(struct election_member_s *member,
		enum event_type_e evt_type, void *evt_arg);

static gboolean wait_for_final_status(struct election_member_s *m,
		gint64 deadline);

#define _thlocal_set_manager(M) do { \
	g_private_replace (&th_local_key_manager, (M)); \
} while (0)

#define _thlocal_get_manager() g_private_get (&th_local_key_manager)

static void
_cond_clean (gpointer p)
{
	GCond *cond = p;
	if (cond) {
		g_cond_clear (cond);
		g_free (cond);
	}
}

static inline void
_manager_record_activity(struct election_manager_s *M, const char *fn, int ln)
{
	if (M->exiting) return;

	struct activity_trace_element_s item = {};
	item.when = oio_ext_monotonic_time();
	item.func = fn;
	item.line = ln;
	g_array_append_vals(M->activity_trace, &item, 1);
}

#ifdef HAVE_EXTRA_DEBUG

#define _manager_save_locked(M) do { \
	g_array_set_size(M->activity_trace, 0); \
	TRACE_EXECUTION(M); \
} while (0)

static void
_manage_dump_activity(struct election_manager_s *M)
{
	if (M->exiting) return;

	const GArray *ga = M->activity_trace;
	EXTRA_ASSERT(ga->len > 0);
	gint64 _in = g_array_index(ga, struct activity_trace_element_s, 0).when;
	const gint64 _out = g_array_index(ga, struct activity_trace_element_s, ga->len - 1).when;
	if (_out - _in > oio_election_lock_alert_delay) {
		GString *tmp = g_string_sized_new(512);
		g_string_printf(tmp, "total=%" G_GINT64_FORMAT, _out - _in);
		for (guint i=0; i< ga->len ;i++) {
			const struct activity_trace_element_s * const item =
				&g_array_index(ga, struct activity_trace_element_s, i);
			g_string_append_printf(tmp, " (%" G_GINT64_FORMAT "/%s:%d)",
					item->when - _in, item->func, item->line);
			_in = item->when;
		}
		GRID_NOTICE("LOCK %.*s", (int) tmp->len, tmp->str);
		g_string_free(tmp, TRUE);
	}
}
#else
#define _manager_save_locked(...)
#define _manage_dump_activity(...)
#endif

#define _manager_lock(M) do { \
	g_mutex_lock(&(M)->lock); \
	_manager_save_locked(M); \
} while (0)

#define _manager_unlock(M) do { \
	TRACE_EXECUTION(M); \
	_manage_dump_activity(M); \
	const gboolean _peering_notify = (M)->deferred_peering_notify; \
	(M)->deferred_peering_notify = FALSE; \
	g_mutex_unlock(&M->lock); \
	if (_peering_notify) { \
		sqlx_peering__notify((M)->peering); \
	} \
} while (0)

static void _completion_router(gpointer p, struct election_manager_s *M);
static void _worker_getpeers(struct election_member_s *m, struct election_manager_s *M);

/* -------------------------------------------------------------------------- */

static inline void
_DEQUE_remove (struct election_member_s *m)
{
	EXTRA_ASSERT(m != NULL);
	EXTRA_ASSERT(m->step < STEP_MAX);
	struct deque_beacon_s *beacon = m->manager->members_by_state + m->step;
	EXTRA_ASSERT(beacon->count > 0);

	struct election_member_s *prev = m->prev, *next = m->next;
	if (beacon->front == m) beacon->front = next;
	if (beacon->back == m) beacon->back = prev;
	if (prev) prev->next = next;
	if (next) next->prev = prev;
	m->prev = m->next = NULL;
	-- beacon->count;
}

static inline void
_DEQUE_add (struct election_member_s *m)
{
	EXTRA_ASSERT(m != NULL);
	EXTRA_ASSERT(m->step < STEP_MAX);
	EXTRA_ASSERT(m->prev == NULL);
	EXTRA_ASSERT(m->next == NULL);
	struct deque_beacon_s *beacon = m->manager->members_by_state + m->step;

	if (beacon->back) {
		m->prev = beacon->back;
		beacon->back->next = m;
	}
	beacon->back = m;
	if (!beacon->front)
		beacon->front = m;
	++ beacon->count;
}

/* --- Misc helpers --------------------------------------------------------- */

static inline gboolean
_is_over (const gint64 now, const gint64 last, const gint64 delay)
{
	return delay > 0 && last > 0 && last < OLDEST(now,delay);
}

#define _IS_OVER(L,D) _is_over(oio_ext_monotonic_time(), L, D)

static gboolean
_extract_id(const char *path, gint32 *pid)
{
	gchar *stripe = strrchr(path, '-');
	if (!stripe)
		return FALSE;

	/* @see https://zookeeper.apache.org/doc/r3.4.8/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming
	 * Zookeeper allows negative sequence number. So the last stripe is
	 * maybe the sign. */
	if (stripe > path && stripe[-1] == '-')
		stripe --;
	gchar *end = NULL;
	*pid = g_ascii_strtoll(stripe+1, &end, 10);
	if ((end && *end) || errno == ERANGE)
		return FALSE;

	return TRUE;
}

static inline int gint32_cmp(gint32 i1, gint32 i2) { return CMP(i1,i2); }

static int
gint32_sort(gconstpointer p1, gconstpointer p2)
{
	return gint32_cmp(*(gint32*)p1, *(gint32*)p2);
}

static GArray *
nodev_to_int32v(const struct String_vector *sv, const char *prefix)
{
	GArray *array = g_array_sized_new(0, 0, sizeof(gint32), sv->count);

	for (int32_t i = 0; sv != NULL && i < sv->count; i++) {
		const char *s = sv->data[i];
		if (g_str_has_prefix(s, prefix)) {
			gint32 id = 0;
			if (_extract_id(s, &id))
				g_array_append_vals(array, &id, 1);
		}
	}

	if (array->len > 1)
		g_array_sort(array, gint32_sort);

	return array;
}

/* Public API --------------------------------------------------------------- */

void
election_manager_dump_delays(void)
{
	GRID_INFO("Election delays:");
	GRID_INFO("- get_status=%"G_GINT64_FORMAT"ms "
			"but nowait after %"G_GINT64_FORMAT"ms",
			oio_election_delay_wait / G_TIME_SPAN_MILLISECOND,
			oio_election_delay_nowait_pending / G_TIME_SPAN_MILLISECOND);
	GRID_INFO("- expire_SLAVE=%"G_GINT64_FORMAT"ms, "
			"expire_MASTER=%"G_GINT64_FORMAT"ms, "
			"expire_NONE=%"G_GINT64_FORMAT"ms",
			oio_election_delay_expire_SLAVE / G_TIME_SPAN_MILLISECOND,
			oio_election_delay_expire_MASTER / G_TIME_SPAN_MILLISECOND,
			oio_election_delay_expire_NONE / G_TIME_SPAN_MILLISECOND);
	GRID_INFO("- retry_failed=%"G_GINT64_FORMAT"ms",
			oio_election_delay_retry_FAILED / G_TIME_SPAN_MILLISECOND);
	GRID_INFO("- ping_final=%"G_GINT64_FORMAT"ms",
			oio_election_delay_ping_final / G_TIME_SPAN_MILLISECOND);
}

GError *
election_manager_create(struct replication_config_s *config,
		struct election_manager_s **result)
{
	EXTRA_ASSERT(result != NULL);
	EXTRA_ASSERT(config != NULL);

	*result = NULL;
	if (NULL == config->get_local_url || NULL == config->get_peers
			|| NULL == config->get_version || ELECTION_MODE_GROUP < config->mode)
		return NEWERROR(ERRCODE_PARAM, "Invalid configuration");

	struct election_manager_s *manager = g_malloc0(sizeof(*manager));
	manager->vtable = &VTABLE;
	manager->config = config;

	g_mutex_init(&manager->lock);

	manager->members_by_key =
		g_tree_new_full(metautils_strcmp3, NULL, NULL, NULL);

	manager->conditions =
		g_tree_new_full(metautils_strcmp3, NULL, g_free, _cond_clean);

	manager->completions =
		g_thread_pool_new((GFunc)_completion_router, manager, 8, FALSE, NULL);

	manager->tasks_getpeers =
		g_thread_pool_new((GFunc)_worker_getpeers, manager, 8, FALSE, NULL);

	manager->activity_trace =
		g_array_sized_new(FALSE, FALSE, sizeof(struct activity_trace_element_s), 32);

	*result = manager;
	return NULL;
}

void
election_manager_add_sync(struct election_manager_s *M,
		struct sqlx_sync_s *sync)
{
	EXTRA_ASSERT(M != NULL);
	EXTRA_ASSERT(sync != NULL);
	EXTRA_ASSERT(M->vtable == &VTABLE);

	if (M->sync_tab != NULL) {
		EXTRA_ASSERT(M->sync_nb > 0);
		M->sync_tab = g_realloc(M->sync_tab, (1 + M->sync_nb) * sizeof(void *));
	} else {
		M->sync_tab = g_malloc(sizeof(void *));
		M->sync_nb = 0;
	}

	M->sync_tab[M->sync_nb] = sync;
	M->sync_nb++;
}

void
election_manager_set_peering (struct election_manager_s *manager,
		struct sqlx_peering_s *peering)
{
	EXTRA_ASSERT(manager != NULL);
	EXTRA_ASSERT(peering != NULL);
	EXTRA_ASSERT(manager->vtable == &VTABLE);
	manager->peering = peering;
}

GError *
election_has_peers (struct election_manager_s *m, const struct sqlx_name_s *n,
		gboolean nocache, gboolean *result)
{
	EXTRA_ASSERT(result != NULL);
	gchar **peers = NULL;
	GError *err = election_get_peers (m, n, nocache, &peers);
	if (err != NULL) {
		EXTRA_ASSERT(peers == NULL);
		*result = FALSE;
		return err;
	} else {
		EXTRA_ASSERT(peers != NULL);
		*result = oio_str_is_set(*peers);
		g_strfreev(peers);
		return NULL;
	}
}

GError *
election_get_peers (struct election_manager_s *m, const struct sqlx_name_s *n,
		gboolean nocache, gchar ***peers)
{
	EXTRA_ASSERT(peers != NULL);
	if (!m) {
		*peers = g_malloc0(sizeof(void*));
		return NULL;
	} else {
		*peers = NULL;
		return ((struct abstract_election_manager_s*)m)->vtable->
			election_get_peers(m,n,nocache,peers);
	}
}

const char *
election_manager_get_local (const struct election_manager_s *m)
{
	if (!m)
		return NULL;
	return ((struct abstract_election_manager_s*)m)->vtable->get_local(m);
}

enum election_mode_e
election_manager_get_mode (const struct election_manager_s *m)
{
	if (!m)
		return ELECTION_MODE_NONE;
	return ((struct abstract_election_manager_s*)m)->vtable->get_mode(m);
}

static struct election_counts_s
_NOLOCK_count (struct election_manager_s *manager)
{
	struct election_counts_s count = {0};
	count.none = manager->members_by_state[STEP_NONE].count;
	count.pending += manager->members_by_state[STEP_CREATING].count;
	count.pending += manager->members_by_state[STEP_WATCHING].count;
	count.pending += manager->members_by_state[STEP_LISTING].count;
	count.pending += manager->members_by_state[STEP_ASKING].count;
	count.pending += manager->members_by_state[STEP_CHECKING_MASTER].count;
	count.pending += manager->members_by_state[STEP_CHECKING_SLAVES].count;
	count.pending += manager->members_by_state[STEP_DELAYED_CHECKING_MASTER].count;
	count.pending += manager->members_by_state[STEP_DELAYED_CHECKING_SLAVES].count;
	count.pending += manager->members_by_state[STEP_REFRESH_CHECKING_MASTER].count;
	count.pending += manager->members_by_state[STEP_REFRESH_CHECKING_SLAVES].count;
	count.pending += manager->members_by_state[STEP_SYNCING].count;
	count.pending += manager->members_by_state[STEP_LEAVING].count;
	count.pending += manager->members_by_state[STEP_LEAVING_FAILING].count;
	count.failed = manager->members_by_state[STEP_FAILED].count;
	count.slave = manager->members_by_state[STEP_SLAVE].count;
	count.master = manager->members_by_state[STEP_MASTER].count;
	count.total = count.none + count.pending + count.master + count.slave + count.failed;
	return count;
}

struct election_counts_s
election_manager_count(struct election_manager_s *manager)
{
	MANAGER_CHECK(manager);
	EXTRA_ASSERT (manager->vtable == &VTABLE);

	_manager_lock(manager);
	struct election_counts_s count = _NOLOCK_count (manager);
	_manager_unlock(manager);
	return count;
}

static GError *
_election_get_peers(struct election_manager_s *manager,
		const struct sqlx_name_s *n, gboolean nocache, gchar ***result)
{
	SQLXNAME_CHECK(n);
	EXTRA_ASSERT(result != NULL);

	if (!manager || !manager->config || !manager->config->get_peers) {
		*result = g_malloc0(sizeof(void*));
		return NULL;
	} else {
		gchar **peers = NULL;
		GError *err = manager->config->get_peers(manager->config->ctx, n, nocache, &peers);
		if (!err) {
			EXTRA_ASSERT(peers != NULL);
			*result = peers;
			return NULL;
		} else {
			EXTRA_ASSERT(peers == NULL);
			*result = NULL;
			g_prefix_error(&err, "get_peers(%s,%s): ", n->base, n->type);
			return err;
		}
	}
}

static void
_manager_clean(struct election_manager_s *manager)
{
	if (!manager)
		return;

	struct election_counts_s count = _NOLOCK_count(manager);
	GRID_DEBUG("%d elections still alive at manager shutdown: %d masters, "
			"%d slaves, %d pending, %d failed, %d exited",
			count.total, count.master, count.slave, count.pending,
			count.failed, count.none);

	if (manager->activity_trace) {
		g_array_free(manager->activity_trace, TRUE);
		manager->activity_trace = NULL;
	}

	if (manager->completions) {
		g_thread_pool_free(manager->completions, FALSE, TRUE);
		manager->completions = NULL;
	}

	if (manager->tasks_getpeers) {
		g_thread_pool_free(manager->tasks_getpeers, FALSE, TRUE);
		manager->tasks_getpeers = NULL;
	}

	if (manager->members_by_key) {
		g_tree_destroy (manager->members_by_key);
		manager->members_by_key = NULL;
	}

	/* Ensure all the items are unlinked */
	for (int i=STEP_NONE; i<STEP_MAX ;++i) {
		struct deque_beacon_s *beacon = manager->members_by_state + i;
		while (beacon->front != NULL) {
			struct election_member_s *m = beacon->front;
			_DEQUE_remove(m);
			m->refcount = 0; /* ugly quirk that cope with an assert on refcount */
			member_destroy (m);
		}
		g_assert (beacon->count == 0);
	}

	if (manager->conditions) {
		g_tree_destroy(manager->conditions);
		manager->conditions = NULL;
	}

	g_mutex_clear(&manager->lock);

	g_free(manager);
}

const char *
_manager_get_local (const struct election_manager_s *manager)
{
	MANAGER_CHECK(manager);
	EXTRA_ASSERT (manager->vtable == &VTABLE);
	if (!manager->config || !manager->config->get_local_url)
		return NULL;
	return manager->config->get_local_url (manager->config->ctx);
}

enum election_mode_e
_manager_get_mode (const struct election_manager_s *manager)
{
	MANAGER_CHECK(manager);
	EXTRA_ASSERT (manager->vtable == &VTABLE);
	if (!manager->config || manager->config->mode <= ELECTION_MODE_NONE)
		return ELECTION_MODE_NONE;
	return manager->config->mode;
}

/* --- Member handling ----------------------------------------------------- */

static void
member_descr(const struct election_member_s *m, gchar *d, gsize ds)
{
	g_snprintf(d, ds,
			"%d/%s "
			"%"G_GINT32_FORMAT"/%"G_GINT32_FORMAT"/%s %u %u/%u/%u/%u [%s] [%s.%s]",
			m->step, _step2str(m->step),
			m->local_id, m->master_id,
			(m->master_url ? m->master_url : "-"),
			m->refcount, m->pending_PIPEFROM,
			m->pending_GETVERS, m->errors_GETVERS, m->concurrent_GETVERS,
			m->key, m->inline_name.base, m->inline_name.type);
}

#define DUMP(LVL,TAG,M) do { \
	if (GRID_##LVL##_ENABLED()) { \
		gchar d[256]; \
		member_descr(m, d, sizeof(d)); \
		GRID_##LVL("%s %s", TAG, d); \
	} \
} while (0)

#ifdef HAVE_EXTRA_DEBUG
static void
member_trace(const char *tag, const struct election_member_s *m)
{
	DUMP(TRACE,tag,m);
}
#else
#define member_trace(...)
#endif

static void
member_debug(const char *tag, const struct election_member_s *m)
{
	DUMP(DEBUG,tag,m);
}

static void
member_warn(const char *tag, const struct election_member_s *m)
{
	DUMP(WARN,tag,m);
}

#ifdef HAVE_EXTRA_ASSERT
static gboolean
member_has_getvers (struct election_member_s *m)
{
	return m->count_GETVERS > 0
		|| m->pending_GETVERS > 0
		|| m->outdated_GETVERS > 0
		|| m->concurrent_GETVERS > 0
		|| m->errors_GETVERS > 0;
}

static gboolean
member_has_action(struct election_member_s *m)
{
	return m->pending_GETVERS > 0
		|| m->pending_PIPEFROM
		|| m->pending_ZK_CREATE
		|| m->pending_ZK_EXISTS
		|| m->pending_ZK_LIST
		|| m->pending_ZK_GET
		|| m->pending_ZK_DELETE;
}
#endif

static const char*
member_get_url(struct election_member_s *m)
{
	return election_manager_get_local(MMANAGER(m));
}

#define member_ref(m) do { \
	++ m->refcount; \
} while (0)

#define member_reset_peers(m) do { \
	if (m->peers) { \
		g_strfreev(m->peers); \
		m->peers = NULL; \
	} \
} while (0)

#define member_unref(m) do { \
	EXTRA_ASSERT (m->refcount > 0); \
	-- m->refcount; \
} while (0)

static GCond*
member_get_cond(struct election_member_s *m)
{
	return m->cond;
}

static GMutex*
member_get_lock(struct election_member_s *m)
{
	return &(MMANAGER(m)->lock);
}

#define member_lock(m) do { \
	_manager_lock(m->manager); \
} while (0)

#define member_unlock(m) do { \
	_manager_unlock(m->manager); \
} while (0)

#define member_signal(m) do { \
	g_cond_signal(member_get_cond(m)); \
} while (0)

#define member_set_master_url(m,u) do { \
	EXTRA_ASSERT(BOOL(m->flag_master_id)); \
	oio_str_replace(&((m)->master_url), u); \
} while (0)

#define member_reset_local(m) do { \
	m->local_id = 0; \
	m->flag_local_id = 0; \
} while (0)

#define member_reset_master(m) do { \
	m->master_id = 0; \
	m->flag_master_id = 0; \
	oio_str_replace(&((m)->master_url), NULL); \
} while (0)

#define member_has_local_id(m)  BOOL(m->flag_local_id)

#define member_has_master_id(m) BOOL(m->flag_master_id)

#define member_set_local_id(m,id) do { \
	EXTRA_ASSERT(!member_has_local_id(m)); \
	m->local_id = id; \
	m->flag_local_id = 1; \
} while (0)

#define member_set_master_id(m,i64) do { \
	if (member_has_master_id(m)) \
		member_set_master_url(m, NULL); \
	m->master_id = (i64); \
	m->flag_master_id = 1; \
} while (0)

#define member_reset_getvers(m) do { \
	m->pending_GETVERS = 0; \
	m->count_GETVERS = 0; \
	m->concurrent_GETVERS = 0; \
	m->outdated_GETVERS = 0; \
	m->errors_GETVERS = 0; \
} while (0)

#define member_reset_pending(m) do { \
	m->pending_PIPEFROM = 0; \
	m->pending_ZK_CREATE = 0; \
	m->pending_ZK_EXISTS = 0; \
	m->pending_ZK_LIST = 0; \
	m->pending_ZK_GET = 0; \
	m->pending_ZK_DELETE = 0; \
} while (0)

static void
member_reset(struct election_member_s *m)
{
	member_reset_local(m);
	member_reset_master(m);
	member_reset_getvers(m);
	member_reset_pending(m);
	/* do not reset the `requested_*` fields, those must survive,
	 * typically to a restart, e.g. to perform a final resync */
}

static void
member_set_status(struct election_member_s *m, const enum election_step_e post)
{
	const enum election_step_e pre = m->step;

	if (pre != post)
		m->last_status = oio_ext_monotonic_time();

	_DEQUE_remove (m);
	m->step = post;
	_DEQUE_add (m);

	/* send a signal to wake all the threads waiting for the election. They
	 * should receive a signal when they have an action to perform with it:
	 * either the status is FINAL and the threads can act, or we re-jumped
	 * to an inactive state and the threads would kickoff a new election. */
	if (STATUS_FINAL(post)) {
		member_debug("FINAL", m);
		member_signal(m);
	} else if (post == STEP_NONE) {
		member_signal(m);
	}
}

static void
member_log_event(struct election_member_s *member, enum election_step_e pre,
		enum event_type_e evt)
{
	struct logged_event_s *plog;
	plog = member->log + ((member->log_index++) % EVENTLOG_SIZE);
	plog->event = evt;
	plog->pre = pre;
	plog->post = member->step;
	plog->time = (oio_ext_real_time() / G_TIME_SPAN_MILLISECOND) % (1LL << 48);
}

#ifdef HAVE_EXTRA_DEBUG
static void
member_log_completion(const char *tag, int zrc, const struct election_member_s *m)
{
	GRID_TRACE("%s %d/%s [%s.%s] %s", tag, zrc, zerror(zrc),
			m->inline_name.base, m->inline_name.type, m->key);
}
#else
#define member_log_completion(...)
#endif

static const char *
member_fullpath(struct election_member_s *m, gchar *d, gsize dlen)
{
	if (member_has_local_id(m))
		g_snprintf(d, dlen, "%s-%010"G_GINT32_FORMAT, m->key, m->local_id);
	else
		g_snprintf(d, dlen, "%s-", m->key);
	return d;
}

static const char *
member_masterpath(struct election_member_s *m, gchar *d, gsize dlen)
{
	if (!member_has_master_id(m))
		return NULL;
	g_snprintf(d, dlen, "%s-%010"G_GINT32_FORMAT, m->key, m->master_id);
	return d;
}

static void
member_destroy(struct election_member_s *member)
{
	if (!member)
		return;

	EXTRA_ASSERT (member->refcount == 0);

	member->cond = NULL;
	oio_str_clean (&member->master_url);
	member_reset_peers(member);

	g_free(member);
}

static struct election_member_s *
_LOCKED_get_member (struct election_manager_s *M, const char *k)
{
	struct election_member_s *m = g_tree_lookup (M->members_by_key, k);
	if (m)
		member_ref (m);
	TRACE_EXECUTION(M);
	return m;
}

static GCond *
_manager_get_condition (struct election_manager_s *m, const char *k)
{
	GCond *cond = g_tree_lookup (m->conditions, k);
	if (!cond) {
		cond = g_malloc0 (sizeof(GCond));
		g_cond_init (cond);
		g_tree_replace (m->conditions, g_strdup(k), cond);
	}
	return cond;
}

static struct election_member_s *
_LOCKED_init_member(struct election_manager_s *manager,
		const struct sqlx_name_s *n, const char *key,
		gboolean autocreate)
{
	MANAGER_CHECK(manager);
	NAME_CHECK(n);

	struct election_member_s *member = _LOCKED_get_member (manager, key);
	if (!member && autocreate) {
		member = g_malloc0 (sizeof(*member));
		member->generation_id = oio_ext_rand_int();

		if (manager->sync_nb <= 0)
			member->sync = NULL;
		else if (manager->sync_nb == 1)
			member->sync = manager->sync_tab[0];
		else {
			const gsize len = strlen(key);
			guint16 id = 0;
			oio_str_hex2bin(key + len - 4, (guint8 *) &id, 4);
			member->sync = manager->sync_tab[id % manager->sync_nb];
		}

		member->manager = manager;
		member->last_status = oio_ext_monotonic_time ();
		g_strlcpy(member->key, key, sizeof(member->key));
		g_strlcpy(member->inline_name.base, n->base, sizeof(member->inline_name.base));
		g_strlcpy(member->inline_name.type, n->type, sizeof(member->inline_name.type));
		g_strlcpy(member->inline_name.ns, n->ns, sizeof(member->inline_name.ns));
		member->refcount = 2;
		member->cond = _manager_get_condition(manager, member->key);

		_DEQUE_add (member);
		g_tree_replace(manager->members_by_key, member->key, member);
	}

	TRACE_EXECUTION(manager);
	return member;
}

static struct election_member_s *
manager_get_member (struct election_manager_s *m, const char *k)
{
	_manager_lock(m);
	struct election_member_s *member = _LOCKED_get_member (m, k);
	_manager_unlock(m);
	return member;
}

static guint
manager_count_active(struct election_manager_s *manager)
{
	struct election_counts_s count = election_manager_count (manager);
	return count.pending + count.master + count.slave;
}

static gboolean
_run_exit (gpointer k, gpointer v, gpointer i)
{
	(void) k, (void) i;
	struct election_member_s *m = v;
	if (m->step != STEP_NONE
			&& m->step != STEP_LEAVING
			&& m->step != STEP_LEAVING_FAILING)
		transition(m, EVT_LEAVE_REQ, NULL);
	return FALSE;
}

gboolean
election_manager_is_operational(struct election_manager_s *manager)
{
	return (manager != NULL &&
			manager->vtable != NULL &&
			manager->peering != NULL &&
			manager->config != NULL &&
			manager->members_by_key != NULL);
}

void
election_manager_exit_all(struct election_manager_s *manager, gint64 duration,
		gboolean persist)
{
	GRID_INFO("Voluntarily exiting all the elections...");
	MANAGER_CHECK(manager);
	EXTRA_ASSERT (manager->vtable == &VTABLE);
	gint64 pivot = oio_ext_monotonic_time () + duration;

	/* Order the nodes to exit */
	_manager_lock(manager);
	manager->exiting = TRUE;
	g_tree_foreach (manager->members_by_key, _run_exit, NULL);
	_manager_unlock(manager);

	guint count = manager_count_active(manager);
	if (duration <= 0) {
		GRID_INFO("%u elections still active", count);
	} else {
		do {
			GRID_INFO("Waiting for %u active elections", count);
			if (oio_ext_monotonic_time() > pivot) {
				GRID_WARN("TIMEOUT while waiting for active elections");
				break;
			}
			g_usleep(500 * G_TIME_SPAN_MILLISECOND);
		} while ((count = manager_count_active(manager)) > 0);
		if (count == 0)
			GRID_INFO("No more active elections");
	}

	if (!persist)
		manager->exiting = FALSE;
}

static void
member_json (struct election_member_s *m, GString *gs)
{
	/* description */
	g_string_append_static (gs, "{\"local\":{");
	if (m->flag_local_id)
		OIO_JSON_append_int (gs, "id", m->local_id);
	else
		g_string_append_static (gs, "\"id\":null");
	g_string_append_c (gs, ',');
	OIO_JSON_append_str (gs, "url", member_get_url(m));
	g_string_append_c (gs, ',');
	OIO_JSON_append_str (gs, "state", _step2str(m->step));
	g_string_append_static (gs, "},\"master\":{");
	if (m->flag_master_id)
		OIO_JSON_append_int (gs, "id", m->master_id);
	else
		g_string_append (gs, "\"id\":null");
	g_string_append_c (gs, ',');
	OIO_JSON_append_str (gs, "url", m->master_url);
	g_string_append_static (gs, "},\"base\":{");
	OIO_JSON_append_str (gs, "name", m->inline_name.base);
	g_string_append_c (gs, ',');
	OIO_JSON_append_str (gs, "type", m->inline_name.type);
	g_string_append_c (gs, ',');
	OIO_JSON_append_str (gs, "zk", m->key);
	g_string_append_static (gs, "},\"#\":{");
	OIO_JSON_append_int (gs, "refcount", m->refcount);
	g_string_append_c (gs, ',');
	OIO_JSON_append_int (gs, "pipefrom", m->pending_PIPEFROM);
	g_string_append_c (gs, ',');
	OIO_JSON_append_int (gs, "getvers", m->pending_GETVERS);
	g_string_append_c (gs, '}');

	/* the peers */
	if (m->peers) {
		g_string_append_static(gs, ",\"peers\":[");
		for (gchar **p = m->peers; *p ;p++) {
			if (p!=m->peers) g_string_append_c(gs, ',');
			oio_str_gstring_append_json_quote(gs, *p);
		}
		g_string_append_c (gs, ']');
	} else {
		g_string_append_static(gs, ",\"peers\":null");
	}

	/* then the livelog */
	g_string_append_static(gs, ",\"log\":[");
	guint idx = m->log_index - 1;
	for (guint i=0; i<EVENTLOG_SIZE ;i++,idx--) {
		struct logged_event_s *plog = m->log + (idx % EVENTLOG_SIZE);
		if (!plog->pre && !plog->post)
			break;
		if (i!=0)
			g_string_append_c(gs, ',');
		g_string_append_printf(gs, "\"%"G_GINT64_FORMAT":%s:%s:%s\"",
				(gint64)plog->time, _step2str(plog->pre),
				_evt2str(plog->event), _step2str(plog->post));
	}
	g_string_append_static (gs, "]}");
}

void
election_manager_whatabout (struct election_manager_s *m,
		const struct sqlx_name_s *n, GString *out)
{
	NAME_CHECK(n);
	MANAGER_CHECK(m);
	EXTRA_ASSERT (m->vtable == &VTABLE);
	EXTRA_ASSERT(out != NULL);

	gchar key[OIO_ELECTION_KEY_LIMIT_LENGTH];
	sqliterepo_hash_name(n, key, sizeof(key));

	_manager_lock(m);
	struct election_member_s *member = _LOCKED_get_member(m, key);
	if (member) {
		member_json (member, out);
		member_unref (member);
	} else {
		if (election_manager_get_mode(m) == ELECTION_MODE_NONE)
			g_string_append_static (out, "{}");
		else
			g_string_append_static (out, "null");
	}
	_manager_unlock (m);
}

/* --- Zookeeper callbacks ----------------------------------------------------
 * All of them are called from the zookeeper's thread.
 * We chose to set the election manager in a thread-local slot because ZK
 * contexts for callbackks currently (3.4.6) require that no memory is
 * allocated, especially because of a memory leak on discarded clone watchers.
 * We are forced to pass an integer cast into pointer so that watchers can use
 * them to recover the right election.
 * -------------------------------------------------------------------------- */

#define completion_do_or_defer(M,Ctx) do { \
	metautils_gthreadpool_push("ZK", (M)->completions, (Ctx)); \
} while (0)

static void
completion_DeleteRogueNode(int zrc, const void *d UNUSED)
{
	if (zrc == ZNONODE) {
		GRID_TRACE2("Rogue ZK node disappeared");
	} else if (zrc == ZOK) {
		GRID_TRACE("Rogue ZK node deleted");
	} else if (zrc == ZSESSIONEXPIRED) {
		/* the node will expire, don't flood with logs in this case */
		GRID_DEBUG("Rogue ZK node deletion error: %s", zerror(zrc));
	} else {
		GRID_WARN("Rogue ZK node deletion error: %s", zerror(zrc));
	}
}

/* @private */
enum deferred_action_type_e
{
	DAT_ASKING,
	DAT_LISTING,
	DAT_LEAVING,
	DAT_WATCHING,
	DAT_CREATING,
	DAT_LEFT,
};

/* @private */
struct exec_later_CREATING_context_s
{
	enum deferred_action_type_e magic;
	int zrc;
	struct election_member_s *member;
	gint32 local_id;
};

static void
deferred_completion_CREATING(struct exec_later_CREATING_context_s *d)
{
	EXTRA_ASSERT(d != NULL);
	EXTRA_ASSERT(DAT_CREATING == d->magic);
	MEMBER_CHECK(d->member);

	member_lock(d->member);
	member_log_completion("CREATE", d->zrc, d->member);
	_thlocal_set_manager (d->member->manager);
	TRACE_EXECUTION(d->member->manager);

	if (d->zrc != ZOK) {
		transition_error(d->member, EVT_CREATE_KO, d->zrc);
	} else {
		transition(d->member, EVT_CREATE_OK, &d->local_id);
	}
	member_unref(d->member);
	member_unlock(d->member);

	g_free(d);
}

static void
completion_CREATING(int zrc, const char *path, const void *d)
{
	if (!d) return;

	struct exec_later_CREATING_context_s *ctx = g_malloc0(sizeof(*ctx));
	ctx->magic = DAT_CREATING;
	ctx->member = (struct election_member_s *) d;
	ctx->local_id = -1;
	ctx->zrc = zrc;
	if (path)
		_extract_id(path, &ctx->local_id);

	struct election_manager_s *M = ctx->member->manager;
	_thlocal_set_manager(M);
	completion_do_or_defer(M, ctx);
}

/* @private */
struct exec_later_WATCHING_context_s
{
	enum deferred_action_type_e magic;
	int zrc;
	struct election_member_s *member;
};

static void
deferred_completion_WATCHING(struct exec_later_WATCHING_context_s *d)
{
	EXTRA_ASSERT(d != NULL);
	EXTRA_ASSERT(DAT_WATCHING == d->magic);
	MEMBER_CHECK(d->member);

	member_lock(d->member);
	member_log_completion("WATCH", d->zrc, d->member);
	if (d->zrc != ZOK) {
		if (d->zrc == ZNONODE)
			transition(d->member, EVT_LEFT_SELF, NULL);
		transition_error(d->member, EVT_EXISTS_KO, d->zrc);
	} else {
		transition(d->member, EVT_EXISTS_OK, NULL);
	}
	member_unref(d->member);
	member_unlock(d->member);

	g_free(d);
}

static void
completion_WATCHING(int zrc, const struct Stat *s UNUSED, const void *d)
{
	if (!d) return;

	struct exec_later_WATCHING_context_s *ctx = g_malloc0(sizeof(*ctx));
	ctx->magic = DAT_WATCHING;
	ctx->zrc = zrc;
	ctx->member = (struct election_member_s*) d;

	struct election_manager_s *M = ctx->member->manager;
	completion_do_or_defer(M, ctx);
}

/* @private */
struct exec_later_ASKING_context_s
{
	enum deferred_action_type_e magic;
	int zrc;
	struct election_member_s *member;
	gchar master[];
};

static void
deferred_completion_ASKING(struct exec_later_ASKING_context_s *d)
{
	EXTRA_ASSERT(d != NULL);
	EXTRA_ASSERT(DAT_ASKING == d->magic);

	MEMBER_CHECK(d->member);

	member_lock(d->member);
	member_log_completion("ASK", d->zrc, d->member);

	if (d->zrc != ZOK) {
		transition_error(d->member, EVT_MASTER_KO, d->zrc);
	} else {
		const char * const * peers = (const char * const *) d->member->peers;
		if (!d->master[0] || !metautils_url_valid_for_connect(d->master)) {
			transition(d->member, EVT_MASTER_BAD, NULL);
		} else if (!peers) {
			GRID_WARN("No peer known for [%s.%s]",
					d->member->inline_name.base, d->member->inline_name.type);
			d->member->requested_peers_decache = 1;
			transition(d->member, EVT_MASTER_BAD, NULL);
		} else {
			const char *myurl = member_get_url(d->member);
			if (strcmp(d->master, myurl) == 0) {
				/* The supposed master carries our ID (i.e. our URL),
				 * if we accept it as-is, we will create a loop on ourselves.
				 * We delete it and pretend there is no master. */
				gchar path[PATH_MAXLEN];
				int zrc2 = sqlx_sync_adelete(d->member->sync,
						member_masterpath(d->member, path, sizeof(path)), -1,
						completion_DeleteRogueNode, NULL);
				TRACE_EXECUTION(d->member->manager);

				if (zrc2 != ZOK) {
					GRID_WARN("Failed to delete Rogue ZK node %s: %s", path, zerror(zrc2));
				} else {
					GRID_WARN("Rogue ZK node being deleted %s", path);
				}
				TRACE_EXECUTION(d->member->manager);

				transition(d->member, EVT_MASTER_BAD, NULL);
			} else if (!oio_strv_has(peers, d->master)) {
				/* The master is an unknown peer. A reload of the peers is necessary */
				GString *tmp = g_string_sized_new(128);
				for (const char * const *p = peers; peers && *p ;++p)
					g_string_append_printf(tmp, " [%s]", *p);
				GRID_WARN("unknown master [%s] for [%s.%s], only%s", d->master,
						d->member->inline_name.base, d->member->inline_name.type,
						tmp->str);
				g_string_free(tmp, TRUE);
				d->member->requested_peers_decache = 1;
				transition(d->member, EVT_MASTER_BAD, NULL);
			} else {
				transition(d->member, EVT_MASTER_OK, d->master);
			}
		}
	}
	member_unref(d->member);
	member_unlock(d->member);
	g_free0 (d);
}

static void
completion_ASKING(int zrc, const char *v, int vlen,
		const struct Stat *s UNUSED, const void *d)
{
	if (vlen > 256)
		vlen = 0;
	struct exec_later_ASKING_context_s *ctx = g_malloc0(sizeof(*ctx) + vlen + 1);
	ctx->magic = DAT_ASKING;
	ctx->zrc = zrc;
	ctx->member = (struct election_member_s*) d;
	if (vlen)
		memcpy(ctx->master, v, vlen);

	struct election_manager_s *M = ctx->member->manager;
	completion_do_or_defer(M, ctx);
}

/* @private */
struct exec_later_LISTING_context_s
{
	enum deferred_action_type_e magic;
	int zrc;
	struct election_member_s *member;
	gint32 master_id;
};

static void
deferred_completion_LISTING (struct exec_later_LISTING_context_s *d)
{
	EXTRA_ASSERT(d != NULL);
	EXTRA_ASSERT(DAT_LISTING == d->magic);
	MEMBER_CHECK(d->member);

	member_lock(d->member);
	member_log_completion("LIST", d->zrc, d->member);
	if (d->zrc != ZOK)
		transition_error(d->member, EVT_LIST_KO, d->zrc);
	else
		transition(d->member, EVT_LIST_OK, &(d->master_id));
	member_unref(d->member);
	member_unlock(d->member);

	g_free(d);
}

static void
completion_LISTING(int zrc, const struct String_vector *sv, const void *d)
{
	if (!d) return;

	struct election_member_s *member = (struct election_member_s*) d;
	gboolean has_first = FALSE;
	gint32 first = -1;
	GArray *i32v = nodev_to_int32v(sv, member->key);
	if (i32v->len > 0) {
		first = g_array_index(i32v, gint32, 0);
		has_first = TRUE;
	}
	g_array_free(i32v, TRUE);

	struct exec_later_LISTING_context_s *ctx = g_malloc0(sizeof(*ctx));
	ctx->magic = DAT_LISTING;
	if (ZOK == (ctx->zrc = zrc))
		ctx->zrc = has_first ? ZOK : ZNONODE;
	ctx->member = member;
	ctx->master_id = first;

	struct election_manager_s *M = ctx->member->manager;
	completion_do_or_defer(M, ctx);
}

/* @private */
struct exec_later_LEAVING_context_s
{
	enum deferred_action_type_e magic;
	int zrc;
	struct election_member_s *member;
};

static void
deferred_completion_LEAVING(struct exec_later_LEAVING_context_s *d)
{
	EXTRA_ASSERT(d != NULL);
	EXTRA_ASSERT(DAT_LEAVING == d->magic);
	MEMBER_CHECK(d->member);

	member_lock(d->member);
	member_log_completion("LEAVE", d->zrc, d->member);
	if (d->zrc == ZNONODE)
		transition(d->member, EVT_LEAVE_OK, NULL);
	else if (d->zrc != ZOK)
		transition_error(d->member, EVT_LEAVE_KO, d->zrc);
	else
		transition(d->member, EVT_LEAVE_OK, NULL);
	member_unref(d->member);
	member_unlock(d->member);

	g_free(d);
}

static void
completion_LEAVING(int zrc, const void *d)
{
	if (!d) return;

	struct exec_later_LEAVING_context_s *ctx = g_malloc0(sizeof(*ctx));
	ctx->magic = DAT_LEAVING;
	ctx->zrc = zrc;
	ctx->member = (struct election_member_s*) d;

	struct election_manager_s *M = ctx->member->manager;
	completion_do_or_defer(M, ctx);
}

/* ------------------------------------------------------------------------- */

static struct election_member_s *
_find_member (struct election_manager_s *M, const char *path, guint gen)
{
	if (!M) return NULL;

	const char *slash = strrchr(path, '/');
	if (!slash) return NULL;
	slash ++;

	const char *stripe = strchr(slash, '-');
	if (!stripe) return NULL;

	const size_t len = stripe - slash;

	gchar *key = alloca(1 + len);
	memcpy(key, slash, len);
	key[len] = 0;

	_manager_lock(M);
	struct election_member_s *member = _LOCKED_get_member(M, key);
	if (member) {
		if (member->generation_id == gen)
			return member;
		GRID_DEBUG("watcher: [%s] obsolete w=%u gen=%u",
				member->key, gen, member->generation_id);
	} else {
		GRID_WARN("watcher: [%s] no election found", key);
	}
	_manager_unlock(M);
	return NULL;
}

/* @private */
struct deferred_watcher_context_s
{
	enum deferred_action_type_e magic;
	int type;
	int state;
	guint gen;
	enum event_type_e evt;
	char path[];
};

static void
deferred_watch_COMMON(struct deferred_watcher_context_s *d,
		struct election_manager_s *M)
{
	EXTRA_ASSERT(d != NULL);
	EXTRA_ASSERT(M != NULL);
	EXTRA_ASSERT(DAT_LEFT == d->magic);

	if (d->type == ZOO_SESSION_EVENT) {
		struct election_member_s *member = _find_member(M, d->path, d->gen);
		/* It happens, when a process has been paused, that d->path is empty,
		 * and thus we cannot find any specific election member. */
		if (member != NULL) {
			member_reset(member);
			member_log_change(member, EVT_DISCONNECTED,
					member_set_status(member, STEP_NONE));
		}
		/* We cannot run all the election and reset everything, because we
		 * introduced a sharding of the elections across several ZK clusters
		 * and the problem concerns only one cluster */
	} else if (d->type == ZOO_DELETED_EVENT) {
		struct election_member_s *member = _find_member(M, d->path, d->gen);
		if (member != NULL) {
			transition(member, d->evt, NULL);
			member_unref(member);
			member_unlock(member);
		}
	}

	g_free(d);
}

static void
watch_COMMON(const int type, const int state,
		const char *path, void *d, const int evt)
{
	if (type != ZOO_SESSION_EVENT && type != ZOO_DELETED_EVENT)
		return;
	if (type == ZOO_SESSION_EVENT &&
			state != ZOO_EXPIRED_SESSION_STATE &&
			state != ZOO_AUTH_FAILED_STATE)
		return;

	const char *slash = path ? strrchr(path, '/') : NULL;
	const size_t len = slash ? strlen(slash) : 0;

	struct deferred_watcher_context_s *ctx = g_malloc0(sizeof(*ctx) + len + 1);
	ctx->magic = DAT_LEFT;
	ctx->type = type;
	ctx->state = state;
	ctx->gen = GPOINTER_TO_UINT(d);
	ctx->evt = evt;
	if (slash && len)
		memcpy(ctx->path, slash, len);

	struct election_manager_s *M = _thlocal_get_manager();
	completion_do_or_defer(M, ctx);
}

static void
watch_MASTER(zhandle_t *h UNUSED, int type, int state, const char *path, void *d)
{
	return watch_COMMON(type, state, path, d, EVT_LEFT_MASTER);
}

static void
watch_SELF(zhandle_t *h UNUSED, int type, int state, const char *path, void *d)
{
	return watch_COMMON(type, state, path, d, EVT_LEFT_SELF);
}

static void
_completion_router(gpointer p, struct election_manager_s *M)
{
	switch (*((enum deferred_action_type_e*)p)) {
		case DAT_CREATING:
			return deferred_completion_CREATING(p);
		case DAT_ASKING:
			return deferred_completion_ASKING(p);
		case DAT_LISTING:
			return deferred_completion_LISTING(p);
		case DAT_LEAVING:
			return deferred_completion_LEAVING(p);
		case DAT_WATCHING:
			return deferred_completion_WATCHING(p);
		case DAT_LEFT:
			return deferred_watch_COMMON(p, M);
	}
	g_assert_not_reached();
}

static void
_worker_getpeers(struct election_member_s *m, struct election_manager_s *M)
{
	gchar **peers = NULL;
	struct sqlx_name_inline_s inline_name = {};

	member_lock(m);
	memcpy(&inline_name, &m->inline_name, sizeof(struct sqlx_name_inline_s));
	const gboolean decache = BOOL(m->requested_peers_decache);
	m->requested_peers_decache = 0;
	member_unlock(m);

	NAME2CONST(n, inline_name);
	GError *err = election_get_peers(M, &n, decache, &peers);

	member_lock(m);
	if (err || !peers || !*peers) {
		transition(m, EVT_GETPEERS_DONE, NULL);
	} else {
		transition(m, EVT_GETPEERS_DONE, peers);
	}
	member_unref(m);
	TRACE_EXECUTION(m->manager);
	member_unlock(m);

	if (peers)
		g_strfreev(peers);
	g_clear_error(&err);
}

/* ------------------------------------------------------------------------- */

static void
member_warn_abnormal_event(struct election_member_s *member, int evt)
{
	gchar tag[64];
	g_snprintf(tag, sizeof(tag), "ABNORMAL %02d/%s", evt, _evt2str(evt));
	return member_warn(tag, member);
}

static void
member_warn_failed_action(struct election_member_s *member, int zrc,
		const char *action)
{
	gchar path[PATH_MAXLEN];
	GRID_WARN("%s failed [%s.%s] [%s] : (%d) %s", action,
			member->inline_name.base, member->inline_name.type,
			member_fullpath(member, path, sizeof(path)),
			zrc, zerror(zrc));
}

/* ------------------------------------------------------------------------- */

enum election_op_e {
	ELOP_NONE, ELOP_START, ELOP_RESYNC, ELOP_EXIT
};

static void
_election_atime(struct election_member_s *m)
{
	m->last_atime = oio_ext_monotonic_time ();
	switch (m->step) {
		case STEP_MASTER:
		case STEP_SLAVE:
			_DEQUE_remove(m);
			_DEQUE_add(m);
			return;
		default:
			return;
	}
}

static GError *
_election_make(struct election_manager_s *m, const struct sqlx_name_s *n,
		enum election_op_e op,
		enum election_step_e *out_status, gboolean *replicated)
{
	MANAGER_CHECK(m);
	SQLXNAME_CHECK(n);

	if (out_status)
		*out_status = STEP_NONE;
	if (replicated)
		*replicated = FALSE;

	if (op != ELOP_EXIT) {
		/* Out of the critical section */
		gboolean peers_present = FALSE;
		GError *err = election_has_peers(m, n, FALSE, &peers_present);
		if (err != NULL) {
			g_prefix_error(&err, "Election error: ");
			return err;
		}
		if (!peers_present) {
			GRID_DEBUG("No peer for [%s][%s]", n->base, n->type);
			return NULL;
		} else {
			if (replicated)
				*replicated = TRUE;
		}
	}

	gchar key[OIO_ELECTION_KEY_LIMIT_LENGTH];
	sqliterepo_hash_name(n, key, sizeof(key));

	_manager_lock(m);
	struct election_member_s *member = _LOCKED_init_member(m, n, key, op != ELOP_EXIT);
	switch (op) {
		case ELOP_NONE:
			_election_atime(member);
			break;
		case ELOP_START:
			_election_atime(member);
			transition(member, EVT_NONE, NULL);
			break;
		case ELOP_RESYNC:
			_election_atime(member);
			transition(member, EVT_SYNC_REQ, NULL);
			break;
		case ELOP_EXIT:
			if (member)
				transition(member, EVT_LEAVE_REQ, NULL);
			break;
	}
	if (member) {
		if (out_status)
			*out_status = member->step;
		member_unref(member);
	}
	_manager_unlock(m);

	return NULL;
}

static GError *
_election_trigger_RESYNC(struct election_manager_s *manager,
		const struct sqlx_name_s *n)
{
	return _election_make(manager, n, ELOP_RESYNC, NULL, NULL);
}

static GError *
_election_init(struct election_manager_s *manager, const struct sqlx_name_s *n,
		enum election_step_e *out_status, gboolean *replicated)
{
	return _election_make(manager, n, ELOP_NONE, out_status, replicated);
}

static GError *
_election_start(struct election_manager_s *manager, const struct sqlx_name_s *n)
{
	return _election_make(manager, n, ELOP_START, NULL, NULL);
}

static GError *
_election_exit(struct election_manager_s *manager, const struct sqlx_name_s *n)
{
	return _election_make(manager, n, ELOP_EXIT, NULL, NULL);
}

static gboolean
wait_for_final_status(struct election_member_s *m, const gint64 deadline)
{
	while (!STATUS_FINAL(m->step)) {

		const gint64 now = oio_ext_monotonic_time();

		/* compare internal timers to our fake'able clock */
		if (now > deadline) {
			GRID_WARN("TIMEOUT! (waiting for election status) [%s.%s] step=%d/%s",
					m->inline_name.base, m->inline_name.type, m->step, _step2str(m->step));
			return FALSE;
		}

		m->last_atime = now;
		transition(m, EVT_NONE, NULL);

		if (oio_election_enable_nowait_pending &&
				m->when_unstable > 0 && m->when_unstable < OLDEST(
					now, oio_election_delay_nowait_pending)) {
			GRID_WARN("TIMEOUT! (election pending for too long) [%s.%s] step=%d/%s",
					m->inline_name.base, m->inline_name.type, m->step, _step2str(m->step));
			return FALSE;
		}

		GRID_TRACE("Still waiting for [%s.%s] step=%d/%s"
				" %"G_GINT64_FORMAT"/%"G_GINT64_FORMAT,
				m->inline_name.base, m->inline_name.type, m->step, _step2str(m->step),
				m->when_unstable / G_TIME_SPAN_SECOND, now / G_TIME_SPAN_SECOND);

		/* perform the real WAIT on the real clock. */
		TRACE_EXECUTION(m->manager);
		_manage_dump_activity(m->manager);
		g_cond_wait_until(member_get_cond(m), member_get_lock(m),
				g_get_monotonic_time() + oio_election_period_cond_wait);
		_manager_save_locked(m->manager);
	}

	m->last_atime = oio_ext_monotonic_time ();
	return TRUE;
}

static enum election_status_e
_election_get_status(struct election_manager_s *mgr,
		const struct sqlx_name_s *n, gchar **master_url)
{
	int rc;
	gchar *url = NULL;

	MANAGER_CHECK(mgr);
	EXTRA_ASSERT(n != NULL);

	gchar key[OIO_ELECTION_KEY_LIMIT_LENGTH];
	sqliterepo_hash_name(n, key, sizeof(key));
	const gint64 deadline = oio_ext_monotonic_time () + oio_election_delay_wait;

	_manager_lock(mgr);
	struct election_member_s *m = _LOCKED_init_member(mgr, n, key, TRUE);

	if (!wait_for_final_status(m, deadline)) // TIMEOUT!
		rc = STEP_FAILED;
	else {
		rc = m->step;
		if (rc == STEP_SLAVE) {
			if (m->master_url)
				url = g_strdup(m->master_url);
		}
	}

	member_unref(m);
	if (rc == STEP_NONE || STATUS_FINAL(rc))
		member_signal(m);
	_manager_unlock(mgr);

	GRID_TRACE("STEP=%s/%d master=%s", _step2str(rc), rc, url);
	switch (rc) {
		case STEP_MASTER:
			return ELECTION_LEADER;
		case STEP_SLAVE:
			if (master_url)
				*master_url = url;
			else
				g_free(url);
			url = NULL;
			return ELECTION_LOST;
		default:
			return ELECTION_FAILED;
	}
}

/* ------------------------------------------------------------------------- */

static gboolean
defer_USE(struct election_member_s *member)
{
	const gint64 now = oio_ext_monotonic_time();

	/* Sometimes, defer_USE() is called after a check for a delay (based on
	 * last_USE), sometimes not. When there is already a check, the delay is
	 * ~ always longer than the following G_TIME_SPAN_SECOND, so this check
	 * is harmless.
	 * However, having a hard limit to a minimum of 1s between 2 USE for the
	 * same election is a good thing, IMO (jfs). */
	if ((now - member->last_USE) < G_TIME_SPAN_SECOND) {
		member_trace("avoid:USE", member);
		return TRUE;
	}

	if (member->peers) {
		member->last_USE = oio_ext_monotonic_time();
		for (gchar **p = member->peers; *p; p++) {
			member->manager->deferred_peering_notify |= sqlx_peering__use(
					member->manager->peering, *p, &member->inline_name);
			TRACE_EXECUTION(member->manager);
		}
	}

	return TRUE;
}

static void
_result_GETVERS (GError *enet,
		struct election_manager_s *manager, const struct sqlx_name_s *name,
		guint reqid, GTree *vremote)
{
	GError *err = NULL;
	GTree *vlocal = NULL;

	EXTRA_ASSERT(manager != NULL);
	EXTRA_ASSERT(name != NULL);
	EXTRA_ASSERT((enet != NULL) ^ (vremote != NULL));

	if (enet) {
		err = g_error_copy(enet);
		GRID_DEBUG("GETVERS error [%s.%s]: (%d) %s",
				name->base, name->type, err->code, err->message);
	} else {
		err = manager->config->get_version (manager->config->ctx, name, &vlocal);
		EXTRA_ASSERT ((err != NULL) ^ (vlocal != NULL));
		if (err) {
			GRID_WARN("GETVERS error [%s.%s]: (%d) %s",
					name->base, name->type, err->code, err->message);
		}
	}

	if (!err) {
		gint64 worst = 0;
		err = version_validate_diff(vlocal, vremote, &worst);
		if (NULL != err) {
			if (err->code == CODE_PIPETO) {
				GRID_DEBUG("Remote outdated : (%d) %s",
						err->code, err->message);
				g_clear_error(&err);
			}
		} else {
			if (worst < 0)
				err = NEWERROR(CODE_PIPEFROM, "One diff missed");
		}
	}

	gchar key[OIO_ELECTION_KEY_LIMIT_LENGTH];
	sqliterepo_hash_name(name, key, sizeof(key));
	struct election_member_s *member = manager_get_member(manager, key);
	if (!member) {
		GRID_WARN("GETVERS Election disappeared [%s]", key);
	} else {

		MEMBER_CHECK(member);

		member_lock(member);
		if (!err) {
			transition(member, EVT_GETVERS_OK, &reqid);
		} else if (err->code == CODE_PIPEFROM) {
			transition(member, EVT_GETVERS_OLD, &reqid);
		} else if (err->code == CODE_CONCURRENT) {
			transition(member, EVT_GETVERS_RACE, &reqid);
		} else {
			if (err->code == CODE_CONTAINER_NOTFOUND) {
				// We may have asked the wrong peer
				member->requested_peers_decache = 1;
			}
			transition(member, EVT_GETVERS_KO, &reqid);
		}
		member_unref(member);
		member_unlock(member);
	}

	if (err) g_clear_error(&err);
	if (vlocal) g_tree_destroy(vlocal);
}

static void
_result_PIPEFROM (GError *e, struct election_manager_s *manager,
		const struct sqlx_name_s *n, guint reqid)
{
	gchar key[OIO_ELECTION_KEY_LIMIT_LENGTH];
	sqliterepo_hash_name(n, key, sizeof(key));

	if (!e || CODE_IS_OK(e->code)) {
		GRID_DEBUG("PIPEFROM ok [%s.%s] [%s]",
				n->base, n->type, key);
	} else {
		GRID_WARN("PIPEFROM failed [%s.%s] [%s]: (%d) %s",
				n->base, n->type, key, e->code, e->message);
	}

	struct election_member_s *member = manager_get_member (manager, key);

	if (member) {
		member_lock(member);
		/* We do the transition even if we undergo an error.
		 * This means we are not consistent but eventually consistent. */
		transition(member, EVT_SYNC_OK, &reqid);
		member_unref(member);
		member_unlock(member);
	}
}

/* -------------------------------------------------------------------------- */

static void
member_action_to_NONE(struct election_member_s *member)
{
	EXTRA_ASSERT(!member_has_action(member));
	EXTRA_ASSERT(!member_has_local_id(member));
	EXTRA_ASSERT(!member_has_master_id(member));
	EXTRA_ASSERT(member->local_id == 0);
	EXTRA_ASSERT(member->master_id == 0);
	EXTRA_ASSERT(member->master_url == NULL);
	member->when_unstable = 0;
	return member_set_status(member, STEP_NONE);
}

static void
member_action_to_PEERING(struct election_member_s *member)
{
	EXTRA_ASSERT(!member_has_action(member));
	EXTRA_ASSERT(!member_has_local_id(member));
	EXTRA_ASSERT(!member_has_master_id(member));
	EXTRA_ASSERT(member->local_id == 0);
	EXTRA_ASSERT(member->master_id == 0);
	EXTRA_ASSERT(member->master_url == NULL);

	/* The only origin of the transition is NONE */
	member->when_unstable = oio_ext_monotonic_time();

	member_ref(member);
#ifndef FAKE_GETPEERS
	struct election_manager_s *M = MMANAGER(member);
	metautils_gthreadpool_push("getpeers", M->tasks_getpeers, member);
#endif
	return member_set_status(member, STEP_PEERING);
}

/* Gathers a check on the set of actions currently pending and the change of
 * the election status. No action may be pending, and then the election get
 * its FAILED state */
static void
member_action_to_FAILED(struct election_member_s *member)
{
	EXTRA_ASSERT(!member_has_action(member));

	member_reset_local(member);
	member_reset_master(member);

	/* setting last_USE to now avoids sending USE as soon as arrived in
	 * the set of FAILED elections. */
	member->last_USE = oio_ext_monotonic_time ();
	return member_set_status(member, STEP_FAILED);
}

static void
member_fail_on_error(struct election_member_s *member, int zrc)
{
	if (_zoo_disconnected(zrc))
		return transition(member, EVT_DISCONNECTED, NULL);
	return member_action_to_FAILED(member);
}

static void
_common_action_to_LEAVE(struct election_member_s *member,
		enum event_type_e evt)
{
	EXTRA_ASSERT(!member_has_action(member));

	member->requested_LEAVE = 0;
	member->requested_LEFT_SELF = 0;
	member->requested_LEFT_MASTER = 0;

	/* Many origins are possible, are to manage those that are stable states,
	 * let's just check the marker before updating it. */
	if (member->when_unstable <= 0)
		member->when_unstable = oio_ext_monotonic_time();

	gchar path[PATH_MAXLEN];
	int zrc = sqlx_sync_adelete(member->sync,
			member_fullpath(member, path, sizeof(path)), -1,
			completion_LEAVING, member);
	TRACE_EXECUTION(member->manager);

	if (unlikely(zrc != ZOK))
		return member_fail_on_error(member, zrc);

	member->pending_ZK_DELETE = 1;
	member_ref(member);
	member_reset_master(member);
	return member_set_status(member, evt);
}

static void
member_action_to_LEAVING(struct election_member_s *member)
{
	return _common_action_to_LEAVE(member, STEP_LEAVING);
}

static void
member_action_to_LEAVING_FAILING(struct election_member_s *member)
{
	return _common_action_to_LEAVE(member, STEP_LEAVING_FAILING);
}

static void
member_leave_on_error(struct election_member_s *member, int zrc)
{
	if (_zoo_disconnected(zrc))
		return transition(member, EVT_DISCONNECTED, NULL);
	return member_action_to_LEAVING_FAILING(member);
}

/* Let's create the node in the ZK service */
static void
member_action_to_CREATING(struct election_member_s *member)
{
	EXTRA_ASSERT(!member_has_action(member));
	EXTRA_ASSERT(!member_has_local_id(member));
	EXTRA_ASSERT(!member_has_master_id(member));
	EXTRA_ASSERT(member->local_id == 0);
	EXTRA_ASSERT(member->master_id == 0);
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(member->peers != NULL);

	member->requested_USE = 0;

	/* We come from either NONE or PEERING */
	if (member->when_unstable <= 0)
		member->when_unstable = oio_ext_monotonic_time();

	if (member->manager->exiting)
		return member_action_to_NONE(member);

	if (!defer_USE(member))
		return member_action_to_FAILED(member);

	const char *myurl = member_get_url(member);
	gchar path[PATH_MAXLEN];
	int zrc = sqlx_sync_acreate(member->sync,
			member_fullpath(member, path, sizeof(path)),
			myurl, strlen(myurl),
			ZOO_EPHEMERAL|ZOO_SEQUENCE,
			completion_CREATING, member);
	TRACE_EXECUTION(member->manager);

	if (unlikely(zrc != ZOK)) {
		member_warn_failed_action(member, zrc, "CREATE");
		return member_fail_on_error(member, zrc);
	}

	member->generation_id ++;
	member->pending_ZK_CREATE = 1;
	member_ref(member);
	return member_set_status(member, STEP_CREATING);
}

static void
member_action_START(struct election_member_s *member)
{
	/* TODO(jfs): implement the obsolescence (time-based decay of cached
	 * items) of the peer, lazy reloading to maybe win some precious ms. */
	return member_action_to_PEERING(member);
}

/* Actual transition */
static void
member_action_to_WATCHING(struct election_member_s *member)
{
	EXTRA_ASSERT(!member_has_action(member));

	gchar path[PATH_MAXLEN];
	int zrc = sqlx_sync_awexists(member->sync,
			member_fullpath(member, path, sizeof(path)),
			watch_SELF, GUINT_TO_POINTER(member->generation_id),
			completion_WATCHING, member);
	TRACE_EXECUTION(member->manager);

	if (unlikely(zrc != ZOK)) {
		member_warn_failed_action(member, zrc, "WATCH");
		return member_leave_on_error(member, zrc);
	}

	member->pending_ZK_EXISTS = 1;
	member_ref(member);
	return member_set_status(member, STEP_WATCHING);
}

static void
member_action_to_LISTING(struct election_member_s *member)
{
	EXTRA_ASSERT(!member_has_action(member));
	EXTRA_ASSERT(member_has_local_id(member));

	member->requested_LEFT_MASTER = 0;
	member_reset_master(member);

	gchar path[PATH_MAXLEN];
	int zrc = sqlx_sync_awget_siblings(member->sync,
			member_fullpath(member, path, sizeof(path)),
			NULL, NULL, completion_LISTING, member);
	TRACE_EXECUTION(member->manager);

	if (unlikely(zrc != ZOK)) {
		member_warn_failed_action(member, zrc, "LIST");
		return member_leave_on_error(member, zrc);
	}

	member->pending_ZK_LIST = 1;
	member_ref(member);
	return member_set_status(member, STEP_LISTING);
}

static void
member_action_to_ASKING(struct election_member_s *member)
{
	EXTRA_ASSERT(!member_has_action(member));
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));
	EXTRA_ASSERT(member->local_id != member->master_id);
	EXTRA_ASSERT(member->master_url == NULL);

	gchar path[PATH_MAXLEN];
	int zrc = sqlx_sync_awget(member->sync,
			member_masterpath(member, path, sizeof(path)),
			watch_MASTER, GUINT_TO_POINTER(member->generation_id),
			completion_ASKING, member);
	TRACE_EXECUTION(member->manager);

	if (unlikely(zrc != ZOK)) {
		member_warn_failed_action(member, zrc, "ASK");
		return member_leave_on_error(member, zrc);
	}

	member->pending_ZK_GET = 1;
	member_ref(member);
	return member_set_status(member, STEP_ASKING);
}

static void
member_action_to_SYNCING(struct election_member_s *member)
{
	EXTRA_ASSERT(!member_has_action(member));

	member_ref(member);

	const char *source = member->master_url;
	const char *target = member_get_url(member);

	EXTRA_ASSERT(target != NULL);
	EXTRA_ASSERT(source != NULL);

	member->requested_PIPEFROM = 0;
	member->pending_PIPEFROM = 1;

	/* The only origin of the transition is SLAVE, note the entrance in an
	 * unstable state */
	member->when_unstable = oio_ext_monotonic_time();

	MEMBER_NAME(n, member);
	member->manager->deferred_peering_notify |= sqlx_peering__pipefrom(
			member->manager->peering, target, &n, source,
			member->manager, 0, _result_PIPEFROM);
	TRACE_EXECUTION(member->manager);

	return member_set_status(member, STEP_SYNCING);
}

static void
member_action_to_REFRESH_CHECKING_MASTER(struct election_member_s *member)
{
	EXTRA_ASSERT(!member_has_action(member));
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));

	member_ref(member);
#ifndef FAKE_GETPEERS
	struct election_manager_s *M = MMANAGER(member);
	metautils_gthreadpool_push("getpeers", M->tasks_getpeers, member);
#endif
	return member_set_status(member, STEP_REFRESH_CHECKING_MASTER);
}

static void
member_action_to_DELAYED_CHECKING_MASTER(struct election_member_s *member)
{
	EXTRA_ASSERT(!member_has_action(member));
	return member_set_status(member, STEP_DELAYED_CHECKING_MASTER);
}

static void
member_action_to_REFRESH_CHECKING_SLAVES(struct election_member_s *member)
{
	EXTRA_ASSERT(!member_has_action(member));
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));

	member_ref(member);
#ifndef FAKE_GETPEERS
	struct election_manager_s *M = MMANAGER(member);
	metautils_gthreadpool_push("getpeers", M->tasks_getpeers, member);
#endif
	return member_set_status(member, STEP_REFRESH_CHECKING_SLAVES);
}

static void
member_action_to_DELAYED_CHECKING_SLAVES(struct election_member_s *member)
{
	EXTRA_ASSERT(!member_has_action(member));
	return member_set_status(member, STEP_DELAYED_CHECKING_SLAVES);
}

static void
member_action_to_CHECKING_MASTER(struct election_member_s *m)
{
	EXTRA_ASSERT(!member_has_action(m));
	EXTRA_ASSERT(!member_has_getvers(m));

	if (m->step == STEP_ASKING)
		m->attempts_GETVERS = sqliterepo_getvers_attempts;

	if (m->pending_GETVERS > 0)
		member_warn("lost:GETVERS", m);

	m->count_GETVERS = 1;
	m->pending_GETVERS = 1;
	m->outdated_GETVERS = 0;
	m->concurrent_GETVERS = 0;
	m->errors_GETVERS = 0;

	MEMBER_NAME(n, m);
	m->manager->deferred_peering_notify |= sqlx_peering__getvers(
			m->manager->peering, m->master_url, &n,
			m->manager, 0, _result_GETVERS);
	TRACE_EXECUTION(m->manager);

	return member_set_status(m, STEP_CHECKING_MASTER);
}

static void
member_action_to_CHECKING_SLAVES(struct election_member_s *m)
{
	EXTRA_ASSERT(!member_has_action(m));
	EXTRA_ASSERT(!member_has_getvers(m));
	EXTRA_ASSERT(member_has_local_id(m));
	EXTRA_ASSERT(member_has_master_id(m));
	EXTRA_ASSERT(m->master_id == m->local_id);
	EXTRA_ASSERT(m->master_url == NULL);
	EXTRA_ASSERT(m->peers != NULL);

	if (m->step != STEP_CHECKING_SLAVES)
		m->attempts_GETVERS = sqliterepo_getvers_attempts;

	if (m->pending_GETVERS > 0)
		member_warn("lost:GETVERS", m);

	const guint pending = g_strv_length(m->peers);
	m->count_GETVERS = pending;
	m->pending_GETVERS = pending;
	m->concurrent_GETVERS = 0;
	m->outdated_GETVERS = 0;
	m->errors_GETVERS = 0;

	MEMBER_NAME(n, m);
	for (gchar **p=m->peers; *p; p++) {
		m->manager->deferred_peering_notify |= sqlx_peering__getvers(
				m->manager->peering, *p, &n, m->manager, 0, _result_GETVERS);
		TRACE_EXECUTION(m->manager);
	}

	return member_set_status(m, STEP_CHECKING_SLAVES);
}

static void
member_action_to_MASTER(struct election_member_s *member)
{
	member->when_unstable = 0;
	return member_set_status(member, STEP_MASTER);
}

static void
member_action_to_SLAVE(struct election_member_s *member)
{
	member->when_unstable = 0;
	return member_set_status(member, STEP_SLAVE);
}

static void
member_finish_CHECKING_MASTER(struct election_member_s *member)
{
	EXTRA_ASSERT (member->pending_GETVERS > 0);
	if ((--member->pending_GETVERS) > 0)
		return;
	EXTRA_ASSERT(!member_has_action(member));

	EXTRA_ASSERT(1 == member->count_GETVERS);

	const guint16 outdated = member->outdated_GETVERS;
	const guint16 errors = member->errors_GETVERS;
	const guint16 concurrent = member->concurrent_GETVERS;

	EXTRA_ASSERT(concurrent + outdated + errors <= 1);

	const guint16 node_left = member->requested_LEFT_SELF;
	const guint16 master_change = member->requested_LEFT_MASTER;

	member_reset_getvers(member);

	member->requested_LEFT_SELF = 0;
	member->requested_LEFT_MASTER = 0;

	if (member->requested_LEAVE)
		return member_action_to_LEAVING(member);
	if (master_change)
		return member_action_to_LISTING(member);

	if (concurrent)
		return member_action_to_SYNCING(member);
	if (outdated)
		return member_action_to_SYNCING(member);

	if (node_left) {
		member_reset_local(member);
		member_reset_master(member);
		return member_action_START(member);
	}

	if (errors) {
		/* Let's retry if the GETVERS simply failed */
		if (member->attempts_GETVERS <= 0)
			return member_action_to_LEAVING_FAILING(member);
		/* We still have spare attempts, let's retry */
		member->attempts_GETVERS --;
		return member_action_to_DELAYED_CHECKING_MASTER(member);
	}

	return member_action_to_SLAVE(member);
}

static void
member_finish_CHECKING_SLAVES(struct election_member_s *member)
{
	EXTRA_ASSERT (member->pending_GETVERS > 0);
	if ((--member->pending_GETVERS) > 0)
		return;
	EXTRA_ASSERT(!member_has_action(member));

	const guint16 asked = member->count_GETVERS;
	const guint16 outdated = member->outdated_GETVERS;
	const guint16 errors = member->errors_GETVERS;
	const guint16 concurrent = member->concurrent_GETVERS;

	member_reset_getvers(member);

	const guint8 node_left = member->requested_LEFT_SELF;

	member->requested_LEFT_SELF = 0;
	member->requested_LEFT_MASTER = 0;

	EXTRA_ASSERT(outdated + concurrent + errors <= asked);

	/* Someone requested us to leave the group. No need to check longer,
	 * we won't ever be MASTER with this. */
	if (member->requested_LEAVE)
		return member_action_to_LEAVING(member);
	/* Idme, our ZK node disappeared. Master or not we will need to restart.
	 * Let's do it right now. */
	if (node_left) {
		member_reset_local(member);
		member_reset_master(member);
		return member_action_START(member);
	}

	/* two error cases immediately tell the current base could not be an
	 * acceptable master in its current state. Let's leave and let the base
	 * become SLAVE and then RESYNC. */
	if (concurrent)
		return member_action_to_LEAVING(member);
	if (outdated)
		return member_action_to_LEAVING(member);

	/* Then check we have at least a quorum of valid answers */
	const guint16 group_size = asked + 1;
	if (errors > 0 && (errors >= (group_size + 1) / 2)) {
		if (member->attempts_GETVERS <= 0)
			return member_action_to_LEAVING_FAILING(member);
		/* We still have spare attempts, let's retry */
		member->attempts_GETVERS --;
		return member_action_to_DELAYED_CHECKING_SLAVES(member);
	}

	return member_action_to_MASTER(member);
}

#ifdef HAVE_EXTRA_ASSERT

static void
_member_assert_NONE(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_NONE);
	EXTRA_ASSERT(!member_has_local_id(member));
	EXTRA_ASSERT(!member_has_master_id(member));
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(!member_has_action(member));
}

static void
_member_assert_PEERING(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_PEERING);
	EXTRA_ASSERT(!member_has_local_id(member));
	EXTRA_ASSERT(!member_has_master_id(member));
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(!member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE == 0);
}

static void
_member_assert_CREATING(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_CREATING);
	EXTRA_ASSERT(member->peers != NULL);
	EXTRA_ASSERT(!member_has_local_id(member));
	EXTRA_ASSERT(!member_has_master_id(member));
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(member->requested_LEFT_SELF == 0);  // makes no sense
	EXTRA_ASSERT(!member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE != 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE == 0);
}

static void
_member_assert_WATCHING(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_WATCHING);
	EXTRA_ASSERT(member->peers != NULL);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(!member_has_master_id(member));
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(!member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS != 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE == 0);
}

static void
_member_assert_LISTING(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_LISTING);
	EXTRA_ASSERT(member->peers != NULL);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(!member_has_master_id(member));
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(!member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST != 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE == 0);
}

static void
_member_assert_ASKING(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_ASKING);
	EXTRA_ASSERT(member->peers != NULL);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(!member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_GET != 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE == 0);
}

static void
_member_assert_CHECKING_MASTER(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_CHECKING_MASTER);
	EXTRA_ASSERT(member->peers != NULL);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));
	EXTRA_ASSERT(member->master_id != member->local_id);
	EXTRA_ASSERT(member->master_url != NULL);
	EXTRA_ASSERT(member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE == 0);
}

static void
_member_assert_CHECKING_SLAVES(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_CHECKING_SLAVES);
	EXTRA_ASSERT(member->peers != NULL);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));
	EXTRA_ASSERT(member->master_id == member->local_id);
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE == 0);
}

static void
_member_assert_LEAVING(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_LEAVING);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(!member_has_master_id(member));
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(!member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE != 0);
}

static inline void
_member_assert_LEAVING_FAILING(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_LEAVING_FAILING);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(!member_has_master_id(member));
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(!member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE != 0);
}

static void
_member_assert_SYNCING(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_SYNCING);
	EXTRA_ASSERT(member->peers != NULL);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));
	EXTRA_ASSERT(member->master_id != member->local_id);
	EXTRA_ASSERT(member->master_url != NULL);
	EXTRA_ASSERT(!member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM != 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE == 0);
}

static void
_member_assert_DELAYED_CHECKING_MASTER(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_DELAYED_CHECKING_MASTER);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));
	EXTRA_ASSERT(member->master_id != member->local_id);
	EXTRA_ASSERT(member->master_url != NULL);
	EXTRA_ASSERT(!member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE == 0);

	EXTRA_ASSERT(member->requested_LEFT_MASTER == 0);
	EXTRA_ASSERT(member->requested_LEFT_SELF == 0);
	EXTRA_ASSERT(member->requested_LEAVE == 0);
}

static void
_member_assert_REFRESH_CHECKING_MASTER(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_REFRESH_CHECKING_MASTER);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));
	EXTRA_ASSERT(member->master_id != member->local_id);
	EXTRA_ASSERT(member->master_url != NULL);
	EXTRA_ASSERT(!member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE == 0);
	/* a request is pending, signal flags are allowed */
}

static void
_member_assert_DELAYED_CHECKING_SLAVES(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_DELAYED_CHECKING_SLAVES);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));
	EXTRA_ASSERT(member->master_id == member->local_id);
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(!member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE == 0);

	EXTRA_ASSERT(member->requested_LEFT_MASTER == 0);
	EXTRA_ASSERT(member->requested_LEFT_SELF == 0);
	EXTRA_ASSERT(member->requested_LEAVE == 0);
}

static void
_member_assert_REFRESH_CHECKING_SLAVES(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_REFRESH_CHECKING_SLAVES);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));
	EXTRA_ASSERT(member->master_id == member->local_id);
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(!member_has_getvers(member));
	EXTRA_ASSERT(member->pending_PIPEFROM == 0);
	EXTRA_ASSERT(member->pending_ZK_CREATE == 0);
	EXTRA_ASSERT(member->pending_ZK_EXISTS == 0);
	EXTRA_ASSERT(member->pending_ZK_LIST == 0);
	EXTRA_ASSERT(member->pending_ZK_GET == 0);
	EXTRA_ASSERT(member->pending_ZK_DELETE == 0);
	/* a request is pending, signal flags are allowed */
}

static void
_member_assert_SLAVE(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_SLAVE);
	EXTRA_ASSERT(member->peers != NULL);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));
	EXTRA_ASSERT(member->master_id != member->local_id);
	EXTRA_ASSERT(member->master_url != NULL);
	EXTRA_ASSERT(!member_has_action(member));
}

static void
_member_assert_MASTER(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_MASTER);
	EXTRA_ASSERT(member->peers != NULL);
	EXTRA_ASSERT(member_has_local_id(member));
	EXTRA_ASSERT(member_has_master_id(member));
	EXTRA_ASSERT(member->master_id == member->local_id);
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(!member_has_action(member));
}

static void
_member_assert_FAILED(struct election_member_s *member)
{
	EXTRA_ASSERT(member->step == STEP_FAILED);
	EXTRA_ASSERT(!member_has_local_id(member));
	EXTRA_ASSERT(!member_has_master_id(member));
	EXTRA_ASSERT(member->master_url == NULL);
	EXTRA_ASSERT(!member_has_action(member));
}

#else
#define _member_assert_NONE(...)
#define _member_assert_PEERING(...)
#define _member_assert_CREATING(...)
#define _member_assert_WATCHING(...)
#define _member_assert_LISTING(...)
#define _member_assert_ASKING(...)
#define _member_assert_CHECKING_MASTER(...)
#define _member_assert_CHECKING_SLAVES(...)
#define _member_assert_LEAVING(...)
#define _member_assert_LEAVING_FAILING(...)
#define _member_assert_SYNCING(...)
#define _member_assert_DELAYED_CHECKING_MASTER(...)
#define _member_assert_REFRESH_CHECKING_MASTER(...)
#define _member_assert_DELAYED_CHECKING_SLAVES(...)
#define _member_assert_REFRESH_CHECKING_SLAVES(...)
#define _member_assert_SLAVE(...)
#define _member_assert_MASTER(...)
#define _member_assert_FAILED(...)
#endif

static void
_member_react_NONE(struct election_member_s *member, enum event_type_e evt)
{
	_member_assert_NONE (member);
	switch (evt) {
		case EVT_NONE:
			member->requested_USE = 0;
			if (member->manager->exiting)
				return;
			/* Right now, we start an election cycle. We consider this point
			 * as the real start of the "unstable" phasis of the election. */
			return member_action_START(member);

			/* Interruptions */
		case EVT_LEAVE_REQ:
			return;
		case EVT_SYNC_REQ:
			/* Trigger an election that must lead to a PRE-SLAVE
			 * state and then a SYNCING */
			member->requested_PIPEFROM = 1;
			member->requested_USE = 1;
			return;
		case EVT_LEFT_SELF:
			/* when the node is removed, it will raise the watcher on the
			 * node. So this event is not abnormal. */
			/* TODO insert a new state in the FSM to wait for this event. */
			return;
		case EVT_LEFT_MASTER:
			return;

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_PEERING(struct election_member_s *member,
		enum event_type_e evt, gchar **peers)
{
	_member_assert_PEERING (member);
	switch (evt) {
		case EVT_NONE:
			return;

			/* Interruptions */
		case EVT_LEAVE_REQ:
			/* we didn't join yet ... why marking it to leave? */
			return;
		case EVT_SYNC_REQ:
			member->requested_PIPEFROM = 1;
			return;
		case EVT_LEFT_SELF:
			return;
		case EVT_LEFT_MASTER:
			return member_warn_abnormal_event(member, evt);

			/* Actions */
		case EVT_GETPEERS_DONE:
			member_reset_peers(member);
			member->peers = g_strdupv(peers);
			TRACE_EXECUTION(member->manager);
			if (!member->peers)
				member_action_to_FAILED(member);
			else
				member_action_to_CREATING(member);
			TRACE_EXECUTION(member->manager);
			return;

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_CREATING(struct election_member_s *member,
		enum event_type_e evt, gint32 *p_local_id)
{
	_member_assert_CREATING (member);
	switch (evt) {
		case EVT_NONE:
			return;

			/* Interruptions */
		case EVT_LEAVE_REQ:
			member->requested_LEAVE = 1;
			return;
		case EVT_SYNC_REQ:
			member->requested_PIPEFROM = 1;
			return;
		case EVT_LEFT_SELF:
			return;
		case EVT_LEFT_MASTER:
			return member_warn_abnormal_event(member, evt);

			/* Actions */
		case EVT_CREATE_KO:
			member->pending_ZK_CREATE = 0;
			return member_action_to_FAILED(member);

		case EVT_CREATE_OK:
			EXTRA_ASSERT(p_local_id != NULL);
			member->pending_ZK_CREATE = 0;
			member_set_local_id(member, *p_local_id);
			/* manage interruptions */
			if (member->requested_LEAVE)
				return member_action_to_LEAVING(member);
			/* nominal flow */
			return member_action_to_WATCHING(member);

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_WATCHING(struct election_member_s *member, enum event_type_e evt)
{
	_member_assert_WATCHING (member);
	switch (evt) {
		case EVT_NONE:
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			member->requested_PIPEFROM = 1;
			return;
		case EVT_LEAVE_REQ:
			member->requested_LEAVE = 1;
			return;
		case EVT_LEFT_SELF:
			member->requested_LEFT_SELF = 1;
			return;
		case EVT_LEFT_MASTER:
			return member_warn_abnormal_event(member, evt);

			/* Action */
		case EVT_EXISTS_KO:
			member->pending_ZK_EXISTS = 0;
			return member_action_to_LEAVING(member);

		case EVT_EXISTS_OK:
			member->pending_ZK_EXISTS = 0;
			/* manage past interruptions */
			if (member->requested_LEAVE)
				return member_action_to_LEAVING(member);
			if (member->requested_LEFT_SELF) {
				member_reset_local(member);
				member->requested_LEFT_SELF = 0;
				member->requested_LEFT_MASTER = 0;
				return member_action_START(member);
			}
			/* nominal flow */
			return member_action_to_LISTING(member);

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_LISTING(struct election_member_s *member, enum event_type_e evt,
		gint32 *p_masterid)
{
	_member_assert_LISTING (member);
	switch (evt) {
		case EVT_NONE:
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			member->requested_PIPEFROM = 1;
			return;
		case EVT_LEAVE_REQ:
			member->requested_LEAVE = 1;
			return;
		case EVT_LEFT_SELF:
			member->requested_LEFT_SELF = 1;
			return;
		case EVT_LEFT_MASTER:
			return;

			/* Actions */
		case EVT_LIST_KO:
			member->pending_ZK_LIST = 0;
			return member_action_to_LEAVING_FAILING(member);

		case EVT_LIST_OK:
			member->pending_ZK_LIST = 0;
			/* manage past interruptions */
			if (member->requested_LEAVE)
				return member_action_to_LEAVING(member);
			if (member->requested_LEFT_SELF) {
				member_reset_local(member);
				member->requested_LEFT_SELF = 0;
				member->requested_LEFT_MASTER = 0;
				return member_action_START(member);
			}
			/* nominal flow */
			if (member->local_id == *p_masterid) {
				/* We are 1st, the probable future master */
				member_set_master_id(member, member->local_id);
				return member_action_to_CHECKING_SLAVES(member);
			} else {
				/* We are in the tail, probable future slave */
				member_set_master_id(member, *p_masterid);
				return member_action_to_ASKING(member);
			}

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_ASKING(struct election_member_s *member, enum event_type_e evt,
		const char *url)
{
	_member_assert_ASKING (member);
	switch (evt) {
		case EVT_NONE:
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			member->requested_PIPEFROM = 1;
			return;
		case EVT_LEAVE_REQ:
			member->requested_LEAVE = 1;
			return;
		case EVT_LEFT_SELF:
			member->requested_LEFT_SELF = 1;
			return;
		case EVT_LEFT_MASTER:
			member->requested_LEFT_MASTER = 1;
			return;

			/* Actions */
		case EVT_MASTER_KO:
			member->pending_ZK_GET = 0;
			EXTRA_ASSERT(url == NULL);
			/* No need to manage a clean LEAVE, we are about to leave
			 * and then fail */
			return member_action_to_LEAVING_FAILING(member);

		case EVT_MASTER_BAD:
			member->pending_ZK_GET = 0;
			EXTRA_ASSERT(url == NULL);
			/* No need to manage a clean LEAVE, we are about to leave
			 * and then fail */
			return member_action_to_LEAVING_FAILING(member);

		case EVT_MASTER_OK:
			member->pending_ZK_GET = 0;
			/* manage past interruptions */
			if (member->requested_LEAVE)
				return member_action_to_LEAVING(member);
			if (member->requested_LEFT_SELF) {
				member_reset_local(member);
				member_reset_master(member);
				member->requested_LEFT_SELF = 0;
				member->requested_LEFT_MASTER = 0;
				return member_action_START(member);
			}
			if (member->requested_LEFT_MASTER) {
				/* Strange situation: we receive the content of the master
				 * node and at the same time the information the master
				 * node has left. We are not even sure the left master was
				 * the last master we've monitored.
				 * For sure we are in a transient state, services are leaving
				 * or elections are moving elsewhere.
				 * For the sake of security, let's do an other whole loop
				 * and restart with a clean state. */
				return member_action_to_LISTING(member);
			}
			/* nominal flow : let's become CHECKING_MASTER */
			member_set_master_url(member, url);
			return member_action_to_CHECKING_MASTER(member);

			/* Abnormal */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_CHECKING_MASTER(struct election_member_s *member,
		enum event_type_e evt)
{
	_member_assert_CHECKING_MASTER (member);
	switch (evt) {
		case EVT_NONE:
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			member->requested_PIPEFROM = 1;
			return;
		case EVT_LEFT_MASTER:
			member->requested_LEFT_MASTER = 1;
			return;
		case EVT_LEFT_SELF:
			member->requested_LEFT_SELF = 1;
			return;
		case EVT_LEAVE_REQ:
			member->requested_LEAVE = 1;
			return;

			/* Actions
			 * The acction is ~ the same for every completion event. This is
			 * intentional and is used to have a pretty transition log for
			 * the election FSM. */
		case EVT_GETVERS_OK:
			return member_finish_CHECKING_MASTER(member);

		case EVT_GETVERS_OLD:
			member->outdated_GETVERS ++;
			return member_finish_CHECKING_MASTER(member);

		case EVT_GETVERS_RACE:
			member->concurrent_GETVERS ++;
			return member_finish_CHECKING_MASTER(member);

		case EVT_GETVERS_KO:
			member->errors_GETVERS ++;
			return member_finish_CHECKING_MASTER(member);

			/* Abnormal */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_CHECKING_SLAVES(struct election_member_s *member,
		enum event_type_e evt)
{
	_member_assert_CHECKING_SLAVES (member);
	switch (evt) {
		case EVT_NONE:
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			return;
		case EVT_LEAVE_REQ:
			member->requested_LEAVE = 1;
			return;
		case EVT_LEFT_SELF:
			member->requested_LEFT_SELF = 1;
			return;
		case EVT_LEFT_MASTER:
			return;

			/* Actions */
		case EVT_GETVERS_OK:
			return member_finish_CHECKING_SLAVES(member);

		case EVT_GETVERS_OLD:
			member->outdated_GETVERS ++;
			return member_finish_CHECKING_SLAVES(member);

		case EVT_GETVERS_RACE:
			member->concurrent_GETVERS ++;
			return member_finish_CHECKING_SLAVES(member);

		case EVT_GETVERS_KO:
			member->errors_GETVERS ++;
			return member_finish_CHECKING_SLAVES(member);

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_LEAVING(struct election_member_s *member,
		enum event_type_e evt)
{
	_member_assert_LEAVING (member);
	switch (evt) {
		case EVT_NONE:
			member->requested_USE = (0 == member->manager->exiting);
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			member->requested_PIPEFROM = 1;
			member->requested_USE = 1;
			return;
		case EVT_LEAVE_REQ:
		case EVT_LEFT_SELF:
		case EVT_LEFT_MASTER:
			return;

			/* Actions */
		case EVT_LEAVE_KO:
			/* The clean "leaving" action failed, let's retry once then
			 * fail the election. The case of zrc==ZNONODE has already
			 * been managed, and assimilated to a success. */
			member->pending_ZK_DELETE = 0;
			return member_action_to_LEAVING_FAILING(member);

		case EVT_LEAVE_OK:
			member->pending_ZK_DELETE = 0;
			member_reset_local(member);
			member_reset_master(member);
			return member_action_to_NONE(member);

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_LEAVING_FAILING(struct election_member_s *member,
		enum event_type_e evt)
{
	_member_assert_LEAVING_FAILING (member);
	switch (evt) {
		case EVT_NONE:
			member->requested_USE = 1;
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			member->requested_PIPEFROM = 1;
			member->requested_USE = 1;
			return;
		case EVT_LEAVE_REQ:
		case EVT_LEFT_SELF:
		case EVT_LEFT_MASTER:
			return;

			/* Actions */
		case EVT_LEAVE_KO:
		case EVT_LEAVE_OK:
			member->pending_ZK_DELETE = 0;
			member_reset_local(member);
			member_reset_master(member);
			return member_action_to_FAILED(member);

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_SYNCING(struct election_member_s *member, enum event_type_e evt)
{
	_member_assert_SYNCING (member);
	switch (evt) {
		case EVT_NONE:
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			/* already currently managed */
			return;
		case EVT_LEAVE_REQ:
			member->requested_LEAVE = 1;
			return;
		case EVT_LEFT_SELF:
			member->requested_LEFT_SELF = 1;
			return;
		case EVT_LEFT_MASTER:
			member->requested_LEFT_MASTER = 1;
			return;

			/* Actions */
		case EVT_SYNC_OK:
		case EVT_SYNC_KO:
			member->pending_PIPEFROM = 0;
			if (member->requested_LEAVE)
				return member_action_to_LEAVING(member);
			if (member->requested_LEFT_SELF)
				return member_action_START(member);
			if (member->requested_LEFT_MASTER)
				return member_action_to_LISTING(member);
			return member_action_to_SLAVE(member);

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_DELAYED_CHECKING_MASTER(struct election_member_s *member, enum event_type_e evt)
{
	gint64 now, delay;
	_member_assert_DELAYED_CHECKING_MASTER(member);
	switch (evt) {
		case EVT_NONE:
			now = oio_ext_monotonic_time();
			delay = sqliterepo_getvers_delay;
			if (member->last_status < OLDEST(now, delay)) {
				if (member->attempts_GETVERS <= 0) {
					return member_action_to_LEAVING_FAILING(member);
				} else {
					member->attempts_GETVERS --;
					if (BOOL(member->requested_peers_decache)) {
						return member_action_to_REFRESH_CHECKING_MASTER(member);
					} else {
						return member_action_to_CHECKING_MASTER(member);
					}
				}
			}
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			member->requested_PIPEFROM = 1;
			return;
		case EVT_LEAVE_REQ:
			member->requested_LEAVE = 0;
			return member_action_to_LEAVING(member);
		case EVT_LEFT_SELF:
			member_reset_local(member);
			member_reset_master(member);
			member->requested_LEFT_SELF = 0;
			member->requested_LEFT_MASTER = 0;
			return member_action_to_CREATING(member);
		case EVT_LEFT_MASTER:
			return member_action_to_LISTING(member);

			/* Actions: none pending */

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_REFRESH_CHECKING_MASTER(struct election_member_s *member,
		enum event_type_e evt, gchar **peers)
{
	_member_assert_REFRESH_CHECKING_MASTER(member);
	switch (evt) {
		case EVT_NONE:
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			member->requested_PIPEFROM = 1;
			return;
		case EVT_LEAVE_REQ:
			member->requested_LEAVE = 1;
			return;
		case EVT_LEFT_SELF:
			member->requested_LEFT_SELF = 1;
			return;
		case EVT_LEFT_MASTER:
			member->requested_LEFT_MASTER = 1;
			return;

			/* Actions: none pending */
		case EVT_GETPEERS_DONE:
			member_reset_peers(member);
			member->peers = g_strdupv(peers);
			if (member->requested_LEAVE)
				return member_action_to_LEAVING(member);
			if (member->requested_LEFT_SELF) {
				member_reset_local(member);
				member_reset_master(member);
				member->requested_LEFT_SELF = 0;
				member->requested_LEFT_MASTER = 0;
				member->requested_LEAVE = 0;
				return member_action_START(member);
			}
			if (member->requested_LEFT_MASTER)
				return member_action_to_LISTING(member);
			if (!member->peers)
				return member_action_to_LEAVING_FAILING(member);
			return member_action_to_CHECKING_MASTER(member);

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_DELAYED_CHECKING_SLAVES(struct election_member_s *member, enum event_type_e evt)
{
	gint64 now, delay;
	_member_assert_DELAYED_CHECKING_SLAVES(member);
	switch (evt) {
		case EVT_NONE:
			now = oio_ext_monotonic_time();
			delay = sqliterepo_getvers_delay;
			if (member->last_status < OLDEST(now, delay)) {
				if (member->attempts_GETVERS <= 0) {
					return member_action_to_LEAVING_FAILING(member);
				} else {
					member->attempts_GETVERS --;
					if (BOOL(member->requested_peers_decache)) {
						return member_action_to_REFRESH_CHECKING_SLAVES(member);
					} else {
						return member_action_to_CHECKING_SLAVES(member);
					}
				}
			}
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			return member_warn_abnormal_event(member, evt);
		case EVT_LEAVE_REQ:
			member->requested_LEAVE = 0;
			return member_action_to_LEAVING(member);
		case EVT_LEFT_SELF:
			member_reset_local(member);
			member_reset_master(member);
			member->requested_LEFT_SELF = 0;
			member->requested_LEFT_MASTER = 0;
			return member_action_to_CREATING(member);
		case EVT_LEFT_MASTER:
			return member_warn_abnormal_event(member, evt);

			/* Actions: none pending */

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_REFRESH_CHECKING_SLAVES(struct election_member_s *member,
		enum event_type_e evt, gchar **peers)
{
	_member_assert_REFRESH_CHECKING_SLAVES(member);
	switch (evt) {
		case EVT_NONE:
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			return;
		case EVT_LEAVE_REQ:
			member->requested_LEAVE = 1;
			return;
		case EVT_LEFT_SELF:
			member->requested_LEFT_SELF = 1;
			return;
		case EVT_LEFT_MASTER:
			return member_warn_abnormal_event(member, evt);

			/* Actions: none pending */
		case EVT_GETPEERS_DONE:
			member_reset_peers(member);
			member->peers = g_strdupv(peers);
			if (member->requested_LEAVE)
				return member_action_to_LEAVING(member);
			if (member->requested_LEFT_SELF) {
				member_reset_local(member);
				member_reset_master(member);
				member->requested_LEAVE = 0;
				member->requested_LEFT_SELF = 0;
				member->requested_LEFT_MASTER = 0;
				return member_action_START(member);
			}
			if (!member->peers)
				return member_action_to_LEAVING_FAILING(member);
			return member_action_to_CHECKING_SLAVES(member);

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_SLAVE(struct election_member_s *member, enum event_type_e evt)
{
	gint64 now;
	_member_assert_SLAVE (member);

	/* Some transitions make the FSM exit the current final state. Those
	 * induced by explicit actions (expirations, requests to leave) are
	 * not considered as lead by the platform entropy. The others are
	 * considered as a result of the global entropy.
	 * When not a result of the entropy, we do not alter the unstability
	 * timestamp to avoid making the client consider this election is
	 * pending for too long. */
	switch (evt) {
		/* Possible time-triggered actions */
		case EVT_NONE:
			now = oio_ext_monotonic_time ();
			if (_is_over(now, member->last_atime, oio_election_delay_expire_SLAVE)) {
				return member_action_to_LEAVING(member);
			}
			return;

			/* Interruptions */
		case EVT_LEAVE_REQ:
			return member_action_to_LEAVING(member);
		case EVT_LEFT_SELF:
			member_warn("LEFT (self)", member);
			member_reset_local(member);
			member_reset_master(member);
			return member_action_START(member);
		case EVT_LEFT_MASTER:
			return member_action_to_LISTING(member);
		case EVT_SYNC_REQ:
			return member_action_to_SYNCING(member);

			/* Actions: none should be pending */

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_MASTER(struct election_member_s *member, enum event_type_e evt)
{
	gint64 now;
	_member_assert_MASTER (member);

	switch (evt) {
		/* Possible time-triggered actions */
		case EVT_NONE:
			now = oio_ext_monotonic_time();
			if (_is_over(now, member->last_atime, oio_election_delay_expire_MASTER)) {
				return member_action_to_LEAVING(member);
			}
			return;

			/* Interruptions */
		case EVT_SYNC_REQ:
			return;
		case EVT_LEAVE_REQ:
			return member_action_to_LEAVING(member);
		case EVT_LEFT_MASTER:
			return member_warn_abnormal_event(member, evt);
		case EVT_LEFT_SELF:
			member_warn("LEFT (self)", member);
			member_reset_local(member);
			member_reset_master(member);
			return member_action_START(member);

			/* Actions: none should be pending! */

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static void
_member_react_FAILED(struct election_member_s *member, enum event_type_e evt)
{
	gint64 now;
	_member_assert_FAILED (member);

	switch (evt) {
		/* Possible time-triggered actions */
		case EVT_NONE:
			now = oio_ext_monotonic_time();
			if (_is_over(now, member->last_status, oio_election_delay_retry_FAILED)) {
				if (member->requested_USE)
					return member_action_START(member);
				return member_action_to_NONE(member);
			}
			return;

			/* Interruptions */
		case EVT_LEAVE_REQ:
			member_reset(member);
			return member_action_to_NONE(member);
		case EVT_SYNC_REQ:
			member->requested_PIPEFROM = 1;
			return;
		case EVT_LEFT_SELF:
			return;
		case EVT_LEFT_MASTER:
			return;

			/* Abnormal events */
		default:
			return member_warn_abnormal_event(member, evt);
	}
}

static gint64
_member_next_timeout(struct election_member_s *m)
{
	switch (m->step) {
		case STEP_DELAYED_CHECKING_MASTER:
			return m->last_status + sqliterepo_getvers_delay;
		case STEP_DELAYED_CHECKING_SLAVES:
			return m->last_status + sqliterepo_getvers_delay;
		case STEP_FAILED:
			return m->last_status + oio_election_delay_retry_FAILED;
		case STEP_SLAVE:
			return m->last_atime + oio_election_delay_expire_SLAVE;
		case STEP_MASTER:
			return m->last_atime + oio_election_delay_expire_MASTER;
		default:
			return 0;
	}
}

static void
_member_react (struct election_member_s *member,
		enum event_type_e evt,
		void *evt_arg)
{
	/* EVT_DISCONNECTED is sent when we lost the link with ZK. All the
	 * ephemeral nodes will be lost, soon. We can reset an elections FSM
	 * receiving this event, whatever its state. */
	if (evt == EVT_DISCONNECTED) {
		member_warn("DISCONNECTED", member);
		member_reset(member);
		return member_action_to_NONE(member);
	}

	if (GRID_TRACE_ENABLED()) {
		gchar tag[64];
		g_snprintf(tag, sizeof(tag), "evt:%d/%s", evt, _evt2str(evt));
		member_trace(tag, member);
	}

	switch (member->step) {
		case STEP_NONE:
			return _member_react_NONE(member, evt);
		case STEP_PEERING:
			return _member_react_PEERING(member, evt, evt_arg);
		case STEP_CREATING:
			return _member_react_CREATING(member, evt, evt_arg);
		case STEP_WATCHING:
			return _member_react_WATCHING(member, evt);
		case STEP_LISTING:
			return _member_react_LISTING(member, evt, evt_arg);
		case STEP_ASKING:
			return _member_react_ASKING(member, evt, evt_arg);
		case STEP_CHECKING_MASTER: /* PRELOST */
			return _member_react_CHECKING_MASTER(member, evt);
		case STEP_CHECKING_SLAVES: /* PRELEAD */
			return _member_react_CHECKING_SLAVES(member, evt);
		case STEP_SYNCING:
			return _member_react_SYNCING(member, evt);

		case STEP_DELAYED_CHECKING_MASTER:
			return _member_react_DELAYED_CHECKING_MASTER(member, evt);
		case STEP_REFRESH_CHECKING_MASTER:
			return _member_react_REFRESH_CHECKING_MASTER(member, evt, evt_arg);

		case STEP_DELAYED_CHECKING_SLAVES:
			return _member_react_DELAYED_CHECKING_SLAVES(member, evt);
		case STEP_REFRESH_CHECKING_SLAVES:
			return _member_react_REFRESH_CHECKING_SLAVES(member, evt, evt_arg);

		case STEP_LEAVING:
			return _member_react_LEAVING(member, evt);
		case STEP_LEAVING_FAILING:
			return _member_react_LEAVING_FAILING(member, evt);
		case STEP_FAILED:
			return _member_react_FAILED(member, evt);

		case STEP_SLAVE:
			return _member_react_SLAVE(member, evt);
		case STEP_MASTER:
			return _member_react_MASTER(member, evt);
	}

	g_assert_not_reached();
}

static void
transition(struct election_member_s *member, enum event_type_e evt,
		void *evt_arg)
{
	member_log_change(member, evt,
			_member_react(member, evt, evt_arg);
			TRACE_EXECUTION(member->manager));

	/* re-kickoff elections marked as to be restarted, but only if without
	 * activity and if the manager if not being exited. */
	if (member->step == STEP_NONE
			&& BOOL(member->requested_USE)
			&& !member->manager->exiting) {
		member_log_change(member, EVT_NONE,
			_member_react(member, EVT_NONE, NULL);
			TRACE_EXECUTION(member->manager));
	}
}

static void
transition_error(struct election_member_s *member,
		enum event_type_e evt, enum ZOO_ERRORS zrc)
{
	EXTRA_ASSERT(zrc != ZOK);
	if (_zoo_disconnected(zrc))
		return transition(member, EVT_DISCONNECTED, NULL);
	return transition(member, evt, NULL);
}

static GPtrArray *
_DEQUE_extract (struct deque_beacon_s *beacon)
{
	GPtrArray *out = g_ptr_array_sized_new(beacon->count);
	for (struct election_member_s *m=beacon->front; m ;m=m->next)
		g_ptr_array_add(out, m);
	return out;
}

static guint
_play_exit_on_state(struct election_manager_s *M, struct deque_beacon_s *beacon)
{
	if (beacon->front == NULL)
		return 0;

	const gint64 now = oio_ext_monotonic_time();
	guint count = 0;
	GPtrArray *members = _DEQUE_extract (beacon);
	for (guint i=0; i<members->len ;++i) {
		struct election_member_s *m = members->pdata[i];

		/* Election in NONE state ... */
		if (!_is_over(now, m->last_status, oio_election_delay_expire_NONE))
			continue;

		/* ... for longer than acceptable */
		if (m->refcount != 1)
			continue;

		/* ... but not referenced by anyone */
		count ++;
		_DEQUE_remove (m);
		g_tree_remove (M->members_by_key, m->key);
		member_unref (m);
		member_destroy (m);
	}
	g_ptr_array_free (members, TRUE);
	return count;
}

guint
election_manager_play_exits (struct election_manager_s *manager)
{
	guint count = 0;
	struct deque_beacon_s *beacon = manager->members_by_state + STEP_NONE;
	if (beacon->front) {
		_manager_lock(manager);
		count += _play_exit_on_state(manager, beacon);
		_manager_unlock(manager);
	}
	return count;
}

static guint
_send_NONE_to_step(struct election_manager_s *M, struct deque_beacon_s *beacon)
{
	gboolean stop = FALSE;
	guint count = 0;

	while (grid_main_is_running() && beacon->front && !stop) {
		_manager_lock(M);
		struct election_member_s *m = beacon->front;
		if (!m) {
			/* The queue emptied before the lock */
			stop = TRUE;
		} else {
			const gint64 deadline = _member_next_timeout(m);
			if (!deadline) {
				/* The FSM is not waiting for a timer
				 * !!! This is abnormal, we should not call the current timer
				 * function on such a queue. !!! */
				stop = TRUE;
			} else {
				const gint64 now = oio_ext_monotonic_time();
				if (now < deadline) {
					/* The timer didn't fire */
					stop = TRUE;
				} else {
					transition (m, EVT_NONE, NULL);
					count ++;
				}
			}
		}
		_manager_unlock(M);
	}
	return count;
}

static inline guint
_send_NONE_to_step2 (struct election_manager_s *M, enum election_step_e step)
{
	return _send_NONE_to_step(M, M->members_by_state + step);
}

guint
election_manager_play_timers_FAILED (struct election_manager_s *M)
{
	return _send_NONE_to_step2(M, STEP_FAILED);
}

guint
election_manager_play_timers_DELAYED_MASTER (struct election_manager_s *M)
{
	return _send_NONE_to_step2(M, STEP_DELAYED_CHECKING_MASTER);
}

guint
election_manager_play_timers_DELAYED_SLAVE (struct election_manager_s *M)
{
	return _send_NONE_to_step2(M, STEP_DELAYED_CHECKING_SLAVES);
}

guint
election_manager_play_timers_MASTER (struct election_manager_s *M)
{
	return _send_NONE_to_step2(M, STEP_MASTER);
}

guint
election_manager_play_timers_SLAVE (struct election_manager_s *M)
{
	return _send_NONE_to_step2(M, STEP_SLAVE);
}

guint
election_manager_balance_masters(struct election_manager_s *M,
		guint ratio, guint max, gint64 inactivity UNUSED)
{
	guint count = 0;

	_manager_lock(M);

	const guint bias = 64;
	const guint nb_master = M->members_by_state[STEP_MASTER].count;
	const guint nb_slave = M->members_by_state[STEP_SLAVE].count;
	const guint ideal = nb_slave / ratio;

	if (nb_master > 0 && nb_master > ideal + bias) {
		max = MIN(max, nb_master);
		max = MIN(max, ideal);
		struct election_member_s *current = M->members_by_state[STEP_MASTER].front;
		while (max-- > 0 && current) {
			struct election_member_s *next = current->next;
			/* Tell the first base to leave its MASTER position but to re-join
			 * immediately after. */
			current->requested_USE = 1;
			transition(current, EVT_LEAVE_REQ, NULL);
			current = next;
		}
	}
	_manager_unlock(M);

	return count;
}

gboolean
election_manager_configured(const struct election_manager_s *m)
{
	return m != NULL
		&& m->sync_tab != NULL
		&& m->peering != NULL
		&& (ELECTION_MODE_NONE != election_manager_get_mode (m));
}

