From: Alexandre DERUMIER <aderumier@odiso.com>
To: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
Subject: Re: [pve-devel] corosync bug: cluster break after 1 node clean shutdown
Date: Fri, 25 Sep 2020 18:29:37 +0200 (CEST) [thread overview]
Message-ID: <2049133658.1264587.1601051377789.JavaMail.zimbra@odiso.com> (raw)
In-Reply-To: <1601037918.lwca57m6tz.astroid@nora.none>
here a new hang:
http://odisoweb1.odiso.net/test4/
This time on corosync start.
node1:
-----
start corosync : 17:22:02
node2
-----
/etc/pve locked 17:22:07
Something new: where doing coredump or bt-full on pmxcfs on node1,
this have unlocked /etc/pve on other nodes
/etc/pve unlocked(with coredump or bt-full): 17:57:40
----- Mail original -----
De: "Fabian Grünbichler" <f.gruenbichler@proxmox.com>
À: "Proxmox VE development discussion" <pve-devel@lists.proxmox.com>
Envoyé: Vendredi 25 Septembre 2020 14:51:30
Objet: Re: [pve-devel] corosync bug: cluster break after 1 node clean shutdown
On September 25, 2020 11:46 am, Alexandre DERUMIER wrote:
>>>I will rebuild once more modifying the send code a bit to log a lot more
>>>details when sending state messages, it would be great if you could
>>>repeat with that as we are still unable to reproduce the issue.
>
> ok, no problem, I'm able to easily reproduce it, I'll do new test when you'll
> send the new version.
>
> (and thanks again to debugging this, because It's really beyond my competence)
same procedure as last time, same place, new checksums:
6b5e2defe543a874e0f7a883e40b279c997438dde158566c4c93c11ea531aef924ed2e4eb2506b5064e49ec9bdd4ebe7acd0b9278e9286eac527b0b15a43d8d7 pve-cluster_6.1-8_amd64.deb
d57ddc08824055826ee15c9c255690d9140e43f8d5164949108f0dc483a2d181b2bda76f0e7f47202699a062342c0cf0bba8f2ae0f7c5411af9967ef051050a0 pve-cluster-dbgsym_6.1-8_amd64.deb
I found one (unfortunately unrelated) bug in our error handling, so
there's that at least ;)
diff --git a/data/src/dfsm.c b/data/src/dfsm.c
index 529c7f9..f3397a0 100644
--- a/data/src/dfsm.c
+++ b/data/src/dfsm.c
@@ -162,8 +162,8 @@ static void
dfsm_send_sync_message_abort(dfsm_t *dfsm)
{
g_return_if_fail(dfsm != NULL);
-
g_mutex_lock (&dfsm->sync_mutex);
+ cfs_dom_debug(dfsm->log_domain, "dfsm_send_sync_message_abort - %" PRIu64" / %" PRIu64, dfsm->msgcount_rcvd, dfsm->msgcount);
dfsm->msgcount_rcvd = dfsm->msgcount;
g_cond_broadcast (&dfsm->sync_cond);
g_mutex_unlock (&dfsm->sync_mutex);
@@ -181,6 +181,7 @@ dfsm_record_local_result(
g_mutex_lock (&dfsm->sync_mutex);
dfsm_result_t *rp = (dfsm_result_t *)g_hash_table_lookup(dfsm->results, &msg_count);
+ cfs_dom_debug(dfsm->log_domain, "dfsm_record_local_result - %" PRIu64": %d", msg_count, msg_result);
if (rp) {
rp->result = msg_result;
rp->processed = processed;
@@ -224,6 +225,48 @@ loop:
return result;
}
+static cs_error_t
+dfsm_send_message_full_debug_state(
+ dfsm_t *dfsm,
+ struct iovec *iov,
+ unsigned int len,
+ int retry)
+{
+ g_return_val_if_fail(dfsm != NULL, CS_ERR_INVALID_PARAM);
+ g_return_val_if_fail(!len || iov != NULL, CS_ERR_INVALID_PARAM);
+
+ struct timespec tvreq = { .tv_sec = 0, .tv_nsec = 100000000 };
+ cs_error_t result;
+ int retries = 0;
+ cfs_dom_message(dfsm->log_domain, "send state message debug");
+ cfs_dom_message(dfsm->log_domain, "cpg_handle %" PRIu64, dfsm->cpg_handle);
+ for (int i = 0; i < len; i++)
+ cfs_dom_message(dfsm->log_domain, "iov[%d] len %zd", i, iov[i].iov_len);
+loop:
+ cfs_dom_message(dfsm->log_domain, "send state message loop body");
+
+ result = cpg_mcast_joined(dfsm->cpg_handle, CPG_TYPE_AGREED, iov, len);
+
+ cfs_dom_message(dfsm->log_domain, "send state message result: %d", result);
+ if (retry && result == CS_ERR_TRY_AGAIN) {
+ nanosleep(&tvreq, NULL);
+ ++retries;
+ if ((retries % 10) == 0)
+ cfs_dom_message(dfsm->log_domain, "cpg_send_message retry %d", retries);
+ if (retries < 100)
+ goto loop;
+ }
+
+ if (retries)
+ cfs_dom_message(dfsm->log_domain, "cpg_send_message retried %d times", retries);
+
+ if (result != CS_OK &&
+ (!retry || result != CS_ERR_TRY_AGAIN))
+ cfs_dom_critical(dfsm->log_domain, "cpg_send_message failed: %d", result);
+
+ return result;
+}
+
static cs_error_t
dfsm_send_state_message_full(
dfsm_t *dfsm,
@@ -235,6 +278,8 @@ dfsm_send_state_message_full(
g_return_val_if_fail(DFSM_VALID_STATE_MESSAGE(type), CS_ERR_INVALID_PARAM);
g_return_val_if_fail(!len || iov != NULL, CS_ERR_INVALID_PARAM);
+ cfs_dom_debug(dfsm->log_domain, "dfsm_send_state_message_full: type %d len %d", type, len);
+
dfsm_message_state_header_t header;
header.base.type = type;
header.base.subtype = 0;
@@ -252,7 +297,7 @@ dfsm_send_state_message_full(
for (int i = 0; i < len; i++)
real_iov[i + 1] = iov[i];
- return dfsm_send_message_full(dfsm, real_iov, len + 1, 1);
+ return type == DFSM_MESSAGE_STATE ? dfsm_send_message_full_debug_state(dfsm, real_iov, len + 1, 1) : dfsm_send_message_full(dfsm, real_iov, len + 1, 1);
}
cs_error_t
@@ -317,6 +362,7 @@ dfsm_send_message_sync(
for (int i = 0; i < len; i++)
real_iov[i + 1] = iov[i];
+ cfs_dom_debug(dfsm->log_domain, "dfsm_send_messag_sync: type NORMAL, count %" PRIu64 ", msgtype %d, len %d", msgcount, msgtype, len);
cs_error_t result = dfsm_send_message_full(dfsm, real_iov, len + 1, 1);
g_mutex_unlock (&dfsm->sync_mutex);
@@ -335,10 +381,12 @@ dfsm_send_message_sync(
if (rp) {
g_mutex_lock (&dfsm->sync_mutex);
- while (dfsm->msgcount_rcvd < msgcount)
+ while (dfsm->msgcount_rcvd < msgcount) {
+ cfs_dom_debug(dfsm->log_domain, "dfsm_send_message_sync: waiting for received messages %" PRIu64 " / %" PRIu64, dfsm->msgcount_rcvd, msgcount);
g_cond_wait (&dfsm->sync_cond, &dfsm->sync_mutex);
+ }
+ cfs_dom_debug(dfsm->log_domain, "dfsm_send_message_sync: done waiting for received messages!");
-
g_hash_table_remove(dfsm->results, &rp->msgcount);
g_mutex_unlock (&dfsm->sync_mutex);
@@ -685,9 +733,13 @@ dfsm_cpg_deliver_callback(
return;
}
+ cfs_dom_debug(dfsm->log_domain, "received message's header type is %d", base_header->type);
+
if (base_header->type == DFSM_MESSAGE_NORMAL) {
dfsm_message_normal_header_t *header = (dfsm_message_normal_header_t *)msg;
+ cfs_dom_debug(dfsm->log_domain, "received normal message (type = %d, subtype = %d, %zd bytes)",
+ base_header->type, base_header->subtype, msg_len);
if (msg_len < sizeof(dfsm_message_normal_header_t)) {
cfs_dom_critical(dfsm->log_domain, "received short message (type = %d, subtype = %d, %zd bytes)",
@@ -704,6 +756,8 @@ dfsm_cpg_deliver_callback(
} else {
int msg_res = -1;
+ cfs_dom_debug(dfsm->log_domain, "deliver message %" PRIu64 " (subtype = %d, length = %zd)",
+ header->count, base_header->subtype, msg_len);
int res = dfsm->dfsm_callbacks->dfsm_deliver_fn(
dfsm, dfsm->data, &msg_res, nodeid, pid, base_header->subtype,
base_header->time, (uint8_t *)msg + sizeof(dfsm_message_normal_header_t),
@@ -724,6 +778,8 @@ dfsm_cpg_deliver_callback(
*/
dfsm_message_state_header_t *header = (dfsm_message_state_header_t *)msg;
+ cfs_dom_debug(dfsm->log_domain, "received state message (type = %d, subtype = %d, %zd bytes), mode is %d",
+ base_header->type, base_header->subtype, msg_len, mode);
if (msg_len < sizeof(dfsm_message_state_header_t)) {
cfs_dom_critical(dfsm->log_domain, "received short state message (type = %d, subtype = %d, %zd bytes)",
@@ -744,6 +800,7 @@ dfsm_cpg_deliver_callback(
if (mode == DFSM_MODE_SYNCED) {
if (base_header->type == DFSM_MESSAGE_UPDATE_COMPLETE) {
+ cfs_dom_debug(dfsm->log_domain, "received update complete message");
for (int i = 0; i < dfsm->sync_info->node_count; i++)
dfsm->sync_info->nodes[i].synced = 1;
@@ -754,6 +811,7 @@ dfsm_cpg_deliver_callback(
return;
} else if (base_header->type == DFSM_MESSAGE_VERIFY_REQUEST) {
+ cfs_dom_debug(dfsm->log_domain, "received verify request message");
if (msg_len != sizeof(dfsm->csum_counter)) {
cfs_dom_critical(dfsm->log_domain, "cpg received verify request with wrong length (%zd bytes) form node %d/%d", msg_len, nodeid, pid);
@@ -823,7 +881,6 @@ dfsm_cpg_deliver_callback(
} else if (mode == DFSM_MODE_START_SYNC) {
if (base_header->type == DFSM_MESSAGE_SYNC_START) {
-
if (nodeid != dfsm->lowest_nodeid) {
cfs_dom_critical(dfsm->log_domain, "ignore sync request from wrong member %d/%d",
nodeid, pid);
@@ -861,6 +918,7 @@ dfsm_cpg_deliver_callback(
return;
} else if (base_header->type == DFSM_MESSAGE_STATE) {
+ cfs_dom_debug(dfsm->log_domain, "received state message for %d/%d", nodeid, pid);
dfsm_node_info_t *ni;
@@ -906,6 +964,8 @@ dfsm_cpg_deliver_callback(
goto leave;
}
+ } else {
+ cfs_dom_debug(dfsm->log_domain, "haven't received all states, waiting for more");
}
return;
@@ -914,6 +974,7 @@ dfsm_cpg_deliver_callback(
} else if (mode == DFSM_MODE_UPDATE) {
if (base_header->type == DFSM_MESSAGE_UPDATE) {
+ cfs_dom_debug(dfsm->log_domain, "received update message");
int res = dfsm->dfsm_callbacks->dfsm_process_update_fn(
dfsm, dfsm->data, dfsm->sync_info, nodeid, pid, msg, msg_len);
@@ -925,6 +986,7 @@ dfsm_cpg_deliver_callback(
} else if (base_header->type == DFSM_MESSAGE_UPDATE_COMPLETE) {
+ cfs_dom_debug(dfsm->log_domain, "received update complete message");
int res = dfsm->dfsm_callbacks->dfsm_commit_fn(dfsm, dfsm->data, dfsm->sync_info);
@@ -1104,6 +1166,7 @@ dfsm_cpg_confchg_callback(
size_t joined_list_entries)
{
cs_error_t result;
+ cfs_debug("dfsm_cpg_confchg_callback called");
dfsm_t *dfsm = NULL;
result = cpg_context_get(handle, (gpointer *)&dfsm);
@@ -1190,7 +1253,7 @@ dfsm_cpg_confchg_callback(
dfsm_set_mode(dfsm, DFSM_MODE_START_SYNC);
if (lowest_nodeid == dfsm->nodeid) {
- if (!dfsm_send_state_message_full(dfsm, DFSM_MESSAGE_SYNC_START, NULL, 0)) {
+ if (dfsm_send_state_message_full(dfsm, DFSM_MESSAGE_SYNC_START, NULL, 0) != CS_OK) {
cfs_dom_critical(dfsm->log_domain, "failed to send SYNC_START message");
goto leave;
}
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
next prev parent reply other threads:[~2020-09-25 16:30 UTC|newest]
Thread overview: 84+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-03 14:11 Alexandre DERUMIER
2020-09-04 12:29 ` Alexandre DERUMIER
2020-09-04 15:42 ` Dietmar Maurer
2020-09-05 13:32 ` Alexandre DERUMIER
2020-09-05 15:23 ` dietmar
2020-09-05 17:30 ` Alexandre DERUMIER
2020-09-06 4:21 ` dietmar
2020-09-06 5:36 ` Alexandre DERUMIER
2020-09-06 6:33 ` Alexandre DERUMIER
2020-09-06 8:43 ` Alexandre DERUMIER
2020-09-06 12:14 ` dietmar
2020-09-06 12:19 ` dietmar
2020-09-07 7:00 ` Thomas Lamprecht
2020-09-07 7:19 ` Alexandre DERUMIER
2020-09-07 8:18 ` dietmar
2020-09-07 9:32 ` Alexandre DERUMIER
2020-09-07 13:23 ` Alexandre DERUMIER
2020-09-08 4:41 ` dietmar
2020-09-08 7:11 ` Alexandre DERUMIER
2020-09-09 20:05 ` Thomas Lamprecht
2020-09-10 4:58 ` Alexandre DERUMIER
2020-09-10 8:21 ` Thomas Lamprecht
2020-09-10 11:34 ` Alexandre DERUMIER
2020-09-10 18:21 ` Thomas Lamprecht
2020-09-14 4:54 ` Alexandre DERUMIER
2020-09-14 7:14 ` Dietmar Maurer
2020-09-14 8:27 ` Alexandre DERUMIER
2020-09-14 8:51 ` Thomas Lamprecht
2020-09-14 15:45 ` Alexandre DERUMIER
2020-09-15 5:45 ` dietmar
2020-09-15 6:27 ` Alexandre DERUMIER
2020-09-15 7:13 ` dietmar
2020-09-15 8:42 ` Alexandre DERUMIER
2020-09-15 9:35 ` Alexandre DERUMIER
2020-09-15 9:46 ` Thomas Lamprecht
2020-09-15 10:15 ` Alexandre DERUMIER
2020-09-15 11:04 ` Alexandre DERUMIER
2020-09-15 12:49 ` Alexandre DERUMIER
2020-09-15 13:00 ` Thomas Lamprecht
2020-09-15 14:09 ` Alexandre DERUMIER
2020-09-15 14:19 ` Alexandre DERUMIER
2020-09-15 14:32 ` Thomas Lamprecht
2020-09-15 14:57 ` Alexandre DERUMIER
2020-09-15 15:58 ` Alexandre DERUMIER
2020-09-16 7:34 ` Alexandre DERUMIER
2020-09-16 7:58 ` Alexandre DERUMIER
2020-09-16 8:30 ` Alexandre DERUMIER
2020-09-16 8:53 ` Alexandre DERUMIER
[not found] ` <1894376736.864562.1600253445817.JavaMail.zimbra@odiso.com>
2020-09-16 13:15 ` Alexandre DERUMIER
2020-09-16 14:45 ` Thomas Lamprecht
2020-09-16 15:17 ` Alexandre DERUMIER
2020-09-17 9:21 ` Fabian Grünbichler
2020-09-17 9:59 ` Alexandre DERUMIER
2020-09-17 10:02 ` Alexandre DERUMIER
2020-09-17 11:35 ` Thomas Lamprecht
2020-09-20 23:54 ` Alexandre DERUMIER
2020-09-22 5:43 ` Alexandre DERUMIER
2020-09-24 14:02 ` Fabian Grünbichler
2020-09-24 14:29 ` Alexandre DERUMIER
2020-09-24 18:07 ` Alexandre DERUMIER
2020-09-25 6:44 ` Alexandre DERUMIER
2020-09-25 7:15 ` Alexandre DERUMIER
2020-09-25 9:19 ` Fabian Grünbichler
2020-09-25 9:46 ` Alexandre DERUMIER
2020-09-25 12:51 ` Fabian Grünbichler
2020-09-25 16:29 ` Alexandre DERUMIER [this message]
2020-09-28 9:17 ` Fabian Grünbichler
2020-09-28 9:35 ` Alexandre DERUMIER
2020-09-28 15:59 ` Alexandre DERUMIER
2020-09-29 5:30 ` Alexandre DERUMIER
2020-09-29 8:51 ` Fabian Grünbichler
2020-09-29 9:37 ` Alexandre DERUMIER
2020-09-29 10:52 ` Alexandre DERUMIER
2020-09-29 11:43 ` Alexandre DERUMIER
2020-09-29 11:50 ` Alexandre DERUMIER
2020-09-29 13:28 ` Fabian Grünbichler
2020-09-29 13:52 ` Alexandre DERUMIER
2020-09-30 6:09 ` Alexandre DERUMIER
2020-09-30 6:26 ` Thomas Lamprecht
2020-09-15 7:58 ` Thomas Lamprecht
2020-12-29 14:21 ` Josef Johansson
2020-09-04 15:46 ` Alexandre DERUMIER
2020-09-30 15:50 ` Thomas Lamprecht
2020-10-15 9:16 ` Eneko Lacunza
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2049133658.1264587.1601051377789.JavaMail.zimbra@odiso.com \
--to=aderumier@odiso.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal