Fix racy rebuilding when the winning writer fails
Consider the following scenario:
1. Thread 1 rebuilds in-memory to state A, and successfully updates
the state in ReviewDb to A.
2. Thread 1 attempts the ref update but fails.
3. Thread 2 rebuilds in-memory and gets A again, which throws
AbortUpdateException because the state in ReviewDb is already A.
Now we are in a state where step (3) repeats indefinitely until there
is a write to the change that triggers a NoteDbChangeState update in
ReviewDb. At that point it will skip the update of the NoteDb ref,
since it's out of date, but the *next* read will go back to step (1)
and have another shot at rebuilding it.
Fix this by checking the actual state of the refs after an
AbortUpdateException, and retrying the ref update if it doesn't match.
This has the slight downside that if N threads are trying to rebuild
the change at once, likely N-1 or N of them will still fail. However,
there's not much we can do in this case: there is no way to tell it to
rebuild only if we think there isn't going to be contention on the
ref. Because we return the staged results and ignore errors coming from
ChangeRebuilder#execute, this shouldn't affect correctness, just
performance for the requests doing the rebuild.
Change-Id: I6243f94d3e99535d1c1abbea0a1b3f075557d93a
diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/notedb/ChangeRebuilderImpl.java b/gerrit-server/src/main/java/com/google/gerrit/server/notedb/ChangeRebuilderImpl.java
index 30c6d10..8244628 100644
--- a/gerrit-server/src/main/java/com/google/gerrit/server/notedb/ChangeRebuilderImpl.java
+++ b/gerrit-server/src/main/java/com/google/gerrit/server/notedb/ChangeRebuilderImpl.java
@@ -239,25 +239,32 @@
return change;
}
});
- if (!migration.failChangeWrites()) {
- manager.execute();
- } else {
- // Don't even attempt to execute if read-only, it would fail anyway. But
- // do throw an exception to the caller so they know to use the staged
- // results instead of reading from the repo.
- throw new OrmException(NoteDbUpdateManager.CHANGES_READ_ONLY);
- }
- } catch (AbortUpdateException e) {
- // Drop this rebuild; another thread completed it. It's ok to not execute
- // the update in this case, since the object referenced in the Result was
- // flushed to the repo by whatever thread won the race.
} catch (ConflictingUpdateException e) {
// Rethrow as an OrmException so the caller knows to use staged results.
// Strictly speaking they are not completely up to date, but result we
// send to the caller is the same as if this rebuild had executed before
// the other thread.
throw new OrmException(e.getMessage());
+ } catch (AbortUpdateException e) {
+ if (NoteDbChangeState.parse(changeId, newNoteDbState).isUpToDate(
+ manager.getChangeRepo().cmds.getRepoRefCache(),
+ manager.getAllUsersRepo().cmds.getRepoRefCache())) {
+ // If the state in ReviewDb matches NoteDb at this point, it means
+ // another thread successfully completed this rebuild. It's ok to not
+ // execute the update in this case, since the object referenced in the
+ // Result was flushed to the repo by whatever thread won the race.
+ return r;
+ }
+ // If the state doesn't match, that means another thread attempted this
+ // rebuild, but failed. Fall through and try to update the ref again.
}
+ if (migration.failChangeWrites()) {
+ // Don't even attempt to execute if read-only, it would fail anyway. But
+ // do throw an exception to the caller so they know to use the staged
+ // results instead of reading from the repo.
+ throw new OrmException(NoteDbUpdateManager.CHANGES_READ_ONLY);
+ }
+ manager.execute();
return r;
}
diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/notedb/NoteDbChangeState.java b/gerrit-server/src/main/java/com/google/gerrit/server/notedb/NoteDbChangeState.java
index d960bda..4a7a781 100644
--- a/gerrit-server/src/main/java/com/google/gerrit/server/notedb/NoteDbChangeState.java
+++ b/gerrit-server/src/main/java/com/google/gerrit/server/notedb/NoteDbChangeState.java
@@ -204,6 +204,19 @@
return id.get().equals(draftIds.get(accountId));
}
+ boolean isUpToDate(RefCache changeRepoRefs, RefCache draftsRepoRefs)
+ throws IOException {
+ if (!isChangeUpToDate(changeRepoRefs)) {
+ return false;
+ }
+ for (Account.Id accountId : draftIds.keySet()) {
+ if (!areDraftsUpToDate(draftsRepoRefs, accountId)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
@VisibleForTesting
Change.Id getChangeId() {
return changeId;