Merge changes I8a340da1,I3358e14c * changes: BatchUpdate: Add a metric for executeChangeOps latency BatchUpdate: Remove slow update exception logs
diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/git/BatchUpdate.java b/gerrit-server/src/main/java/com/google/gerrit/server/git/BatchUpdate.java index 7457da5..8a41ebc 100644 --- a/gerrit-server/src/main/java/com/google/gerrit/server/git/BatchUpdate.java +++ b/gerrit-server/src/main/java/com/google/gerrit/server/git/BatchUpdate.java
@@ -18,10 +18,9 @@ import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import static java.util.Comparator.comparing; -import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.NANOSECONDS; -import static java.util.concurrent.TimeUnit.SECONDS; +import com.google.common.base.Stopwatch; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ListMultimap; @@ -35,6 +34,11 @@ import com.google.gerrit.extensions.restapi.ResourceConflictException; import com.google.gerrit.extensions.restapi.ResourceNotFoundException; import com.google.gerrit.extensions.restapi.RestApiException; +import com.google.gerrit.metrics.Description; +import com.google.gerrit.metrics.Description.Units; +import com.google.gerrit.metrics.Field; +import com.google.gerrit.metrics.MetricMaker; +import com.google.gerrit.metrics.Timer1; import com.google.gerrit.reviewdb.client.Account; import com.google.gerrit.reviewdb.client.Change; import com.google.gerrit.reviewdb.client.PatchSet; @@ -46,8 +50,6 @@ import com.google.gerrit.server.GerritPersonIdent; import com.google.gerrit.server.IdentifiedUser; import com.google.gerrit.server.config.AllUsersName; -import com.google.gerrit.server.config.ConfigUtil; -import com.google.gerrit.server.config.GerritServerConfig; import com.google.gerrit.server.extensions.events.GitReferenceUpdated; import com.google.gerrit.server.index.change.ChangeIndexer; import com.google.gerrit.server.notedb.ChangeNotes; @@ -63,11 +65,12 @@ import com.google.gwtorm.server.OrmConcurrencyException; import com.google.gwtorm.server.OrmException; import com.google.gwtorm.server.SchemaFactory; +import com.google.inject.Inject; +import com.google.inject.Singleton; import com.google.inject.assistedinject.Assisted; import com.google.inject.assistedinject.AssistedInject; import org.eclipse.jgit.lib.BatchRefUpdate; -import org.eclipse.jgit.lib.Config; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectInserter; import org.eclipse.jgit.lib.ObjectReader; @@ -347,6 +350,22 @@ } } + @Singleton + private static class Metrics { + final Timer1<Boolean> executeChangeOpsLatency; + + @Inject + Metrics(MetricMaker metricMaker) { + executeChangeOpsLatency = metricMaker.newTimer( + "batch_update/execute_change_ops", + new Description( + "BatchUpdate change update latency, excluding reindexing") + .setCumulative() + .setUnit(Units.MILLISECONDS), + Field.ofBoolean("success")); + } + } + private static Order getOrder(Collection<BatchUpdate> updates) { Order o = null; for (BatchUpdate u : updates) { @@ -407,13 +426,15 @@ } listener.afterRefUpdates(); for (BatchUpdate u : updates) { - u.executeChangeOps(updateChangesInParallel, dryrun); + u.reindexChanges( + u.executeChangeOps(updateChangesInParallel, dryrun)); } listener.afterUpdateChanges(); break; case DB_BEFORE_REPO: for (BatchUpdate u : updates) { - u.executeChangeOps(updateChangesInParallel, dryrun); + u.reindexChanges( + u.executeChangeOps(updateChangesInParallel, dryrun)); } listener.afterUpdateChanges(); for (BatchUpdate u : updates) { @@ -482,12 +503,12 @@ private final GitReferenceUpdated gitRefUpdated; private final GitRepositoryManager repoManager; private final ListeningExecutorService changeUpdateExector; + private final Metrics metrics; private final NoteDbUpdateManager.Factory updateManagerFactory; private final NotesMigration notesMigration; private final ReviewDb db; private final SchemaFactory<ReviewDb> schemaFactory; - private final long logThresholdNanos; private final Project.NameKey project; private final CurrentUser user; private final Timestamp when; @@ -512,7 +533,6 @@ @AssistedInject BatchUpdate( - @GerritServerConfig Config cfg, AllUsersName allUsers, ChangeControl.GenericFactory changeControlFactory, ChangeIndexer indexer, @@ -522,6 +542,7 @@ @GerritPersonIdent PersonIdent serverIdent, GitReferenceUpdated gitRefUpdated, GitRepositoryManager repoManager, + Metrics metrics, NoteDbUpdateManager.Factory updateManagerFactory, NotesMigration notesMigration, SchemaFactory<ReviewDb> schemaFactory, @@ -536,15 +557,11 @@ this.changeUpdateFactory = changeUpdateFactory; this.gitRefUpdated = gitRefUpdated; this.indexer = indexer; + this.metrics = metrics; this.notesMigration = notesMigration; this.repoManager = repoManager; this.schemaFactory = schemaFactory; this.updateManagerFactory = updateManagerFactory; - - this.logThresholdNanos = MILLISECONDS.toNanos( - ConfigUtil.getTimeUnit( - cfg, "change", null, "updateDebugLogThreshold", - SECONDS.toMillis(2), MILLISECONDS)); this.db = db; this.project = project; this.user = user; @@ -710,63 +727,71 @@ } } - private void executeChangeOps(boolean parallel, + private List<ChangeTask> executeChangeOps(boolean parallel, boolean dryrun) throws UpdateException, RestApiException { - logDebug("Executing change ops (parallel? {})", parallel); - ListeningExecutorService executor = parallel - ? changeUpdateExector - : MoreExecutors.newDirectExecutorService(); - - List<ChangeTask> tasks = new ArrayList<>(ops.keySet().size()); + List<ChangeTask> tasks; + boolean success = false; + Stopwatch sw = Stopwatch.createStarted(); try { - if (notesMigration.commitChangeWrites() && repo != null) { - // A NoteDb change may have been rebuilt since the repo was originally - // opened, so make sure we see that. - logDebug("Preemptively scanning for repo changes"); - repo.scanForRepoChanges(); - } - if (!ops.isEmpty() && notesMigration.failChangeWrites()) { - // Fail fast before attempting any writes if changes are read-only, as - // this is a programmer error. - logDebug("Failing early due to read-only Changes table"); - throw new OrmException(NoteDbUpdateManager.CHANGES_READ_ONLY); - } - List<ListenableFuture<?>> futures = new ArrayList<>(ops.keySet().size()); - for (Map.Entry<Change.Id, Collection<Op>> e : ops.asMap().entrySet()) { - ChangeTask task = - new ChangeTask(e.getKey(), e.getValue(), Thread.currentThread(), - dryrun); - tasks.add(task); - if (!parallel) { - logDebug("Direct execution of task for ops: {}", ops); - } - futures.add(executor.submit(task)); - } - if (parallel) { - logDebug("Waiting on futures for {} ops spanning {} changes", - ops.size(), ops.keySet().size()); - } - // TODO(dborowitz): Timing is wrong for non-parallel updates. - long startNanos = System.nanoTime(); - Futures.allAsList(futures).get(); - maybeLogSlowUpdate(startNanos, "change"); + logDebug("Executing change ops (parallel? {})", parallel); + ListeningExecutorService executor = parallel + ? changeUpdateExector + : MoreExecutors.newDirectExecutorService(); - if (notesMigration.commitChangeWrites()) { - startNanos = System.nanoTime(); - if (!dryrun) { - executeNoteDbUpdates(tasks); + tasks = new ArrayList<>(ops.keySet().size()); + try { + if (notesMigration.commitChangeWrites() && repo != null) { + // A NoteDb change may have been rebuilt since the repo was originally + // opened, so make sure we see that. + logDebug("Preemptively scanning for repo changes"); + repo.scanForRepoChanges(); } - maybeLogSlowUpdate(startNanos, "NoteDb"); + if (!ops.isEmpty() && notesMigration.failChangeWrites()) { + // Fail fast before attempting any writes if changes are read-only, as + // this is a programmer error. + logDebug("Failing early due to read-only Changes table"); + throw new OrmException(NoteDbUpdateManager.CHANGES_READ_ONLY); + } + List<ListenableFuture<?>> futures = + new ArrayList<>(ops.keySet().size()); + for (Map.Entry<Change.Id, Collection<Op>> e : ops.asMap().entrySet()) { + ChangeTask task = + new ChangeTask(e.getKey(), e.getValue(), Thread.currentThread(), + dryrun); + tasks.add(task); + if (!parallel) { + logDebug("Direct execution of task for ops: {}", ops); + } + futures.add(executor.submit(task)); + } + if (parallel) { + logDebug("Waiting on futures for {} ops spanning {} changes", + ops.size(), ops.keySet().size()); + } + Futures.allAsList(futures).get(); + + if (notesMigration.commitChangeWrites()) { + if (!dryrun) { + executeNoteDbUpdates(tasks); + } + } + success = true; + } catch (ExecutionException | InterruptedException e) { + Throwables.propagateIfInstanceOf(e.getCause(), UpdateException.class); + Throwables.propagateIfInstanceOf(e.getCause(), RestApiException.class); + throw new UpdateException(e); + } catch (OrmException | IOException e) { + throw new UpdateException(e); } - } catch (ExecutionException | InterruptedException e) { - Throwables.propagateIfInstanceOf(e.getCause(), UpdateException.class); - Throwables.propagateIfInstanceOf(e.getCause(), RestApiException.class); - throw new UpdateException(e); - } catch (OrmException | IOException e) { - throw new UpdateException(e); + } finally { + metrics.executeChangeOpsLatency.record( + success, sw.elapsed(NANOSECONDS), NANOSECONDS); } + return tasks; + } + private void reindexChanges(List<ChangeTask> tasks) { // Reindex changes. for (ChangeTask task : tasks) { if (task.deleted) { @@ -777,26 +802,6 @@ } } - private static class SlowUpdateException extends Exception { - private static final long serialVersionUID = 1L; - - private SlowUpdateException(String fmt, Object... args) { - super(String.format(fmt, args)); - } - } - - private void maybeLogSlowUpdate(long startNanos, String desc) { - long elapsedNanos = System.nanoTime() - startNanos; - if (!log.isDebugEnabled() || elapsedNanos <= logThresholdNanos) { - return; - } - // Always log even without RequestId. - log.debug("Slow " + desc + " update", - new SlowUpdateException( - "Slow %s update (%d ms) to %s for %s", - desc, NANOSECONDS.toMillis(elapsedNanos), project, ops.keySet())); - } - private void executeNoteDbUpdates(List<ChangeTask> tasks) { // Aggregate together all NoteDb ref updates from the ops we executed, // possibly in parallel. Each task had its own NoteDbUpdateManager instance