blob: e9349c4ea8be20dc0a8321904a141a90fd47c419 [file] [log] [blame]
// Copyright (C) 2013 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.server.index.change;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.util.concurrent.Futures.successfulAsList;
import static com.google.common.util.concurrent.Futures.transform;
import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
import static com.google.gerrit.server.git.QueueProvider.QueueType.BATCH;
import com.google.common.base.Stopwatch;
import com.google.common.flogger.FluentLogger;
import com.google.common.primitives.Ints;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.UncheckedExecutionException;
import com.google.gerrit.entities.Change;
import com.google.gerrit.entities.Project;
import com.google.gerrit.entities.RefNames;
import com.google.gerrit.index.SiteIndexer;
import com.google.gerrit.server.git.GitRepositoryManager;
import com.google.gerrit.server.git.MultiProgressMonitor;
import com.google.gerrit.server.git.MultiProgressMonitor.Task;
import com.google.gerrit.server.index.IndexExecutor;
import com.google.gerrit.server.index.OnlineReindexMode;
import com.google.gerrit.server.notedb.ChangeNotes;
import com.google.gerrit.server.notedb.ChangeNotes.Factory.ChangeNotesResult;
import com.google.gerrit.server.project.ProjectCache;
import com.google.gerrit.server.query.change.ChangeData;
import com.google.inject.Inject;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.Callable;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
import org.eclipse.jgit.errors.RepositoryNotFoundException;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.TextProgressMonitor;
/**
* Implementation that can index all changes on a host or within a project. Used by Gerrit's
* initialization and upgrade programs as well as by REST API endpoints that offer this
* functionality.
*/
public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, ChangeIndex> {
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
private static final int PROJECT_SLICE_MAX_REFS = 1000;
private final ChangeData.Factory changeDataFactory;
private final GitRepositoryManager repoManager;
private final ListeningExecutorService executor;
private final ChangeIndexer.Factory indexerFactory;
private final ChangeNotes.Factory notesFactory;
private final ProjectCache projectCache;
@Inject
AllChangesIndexer(
ChangeData.Factory changeDataFactory,
GitRepositoryManager repoManager,
@IndexExecutor(BATCH) ListeningExecutorService executor,
ChangeIndexer.Factory indexerFactory,
ChangeNotes.Factory notesFactory,
ProjectCache projectCache) {
this.changeDataFactory = changeDataFactory;
this.repoManager = repoManager;
this.executor = executor;
this.indexerFactory = indexerFactory;
this.notesFactory = notesFactory;
this.projectCache = projectCache;
}
private static class ProjectSlice {
private final Project.NameKey name;
private final int slice;
private final int slices;
ProjectSlice(Project.NameKey name, int slice, int slices) {
this.name = name;
this.slice = slice;
this.slices = slices;
}
public Project.NameKey getName() {
return name;
}
public int getSlice() {
return slice;
}
public int getSlices() {
return slices;
}
}
@Override
public Result indexAll(ChangeIndex index) {
ProgressMonitor pm = new TextProgressMonitor();
pm.beginTask("Collecting projects", ProgressMonitor.UNKNOWN);
List<ProjectSlice> projectSlices = new ArrayList<>();
int changeCount = 0;
Stopwatch sw = Stopwatch.createStarted();
int projectsFailed = 0;
for (Project.NameKey name : projectCache.all()) {
try (Repository repo = repoManager.openRepository(name)) {
// The simplest approach to distribute indexing would be to let each thread grab a project
// and index it fully. But if a site has one big project and 100s of small projects, then
// in the beginning all CPUs would be busy reindexing projects. But soon enough all small
// projects have been reindexed, and only the thread that reindexes the big project is
// still working. The other threads would idle. Reindexing the big project on a single
// thread becomes the critical path. Bringing in more CPUs would not speed up things.
//
// To avoid such situations, we split big repos into smaller parts and let
// the thread pool index these smaller parts. This splitting introduces an overhead in the
// workload setup and there might be additional slow-downs from multiple threads
// concurrently working on different parts of the same project. But for Wikimedia's Gerrit,
// which had 2 big projects, many middle sized ones, and lots of smaller ones, the
// splitting of repos into smaller parts reduced indexing time from 1.5 hours to 55 minutes
// in 2020.
int size = estimateSize(repo);
changeCount += size;
int slices = 1 + size / PROJECT_SLICE_MAX_REFS;
if (slices > 1) {
verboseWriter.println("Submitting " + name + " for indexing in " + slices + " slices");
}
for (int slice = 0; slice < slices; slice++) {
projectSlices.add(new ProjectSlice(name, slice, slices));
}
} catch (IOException e) {
logger.atSevere().withCause(e).log("Error collecting project %s", name);
projectsFailed++;
if (projectsFailed > projectCache.all().size() / 2) {
logger.atSevere().log("Over 50%% of the projects could not be collected: aborted");
return Result.create(sw, false, 0, 0);
}
}
pm.update(1);
}
pm.endTask();
setTotalWork(changeCount);
// projectSlices are currently grouped by projects. First all slices for project1, followed
// by all slices for project2, and so on. As workers pick tasks sequentially, multiple threads
// would typically work concurrently on different slices of the same project. While this is not
// a big issue, shuffling the list beforehand helps with ungrouping the project slices, so
// different slices are less likely to be worked on concurrently.
// This shuffling gave a 6% runtime reduction for Wikimedia's Gerrit in 2020.
Collections.shuffle(projectSlices);
return indexAll(index, projectSlices);
}
private int estimateSize(Repository repo) throws IOException {
// Estimate size based on IDs that show up in ref names. This is not perfect, since patch set
// refs may exist for changes whose metadata was never successfully stored. But that's ok, as
// the estimate is just used as a heuristic for sorting projects.
long size =
repo.getRefDatabase().getRefsByPrefix(RefNames.REFS_CHANGES).stream()
.map(r -> Change.Id.fromRef(r.getName()))
.filter(Objects::nonNull)
.distinct()
.count();
return Ints.saturatedCast(size);
}
private SiteIndexer.Result indexAll(ChangeIndex index, List<ProjectSlice> projectSlices) {
Stopwatch sw = Stopwatch.createStarted();
MultiProgressMonitor mpm = new MultiProgressMonitor(progressOut, "Reindexing changes");
Task projTask = mpm.beginSubTask("project-slices", projectSlices.size());
checkState(totalWork >= 0);
Task doneTask = mpm.beginSubTask(null, totalWork);
Task failedTask = mpm.beginSubTask("failed", MultiProgressMonitor.UNKNOWN);
List<ListenableFuture<?>> futures = new ArrayList<>();
AtomicBoolean ok = new AtomicBoolean(true);
for (ProjectSlice projectSlice : projectSlices) {
Project.NameKey name = projectSlice.getName();
int slice = projectSlice.getSlice();
int slices = projectSlice.getSlices();
ListenableFuture<?> future =
executor.submit(
reindexProject(
indexerFactory.create(executor, index),
name,
slice,
slices,
doneTask,
failedTask));
String description = "project " + name + " (" + slice + "/" + slices + ")";
addErrorListener(future, description, projTask, ok);
futures.add(future);
}
try {
mpm.waitFor(
transform(
successfulAsList(futures),
x -> {
mpm.end();
return null;
},
directExecutor()));
} catch (UncheckedExecutionException e) {
logger.atSevere().withCause(e).log("Error in batch indexer");
ok.set(false);
}
// If too many changes failed, maybe there was a bug in the indexer. Don't
// trust the results. This is not an exact percentage since we bump the same
// failure counter if a project can't be read, but close enough.
int nFailed = failedTask.getCount();
int nDone = doneTask.getCount();
int nTotal = nFailed + nDone;
double pctFailed = ((double) nFailed) / nTotal * 100;
if (pctFailed > 10) {
logger.atSevere().log(
"Failed %s/%s changes (%s%%); not marking new index as ready",
nFailed, nTotal, Math.round(pctFailed));
ok.set(false);
}
return Result.create(sw, ok.get(), nDone, nFailed);
}
public Callable<Void> reindexProject(
ChangeIndexer indexer, Project.NameKey project, Task done, Task failed) {
return reindexProject(indexer, project, 0, 1, done, failed);
}
public Callable<Void> reindexProject(
ChangeIndexer indexer,
Project.NameKey project,
int slice,
int slices,
Task done,
Task failed) {
return new ProjectIndexer(indexer, project, slice, slices, done, failed);
}
private class ProjectIndexer implements Callable<Void> {
private final ChangeIndexer indexer;
private final Project.NameKey project;
private final int slice;
private final int slices;
private final ProgressMonitor done;
private final ProgressMonitor failed;
private ProjectIndexer(
ChangeIndexer indexer,
Project.NameKey project,
int slice,
int slices,
ProgressMonitor done,
ProgressMonitor failed) {
this.indexer = indexer;
this.project = project;
this.slice = slice;
this.slices = slices;
this.done = done;
this.failed = failed;
}
@Override
public Void call() throws Exception {
try (Repository repo = repoManager.openRepository(project)) {
OnlineReindexMode.begin();
// Order of scanning changes is undefined. This is ok if we assume that packfile locality is
// not important for indexing, since sites should have a fully populated DiffSummary cache.
// It does mean that reindexing after invalidating the DiffSummary cache will be expensive,
// but the goal is to invalidate that cache as infrequently as we possibly can. And besides,
// we don't have concrete proof that improving packfile locality would help.
notesFactory.scan(repo, project, id -> (id.get() % slices) == slice).forEach(r -> index(r));
} catch (RepositoryNotFoundException rnfe) {
logger.atSevere().log(rnfe.getMessage());
} finally {
OnlineReindexMode.end();
}
return null;
}
private void index(ChangeNotesResult r) {
if (r.error().isPresent()) {
fail("Failed to read change " + r.id() + " for indexing", true, r.error().get());
return;
}
try {
indexer.index(changeDataFactory.create(r.notes()));
done.update(1);
verboseWriter.format(
"Reindexed change %d (project: %s)\n", r.id().get(), r.notes().getProjectName().get());
} catch (RejectedExecutionException e) {
// Server shutdown, don't spam the logs.
failSilently();
} catch (Exception e) {
fail("Failed to index change " + r.id(), true, e);
}
}
private void fail(String error, boolean failed, Exception e) {
if (failed) {
this.failed.update(1);
}
logger.atWarning().withCause(e).log(error);
verboseWriter.println(error);
}
private void failSilently() {
this.failed.update(1);
}
@Override
public String toString() {
return "Index all changes of project " + project.get();
}
}
}