Fetch plugin data concurrently to improve performance

Add an option to configure the number of threads to use for fetching
data from the Gerrit REST API concurrently. By default use 2 threads.
I could create the plugin page in less than a minute using this default.

Using more threads may lead to 429 errors by exceeding REST API rate
limits. Your mileage may vary depending on rate limits and recent
history of requests you have executed.

Change-Id: Id59e22c75baaeff8cf122094ad91cb6552ee67a6
diff --git a/tools/plugins.py b/tools/plugins.py
index 88cc6b2..501404b 100644
--- a/tools/plugins.py
+++ b/tools/plugins.py
@@ -7,8 +7,10 @@
 import sys
 import time
 from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
 from enum import Enum, IntEnum
+from functools import partial
 from operator import attrgetter
 from typing import List
@@ -136,6 +138,14 @@
             help="use anonymous access, i.e. no credentials",
+        parser.add_argument(
+            "-t",
+            "--threads",
+            dest="threads",
+            default=2,
+            type=int,
+            help="number of threads fetching data from Gerrit concurrently",
+        )
         return parser.parse_args()
@@ -180,66 +190,75 @@
                 return "Deprecated"
     def __init__(self):
-        auth = self._authenticate(self._parse_options())
+        self.options = self._parse_options()
+        auth = self._authenticate()
         self.api = GerritRestAPI(url=GERRIT, auth=auth)
         self.plugins = list()
         self.maintainers = defaultdict(list)
-        self._fetch_plugin_data()
+        self._create_plugins()
         self.plugins = sorted(self.plugins, key=attrgetter("state", "empty"))
     def __iter__(self):
         return iter(self.plugins)
-    def _fetch_plugin_data(self):
-        """Fetch plugin data from Gerrit"""
+    def _create_plugin(self, plugin_list: dict, builds, p):
+        """Create a plugin by fetching its data from Gerrit"""
+        name = p[len("plugins/") :]
+        plugin = plugin_list[p]
+        if plugin["state"] == "ACTIVE":
+            state = PluginState.ACTIVE
+            changes = self._get_recent_changes_count(p)
+            branches = self._get_branch_results(plugin["id"], name, builds)
+        else:
+            state = PluginState.READ_ONLY
+            changes = 0
+            branches = [Branch.missing(branch) for branch in BRANCHES]
+        description = (
+            plugin["description"].split("\n")[0].rstrip(r"\.")
+            if "description" in plugin
+            else ""
+        )
+        parent, owner_group_ids = self._get_meta_data(name)
+        maintainers, maintainers_csv = self._get_owner_names(
+            parent, name, owner_group_ids
+        )
+        plugin = Plugin(
+            name=name,
+            parent=parent,
+            state=state,
+            owner_group_ids=owner_group_ids,
+            owner_names=maintainers_csv,
+            empty=self._is_project_empty(p),
+            description=description,
+            all_changes_count=self._get_all_changes_count(p),
+            recent_changes_count=changes,
+            branches=branches,
+        )
+        return plugin, maintainers
+    def _create_plugins(self):
+        """Create plugins by fetching plugin data from Gerrit"""
         plugin_list = self.api.get("/projects/?p=plugins%2f&d")
         builds = requests.get(
-        for p in tqdm(plugin_list):
-            name = p[len("plugins/") :]
-            plugin = plugin_list[p]
-            if plugin["state"] == "ACTIVE":
-                state = PluginState.ACTIVE
-                changes = self._get_recent_changes_count(p)
-                branches = self._get_branch_results(plugin["id"], name, builds)
-            else:
-                state = PluginState.READ_ONLY
-                changes = 0
-                branches = [Branch.missing(branch) for branch in BRANCHES]
-            description = (
-                plugin["description"].split("\n")[0].rstrip(r"\.")
-                if "description" in plugin
-                else ""
+        creator = partial(self._create_plugin, plugin_list, builds)
+        with ThreadPoolExecutor(max_workers=self.options.threads) as executor:
+            results = list(
+                tqdm(executor.map(creator, plugin_list), total=len(plugin_list))
+            for (plugin, maintainers) in results:
+                self.plugins.append(plugin)
+                for m in maintainers:
+                    self.maintainers[m].append(plugin.name)
-            parent, owner_group_ids = self._get_meta_data(name)
-            maintainers, maintainers_csv = self._get_owner_names(
-                parent, name, owner_group_ids
-            )
-            self.plugins.append(
-                Plugin(
-                    name=name,
-                    parent=parent,
-                    state=state,
-                    owner_group_ids=owner_group_ids,
-                    owner_names=maintainers_csv,
-                    empty=self._is_project_empty(p),
-                    description=description,
-                    all_changes_count=self._get_all_changes_count(p),
-                    recent_changes_count=changes,
-                    branches=branches,
-                )
-            )
-            for m in maintainers:
-                self.maintainers[m].append(name)
-    def _authenticate(self, options):
-        if options.netrc:
+    def _authenticate(self):
+        if self.options.netrc:
             return HTTPBasicAuthFromNetrc(url=GERRIT)
-        elif options.anonymous:
+        elif self.options.anonymous:
             return Anonymous()
             return self._authenticate_interactive()