zoekt-mirror-github: add arguments to filter repos on topics criteria

Added `topic` and `exclude_topic` options to the zoekt-mirror-github command.
These options are lists.
Github repositories are now filtered according to the topics they have.

`topic` option will include only the repositories having at least on topic in the list.
`exclude_topic` will exclude the repositories having at least one topic in the list.
The two options can be combined:
`exclude_topic` filter is applied to repositories previously filtered with `topic`

Change-Id: I8dceb805cd1aa5455a03c7d3cc425795a614f1f0
diff --git a/cmd/zoekt-indexserver/config.go b/cmd/zoekt-indexserver/config.go
index edea3dc..849572b 100644
--- a/cmd/zoekt-indexserver/config.go
+++ b/cmd/zoekt-indexserver/config.go
@@ -45,6 +45,8 @@
 	GitLabURL              string
 	OnlyPublic             bool
 	GerritApiURL           string
+	Topics                 []string
+	ExcludeTopics          []string
 }
 
 func randomize(entries []ConfigEntry) []ConfigEntry {
@@ -179,6 +181,12 @@
 			if c.CredentialPath != "" {
 				cmd.Args = append(cmd.Args, "-token", c.CredentialPath)
 			}
+			for _, topic := range c.Topics {
+				cmd.Args = append(cmd.Args, "-topic", topic)
+			}
+			for _, topic := range c.ExcludeTopics {
+				cmd.Args = append(cmd.Args, "-exclude_topic", topic)
+			}
 		} else if c.GitilesURL != "" {
 			cmd = exec.Command("zoekt-mirror-gitiles",
 				"-dest", repoDir, "-name", c.Name)
diff --git a/cmd/zoekt-mirror-github/main.go b/cmd/zoekt-mirror-github/main.go
index 7a2078b..ba1a0b8 100644
--- a/cmd/zoekt-mirror-github/main.go
+++ b/cmd/zoekt-mirror-github/main.go
@@ -36,6 +36,22 @@
 	"github.com/google/zoekt/gitindex"
 )
 
+type topicsFlag []string
+
+func (f *topicsFlag) String() string {
+	return strings.Join(*f, ",")
+}
+
+func (f *topicsFlag) Set(value string) error {
+	*f = append(*f, value)
+	return nil
+}
+
+type reposFilters struct {
+	topics        []string
+	excludeTopics []string
+}
+
 func main() {
 	dest := flag.String("dest", "", "destination directory")
 	githubURL := flag.String("url", "", "GitHub Enterprise url. If not set github.com will be used as the host.")
@@ -48,6 +64,11 @@
 	deleteRepos := flag.Bool("delete", false, "delete missing repos")
 	namePattern := flag.String("name", "", "only clone repos whose name matches the given regexp.")
 	excludePattern := flag.String("exclude", "", "don't mirror repos whose names match this regexp.")
+	topics := topicsFlag{}
+	flag.Var(&topics, "topic", "only clone repos whose have one of given topics. You can add multiple topics by setting this more than once.")
+	excludeTopics := topicsFlag{}
+	flag.Var(&excludeTopics, "exclude_topic", "don't clone repos whose have one of given topics. You can add multiple topics by setting this more than once.")
+
 	flag.Parse()
 
 	if *dest == "" {
@@ -106,15 +127,19 @@
 		}
 	}
 
+	reposFilters := reposFilters{
+		topics:        topics,
+		excludeTopics: excludeTopics,
+	}
 	var repos []*github.Repository
 	var err error
 	if *org != "" {
-		repos, err = getOrgRepos(client, *org)
+		repos, err = getOrgRepos(client, *org, reposFilters)
 	} else if *user != "" {
-		repos, err = getUserRepos(client, *user)
+		repos, err = getUserRepos(client, *user, reposFilters)
 	} else {
 		log.Printf("no user or org specified, cloning all repos.")
-		repos, err = getUserRepos(client, "")
+		repos, err = getUserRepos(client, "", reposFilters)
 	}
 
 	if err != nil {
@@ -185,7 +210,30 @@
 	return nil
 }
 
-func getOrgRepos(client *github.Client, org string) ([]*github.Repository, error) {
+func hasIntersection(s1, s2 []string) bool {
+	hash := make(map[string]bool)
+	for _, e := range s1 {
+		hash[e] = true
+	}
+	for _, e := range s2 {
+		if hash[e] {
+			return true
+		}
+	}
+	return false
+}
+
+func filterByTopic(repos []*github.Repository, include []string, exclude []string) (filteredRepos []*github.Repository) {
+	for _, repo := range repos {
+		if (len(include) == 0 || hasIntersection(include, repo.Topics)) &&
+			!hasIntersection(exclude, repo.Topics) {
+			filteredRepos = append(filteredRepos, repo)
+		}
+	}
+	return
+}
+
+func getOrgRepos(client *github.Client, org string, reposFilters reposFilters) ([]*github.Repository, error) {
 	var allRepos []*github.Repository
 	opt := &github.RepositoryListByOrgOptions{}
 	for {
@@ -198,6 +246,7 @@
 		}
 
 		opt.Page = resp.NextPage
+		repos = filterByTopic(repos, reposFilters.topics, reposFilters.excludeTopics)
 		allRepos = append(allRepos, repos...)
 		if resp.NextPage == 0 {
 			break
@@ -206,7 +255,7 @@
 	return allRepos, nil
 }
 
-func getUserRepos(client *github.Client, user string) ([]*github.Repository, error) {
+func getUserRepos(client *github.Client, user string, reposFilters reposFilters) ([]*github.Repository, error) {
 	var allRepos []*github.Repository
 	opt := &github.RepositoryListOptions{}
 	for {
@@ -219,6 +268,7 @@
 		}
 
 		opt.Page = resp.NextPage
+		repos = filterByTopic(repos, reposFilters.topics, reposFilters.excludeTopics)
 		allRepos = append(allRepos, repos...)
 		if resp.NextPage == 0 {
 			break