blob: c4210a963606c454f83b0b5d64dbd011b70f99bc [file] [log] [blame]
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This binary fetches all repos of a Gitiles host. It does double
// duty for other "simple" web hosts
package main
import (
"flag"
"fmt"
"log"
"net/url"
"os"
"path/filepath"
"github.com/google/zoekt/gitindex"
)
type crawlTarget struct {
cloneURL string
webURL string
webURLType string
}
type hostCrawler func(*url.URL, func(string) bool) (map[string]*crawlTarget, error)
func main() {
dest := flag.String("dest", "", "destination directory")
namePattern := flag.String("name", "", "only clone repos whose name matches the regexp.")
excludePattern := flag.String("exclude", "", "don't mirror repos whose names match this regexp.")
hostType := flag.String("type", "gitiles", "which webserver to crawl. Choices: gitiles, cgit")
flag.Parse()
if len(flag.Args()) < 1 {
log.Fatal("must provide URL argument.")
}
var crawler hostCrawler
switch *hostType {
case "gitiles":
crawler = getGitilesRepos
case "cgit":
crawler = getCGitRepos
default:
log.Fatalf("unknown host type %q", *hostType)
}
rootURL, err := url.Parse(flag.Arg(0))
if err != nil {
log.Fatalf("url.Parse(): %v", err)
}
if *dest == "" {
log.Fatal("must set --dest")
}
if err := os.MkdirAll(filepath.Join(*dest, rootURL.Host, rootURL.Path), 0o755); err != nil {
log.Fatal(err)
}
filter, err := gitindex.NewFilter(*namePattern, *excludePattern)
if err != nil {
log.Fatal(err)
}
repos, err := crawler(rootURL, filter.Include)
if err != nil {
log.Fatal(err)
}
for nm, target := range repos {
// For git.savannah.gnu.org, this puts an ugly "CGit"
// path component into the name. However, it's
// possible that there are multiple, different CGit pages
// on the host, so we have to keep it.
fullName := filepath.Join(rootURL.Host, rootURL.Path, nm)
config := map[string]string{
"zoekt.web-url": target.webURL,
"zoekt.web-url-type": target.webURLType,
"zoekt.name": fullName,
}
dest, err := gitindex.CloneRepo(*dest, fullName, target.cloneURL, config)
if err != nil {
log.Fatal(err)
}
if dest != "" {
fmt.Println(dest)
}
}
}