Delete gerrit/zoekt.
The source-code is hosted on https://github.com/sourcegraph/zoekt
Change-Id: Icb6e8dd6d9479fe8f78b6868e90f28e508bd2e2b
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index 7cd8594..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,6 +0,0 @@
-*~
-cmd/zoekt-index/zoekt-index
-cmd/zoekt-webserver/zoekt-webserver
-cmd/zoekt-mirror-github/zoekt-mirror-github
-cmd/zoekt-server/zoekt-server
-cmd/zoekt-git-index/zoekt-git-index
diff --git a/CONTRIBUTING b/CONTRIBUTING
deleted file mode 100644
index 61b75c4..0000000
--- a/CONTRIBUTING
+++ /dev/null
@@ -1,4 +0,0 @@
-**NOTICE:**
-[github.com/sourcegraph/zoekt](https://github.com/sourcegraph/zoekt) is the
-active main repository for Zoekt development. Please contribute pull requests
-to that repository.
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 261eeb9..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/README.md b/README.md
index 6050602..3ab4d09 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,3 @@
-
- "Zoekt, en gij zult spinazie eten" - Jan Eertink
-
- ("seek, and ye shall eat spinach" - My primary school teacher)
-
This is a fast text search engine, intended for use with source
code. (Pronunciation: roughly as you would pronounce "zooked" in English)
diff --git a/all.bash b/all.bash
deleted file mode 100755
index 4b5ae45..0000000
--- a/all.bash
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-set -eux
-go test github.com/google/zoekt/...
-go install github.com/google/zoekt/cmd/...
diff --git a/api.go b/api.go
deleted file mode 100644
index 2bd8a88..0000000
--- a/api.go
+++ /dev/null
@@ -1,312 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "context"
- "fmt"
- "time"
-
- "github.com/google/zoekt/query"
-)
-
-// FileMatch contains all the matches within a file.
-type FileMatch struct {
- // Ranking; the higher, the better.
- Score float64 // TODO - hide this field?
-
- // For debugging. Needs DebugScore set, but public so tests in
- // other packages can print some diagnostics.
- Debug string
-
- FileName string
-
- // Repository is the globally unique name of the repo of the
- // match
- Repository string
- Branches []string
- LineMatches []LineMatch
-
- // Only set if requested
- Content []byte
-
- // Checksum of the content.
- Checksum []byte
-
- // Detected language of the result.
- Language string
-
- // SubRepositoryName is the globally unique name of the repo,
- // if it came from a subrepository
- SubRepositoryName string
-
- // SubRepositoryPath holds the prefix where the subrepository
- // was mounted.
- SubRepositoryPath string
-
- // Commit SHA1 (hex) of the (sub)repo holding the file.
- Version string
-}
-
-// LineMatch holds the matches within a single line in a file.
-type LineMatch struct {
- // The line in which a match was found.
- Line []byte
- LineStart int
- LineEnd int
- LineNumber int
-
- // If set, this was a match on the filename.
- FileName bool
-
- // The higher the better. Only ranks the quality of the match
- // within the file, does not take rank of file into account
- Score float64
- LineFragments []LineFragmentMatch
-}
-
-// LineFragmentMatch a segment of matching text within a line.
-type LineFragmentMatch struct {
- // Offset within the line, in bytes.
- LineOffset int
-
- // Offset from file start, in bytes.
- Offset uint32
-
- // Number bytes that match.
- MatchLength int
-}
-
-// Stats contains interesting numbers on the search
-type Stats struct {
- // Amount of I/O for reading contents.
- ContentBytesLoaded int64
-
- // Amount of I/O for reading from index.
- IndexBytesLoaded int64
-
- // Number of search shards that had a crash.
- Crashes int
-
- // Wall clock time for this search
- Duration time.Duration
-
- // Number of files containing a match.
- FileCount int
-
- // Number of files in shards that we considered.
- ShardFilesConsidered int
-
- // Files that we evaluated. Equivalent to files for which all
- // atom matches (including negations) evaluated to true.
- FilesConsidered int
-
- // Files for which we loaded file content to verify substring matches
- FilesLoaded int
-
- // Candidate files whose contents weren't examined because we
- // gathered enough matches.
- FilesSkipped int
-
- // Shards that we did not process because a query was canceled.
- ShardsSkipped int
-
- // Number of non-overlapping matches
- MatchCount int
-
- // Number of candidate matches as a result of searching ngrams.
- NgramMatches int
-
- // Wall clock time for queued search.
- Wait time.Duration
-
- // Number of times regexp was called on files that we evaluated.
- RegexpsConsidered int
-}
-
-func (s *Stats) Add(o Stats) {
- s.ContentBytesLoaded += o.ContentBytesLoaded
- s.IndexBytesLoaded += o.IndexBytesLoaded
- s.Crashes += o.Crashes
- s.FileCount += o.FileCount
- s.FilesConsidered += o.FilesConsidered
- s.FilesLoaded += o.FilesLoaded
- s.FilesSkipped += o.FilesSkipped
- s.MatchCount += o.MatchCount
- s.NgramMatches += o.NgramMatches
- s.ShardFilesConsidered += o.ShardFilesConsidered
- s.ShardsSkipped += o.ShardsSkipped
-}
-
-// SearchResult contains search matches and extra data
-type SearchResult struct {
- Stats
- Files []FileMatch
-
- // RepoURLs holds a repo => template string map.
- RepoURLs map[string]string
-
- // FragmentNames holds a repo => template string map, for
- // the line number fragment.
- LineFragments map[string]string
-}
-
-// RepositoryBranch describes an indexed branch, which is a name
-// combined with a version.
-type RepositoryBranch struct {
- Name string
- Version string
-}
-
-// Repository holds repository metadata.
-type Repository struct {
- // The repository name
- Name string
-
- // The repository URL.
- URL string
-
- // The physical source where this repo came from, eg. full
- // path to the zip filename or git repository directory. This
- // will not be exposed in the UI, but can be used to detect
- // orphaned index shards.
- Source string
-
- // The branches indexed in this repo.
- Branches []RepositoryBranch
-
- // Nil if this is not the super project.
- SubRepoMap map[string]*Repository
-
- // URL template to link to the commit of a branch
- CommitURLTemplate string
-
- // The repository URL for getting to a file. Has access to
- // {{Branch}}, {{Path}}
- FileURLTemplate string
-
- // The URL fragment to add to a file URL for line numbers. has
- // access to {{LineNumber}}. The fragment should include the
- // separator, generally '#' or ';'.
- LineFragmentTemplate string
-
- // All zoekt.* configuration settings.
- RawConfig map[string]string
-
- // Importance of the repository, bigger is more important
- Rank uint16
-
- // IndexOptions is a hash of the options used to create the index for the
- // repo.
- IndexOptions string
-}
-
-// IndexMetadata holds metadata stored in the index file. It contains
-// data generated by the core indexing library.
-type IndexMetadata struct {
- IndexFormatVersion int
- IndexFeatureVersion int
- IndexMinReaderVersion int
- IndexTime time.Time
- PlainASCII bool
- LanguageMap map[string]byte
- ZoektVersion string
-}
-
-// Statistics of a (collection of) repositories.
-type RepoStats struct {
- // Repos is used for aggregrating the number of repositories.
- Repos int
-
- // Shards is the total number of search shards.
- Shards int
-
- // Documents holds the number of documents or files.
- Documents int
-
- // IndexBytes is the amount of RAM used for index overhead.
- IndexBytes int64
-
- // ContentBytes is the amount of RAM used for raw content.
- ContentBytes int64
-}
-
-func (s *RepoStats) Add(o *RepoStats) {
- // can't update Repos, since one repo may have multiple
- // shards.
- s.Shards += o.Shards
- s.IndexBytes += o.IndexBytes
- s.Documents += o.Documents
- s.ContentBytes += o.ContentBytes
-}
-
-type RepoListEntry struct {
- Repository Repository
- IndexMetadata IndexMetadata
- Stats RepoStats
-}
-
-// RepoList holds a set of Repository metadata.
-type RepoList struct {
- Repos []*RepoListEntry
- Crashes int
-}
-
-type Searcher interface {
- Search(ctx context.Context, q query.Q, opts *SearchOptions) (*SearchResult, error)
-
- // List lists repositories. The query `q` can only contain
- // query.Repo atoms.
- List(ctx context.Context, q query.Q) (*RepoList, error)
- Close()
-
- // Describe the searcher for debug messages.
- String() string
-}
-
-type SearchOptions struct {
- // Return an upper-bound estimate of eligible documents in
- // stats.ShardFilesConsidered.
- EstimateDocCount bool
-
- // Return the whole file.
- Whole bool
-
- // Maximum number of matches: skip all processing an index
- // shard after we found this many non-overlapping matches.
- ShardMaxMatchCount int
-
- // Maximum number of matches: stop looking for more matches
- // once we have this many matches across shards.
- TotalMaxMatchCount int
-
- // Maximum number of important matches: skip processing
- // shard after we found this many important matches.
- ShardMaxImportantMatch int
-
- // Maximum number of important matches across shards.
- TotalMaxImportantMatch int
-
- // Abort the search after this much time has passed.
- MaxWallTime time.Duration
-
- // Trim the number of results after collating and sorting the
- // results
- MaxDocDisplayCount int
-}
-
-func (s *SearchOptions) String() string {
- return fmt.Sprintf("%#v", s)
-}
diff --git a/bits.go b/bits.go
deleted file mode 100644
index 62f6110..0000000
--- a/bits.go
+++ /dev/null
@@ -1,268 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "encoding/binary"
- "unicode"
- "unicode/utf8"
-)
-
-func generateCaseNgrams(g ngram) []ngram {
- asRunes := ngramToRunes(g)
-
- variants := make([]ngram, 0, 8)
- cur := asRunes
- for {
- for i := 0; i < 3; i++ {
- next := unicode.SimpleFold(cur[i])
- cur[i] = next
- if next != asRunes[i] {
- break
- }
- }
-
- variants = append(variants, runesToNGram(cur))
- if cur == asRunes {
- break
- }
- }
-
- return variants
-}
-
-func toLower(in []byte) []byte {
- out := make([]byte, 0, len(in))
- var buf [4]byte
- for _, c := range string(in) {
- i := utf8.EncodeRune(buf[:], unicode.ToLower(c))
- out = append(out, buf[:i]...)
- }
- return out
-}
-
-// compare 'lower' and 'mixed', where lower is the needle. 'mixed' may
-// be larger than 'lower'. Returns whether there was a match, and if
-// yes, the byte size of the match.
-func caseFoldingEqualsRunes(lower, mixed []byte) (int, bool) {
- matchTotal := 0
- for len(lower) > 0 && len(mixed) > 0 {
- lr, lsz := utf8.DecodeRune(lower)
- lower = lower[lsz:]
-
- mr, msz := utf8.DecodeRune(mixed)
- mixed = mixed[msz:]
- matchTotal += msz
-
- if lr != unicode.ToLower(mr) {
- return 0, false
- }
- }
-
- return matchTotal, len(lower) == 0
-}
-
-type ngram uint64
-
-func runesToNGram(b [ngramSize]rune) ngram {
- return ngram(uint64(b[0])<<42 | uint64(b[1])<<21 | uint64(b[2]))
-}
-
-func bytesToNGram(b []byte) ngram {
- return runesToNGram([ngramSize]rune{rune(b[0]), rune(b[1]), rune(b[2])})
-}
-
-func stringToNGram(s string) ngram {
- return bytesToNGram([]byte(s))
-}
-
-func ngramToBytes(n ngram) []byte {
- rs := ngramToRunes(n)
- return []byte{byte(rs[0]), byte(rs[1]), byte(rs[2])}
-}
-
-const runeMask = 1<<21 - 1
-
-func ngramToRunes(n ngram) [ngramSize]rune {
- return [ngramSize]rune{rune((n >> 42) & runeMask), rune((n >> 21) & runeMask), rune(n & runeMask)}
-}
-
-func (n ngram) String() string {
- rs := ngramToRunes(n)
- return string(rs[:])
-}
-
-type runeNgramOff struct {
- ngram ngram
- byteSize uint32 // size of ngram
- byteOff uint32
- runeOff uint32
-}
-
-func splitNGrams(str []byte) []runeNgramOff {
- var runeGram [3]rune
- var off [3]uint32
- var runeCount int
-
- result := make([]runeNgramOff, 0, len(str))
- var i uint32
-
- chars := -1
- for len(str) > 0 {
- chars++
- r, sz := utf8.DecodeRune(str)
- str = str[sz:]
- runeGram[0] = runeGram[1]
- off[0] = off[1]
- runeGram[1] = runeGram[2]
- off[1] = off[2]
- runeGram[2] = r
- off[2] = uint32(i)
- i += uint32(sz)
- runeCount++
- if runeCount < ngramSize {
- continue
- }
-
- ng := runesToNGram(runeGram)
- result = append(result, runeNgramOff{
- ngram: ng,
- byteSize: i - off[0],
- byteOff: off[0],
- runeOff: uint32(chars),
- })
- }
- return result
-}
-
-const (
- _classChar = 0
- _classDigit = iota
- _classPunct = iota
- _classOther = iota
- _classSpace = iota
-)
-
-func byteClass(c byte) int {
- if (c >= 'a' && c <= 'z') || c >= 'A' && c <= 'Z' {
- return _classChar
- }
- if c >= '0' && c <= '9' {
- return _classDigit
- }
-
- switch c {
- case ' ', '\n':
- return _classSpace
- case '.', ',', ';', '"', '\'':
- return _classPunct
- default:
- return _classOther
- }
-}
-
-func marshalDocSections(secs []DocumentSection) []byte {
- ints := make([]uint32, 0, len(secs)*2)
- for _, s := range secs {
- ints = append(ints, uint32(s.Start), uint32(s.End))
- }
-
- return toSizedDeltas(ints)
-}
-
-func unmarshalDocSections(in []byte, buf []DocumentSection) (secs []DocumentSection) {
- // TODO - ints is unnecessary garbage here.
- ints := fromSizedDeltas(in, nil)
- if cap(buf) >= len(ints)/2 {
- buf = buf[:0]
- } else {
- buf = make([]DocumentSection, 0, len(ints)/2)
- }
-
- for len(ints) > 0 {
- buf = append(buf, DocumentSection{ints[0], ints[1]})
- ints = ints[2:]
- }
- return buf
-}
-
-type ngramSlice []ngram
-
-func (p ngramSlice) Len() int { return len(p) }
-
-func (p ngramSlice) Less(i, j int) bool {
- return p[i] < p[j]
-}
-
-func (p ngramSlice) Swap(i, j int) {
- p[i], p[j] = p[j], p[i]
-}
-
-func toSizedDeltas(offsets []uint32) []byte {
- var enc [8]byte
-
- deltas := make([]byte, 0, len(offsets)*2)
-
- m := binary.PutUvarint(enc[:], uint64(len(offsets)))
- deltas = append(deltas, enc[:m]...)
-
- var last uint32
- for _, p := range offsets {
- delta := p - last
- last = p
-
- m := binary.PutUvarint(enc[:], uint64(delta))
- deltas = append(deltas, enc[:m]...)
- }
- return deltas
-}
-
-func fromSizedDeltas(data []byte, ps []uint32) []uint32 {
- sz, m := binary.Uvarint(data)
- data = data[m:]
-
- if cap(ps) < int(sz) {
- ps = make([]uint32, 0, sz)
- } else {
- ps = ps[:0]
- }
-
- var last uint32
- for len(data) > 0 {
- delta, m := binary.Uvarint(data)
- offset := last + uint32(delta)
- last = offset
- data = data[m:]
- ps = append(ps, offset)
- }
- return ps
-}
-
-func fromDeltas(data []byte, buf []uint32) []uint32 {
- buf = buf[:0]
- if cap(buf) < len(data)/2 {
- buf = make([]uint32, 0, len(data)/2)
- }
-
- var last uint32
- for len(data) > 0 {
- delta, m := binary.Uvarint(data)
- offset := last + uint32(delta)
- last = offset
- data = data[m:]
- buf = append(buf, offset)
- }
- return buf
-}
diff --git a/bits_test.go b/bits_test.go
deleted file mode 100644
index 41710a9..0000000
--- a/bits_test.go
+++ /dev/null
@@ -1,198 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "encoding/binary"
- "log"
- "math/rand"
- "reflect"
- "sort"
- "testing"
- "testing/quick"
-
- "github.com/google/go-cmp/cmp"
-)
-
-var _ = log.Println
-
-func TestNgram(t *testing.T) {
- in := "abc"
- n := stringToNGram(in)
- if n.String() != "abc" {
- t.Errorf("got %q, want %q", n, "abc")
- }
-
- f := func(b ngramRunes) bool {
- n := runesToNGram(b)
- got := ngramRunes(ngramToRunes(n))
- if !reflect.DeepEqual(b, got) {
- t.Log(cmp.Diff(b, got))
- return false
- }
- return true
- }
- if err := quick.Check(f, nil); err != nil {
- t.Error(err)
- }
-}
-
-type ngramRunes [ngramSize]rune
-
-func (ngramRunes) Generate(rand *rand.Rand, size int) reflect.Value {
- // Same implementation used by testing/quick to generate strings. But we
- // force it to ngramSize runes.
- var b ngramRunes
- for i := range b {
- b[i] = rune(rand.Intn(0x10ffff))
- }
- return reflect.ValueOf(b)
-}
-
-func TestDocSection(t *testing.T) {
- in := []DocumentSection{{1, 2}, {3, 4}}
- serialized := marshalDocSections(in)
- roundtrip := unmarshalDocSections(serialized, nil)
- if !reflect.DeepEqual(in, roundtrip) {
- t.Errorf("got %v, want %v", roundtrip, in)
- }
-}
-
-func TestGenerateCaseNgrams(t *testing.T) {
- ng := stringToNGram("aB1")
- gotNG := generateCaseNgrams(ng)
-
- got := map[string]bool{}
- for _, n := range gotNG {
- got[string(ngramToBytes(n))] = true
- }
-
- want := map[string]bool{
- "aB1": true,
- "AB1": true,
- "ab1": true,
- "Ab1": true,
- }
-
- if !reflect.DeepEqual(got, want) {
- t.Errorf("got %v, want %v", got, want)
- }
-}
-
-func TestNextFileIndex(t *testing.T) {
- for _, tc := range []struct {
- off, curFile uint32
- ends []uint32
- want uint32
- }{
- {maxUInt32, 0, []uint32{34}, 1},
- {9, 0, []uint32{34}, 0},
- {450, 0, []uint32{100, 200, 300, 400, 500, 600}, 4},
- } {
- got := nextFileIndex(tc.off, tc.curFile, tc.ends)
- if got != tc.want {
- t.Errorf("%v: got %d, want %d", tc, got, tc.want)
- }
- }
-}
-
-func TestSizedDeltas(t *testing.T) {
- encode := func(nums []uint32) []byte {
- return toSizedDeltas(nums)
- }
- decode := func(data []byte) []uint32 {
- if len(data) == 0 {
- return nil
- }
- return fromSizedDeltas(data, nil)
- }
- testIncreasingIntCoder(t, encode, decode)
-}
-
-func TestFromDeltas(t *testing.T) {
- decode := func(data []byte) []uint32 {
- if len(data) == 0 {
- return nil
- }
- return fromDeltas(data, nil)
- }
- testIncreasingIntCoder(t, toDeltas, decode)
-}
-
-func TestCompressedPostingIterator(t *testing.T) {
- decode := func(data []byte) []uint32 {
- if len(data) == 0 {
- return nil
- }
-
- var nums []uint32
- i := newCompressedPostingIterator(data, stringToNGram("abc"))
- for i.first() != maxUInt32 {
- nums = append(nums, i.first())
- i.next(i.first())
- }
- return nums
- }
- testIncreasingIntCoder(t, toDeltas, decode)
-}
-
-func toDeltas(offsets []uint32) []byte {
- var enc [8]byte
-
- deltas := make([]byte, 0, len(offsets)*2)
-
- var last uint32
- for _, p := range offsets {
- delta := p - last
- last = p
-
- m := binary.PutUvarint(enc[:], uint64(delta))
- deltas = append(deltas, enc[:m]...)
- }
- return deltas
-}
-
-func testIncreasingIntCoder(t *testing.T, encode func([]uint32) []byte, decode func([]byte) []uint32) {
- f := func(nums []uint32) bool {
- nums = sortedUnique(nums)
- b := encode(nums)
- got := decode(b)
- if len(nums) == len(got) && len(nums) == 0 {
- return true
- }
- if !reflect.DeepEqual(got, nums) {
- t.Log(cmp.Diff(nums, got))
- return false
- }
- return true
- }
- if err := quick.Check(f, nil); err != nil {
- t.Error(err)
- }
-}
-
-func sortedUnique(nums []uint32) []uint32 {
- if len(nums) == 0 {
- return nums
- }
- sort.Slice(nums, func(i, j int) bool { return nums[i] < nums[j] })
- filtered := nums[:1]
- for _, n := range nums[1:] {
- if filtered[len(filtered)-1] != n {
- filtered = append(filtered, n)
- }
- }
- return filtered
-}
diff --git a/build-deploy.sh b/build-deploy.sh
deleted file mode 100644
index 7fb442f..0000000
--- a/build-deploy.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-
-# this script packages up all the binaries, and a script (deploy.sh)
-# to twiddle with the server and the binaries
-
-set -ex
-
-# Put the date first so we can sort.
-if [[ -z "$VERSION" ]]; then
- VERSION=$(date --iso-8601=minutes | tr -d ':' | sed 's|\+.*$||')
- if [[ -d .git ]]; then
- VERSION=${VERSION}-$(git show --pretty=format:%h -q)
- fi
-fi
-
-set -u
-
-out=zoekt-${VERSION}
-mkdir -p ${out}
-
-for d in $(find cmd -maxdepth 1 -type d)
-do
- go build -tags netgo -ldflags "-X github.com/google/zoekt.Version=$VERSION" -o ${out}/$(basename $d) github.com/google/zoekt/$d
-done
-
-cat <<EOF > ${out}/deploy.sh
-#!/bin/bash
-
-echo "Set the following in the environment."
-echo ""
-echo ' export PATH="'$PWD'/bin:$PATH'
-echo ""
-
-set -eux
-
-# Allow sandbox to create NS's
-sudo sh -c 'echo 1 > /proc/sys/kernel/unprivileged_userns_clone'
-
-# we mmap the entire index, but typically only want the file contents.
-sudo sh -c 'echo 1 >/proc/sys/vm/overcommit_memory'
-
-# allow bind to 80 and 443
-sudo setcap 'cap_net_bind_service=+ep' bin/zoekt-webserver
-
-EOF
-
-chmod 755 ${out}/*
-
-tar --owner=root --group=root -czf zoekt-deploy-${VERSION}.tar.gz ${out}/*
-
-rm -rf ${out}
diff --git a/build/builder.go b/build/builder.go
deleted file mode 100644
index 5461828..0000000
--- a/build/builder.go
+++ /dev/null
@@ -1,567 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// package build implements a more convenient interface for building
-// zoekt indices.
-package build
-
-import (
- "crypto/sha1"
- "flag"
- "fmt"
- "io"
- "io/ioutil"
- "log"
- "net/url"
- "os"
- "os/exec"
- "path/filepath"
- "reflect"
- "regexp"
- "runtime"
- "runtime/pprof"
- "sort"
- "strings"
- "sync"
-
- "github.com/bmatcuk/doublestar"
- "github.com/google/zoekt"
- "github.com/google/zoekt/ctags"
-)
-
-var DefaultDir = filepath.Join(os.Getenv("HOME"), ".zoekt")
-
-// Branch describes a single branch version.
-type Branch struct {
- Name string
- Version string
-}
-
-// Options sets options for the index building.
-type Options struct {
- // IndexDir is a directory that holds *.zoekt index files.
- IndexDir string
-
- // SizeMax is the maximum file size
- SizeMax int
-
- // Parallelism is the maximum number of shards to index in parallel
- Parallelism int
-
- // ShardMax sets the maximum corpus size for a single shard
- ShardMax int
-
- // TrigramMax sets the maximum number of distinct trigrams per document.
- TrigramMax int
-
- // RepositoryDescription holds names and URLs for the repository.
- RepositoryDescription zoekt.Repository
-
- // SubRepositories is a path => sub repository map.
- SubRepositories map[string]*zoekt.Repository
-
- // Path to exuberant ctags binary to run
- CTags string
-
- // If set, ctags must succeed.
- CTagsMustSucceed bool
-
- // Write memory profiles to this file.
- MemProfile string
-
- // LargeFiles is a slice of glob patterns, including ** for any number
- // of directories, where matching file paths should be indexed
- // regardless of their size. The full pattern syntax is here:
- // https://github.com/bmatcuk/doublestar/tree/v1#patterns.
- LargeFiles []string
-}
-
-// HashOptions creates a hash of the options that affect an index.
-func (o *Options) HashOptions() string {
- hasher := sha1.New()
-
- hasher.Write([]byte(o.CTags))
- hasher.Write([]byte(fmt.Sprintf("%t", o.CTagsMustSucceed)))
- hasher.Write([]byte(fmt.Sprintf("%d", o.SizeMax)))
- hasher.Write([]byte(fmt.Sprintf("%q", o.LargeFiles)))
-
- return fmt.Sprintf("%x", hasher.Sum(nil))
-}
-
-type largeFilesFlag struct{ *Options }
-
-func (f largeFilesFlag) String() string {
- // From flag.Value documentation:
- //
- // The flag package may call the String method with a zero-valued receiver,
- // such as a nil pointer.
- if f.Options == nil {
- return ""
- }
- s := append([]string{""}, f.LargeFiles...)
- return strings.Join(s, "-large_file ")
-}
-
-func (f largeFilesFlag) Set(value string) error {
- f.LargeFiles = append(f.LargeFiles, value)
- return nil
-}
-
-// Flags adds flags for build options to fs.
-func (o *Options) Flags(fs *flag.FlagSet) {
- x := *o
- x.SetDefaults()
- fs.IntVar(&o.SizeMax, "file_limit", x.SizeMax, "maximum file size")
- fs.IntVar(&o.TrigramMax, "max_trigram_count", x.TrigramMax, "maximum number of trigrams per document")
- fs.IntVar(&o.ShardMax, "shard_limit", x.ShardMax, "maximum corpus size for a shard")
- fs.IntVar(&o.Parallelism, "parallelism", x.Parallelism, "maximum number of parallel indexing processes.")
- fs.StringVar(&o.IndexDir, "index", x.IndexDir, "directory for search indices")
- fs.BoolVar(&o.CTagsMustSucceed, "require_ctags", x.CTagsMustSucceed, "If set, ctags calls must succeed.")
- fs.Var(largeFilesFlag{o}, "large_file", "A glob pattern where matching files are to be index regardless of their size. You can add multiple patterns by setting this more than once.")
-}
-
-// Builder manages (parallel) creation of uniformly sized shards. The
-// builder buffers up documents until it collects enough documents and
-// then builds a shard and writes.
-type Builder struct {
- opts Options
- throttle chan int
-
- nextShardNum int
- todo []*zoekt.Document
- size int
-
- parser ctags.Parser
-
- building sync.WaitGroup
-
- errMu sync.Mutex
- buildError error
-
- // temp name => final name for finished shards. We only rename
- // them once all shards succeed to avoid Frankstein corpuses.
- finishedShards map[string]string
-}
-
-type finishedShard struct {
- temp, final string
-}
-
-// SetDefaults sets reasonable default options.
-func (o *Options) SetDefaults() {
- if o.CTags == "" {
- ctags, err := exec.LookPath("universal-ctags")
- if err == nil {
- o.CTags = ctags
- }
- }
-
- if o.CTags == "" {
- ctags, err := exec.LookPath("ctags-exuberant")
- if err == nil {
- o.CTags = ctags
- }
- }
- if o.Parallelism == 0 {
- o.Parallelism = 4
- }
- if o.SizeMax == 0 {
- o.SizeMax = 2 << 20
- }
- if o.ShardMax == 0 {
- o.ShardMax = 100 << 20
- }
- if o.TrigramMax == 0 {
- o.TrigramMax = 20000
- }
-
- if o.RepositoryDescription.Name == "" && o.RepositoryDescription.URL != "" {
- parsed, _ := url.Parse(o.RepositoryDescription.URL)
- if parsed != nil {
- o.RepositoryDescription.Name = filepath.Join(parsed.Host, parsed.Path)
- }
- }
-}
-
-func hashString(s string) string {
- h := sha1.New()
- io.WriteString(h, s)
- return fmt.Sprintf("%x", h.Sum(nil))
-}
-
-// ShardName returns the name the given index shard.
-func (o *Options) shardName(n int) string {
- abs := url.QueryEscape(o.RepositoryDescription.Name)
- if len(abs) > 200 {
- abs = abs[:200] + hashString(abs)[:8]
- }
- return filepath.Join(o.IndexDir,
- fmt.Sprintf("%s_v%d.%05d.zoekt", abs, zoekt.IndexFormatVersion, n))
-}
-
-// IncrementalSkipIndexing returns true if the index present on disk matches
-// the build options.
-func (o *Options) IncrementalSkipIndexing() bool {
- fn := o.shardName(0)
-
- f, err := os.Open(fn)
- if err != nil {
- return false
- }
-
- iFile, err := zoekt.NewIndexFile(f)
- if err != nil {
- return false
- }
- defer iFile.Close()
-
- repo, index, err := zoekt.ReadMetadata(iFile)
- if err != nil {
- return false
- }
-
- if index.IndexFeatureVersion != zoekt.FeatureVersion {
- return false
- }
-
- if repo.IndexOptions != o.HashOptions() {
- return false
- }
-
- return reflect.DeepEqual(repo.Branches, o.RepositoryDescription.Branches)
-}
-
-// IgnoreSizeMax determines whether the max size should be ignored.
-func (o *Options) IgnoreSizeMax(name string) bool {
- for _, pattern := range o.LargeFiles {
- pattern = strings.TrimSpace(pattern)
- m, _ := doublestar.PathMatch(pattern, name)
- if m {
- return true
- }
- }
-
- return false
-}
-
-// NewBuilder creates a new Builder instance.
-func NewBuilder(opts Options) (*Builder, error) {
- opts.SetDefaults()
- if opts.RepositoryDescription.Name == "" {
- return nil, fmt.Errorf("builder: must set Name")
- }
-
- b := &Builder{
- opts: opts,
- throttle: make(chan int, opts.Parallelism),
- finishedShards: map[string]string{},
- }
-
- if b.opts.CTags == "" && b.opts.CTagsMustSucceed {
- return nil, fmt.Errorf("ctags binary not found, but CTagsMustSucceed set")
- }
-
- if strings.Contains(opts.CTags, "universal-ctags") {
- parser, err := ctags.NewParser(opts.CTags)
- if err != nil && opts.CTagsMustSucceed {
- return nil, fmt.Errorf("ctags.NewParser: %v", err)
- }
-
- b.parser = parser
- }
- if _, err := b.newShardBuilder(); err != nil {
- return nil, err
- }
-
- return b, nil
-}
-
-// AddFile is a convenience wrapper for the Add method
-func (b *Builder) AddFile(name string, content []byte) error {
- return b.Add(zoekt.Document{Name: name, Content: content})
-}
-
-func (b *Builder) Add(doc zoekt.Document) error {
- // We could pass the document on to the shardbuilder, but if
- // we pass through a part of the source tree with binary/large
- // files, the corresponding shard would be mostly empty, so
- // insert a reason here too.
- if len(doc.Content) > b.opts.SizeMax && !b.opts.IgnoreSizeMax(doc.Name) {
- doc.SkipReason = fmt.Sprintf("document size %d larger than limit %d", len(doc.Content), b.opts.SizeMax)
- } else if err := zoekt.CheckText(doc.Content, b.opts.TrigramMax); err != nil {
- doc.SkipReason = err.Error()
- doc.Language = "binary"
- }
-
- b.todo = append(b.todo, &doc)
- b.size += len(doc.Name) + len(doc.Content)
- if b.size > b.opts.ShardMax {
- return b.flush()
- }
-
- return nil
-}
-
-// Finish creates a last shard from the buffered documents, and clears
-// stale shards from previous runs. This should always be called, also
-// in failure cases, to ensure cleanup.
-func (b *Builder) Finish() error {
- b.flush()
- b.building.Wait()
-
- if b.buildError != nil {
- for tmp := range b.finishedShards {
- os.Remove(tmp)
- }
- b.finishedShards = map[string]string{}
- return b.buildError
- }
-
- for tmp, final := range b.finishedShards {
- if err := os.Rename(tmp, final); err != nil {
- b.buildError = err
- }
- }
- b.finishedShards = map[string]string{}
-
- if b.nextShardNum > 0 {
- b.deleteRemainingShards()
- }
- return b.buildError
-}
-
-func (b *Builder) deleteRemainingShards() {
- for {
- shard := b.nextShardNum
- b.nextShardNum++
- name := b.opts.shardName(shard)
- if err := os.Remove(name); os.IsNotExist(err) {
- break
- }
- }
-}
-
-func (b *Builder) flush() error {
- todo := b.todo
- b.todo = nil
- b.size = 0
- b.errMu.Lock()
- defer b.errMu.Unlock()
- if b.buildError != nil {
- return b.buildError
- }
-
- hasShard := b.nextShardNum > 0
- if len(todo) == 0 && hasShard {
- return nil
- }
-
- shard := b.nextShardNum
- b.nextShardNum++
-
- if b.opts.Parallelism > 1 {
- b.building.Add(1)
- go func() {
- b.throttle <- 1
- done, err := b.buildShard(todo, shard)
- <-b.throttle
-
- b.errMu.Lock()
- defer b.errMu.Unlock()
- if err != nil && b.buildError == nil {
- b.buildError = err
- }
- if err == nil {
- b.finishedShards[done.temp] = done.final
- }
- b.building.Done()
- }()
- } else {
- // No goroutines when we're not parallel. This
- // simplifies memory profiling.
- done, err := b.buildShard(todo, shard)
- b.buildError = err
- if err == nil {
- b.finishedShards[done.temp] = done.final
- }
- if b.opts.MemProfile != "" {
- // drop memory, and profile.
- todo = nil
- b.writeMemProfile(b.opts.MemProfile)
- }
-
- return b.buildError
- }
-
- return nil
-}
-
-var profileNumber int
-
-func (b *Builder) writeMemProfile(name string) {
- nm := fmt.Sprintf("%s.%d", name, profileNumber)
- profileNumber++
- f, err := os.Create(nm)
- if err != nil {
- log.Fatal("could not create memory profile: ", err)
- }
- runtime.GC() // get up-to-date statistics
- if err := pprof.WriteHeapProfile(f); err != nil {
- log.Fatal("could not write memory profile: ", err)
- }
- f.Close()
- log.Printf("wrote mem profile %q", nm)
-}
-
-// map [0,inf) to [0,1) monotonically
-func squashRange(j int) float64 {
- x := float64(j)
- return x / (1 + x)
-}
-
-var testRe = regexp.MustCompile("test")
-
-type rankedDoc struct {
- *zoekt.Document
- rank []float64
-}
-
-func rank(d *zoekt.Document, origIdx int) []float64 {
- test := 0.0
- if testRe.MatchString(d.Name) {
- test = 1.0
- }
-
- // Smaller is earlier (=better).
- return []float64{
- // Prefer docs that are not tests
- test,
-
- // With many symbols
- 1.0 - squashRange(len(d.Symbols)),
-
- // With short content
- squashRange(len(d.Content)),
-
- // With short names
- squashRange(len(d.Name)),
-
- // That is present is as many branches as possible
- 1.0 - squashRange(len(d.Branches)),
-
- // Preserve original ordering.
- squashRange(origIdx),
- }
-}
-
-func sortDocuments(todo []*zoekt.Document) {
- rs := make([]rankedDoc, 0, len(todo))
- for i, t := range todo {
- rd := rankedDoc{t, rank(t, i)}
- rs = append(rs, rd)
- }
- sort.Slice(rs, func(i, j int) bool {
- r1 := rs[i].rank
- r2 := rs[j].rank
- for i := range r1 {
- if r1[i] < r2[i] {
- return true
- }
- if r1[i] > r2[i] {
- return false
- }
- }
-
- return false
- })
- for i := range todo {
- todo[i] = rs[i].Document
- }
-}
-
-func (b *Builder) buildShard(todo []*zoekt.Document, nextShardNum int) (*finishedShard, error) {
- if b.opts.CTags != "" {
- err := ctagsAddSymbols(todo, b.parser, b.opts.CTags)
- if b.opts.CTagsMustSucceed && err != nil {
- return nil, err
- }
- if err != nil {
- log.Printf("ignoring %s error: %v", b.opts.CTags, err)
- }
- }
-
- name := b.opts.shardName(nextShardNum)
-
- shardBuilder, err := b.newShardBuilder()
- if err != nil {
- return nil, err
- }
- sortDocuments(todo)
- for _, t := range todo {
- if err := shardBuilder.Add(*t); err != nil {
- return nil, err
- }
- }
-
- return b.writeShard(name, shardBuilder)
-}
-
-func (b *Builder) newShardBuilder() (*zoekt.IndexBuilder, error) {
- desc := b.opts.RepositoryDescription
- desc.SubRepoMap = b.opts.SubRepositories
- desc.IndexOptions = b.opts.HashOptions()
-
- shardBuilder, err := zoekt.NewIndexBuilder(&desc)
- if err != nil {
- return nil, err
- }
- return shardBuilder, nil
-}
-
-func (b *Builder) writeShard(fn string, ib *zoekt.IndexBuilder) (*finishedShard, error) {
- dir := filepath.Dir(fn)
- if err := os.MkdirAll(dir, 0o700); err != nil {
- return nil, err
- }
-
- f, err := ioutil.TempFile(dir, filepath.Base(fn)+".*.tmp")
- if err != nil {
- return nil, err
- }
- if runtime.GOOS != "windows" {
- if err := f.Chmod(0o666 &^ umask); err != nil {
- return nil, err
- }
- }
-
- defer f.Close()
- if err := ib.Write(f); err != nil {
- return nil, err
- }
- fi, err := f.Stat()
- if err != nil {
- return nil, err
- }
- if err := f.Close(); err != nil {
- return nil, err
- }
-
- log.Printf("finished %s: %d index bytes (overhead %3.1f)", fn, fi.Size(),
- float64(fi.Size())/float64(ib.ContentSize()+1))
-
- return &finishedShard{f.Name(), fn}, nil
-}
-
-// umask holds the Umask of the current process
-var umask os.FileMode
diff --git a/build/builder_test.go b/build/builder_test.go
deleted file mode 100644
index 133b39a..0000000
--- a/build/builder_test.go
+++ /dev/null
@@ -1,51 +0,0 @@
-package build
-
-import (
- "flag"
- "testing"
-
- "github.com/google/go-cmp/cmp"
-)
-
-func TestFlags(t *testing.T) {
- cases := []struct {
- args []string
- want Options
- }{{
- // Defaults
- args: []string{},
- want: Options{},
- }, {
- args: []string{"-index", "/tmp"},
- want: Options{
- IndexDir: "/tmp",
- },
- }, {
- // single large file pattern
- args: []string{"-large_file", "*.md"},
- want: Options{
- LargeFiles: []string{"*.md"},
- },
- }, {
- // multiple large file pattern
- args: []string{"-large_file", "*.md", "-large_file", "*.yaml"},
- want: Options{
- LargeFiles: []string{"*.md", "*.yaml"},
- },
- }}
-
- for _, c := range cases {
- c.want.SetDefaults()
- // depends on $PATH setting.
- c.want.CTags = ""
-
- got := Options{}
- fs := flag.NewFlagSet("", flag.ContinueOnError)
- got.Flags(fs)
- if err := fs.Parse(c.args); err != nil {
- t.Errorf("failed to parse args %v: %v", c.args, err)
- } else if !cmp.Equal(got, c.want) {
- t.Errorf("mismatch for %v (-want +got):\n%s", c.args, cmp.Diff(c.want, got))
- }
- }
-}
diff --git a/build/builder_unix.go b/build/builder_unix.go
deleted file mode 100644
index fde31d5..0000000
--- a/build/builder_unix.go
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// +build !windows
-
-package build
-
-import (
- "os"
- "syscall"
-)
-
-func init() {
- umask = os.FileMode(syscall.Umask(0))
- syscall.Umask(int(umask))
-}
diff --git a/build/ctags.go b/build/ctags.go
deleted file mode 100644
index 9c2ac5b..0000000
--- a/build/ctags.go
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package build
-
-import (
- "bytes"
- "fmt"
- "io/ioutil"
- "os"
- "os/exec"
- "path/filepath"
- "strings"
- "time"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/ctags"
-)
-
-func runCTags(bin string, inputs map[string][]byte) ([]*ctags.Entry, error) {
- const debug = false
- if len(inputs) == 0 {
- return nil, nil
- }
- dir, err := ioutil.TempDir("", "ctags-input")
- if err != nil {
- return nil, err
- }
- if !debug {
- defer os.RemoveAll(dir)
- }
-
- // --sort shells out to sort(1).
- args := []string{bin, "-n", "-f", "-", "--sort=no"}
-
- fileCount := 0
- for n, c := range inputs {
- if len(c) == 0 {
- continue
- }
-
- full := filepath.Join(dir, n)
- if err := os.MkdirAll(filepath.Dir(full), 0o700); err != nil {
- return nil, err
- }
- err := ioutil.WriteFile(full, c, 0o600)
- if err != nil {
- return nil, err
- }
- args = append(args, n)
- fileCount++
- }
- if fileCount == 0 {
- return nil, nil
- }
-
- cmd := exec.Command(args[0], args[1:]...)
- cmd.Dir = dir
-
- var errBuf, outBuf bytes.Buffer
- cmd.Stderr = &errBuf
- cmd.Stdout = &outBuf
-
- if err := cmd.Start(); err != nil {
- return nil, err
- }
-
- errChan := make(chan error, 1)
- go func() {
- err := cmd.Wait()
- errChan <- err
- }()
- timeout := time.After(5 * time.Second)
- select {
- case <-timeout:
- cmd.Process.Kill()
- return nil, fmt.Errorf("timeout executing ctags")
- case err := <-errChan:
- if err != nil {
- return nil, fmt.Errorf("exec(%s): %v, stderr: %s", cmd.Args, err, errBuf.String())
- }
- }
-
- var entries []*ctags.Entry
- for _, l := range bytes.Split(outBuf.Bytes(), []byte{'\n'}) {
- if len(l) == 0 {
- continue
- }
- e, err := ctags.Parse(string(l))
- if err != nil {
- return nil, err
- }
-
- if len(e.Sym) == 1 {
- continue
- }
- entries = append(entries, e)
- }
- return entries, nil
-}
-
-func runCTagsChunked(bin string, in map[string][]byte) ([]*ctags.Entry, error) {
- var res []*ctags.Entry
-
- cur := map[string][]byte{}
- sz := 0
- for k, v := range in {
- cur[k] = v
- sz += len(k)
-
- // 100k seems reasonable.
- if sz > (100 << 10) {
- r, err := runCTags(bin, cur)
- if err != nil {
- return nil, err
- }
- res = append(res, r...)
-
- cur = map[string][]byte{}
- sz = 0
- }
- }
- r, err := runCTags(bin, cur)
- if err != nil {
- return nil, err
- }
- res = append(res, r...)
- return res, nil
-}
-
-func ctagsAddSymbolsParser(todo []*zoekt.Document, parser ctags.Parser) error {
- for _, doc := range todo {
- if doc.Symbols != nil {
- continue
- }
-
- es, err := parser.Parse(doc.Name, doc.Content)
- if err != nil {
- return err
- }
- if len(es) == 0 {
- continue
- }
- doc.Language = strings.ToLower(es[0].Language)
-
- symOffsets, err := tagsToSections(doc.Content, es)
- if err != nil {
- return fmt.Errorf("%s: %v", doc.Name, err)
- }
- doc.Symbols = symOffsets
- }
-
- return nil
-}
-
-func ctagsAddSymbols(todo []*zoekt.Document, parser ctags.Parser, bin string) error {
- if parser != nil {
- return ctagsAddSymbolsParser(todo, parser)
- }
-
- pathIndices := map[string]int{}
- contents := map[string][]byte{}
- for i, t := range todo {
- if t.Symbols != nil {
- continue
- }
-
- _, ok := pathIndices[t.Name]
- if ok {
- continue
- }
-
- pathIndices[t.Name] = i
- contents[t.Name] = t.Content
- }
-
- var err error
- var entries []*ctags.Entry
- entries, err = runCTagsChunked(bin, contents)
- if err != nil {
- return err
- }
-
- fileTags := map[string][]*ctags.Entry{}
- for _, e := range entries {
- fileTags[e.Path] = append(fileTags[e.Path], e)
- }
-
- for k, tags := range fileTags {
- symOffsets, err := tagsToSections(contents[k], tags)
- if err != nil {
- return fmt.Errorf("%s: %v", k, err)
- }
- todo[pathIndices[k]].Symbols = symOffsets
- if len(tags) > 0 {
- todo[pathIndices[k]].Language = strings.ToLower(tags[0].Language)
- }
- }
- return nil
-}
-
-func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, error) {
- nls := newLinesIndices(content)
- nls = append(nls, uint32(len(content)))
- var symOffsets []zoekt.DocumentSection
- var lastEnd uint32
- var lastLine int
- var lastIntraEnd int
- for _, t := range tags {
- if t.Line <= 0 {
- // Observed this with a .JS file.
- continue
- }
- lineIdx := t.Line - 1
- if lineIdx >= len(nls) {
- return nil, fmt.Errorf("linenum for entry out of range %v", t)
- }
-
- lineOff := uint32(0)
- if lineIdx > 0 {
- lineOff = nls[lineIdx-1] + 1
- }
-
- end := nls[lineIdx]
- line := content[lineOff:end]
- if lastLine == lineIdx {
- line = line[lastIntraEnd:]
- } else {
- lastIntraEnd = 0
- }
-
- intraOff := lastIntraEnd + bytes.Index(line, []byte(t.Sym))
- if intraOff < 0 {
- // for Go code, this is very common, since
- // ctags barfs on multi-line declarations
- continue
- }
- start := lineOff + uint32(intraOff)
- if start < lastEnd {
- // This can happen if we have multiple tags on the same line.
- // Give up.
- continue
- }
-
- endSym := start + uint32(len(t.Sym))
-
- symOffsets = append(symOffsets, zoekt.DocumentSection{
- Start: start,
- End: endSym,
- })
- lastEnd = endSym
- lastLine = lineIdx
- lastIntraEnd = intraOff + len(t.Sym)
- }
-
- return symOffsets, nil
-}
-
-func newLinesIndices(in []byte) []uint32 {
- out := make([]uint32, 0, len(in)/30)
- for i, c := range in {
- if c == '\n' {
- out = append(out, uint32(i))
- }
- }
- return out
-}
diff --git a/build/ctags_test.go b/build/ctags_test.go
deleted file mode 100644
index c853b2e..0000000
--- a/build/ctags_test.go
+++ /dev/null
@@ -1,94 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package build
-
-import (
- "reflect"
- "testing"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/ctags"
-)
-
-func TestTagsToSections(t *testing.T) {
- c := []byte("package foo\nfunc bar(j int) {}\n//bla")
- // ----------01234567890 1234567890123456789 012345
-
- tags := []*ctags.Entry{
- {
- Sym: "bar",
- Line: 2,
- },
- }
-
- secs, err := tagsToSections(c, tags)
- if err != nil {
- t.Fatal("tagsToSections", err)
- }
-
- if len(secs) != 1 || secs[0].Start != 17 || secs[0].End != 20 {
- t.Fatalf("got %#v, want 1 section (17,20)", secs)
- }
-}
-
-func TestTagsToSectionsMultiple(t *testing.T) {
- c := []byte("class Foob { int x; int b; }")
- // ----------012345678901234567890123456789
-
- tags := []*ctags.Entry{
- {
- Sym: "x",
- Line: 1,
- },
- {
- Sym: "b",
- Line: 1,
- },
- }
-
- got, err := tagsToSections(c, tags)
- if err != nil {
- t.Fatal("tagsToSections", err)
- }
-
- want := []zoekt.DocumentSection{
- {Start: 17, End: 18},
- {Start: 24, End: 25},
- }
- if !reflect.DeepEqual(got, want) {
- t.Errorf("got %v, want %v", got, want)
- }
-}
-
-func TestTagsToSectionsEOF(t *testing.T) {
- c := []byte("package foo\nfunc bar(j int) {}")
- // ----------01234567890 1234567890123456789 012345
-
- tags := []*ctags.Entry{
- {
- Sym: "bar",
- Line: 2,
- },
- }
-
- secs, err := tagsToSections(c, tags)
- if err != nil {
- t.Fatal("tagsToSections", err)
- }
-
- if len(secs) != 1 || secs[0].Start != 17 || secs[0].End != 20 {
- t.Fatalf("got %#v, want 1 section (17,20)", secs)
- }
-}
diff --git a/build/e2e_test.go b/build/e2e_test.go
deleted file mode 100644
index d72cda4..0000000
--- a/build/e2e_test.go
+++ /dev/null
@@ -1,491 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package build
-
-import (
- "fmt"
- "io/ioutil"
- "log"
- "os"
- "path/filepath"
- "reflect"
- "strings"
- "testing"
- "time"
-
- "golang.org/x/net/context"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/query"
- "github.com/google/zoekt/shards"
-)
-
-func TestBasic(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
-
- opts := Options{
- IndexDir: dir,
- ShardMax: 1024,
- RepositoryDescription: zoekt.Repository{
- Name: "repo",
- },
- Parallelism: 2,
- SizeMax: 1 << 20,
- }
-
- b, err := NewBuilder(opts)
- if err != nil {
- t.Fatalf("NewBuilder: %v", err)
- }
-
- for i := 0; i < 4; i++ {
- s := fmt.Sprintf("%d", i)
- b.AddFile("F"+s, []byte(strings.Repeat(s, 1000)))
- }
-
- if err := b.Finish(); err != nil {
- t.Errorf("Finish: %v", err)
- }
-
- fs, _ := filepath.Glob(dir + "/*")
- if len(fs) <= 1 {
- t.Fatalf("want multiple shards, got %v", fs)
- }
-
- ss, err := shards.NewDirectorySearcher(dir)
- if err != nil {
- t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
- }
-
- q, err := query.Parse("111")
- if err != nil {
- t.Fatalf("Parse(111): %v", err)
- }
-
- var sOpts zoekt.SearchOptions
- ctx := context.Background()
- result, err := ss.Search(ctx, q, &sOpts)
- if err != nil {
- t.Fatalf("Search(%v): %v", q, err)
- }
-
- if len(result.Files) != 1 || result.Files[0].FileName != "F1" {
- t.Errorf("got %v, want 1 file.", result.Files)
- }
- defer ss.Close()
-}
-
-func TestLargeFileOption(t *testing.T) {
- dir, err := ioutil.TempDir("", "large_files_test")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
-
- sizeMax := 1000
- opts := Options{
- IndexDir: dir,
- LargeFiles: []string{"F0", "F2"},
- RepositoryDescription: zoekt.Repository{
- Name: "repo",
- },
- SizeMax: sizeMax,
- }
-
- b, err := NewBuilder(opts)
- if err != nil {
- t.Fatalf("NewBuilder: %v", err)
- }
-
- for i := 0; i < 4; i++ {
- s := fmt.Sprintf("%d", i)
- b.AddFile("F"+s, []byte(strings.Repeat("a", sizeMax+1)))
- }
-
- if err := b.Finish(); err != nil {
- t.Errorf("Finish: %v", err)
- }
-
- ss, err := shards.NewDirectorySearcher(dir)
- if err != nil {
- t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
- }
-
- q, err := query.Parse("aaa")
- if err != nil {
- t.Fatalf("Parse(aaa): %v", err)
- }
-
- var sOpts zoekt.SearchOptions
- ctx := context.Background()
- result, err := ss.Search(ctx, q, &sOpts)
- if err != nil {
- t.Fatalf("Search(%v): %v", q, err)
- }
-
- if len(result.Files) != 2 {
- t.Errorf("got %v files, want 2 files.", len(result.Files))
- }
- defer ss.Close()
-}
-
-func TestUpdate(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
-
- opts := Options{
- IndexDir: dir,
- ShardMax: 1024,
- RepositoryDescription: zoekt.Repository{
- Name: "repo",
- FileURLTemplate: "url",
- },
- Parallelism: 2,
- SizeMax: 1 << 20,
- }
-
- if b, err := NewBuilder(opts); err != nil {
- t.Fatalf("NewBuilder: %v", err)
- } else {
- b.AddFile("F", []byte("hoi"))
- if err := b.Finish(); err != nil {
- t.Errorf("Finish: %v", err)
- }
- }
- ss, err := shards.NewDirectorySearcher(dir)
- if err != nil {
- t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
- }
-
- ctx := context.Background()
- repos, err := ss.List(ctx, &query.Repo{Pattern: "repo"})
- if err != nil {
- t.Fatalf("List: %v", err)
- }
-
- if len(repos.Repos) != 1 {
- t.Errorf("List(repo): got %v, want 1 repo", repos.Repos)
- }
-
- fs, err := filepath.Glob(filepath.Join(dir, "*"))
- if err != nil {
- t.Fatalf("glob: %v", err)
- }
-
- opts.RepositoryDescription = zoekt.Repository{
- Name: "repo2",
- FileURLTemplate: "url2",
- }
-
- if b, err := NewBuilder(opts); err != nil {
- t.Fatalf("NewBuilder: %v", err)
- } else {
- b.AddFile("F", []byte("hoi"))
- if err := b.Finish(); err != nil {
- t.Errorf("Finish: %v", err)
- }
- }
-
- // This is ugly, and potentially flaky, but there is no
- // observable synchronization for the Sharded searcher, so
- // this is the best we can do.
- time.Sleep(100 * time.Millisecond)
-
- ctx = context.Background()
- if repos, err = ss.List(ctx, &query.Repo{Pattern: "repo"}); err != nil {
- t.Fatalf("List: %v", err)
- } else if len(repos.Repos) != 2 {
- t.Errorf("List(repo): got %v, want 2 repos", repos.Repos)
- }
-
- for _, fn := range fs {
- log.Printf("removing %s", fn)
- if err := os.Remove(fn); err != nil {
- t.Fatalf("Remove(%s): %v", fn, err)
- }
- }
-
- time.Sleep(100 * time.Millisecond)
-
- ctx = context.Background()
- if repos, err = ss.List(ctx, &query.Repo{Pattern: "repo"}); err != nil {
- t.Fatalf("List: %v", err)
- } else if len(repos.Repos) != 1 {
- var ss []string
- for _, r := range repos.Repos {
- ss = append(ss, r.Repository.Name)
- }
- t.Errorf("List(repo): got %v, want 1 repo", ss)
- }
-}
-
-func TestDeleteOldShards(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
-
- opts := Options{
- IndexDir: dir,
- ShardMax: 1024,
- RepositoryDescription: zoekt.Repository{
- Name: "repo",
- FileURLTemplate: "url",
- },
- SizeMax: 1 << 20,
- }
- opts.SetDefaults()
-
- b, err := NewBuilder(opts)
- if err != nil {
- t.Fatalf("NewBuilder: %v", err)
- }
- for i := 0; i < 4; i++ {
- s := fmt.Sprintf("%d\n", i)
- b.AddFile("F"+s, []byte(strings.Repeat(s, 1024/2)))
- }
- if err := b.Finish(); err != nil {
- t.Errorf("Finish: %v", err)
- }
-
- glob := filepath.Join(dir, "*")
- fs, err := filepath.Glob(glob)
- if err != nil {
- t.Fatalf("Glob(%s): %v", glob, err)
- } else if len(fs) != 4 {
- t.Fatalf("Glob(%s): got %v, want 4 shards", glob, fs)
- }
-
- if fi, err := os.Lstat(fs[0]); err != nil {
- t.Fatalf("Lstat: %v", err)
- } else if fi.Mode()&0o666 == 0o600 {
- // This fails spuriously if your umask is very restrictive.
- t.Errorf("got mode %o, should respect umask.", fi.Mode())
- }
-
- // Do again, without sharding.
- opts.ShardMax = 1 << 20
- b, err = NewBuilder(opts)
- if err != nil {
- t.Fatalf("NewBuilder: %v", err)
- }
- for i := 0; i < 4; i++ {
- s := fmt.Sprintf("%d\n", i)
- b.AddFile("F"+s, []byte(strings.Repeat(s, 1024/2)))
- }
- if err := b.Finish(); err != nil {
- t.Errorf("Finish: %v", err)
- }
-
- fs, err = filepath.Glob(glob)
- if err != nil {
- t.Fatalf("Glob(%s): %v", glob, err)
- } else if len(fs) != 1 {
- t.Fatalf("Glob(%s): got %v, want 1 shard", glob, fs)
- }
-
- // Again, but don't index anything; should leave old shards intact.
- b, err = NewBuilder(opts)
- if err != nil {
- t.Fatalf("NewBuilder: %v", err)
- }
- if err := b.Finish(); err != nil {
- t.Errorf("Finish: %v", err)
- }
-
- fs, err = filepath.Glob(glob)
- if err != nil {
- t.Fatalf("Glob(%s): %v", glob, err)
- } else if len(fs) != 1 {
- t.Fatalf("Glob(%s): got %v, want 1 shard", glob, fs)
- }
-}
-
-func TestPartialSuccess(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
-
- opts := Options{
- IndexDir: dir,
- ShardMax: 1024,
- SizeMax: 1 << 20,
- Parallelism: 1,
- }
- opts.RepositoryDescription.Name = "repo"
- opts.SetDefaults()
-
- b, err := NewBuilder(opts)
- if err != nil {
- t.Fatalf("NewBuilder: %v", err)
- }
-
- for i := 0; i < 4; i++ {
- nm := fmt.Sprintf("F%d", i)
-
- // no error checking: the 2nd call will fail
- b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128)))
- if i == 1 {
- // force writes to fail.
- if err := os.Chmod(dir, 0o555); err != nil {
- t.Fatalf("chmod(%s): %s", dir, err)
- }
- }
- }
-
- if err := os.Chmod(dir, 0o755); err != nil {
- t.Fatalf("chmod(%s, writable): %s", dir, err)
- }
-
- // No error checking.
- b.Finish()
-
- // Finish cleans up temporary files.
- if fs, err := filepath.Glob(dir + "/*"); err != nil {
- t.Errorf("glob(%s): %v", dir, err)
- } else if len(fs) != 0 {
- t.Errorf("got shards %v, want []", fs)
- }
-}
-
-type filerankCase struct {
- name string
- docs []*zoekt.Document
- want []int
-}
-
-func testFileRankAspect(t *testing.T, c filerankCase) {
- var want []*zoekt.Document
- for _, j := range c.want {
- want = append(want, c.docs[j])
- }
-
- got := make([]*zoekt.Document, len(c.docs))
- copy(got, c.docs)
- sortDocuments(got)
-
- print := func(ds []*zoekt.Document) string {
- r := ""
- for _, d := range ds {
- r += fmt.Sprintf("%v, ", d)
- }
- return r
- }
- if !reflect.DeepEqual(got, want) {
- t.Errorf("got docs [%v], want [%v]", print(got), print(want))
- }
-}
-
-func TestFileRank(t *testing.T) {
- for _, c := range []filerankCase{{
- name: "filename",
- docs: []*zoekt.Document{
- {
- Name: "longlonglong",
- Content: []byte("bla"),
- },
- {
- Name: "short",
- Content: []byte("bla"),
- },
- },
- want: []int{1, 0},
- }, {
- name: "test",
- docs: []*zoekt.Document{
- {
- Name: "test",
- Content: []byte("bla"),
- },
- {
- Name: "longlonglong",
- Content: []byte("bla"),
- },
- },
- want: []int{1, 0},
- }, {
- name: "content",
- docs: []*zoekt.Document{
- {
- Content: []byte("bla"),
- },
- {
- Content: []byte("blablablabla"),
- },
- {
- Content: []byte("blabla"),
- },
- },
- want: []int{0, 2, 1},
- }} {
- t.Run(c.name, func(t *testing.T) {
- testFileRankAspect(t, c)
- })
- }
-}
-
-func TestEmptyContent(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
-
- opts := Options{
- IndexDir: dir,
- RepositoryDescription: zoekt.Repository{
- Name: "repo",
- },
- }
- opts.SetDefaults()
-
- b, err := NewBuilder(opts)
- if err != nil {
- t.Fatalf("NewBuilder: %v", err)
- }
- if err := b.Finish(); err != nil {
- t.Errorf("Finish: %v", err)
- }
-
- fs, _ := filepath.Glob(dir + "/*")
- if len(fs) != 1 {
- t.Fatalf("want a shard, got %v", fs)
- }
-
- ss, err := shards.NewDirectorySearcher(dir)
- if err != nil {
- t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
- }
- defer ss.Close()
-
- ctx := context.Background()
- result, err := ss.List(ctx, &query.Const{Value: true})
- if err != nil {
- t.Fatalf("List: %v", err)
- }
-
- if len(result.Repos) != 1 || result.Repos[0].Repository.Name != "repo" {
- t.Errorf("got %+v, want 1 repo.", result.Repos)
- }
-}
diff --git a/cmd/flags.go b/cmd/flags.go
deleted file mode 100644
index e0b9005..0000000
--- a/cmd/flags.go
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2019 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package cmd
-
-import (
- "flag"
- "fmt"
- "os"
- "path/filepath"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/build"
-)
-
-var (
- version = flag.Bool("version", false, "Print version number")
- opts = &build.Options{}
-)
-
-func init() {
- opts.Flags(flag.CommandLine)
-}
-
-func OptionsFromFlags() *build.Options {
- if *version {
- name := filepath.Base(os.Args[0])
- fmt.Printf("%s version %q\n", name, zoekt.Version)
- os.Exit(0)
- }
-
- opts.SetDefaults()
- return opts
-}
diff --git a/cmd/zoekt-archive-index/archive.go b/cmd/zoekt-archive-index/archive.go
deleted file mode 100644
index c1afe5b..0000000
--- a/cmd/zoekt-archive-index/archive.go
+++ /dev/null
@@ -1,188 +0,0 @@
-package main
-
-import (
- "archive/tar"
- "archive/zip"
- "bytes"
- "compress/gzip"
- "fmt"
- "io"
- "io/ioutil"
- "net/http"
- "net/url"
- "os"
- "strings"
-)
-
-type Archive interface {
- Next() (*File, error)
- Close() error
-}
-
-type File struct {
- io.ReadCloser
- Name string
- Size int64
-}
-
-type tarArchive struct {
- io.Closer
- tr *tar.Reader
-}
-
-func (a *tarArchive) Next() (*File, error) {
- for {
- hdr, err := a.tr.Next()
- if err != nil {
- return nil, err
- }
-
- // We only care about files
- if hdr.Typeflag != tar.TypeReg && hdr.Typeflag != tar.TypeRegA {
- continue
- }
-
- return &File{
- ReadCloser: ioutil.NopCloser(a.tr),
- Name: hdr.Name,
- Size: hdr.Size,
- }, nil
- }
-}
-
-type zipArchive struct {
- io.Closer
- files []*zip.File
-}
-
-func (a *zipArchive) Next() (*File, error) {
- if len(a.files) == 0 {
- return nil, io.EOF
- }
-
- f := a.files[0]
- a.files = a.files[1:]
-
- r, err := f.Open()
- if err != nil {
- return nil, err
- }
-
- return &File{
- ReadCloser: r,
- Name: f.Name,
- Size: int64(f.UncompressedSize64),
- }, nil
-}
-
-func newZipArchive(r io.Reader, closer io.Closer) (*zipArchive, error) {
- f, ok := r.(interface {
- io.ReaderAt
- Stat() (os.FileInfo, error)
- })
- if !ok {
- return nil, fmt.Errorf("streaming zip files not supported")
- }
-
- fi, err := f.Stat()
- if err != nil {
- return nil, err
- }
-
- zr, err := zip.NewReader(f, fi.Size())
- if err != nil {
- return nil, err
- }
-
- // Filter out non files
- files := zr.File[:0]
- for _, f := range zr.File {
- if f.Mode().IsRegular() {
- files = append(files, f)
- }
- }
-
- return &zipArchive{
- Closer: closer,
- files: files,
- }, nil
-}
-
-func detectContentType(r io.Reader) (string, io.Reader, error) {
- var buf [512]byte
- n, err := io.ReadFull(r, buf[:])
- if err != nil && err != io.ErrUnexpectedEOF {
- return "", nil, err
- }
-
- ct := http.DetectContentType(buf[:n])
-
- // If we are a seeker, we can just undo our read
- if s, ok := r.(io.Seeker); ok {
- _, err := s.Seek(int64(-n), io.SeekCurrent)
- return ct, r, err
- }
-
- // Otherwise return a new reader which merges in the read bytes
- return ct, io.MultiReader(bytes.NewReader(buf[:n]), r), nil
-}
-
-func openReader(u string) (io.ReadCloser, error) {
- if strings.HasPrefix(u, "https://") || strings.HasPrefix(u, "http://") {
- resp, err := http.Get(u)
- if err != nil {
- return nil, err
- }
- if resp.StatusCode < 200 || resp.StatusCode >= 300 {
- b, err := ioutil.ReadAll(io.LimitReader(resp.Body, 1024))
- _ = resp.Body.Close()
- if err != nil {
- return nil, err
- }
- return nil, &url.Error{
- Op: "Get",
- URL: u,
- Err: fmt.Errorf("%s: %s", resp.Status, string(b)),
- }
- }
- return resp.Body, nil
- } else if u == "-" {
- return ioutil.NopCloser(os.Stdin), nil
- }
-
- return os.Open(u)
-}
-
-// openArchive opens the tar at the URL or filepath u. Also supported is tgz
-// files over http.
-func openArchive(u string) (ar Archive, err error) {
- readCloser, err := openReader(u)
- if err != nil {
- return nil, err
- }
- defer func() {
- if err != nil {
- _ = readCloser.Close()
- }
- }()
-
- ct, r, err := detectContentType(readCloser)
- if err != nil {
- return nil, err
- }
- switch ct {
- case "application/x-gzip":
- r, err = gzip.NewReader(r)
- if err != nil {
- return nil, err
- }
-
- case "application/zip":
- return newZipArchive(r, readCloser)
- }
-
- return &tarArchive{
- Closer: readCloser,
- tr: tar.NewReader(r),
- }, nil
-}
diff --git a/cmd/zoekt-archive-index/e2e_test.go b/cmd/zoekt-archive-index/e2e_test.go
deleted file mode 100644
index daaf556..0000000
--- a/cmd/zoekt-archive-index/e2e_test.go
+++ /dev/null
@@ -1,182 +0,0 @@
-package main
-
-import (
- "archive/tar"
- "archive/zip"
- "compress/gzip"
- "context"
- "errors"
- "flag"
- "fmt"
- "io"
- "io/ioutil"
- "log"
- "os"
- "strings"
- "testing"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/build"
- "github.com/google/zoekt/query"
- "github.com/google/zoekt/shards"
-)
-
-func TestMain(m *testing.M) {
- flag.Parse()
- if !testing.Verbose() {
- log.SetOutput(ioutil.Discard)
- }
- os.Exit(m.Run())
-}
-
-func writeArchive(w io.Writer, format string, files map[string]string) (err error) {
- if format == "zip" {
- zw := zip.NewWriter(w)
- for name, body := range files {
- f, err := zw.Create(name)
- if err != nil {
- return err
- }
- if _, err := f.Write([]byte(body)); err != nil {
- return err
- }
- }
- return zw.Close()
- }
-
- if format == "tgz" {
- gw := gzip.NewWriter(w)
- defer func() {
- err2 := gw.Close()
- if err == nil {
- err = err2
- }
- }()
- w = gw
- format = "tar"
- }
-
- if format != "tar" {
- return errors.New("expected tar")
- }
-
- tw := tar.NewWriter(w)
-
- for name, body := range files {
- hdr := &tar.Header{
- Name: name,
- Mode: 0o600,
- Size: int64(len(body)),
- }
- if err := tw.WriteHeader(hdr); err != nil {
- return err
- }
- if _, err := tw.Write([]byte(body)); err != nil {
- return err
- }
- }
- if err := tw.Close(); err != nil {
- return err
- }
-
- return nil
-}
-
-// TestIndexArg tests zoekt-archive-index by creating an archive and then
-// indexing and executing searches and checking we get expected results.
-// Additionally, we test that the index is properly updated with the
-// -incremental=true option changing the options between indexes and ensuring
-// the results change as expected.
-func TestIndexIncrementally(t *testing.T) {
- for _, format := range []string{"tar", "tgz", "zip"} {
- t.Run(format, func(t *testing.T) {
- testIndexIncrementally(t, format)
- })
- }
-}
-
-func testIndexIncrementally(t *testing.T, format string) {
- indexdir, err := ioutil.TempDir("", "TestIndexArg-index")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(indexdir)
- archive, err := ioutil.TempFile("", "TestIndexArg-archive")
- if err != nil {
- t.Fatalf("TempFile: %v", err)
- }
- defer os.Remove(archive.Name())
-
- fileSize := 1000
-
- files := map[string]string{}
- for i := 0; i < 4; i++ {
- s := fmt.Sprintf("%d", i)
- files["F"+s] = strings.Repeat("a", fileSize)
- }
-
- err = writeArchive(archive, format, files)
- if err != nil {
- t.Fatalf("unable to create archive %v", err)
- }
- archive.Close()
-
- // tests contain options used to build an index and the expected number of
- // files in the result set based on the options.
- tests := []struct {
- largeFiles []string
- wantNumFiles int
- }{
- {
- largeFiles: []string{},
- wantNumFiles: 0,
- },
- {
- largeFiles: []string{"F0", "F2"},
- wantNumFiles: 2,
- },
- }
-
- for _, test := range tests {
- largeFiles, wantNumFiles := test.largeFiles, test.wantNumFiles
-
- bopts := build.Options{
- SizeMax: fileSize - 1,
- IndexDir: indexdir,
- LargeFiles: largeFiles,
- }
- opts := Options{
- Incremental: true,
- Archive: archive.Name(),
- Name: "repo",
- Branch: "master",
- Commit: "cccccccccccccccccccccccccccccccccccccccc",
- Strip: 0,
- }
-
- if err := do(opts, bopts); err != nil {
- t.Fatalf("error creating index: %v", err)
- }
-
- ss, err := shards.NewDirectorySearcher(indexdir)
- if err != nil {
- t.Fatalf("NewDirectorySearcher(%s): %v", indexdir, err)
- }
- defer ss.Close()
-
- q, err := query.Parse("aaa")
- if err != nil {
- t.Fatalf("Parse(aaa): %v", err)
- }
-
- var sOpts zoekt.SearchOptions
- result, err := ss.Search(context.Background(), q, &sOpts)
- if err != nil {
- t.Fatalf("Search(%v): %v", q, err)
- }
-
- if len(result.Files) != wantNumFiles {
- t.Errorf("got %v, want %d files.", result.Files, wantNumFiles)
- }
- }
-}
diff --git a/cmd/zoekt-archive-index/main.go b/cmd/zoekt-archive-index/main.go
deleted file mode 100644
index 8754b4e..0000000
--- a/cmd/zoekt-archive-index/main.go
+++ /dev/null
@@ -1,232 +0,0 @@
-// Command zoekt-archive-index indexes an archive.
-//
-// Example via github.com:
-//
-// zoekt-archive-index -incremental -commit b57cb1605fd11ba2ecfa7f68992b4b9cc791934d -name github.com/gorilla/mux -strip_components 1 https://codeload.github.com/gorilla/mux/legacy.tar.gz/b57cb1605fd11ba2ecfa7f68992b4b9cc791934d
-//
-// zoekt-archive-index -branch master https://github.com/gorilla/mux/commit/b57cb1605fd11ba2ecfa7f68992b4b9cc791934d
-package main
-
-import (
- "errors"
- "flag"
- "fmt"
- "io"
- "io/ioutil"
- "log"
- "net/url"
- "strings"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/build"
- "github.com/google/zoekt/cmd"
- "github.com/google/zoekt/gitindex"
- "go.uber.org/automaxprocs/maxprocs"
-)
-
-// stripComponents removes the specified number of leading path
-// elements. Pathnames with fewer elements will return the empty string.
-func stripComponents(path string, count int) string {
- for i := 0; path != "" && i < count; i++ {
- i := strings.Index(path, "/")
- if i < 0 {
- return ""
- }
- path = path[i+1:]
- }
- return path
-}
-
-// isGitOID checks if the revision is a git OID SHA string.
-//
-// Note: This doesn't mean the SHA exists in a repository, nor does it mean it
-// isn't a ref. Git allows 40-char hexadecimal strings to be references.
-func isGitOID(s string) bool {
- if len(s) != 40 {
- return false
- }
- for _, r := range s {
- if !(('0' <= r && r <= '9') ||
- ('a' <= r && r <= 'f') ||
- ('A' <= r && r <= 'F')) {
- return false
- }
- }
- return true
-}
-
-type Options struct {
- Incremental bool
-
- Archive string
- Name string
- RepoURL string
- Branch string
- Commit string
- Strip int
-}
-
-func (o *Options) SetDefaults() {
- // We guess based on the archive URL.
- u, _ := url.Parse(o.Archive)
- if u == nil {
- return
- }
-
- setRef := func(ref string) {
- if isGitOID(ref) && o.Commit == "" {
- o.Commit = ref
- }
- if !isGitOID(ref) && o.Branch == "" {
- o.Branch = ref
- }
- }
-
- switch u.Host {
- case "github.com", "codeload.github.com":
- // https://github.com/octokit/octokit.rb/commit/3d21ec53a331a6f037a91c368710b99387d012c1
- // https://github.com/octokit/octokit.rb/blob/master/README.md
- // https://github.com/octokit/octokit.rb/tree/master/lib
- // https://codeload.github.com/octokit/octokit.rb/legacy.tar.gz/master
- parts := strings.Split(u.Path, "/")
- if len(parts) > 2 && o.Name == "" {
- o.Name = fmt.Sprintf("github.com/%s/%s", parts[1], parts[2])
- o.RepoURL = fmt.Sprintf("https://github.com/%s/%s", parts[1], parts[2])
- }
- if len(parts) > 4 {
- setRef(parts[4])
- if u.Host == "github.com" {
- o.Archive = fmt.Sprintf("https://codeload.github.com/%s/%s/legacy.tar.gz/%s", parts[1], parts[2], parts[4])
- }
- }
- o.Strip = 1
- case "api.github.com":
- // https://api.github.com/repos/octokit/octokit.rb/tarball/master
- parts := strings.Split(u.Path, "/")
- if len(parts) > 2 && o.Name == "" {
- o.Name = fmt.Sprintf("github.com/%s/%s", parts[1], parts[2])
- o.RepoURL = fmt.Sprintf("https://github.com/%s/%s", parts[1], parts[2])
- }
- if len(parts) > 5 {
- setRef(parts[5])
- }
- o.Strip = 1
- }
-}
-
-func do(opts Options, bopts build.Options) error {
- opts.SetDefaults()
-
- if opts.Name == "" && opts.RepoURL == "" {
- return errors.New("-name or -url required")
- }
- if opts.Branch == "" {
- return errors.New("-branch required")
- }
-
- if opts.Name != "" {
- bopts.RepositoryDescription.Name = opts.Name
- }
- if opts.RepoURL != "" {
- u, err := url.Parse(opts.RepoURL)
- if err != nil {
- return err
- }
- if err := gitindex.SetTemplatesFromOrigin(&bopts.RepositoryDescription, u); err != nil {
- return err
- }
- }
- bopts.SetDefaults()
- bopts.RepositoryDescription.Branches = []zoekt.RepositoryBranch{{Name: opts.Branch, Version: opts.Commit}}
- brs := []string{opts.Branch}
-
- if opts.Incremental && bopts.IncrementalSkipIndexing() {
- return nil
- }
-
- a, err := openArchive(opts.Archive)
- if err != nil {
- return err
- }
- defer a.Close()
-
- bopts.RepositoryDescription.Source = opts.Archive
- builder, err := build.NewBuilder(bopts)
- if err != nil {
- return err
- }
-
- add := func(f *File) error {
- defer f.Close()
-
- contents, err := ioutil.ReadAll(f)
- if err != nil {
- return err
- }
-
- name := stripComponents(f.Name, opts.Strip)
- if name == "" {
- return nil
- }
-
- return builder.Add(zoekt.Document{
- Name: name,
- Content: contents,
- Branches: brs,
- })
- }
-
- for {
- f, err := a.Next()
- if err == io.EOF {
- break
- }
- if err != nil {
- return err
- }
-
- if err := add(f); err != nil {
- return err
- }
- }
-
- return builder.Finish()
-}
-
-func main() {
- var (
- incremental = flag.Bool("incremental", true, "only index changed repositories")
-
- name = flag.String("name", "", "The repository name for the archive")
- urlRaw = flag.String("url", "", "The repository URL for the archive")
- branch = flag.String("branch", "", "The branch name for the archive")
- commit = flag.String("commit", "", "The commit sha for the archive. If incremental this will avoid updating shards already at commit")
- strip = flag.Int("strip_components", 0, "Remove the specified number of leading path elements. Pathnames with fewer elements will be silently skipped.")
- )
- flag.Parse()
-
- // Tune GOMAXPROCS to match Linux container CPU quota.
- maxprocs.Set()
-
- log.SetFlags(log.LstdFlags | log.Lshortfile)
-
- if len(flag.Args()) != 1 {
- log.Fatal("expected argument for archive location")
- }
- archive := flag.Args()[0]
- bopts := cmd.OptionsFromFlags()
- opts := Options{
- Incremental: *incremental,
-
- Archive: archive,
- Name: *name,
- RepoURL: *urlRaw,
- Branch: *branch,
- Commit: *commit,
- Strip: *strip,
- }
-
- if err := do(opts, *bopts); err != nil {
- log.Fatal(err)
- }
-}
diff --git a/cmd/zoekt-git-clone/main.go b/cmd/zoekt-git-clone/main.go
deleted file mode 100644
index eb85488..0000000
--- a/cmd/zoekt-git-clone/main.go
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This binary fetches all repos of a user or organization and clones
-// them. It is strongly recommended to get a personal API token from
-// https://github.com/settings/tokens, save the token in a file, and
-// point the --token option to it.
-package main
-
-import (
- "flag"
- "fmt"
- "log"
- "net/url"
- "os"
- "path/filepath"
- "strings"
-
- "github.com/google/zoekt/gitindex"
-)
-
-func main() {
- dest := flag.String("dest", "", "destination directory")
- flag.Parse()
-
- if *dest == "" {
- log.Fatal("must set --dest")
- }
- if len(flag.Args()) == 0 {
- log.Fatal("must provide URL")
- }
- u, err := url.Parse(flag.Arg(0))
- if err != nil {
- log.Fatalf("url.Parse: %v", err)
- }
-
- name := filepath.Join(u.Host, u.Path)
- name = strings.TrimSuffix(name, ".git")
-
- destDir := filepath.Dir(filepath.Join(*dest, name))
- if err := os.MkdirAll(destDir, 0o755); err != nil {
- log.Fatal(err)
- }
-
- config := map[string]string{
- "zoekt.name": name,
- }
-
- destRepo, err := gitindex.CloneRepo(destDir, filepath.Base(name), u.String(), config)
- if err != nil {
- log.Fatalf("CloneRepo: %v", err)
- }
- if destRepo != "" {
- fmt.Println(destRepo)
- }
-}
diff --git a/cmd/zoekt-git-index/main.go b/cmd/zoekt-git-index/main.go
deleted file mode 100644
index 50997e5..0000000
--- a/cmd/zoekt-git-index/main.go
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package main
-
-import (
- "flag"
- "log"
- "os"
- "path/filepath"
- "strings"
-
- "github.com/google/zoekt/cmd"
- "github.com/google/zoekt/gitindex"
- "go.uber.org/automaxprocs/maxprocs"
-)
-
-func main() {
- allowMissing := flag.Bool("allow_missing_branches", false, "allow missing branches.")
- submodules := flag.Bool("submodules", true, "if set to false, do not recurse into submodules")
- branchesStr := flag.String("branches", "HEAD", "git branches to index.")
- branchPrefix := flag.String("prefix", "refs/heads/", "prefix for branch names")
-
- incremental := flag.Bool("incremental", true, "only index changed repositories")
- repoCacheDir := flag.String("repo_cache", "", "directory holding bare git repos, named by URL. "+
- "this is used to find repositories for submodules. "+
- "It also affects name if the indexed repository is under this directory.")
- flag.Parse()
-
- // Tune GOMAXPROCS to match Linux container CPU quota.
- maxprocs.Set()
-
- if *repoCacheDir != "" {
- dir, err := filepath.Abs(*repoCacheDir)
- if err != nil {
- log.Fatalf("Abs: %v", err)
- }
- *repoCacheDir = dir
- }
- opts := cmd.OptionsFromFlags()
-
- var branches []string
- if *branchesStr != "" {
- branches = strings.Split(*branchesStr, ",")
- }
-
- gitRepos := map[string]string{}
- for _, repoDir := range flag.Args() {
- repoDir, err := filepath.Abs(repoDir)
- if err != nil {
- log.Fatal(err)
- }
- repoDir = filepath.Clean(repoDir)
-
- name := strings.TrimSuffix(repoDir, "/.git")
- if *repoCacheDir != "" && strings.HasPrefix(name, *repoCacheDir) {
- name = strings.TrimPrefix(name, *repoCacheDir+"/")
- name = strings.TrimSuffix(name, ".git")
- } else {
- name = strings.TrimSuffix(filepath.Base(name), ".git")
- }
- gitRepos[repoDir] = name
- }
-
- exitStatus := 0
- for dir, name := range gitRepos {
- opts.RepositoryDescription.Name = name
- gitOpts := gitindex.Options{
- BranchPrefix: *branchPrefix,
- Incremental: *incremental,
- Submodules: *submodules,
- RepoCacheDir: *repoCacheDir,
- AllowMissingBranch: *allowMissing,
- BuildOptions: *opts,
- Branches: branches,
- RepoDir: dir,
- }
-
- if err := gitindex.IndexGitRepo(gitOpts); err != nil {
- log.Printf("indexGitRepo(%s): %v", dir, err)
- exitStatus = 1
- }
- }
- os.Exit(exitStatus)
-}
diff --git a/cmd/zoekt-hg-index/main.go b/cmd/zoekt-hg-index/main.go
deleted file mode 100644
index dd782a6..0000000
--- a/cmd/zoekt-hg-index/main.go
+++ /dev/null
@@ -1,89 +0,0 @@
-// Copyright 2020 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// zoekt-hg-index provides bare-bones Mercurial indexing
-package main
-
-import (
- "flag"
- "fmt"
- "log"
- "path/filepath"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/build"
- "github.com/google/zoekt/cmd"
-
- "go.uber.org/automaxprocs/maxprocs"
- "humungus.tedunangst.com/r/gerc"
-)
-
-func main() {
- revisionStr := flag.String("revision", "", "hg revision to index")
- flag.Parse()
- maxprocs.Set()
- opts := cmd.OptionsFromFlags()
-
- if len(flag.Args()) < 1 {
- log.Fatal("hg repo directory argument missing")
- }
- dir, err := filepath.Abs(flag.Arg(0))
- if err != nil {
- log.Fatal(err)
- }
- opts.RepositoryDescription.Name = dir
-
- if err := indexHg(dir, *revisionStr, opts); err != nil {
- log.Fatal(err)
- }
-}
-
-func indexHg(dir, rev string, opts *build.Options) error {
- r, err := gerc.Open(dir)
- if err != nil {
- log.Fatal(err)
- }
- defer r.Close()
-
- builder, err := build.NewBuilder(*opts)
- if err != nil {
- return err
- }
- defer builder.Finish()
-
- mfs, err := r.GetFiles(gerc.FilesArgs{
- Revision: rev,
- })
- if err != nil {
- return fmt.Errorf("GetFiles %v", err)
- }
-
- for _, mf := range mfs {
- fd := gerc.FileDataArgs{
- Filename: mf.Name,
- Revision: rev,
- }
- content, err := r.GetFileData(fd)
- if err != nil {
- return fmt.Errorf("GetFileData %v", err)
- }
- if err := builder.Add(zoekt.Document{
- Name: mf.Name,
- Content: content,
- }); err != nil {
- return err
- }
- }
- return builder.Finish()
-}
diff --git a/cmd/zoekt-index/main.go b/cmd/zoekt-index/main.go
deleted file mode 100644
index a103f21..0000000
--- a/cmd/zoekt-index/main.go
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package main
-
-import (
- "flag"
- "fmt"
- "io/ioutil"
- "log"
- "os"
- "path/filepath"
- "runtime/pprof"
- "strings"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/build"
- "github.com/google/zoekt/cmd"
- "go.uber.org/automaxprocs/maxprocs"
-)
-
-type fileInfo struct {
- name string
- size int64
-}
-
-type fileAggregator struct {
- ignoreDirs map[string]struct{}
- sizeMax int64
- sink chan fileInfo
-}
-
-func (a *fileAggregator) add(path string, info os.FileInfo, err error) error {
- if err != nil {
- return err
- }
-
- if info.IsDir() {
- base := filepath.Base(path)
- if _, ok := a.ignoreDirs[base]; ok {
- return filepath.SkipDir
- }
- }
-
- if info.Mode().IsRegular() {
- a.sink <- fileInfo{path, info.Size()}
- }
- return nil
-}
-
-func main() {
- cpuProfile := flag.String("cpu_profile", "", "write cpu profile to file")
- ignoreDirs := flag.String("ignore_dirs", ".git,.hg,.svn", "comma separated list of directories to ignore.")
- flag.Parse()
-
- // Tune GOMAXPROCS to match Linux container CPU quota.
- maxprocs.Set()
-
- opts := cmd.OptionsFromFlags()
- if *cpuProfile != "" {
- f, err := os.Create(*cpuProfile)
- if err != nil {
- log.Fatal(err)
- }
- pprof.StartCPUProfile(f)
- defer pprof.StopCPUProfile()
- }
-
- ignoreDirMap := map[string]struct{}{}
- if *ignoreDirs != "" {
- dirs := strings.Split(*ignoreDirs, ",")
- for _, d := range dirs {
- d = strings.TrimSpace(d)
- if d != "" {
- ignoreDirMap[d] = struct{}{}
- }
- }
- }
- for _, arg := range flag.Args() {
- opts.RepositoryDescription.Source = arg
- if err := indexArg(arg, *opts, ignoreDirMap); err != nil {
- log.Fatal(err)
- }
- }
-}
-
-func indexArg(arg string, opts build.Options, ignore map[string]struct{}) error {
- dir, err := filepath.Abs(filepath.Clean(arg))
- if err != nil {
- return err
- }
-
- opts.RepositoryDescription.Name = filepath.Base(dir)
- builder, err := build.NewBuilder(opts)
- if err != nil {
- return err
- }
- defer builder.Finish()
-
- comm := make(chan fileInfo, 100)
- agg := fileAggregator{
- ignoreDirs: ignore,
- sink: comm,
- sizeMax: int64(opts.SizeMax),
- }
-
- go func() {
- if err := filepath.Walk(dir, agg.add); err != nil {
- log.Fatal(err)
- }
- close(comm)
- }()
-
- for f := range comm {
- displayName := strings.TrimPrefix(f.name, dir+"/")
- if f.size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(displayName) {
- builder.Add(zoekt.Document{
- Name: displayName,
- SkipReason: fmt.Sprintf("document size %d larger than limit %d", f.size, opts.SizeMax),
- })
- continue
- }
- content, err := ioutil.ReadFile(f.name)
- if err != nil {
- return err
- }
-
- builder.AddFile(displayName, content)
- }
-
- return builder.Finish()
-}
diff --git a/cmd/zoekt-indexserver/config.go b/cmd/zoekt-indexserver/config.go
deleted file mode 100644
index d213774..0000000
--- a/cmd/zoekt-indexserver/config.go
+++ /dev/null
@@ -1,268 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package main
-
-import (
- "bytes"
- "encoding/json"
- "io/ioutil"
- "log"
- "math/rand"
- "net/http"
- "net/url"
- "os"
- "os/exec"
- "path/filepath"
- "time"
-
- "github.com/fsnotify/fsnotify"
-)
-
-type ConfigEntry struct {
- GithubUser string
- GithubOrg string
- BitBucketServerProject string
- GitHubURL string
- GitilesURL string
- CGitURL string
- BitBucketServerURL string
- DisableTLS bool
- CredentialPath string
- ProjectType string
- Name string
- Exclude string
- GitLabURL string
- OnlyPublic bool
- GerritApiURL string
- Topics []string
- ExcludeTopics []string
-}
-
-func randomize(entries []ConfigEntry) []ConfigEntry {
- perm := rand.Perm(len(entries))
-
- var shuffled []ConfigEntry
- for _, i := range perm {
- shuffled = append(shuffled, entries[i])
- }
-
- return shuffled
-}
-
-func isHTTP(u string) bool {
- asURL, err := url.Parse(u)
- return err == nil && (asURL.Scheme == "http" || asURL.Scheme == "https")
-}
-
-func readConfigURL(u string) ([]ConfigEntry, error) {
- var body []byte
- var readErr error
-
- if isHTTP(u) {
- rep, err := http.Get(u)
- if err != nil {
- return nil, err
- }
- defer rep.Body.Close()
-
- body, readErr = ioutil.ReadAll(rep.Body)
- } else {
- body, readErr = ioutil.ReadFile(u)
- }
-
- if readErr != nil {
- return nil, readErr
- }
-
- var result []ConfigEntry
- if err := json.Unmarshal(body, &result); err != nil {
- return nil, err
- }
- return result, nil
-}
-
-func watchFile(path string) (<-chan struct{}, error) {
- watcher, err := fsnotify.NewWatcher()
- if err != nil {
- return nil, err
- }
-
- if err := watcher.Add(filepath.Dir(path)); err != nil {
- return nil, err
- }
-
- out := make(chan struct{}, 1)
- go func() {
- var last time.Time
- for {
- select {
- case <-watcher.Events:
- fi, err := os.Stat(path)
- if err == nil && fi.ModTime() != last {
- out <- struct{}{}
- last = fi.ModTime()
- }
- case err := <-watcher.Errors:
- if err != nil {
- log.Printf("watcher error: %v", err)
- }
- }
- }
- }()
- return out, nil
-}
-
-func periodicMirrorFile(repoDir string, opts *Options, pendingRepos chan<- string) {
- ticker := time.NewTicker(opts.mirrorInterval)
-
- var watcher <-chan struct{}
- if !isHTTP(opts.mirrorConfigFile) {
- var err error
- watcher, err = watchFile(opts.mirrorConfigFile)
- if err != nil {
- log.Printf("watchFile(%q): %v", opts.mirrorConfigFile, err)
- }
- }
-
- var lastCfg []ConfigEntry
- for {
- cfg, err := readConfigURL(opts.mirrorConfigFile)
- if err != nil {
- log.Printf("readConfig(%s): %v", opts.mirrorConfigFile, err)
- } else {
- lastCfg = cfg
- }
-
- executeMirror(lastCfg, repoDir, pendingRepos)
-
- select {
- case <-watcher:
- log.Printf("mirror config %s changed", opts.mirrorConfigFile)
- case <-ticker.C:
- }
- }
-}
-
-func executeMirror(cfg []ConfigEntry, repoDir string, pendingRepos chan<- string) {
- // Randomize the ordering in which we query
- // things. This is to ensure that quota limits don't
- // always hit the last one in the list.
- cfg = randomize(cfg)
- for _, c := range cfg {
- var cmd *exec.Cmd
- if c.GitHubURL != "" || c.GithubUser != "" || c.GithubOrg != "" {
- cmd = exec.Command("zoekt-mirror-github",
- "-dest", repoDir, "-delete")
- if c.GitHubURL != "" {
- cmd.Args = append(cmd.Args, "-url", c.GitHubURL)
- }
- if c.GithubUser != "" {
- cmd.Args = append(cmd.Args, "-user", c.GithubUser)
- } else if c.GithubOrg != "" {
- cmd.Args = append(cmd.Args, "-org", c.GithubOrg)
- }
- if c.Name != "" {
- cmd.Args = append(cmd.Args, "-name", c.Name)
- }
- if c.Exclude != "" {
- cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
- }
- if c.CredentialPath != "" {
- cmd.Args = append(cmd.Args, "-token", c.CredentialPath)
- }
- for _, topic := range c.Topics {
- cmd.Args = append(cmd.Args, "-topic", topic)
- }
- for _, topic := range c.ExcludeTopics {
- cmd.Args = append(cmd.Args, "-exclude_topic", topic)
- }
- } else if c.GitilesURL != "" {
- cmd = exec.Command("zoekt-mirror-gitiles",
- "-dest", repoDir, "-name", c.Name)
- if c.Exclude != "" {
- cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
- }
- cmd.Args = append(cmd.Args, c.GitilesURL)
- } else if c.CGitURL != "" {
- cmd = exec.Command("zoekt-mirror-gitiles",
- "-type", "cgit",
- "-dest", repoDir, "-name", c.Name)
- if c.Exclude != "" {
- cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
- }
- cmd.Args = append(cmd.Args, c.CGitURL)
- } else if c.BitBucketServerURL != "" {
- cmd = exec.Command("zoekt-mirror-bitbucket-server",
- "-dest", repoDir, "-url", c.BitBucketServerURL, "-delete")
- if c.BitBucketServerProject != "" {
- cmd.Args = append(cmd.Args, "-project", c.BitBucketServerProject)
- }
- if c.DisableTLS {
- cmd.Args = append(cmd.Args, "-disable-tls")
- }
- if c.ProjectType != "" {
- cmd.Args = append(cmd.Args, "-type", c.ProjectType)
- }
- if c.Name != "" {
- cmd.Args = append(cmd.Args, "-name", c.Name)
- }
- if c.Exclude != "" {
- cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
- }
- if c.CredentialPath != "" {
- cmd.Args = append(cmd.Args, "-credentials", c.CredentialPath)
- }
- } else if c.GitLabURL != "" {
- cmd = exec.Command("zoekt-mirror-gitlab",
- "-dest", repoDir, "-url", c.GitLabURL)
- if c.Name != "" {
- cmd.Args = append(cmd.Args, "-name", c.Name)
- }
- if c.Exclude != "" {
- cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
- }
- if c.OnlyPublic {
- cmd.Args = append(cmd.Args, "-public")
- }
- if c.CredentialPath != "" {
- cmd.Args = append(cmd.Args, "-token", c.CredentialPath)
- }
- } else if c.GerritApiURL != "" {
- cmd = exec.Command("zoekt-mirror-gerrit",
- "-dest", repoDir)
- if c.CredentialPath != "" {
- cmd.Args = append(cmd.Args, "-http-credentials", c.CredentialPath)
- }
- if c.Name != "" {
- cmd.Args = append(cmd.Args, "-name", c.Name)
- }
- if c.Exclude != "" {
- cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
- }
- cmd.Args = append(cmd.Args, c.GerritApiURL)
- }
-
- stdout, _ := loggedRun(cmd)
-
- for _, fn := range bytes.Split(stdout, []byte{'\n'}) {
- if len(fn) == 0 {
- continue
- }
-
- pendingRepos <- string(fn)
- }
-
- }
-}
diff --git a/cmd/zoekt-indexserver/main.go b/cmd/zoekt-indexserver/main.go
deleted file mode 100644
index 8392bb8..0000000
--- a/cmd/zoekt-indexserver/main.go
+++ /dev/null
@@ -1,298 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This program manages a zoekt indexing deployment:
-// * recycling logs
-// * periodically fetching new data.
-// * periodically reindexing all git repos.
-
-package main
-
-import (
- "bytes"
- "context"
- "flag"
- "fmt"
- "log"
- "math"
- "os"
- "os/exec"
- "path/filepath"
- "runtime"
- "strings"
- "time"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/gitindex"
-)
-
-const day = time.Hour * 24
-
-func loggedRun(cmd *exec.Cmd) (out, err []byte) {
- outBuf := &bytes.Buffer{}
- errBuf := &bytes.Buffer{}
- cmd.Stdout = outBuf
- cmd.Stderr = errBuf
-
- log.Printf("run %v", cmd.Args)
- if err := cmd.Run(); err != nil {
- log.Printf("command %s failed: %v\nOUT: %s\nERR: %s",
- cmd.Args, err, outBuf.String(), errBuf.String())
- }
-
- return outBuf.Bytes(), errBuf.Bytes()
-}
-
-type Options struct {
- cpuFraction float64
- cpuCount int
- fetchInterval time.Duration
- mirrorInterval time.Duration
- indexFlagsStr string
- indexFlags []string
- mirrorConfigFile string
- maxLogAge time.Duration
- indexTimeout time.Duration
-}
-
-func (o *Options) validate() {
- if o.cpuFraction <= 0.0 || o.cpuFraction > 1.0 {
- log.Fatal("cpu_fraction must be between 0.0 and 1.0")
- }
-
- o.cpuCount = int(math.Trunc(float64(runtime.GOMAXPROCS(0)) * o.cpuFraction))
- if o.cpuCount < 1 {
- o.cpuCount = 1
- }
- if o.indexFlagsStr != "" {
- o.indexFlags = strings.Split(o.indexFlagsStr, " ")
- }
-}
-
-func (o *Options) defineFlags() {
- flag.DurationVar(&o.indexTimeout, "index_timeout", time.Hour, "kill index job after this much time")
- flag.DurationVar(&o.maxLogAge, "max_log_age", 3*day, "recycle index logs after this much time")
- flag.DurationVar(&o.fetchInterval, "fetch_interval", time.Hour, "run fetches this often")
- flag.StringVar(&o.mirrorConfigFile, "mirror_config",
- "", "JSON file holding mirror configuration.")
-
- flag.DurationVar(&o.mirrorInterval, "mirror_duration", 24*time.Hour, "find and clone new repos at this frequency.")
- flag.Float64Var(&o.cpuFraction, "cpu_fraction", 0.25,
- "use this fraction of the cores for indexing.")
- flag.StringVar(&o.indexFlagsStr, "git_index_flags", "", "space separated list of flags passed through to zoekt-git-index (e.g. -git_index_flags='-symbols=false -submodules=false'")
-}
-
-// periodicFetch runs git-fetch every once in a while. Results are
-// posted on pendingRepos.
-func periodicFetch(repoDir, indexDir string, opts *Options, pendingRepos chan<- string) {
- t := time.NewTicker(opts.fetchInterval)
- for {
- repos, err := gitindex.FindGitRepos(repoDir)
- if err != nil {
- log.Println(err)
- continue
- }
- if len(repos) == 0 {
- log.Printf("no repos found under %s", repoDir)
- }
-
- // TODO: Randomize to make sure quota throttling hits everyone.
-
- later := map[string]struct{}{}
- for _, dir := range repos {
- if ok := fetchGitRepo(dir); !ok {
- later[dir] = struct{}{}
- } else {
- pendingRepos <- dir
- }
- }
-
- for r := range later {
- pendingRepos <- r
- }
-
- <-t.C
- }
-}
-
-// fetchGitRepo runs git-fetch, and returns true if there was an
-// update.
-func fetchGitRepo(dir string) bool {
- cmd := exec.Command("git", "--git-dir", dir, "fetch", "origin")
- outBuf := &bytes.Buffer{}
- errBuf := &bytes.Buffer{}
-
- // Prevent prompting
- cmd.Stdin = &bytes.Buffer{}
- cmd.Stderr = errBuf
- cmd.Stdout = outBuf
- if err := cmd.Run(); err != nil {
- log.Printf("command %s failed: %v\nOUT: %s\nERR: %s",
- cmd.Args, err, outBuf.String(), errBuf.String())
- } else {
- return len(outBuf.Bytes()) != 0
- }
- return false
-}
-
-// indexPendingRepos consumes the directories on the repos channel and
-// indexes them, sequentially.
-func indexPendingRepos(indexDir, repoDir string, opts *Options, repos <-chan string) {
- for dir := range repos {
- indexPendingRepo(dir, indexDir, repoDir, opts)
-
- // Failures (eg. timeout) will leave temp files
- // around. We have to clean them, or they will fill up the indexing volume.
- if failures, err := filepath.Glob(filepath.Join(indexDir, "*.tmp")); err != nil {
- log.Printf("Glob: %v", err)
- } else {
- for _, f := range failures {
- os.Remove(f)
- }
- }
- }
-}
-
-func indexPendingRepo(dir, indexDir, repoDir string, opts *Options) {
- ctx, cancel := context.WithTimeout(context.Background(), opts.indexTimeout)
- defer cancel()
- args := []string{
- "-require_ctags",
- fmt.Sprintf("-parallelism=%d", opts.cpuCount),
- "-repo_cache", repoDir,
- "-index", indexDir,
- "-incremental",
- }
- args = append(args, opts.indexFlags...)
- args = append(args, dir)
- cmd := exec.CommandContext(ctx, "zoekt-git-index", args...)
- loggedRun(cmd)
-}
-
-// deleteLogs deletes old logs.
-func deleteLogs(logDir string, maxAge time.Duration) {
- fs, err := filepath.Glob(filepath.Join(logDir, "*"))
- if err != nil {
- log.Fatalf("filepath.Glob(%s): %v", logDir, err)
- }
-
- threshold := time.Now().Add(-maxAge)
- for _, fn := range fs {
- if fi, err := os.Lstat(fn); err == nil && fi.ModTime().Before(threshold) {
- os.Remove(fn)
- }
- }
-}
-
-func deleteLogsLoop(logDir string, maxAge time.Duration) {
- tick := time.NewTicker(maxAge / 100)
- for {
- deleteLogs(logDir, maxAge)
- <-tick.C
- }
-}
-
-// Delete the shard if its corresponding git repo can't be found.
-func deleteIfOrphan(repoDir string, fn string) error {
- f, err := os.Open(fn)
- if err != nil {
- return nil
- }
- defer f.Close()
-
- ifile, err := zoekt.NewIndexFile(f)
- if err != nil {
- return nil
- }
- defer ifile.Close()
-
- repo, _, err := zoekt.ReadMetadata(ifile)
- if err != nil {
- return nil
- }
-
- _, err = os.Stat(repo.Source)
- if os.IsNotExist(err) {
- log.Printf("deleting orphan shard %s; source %q not found", fn, repo.Source)
- return os.Remove(fn)
- }
-
- return err
-}
-
-func deleteOrphanIndexes(indexDir, repoDir string, watchInterval time.Duration) {
- t := time.NewTicker(watchInterval)
-
- expr := indexDir + "/*"
- for {
- fs, err := filepath.Glob(expr)
- if err != nil {
- log.Printf("Glob(%q): %v", expr, err)
- }
-
- for _, f := range fs {
- if err := deleteIfOrphan(repoDir, f); err != nil {
- log.Printf("deleteIfOrphan(%q): %v", f, err)
- }
- }
- <-t.C
- }
-}
-
-func main() {
- var opts Options
- opts.defineFlags()
- dataDir := flag.String("data_dir",
- filepath.Join(os.Getenv("HOME"), "zoekt-serving"), "directory holding all data.")
- indexDir := flag.String("index_dir", "", "directory holding index shards. Defaults to $data_dir/index/")
- flag.Parse()
- opts.validate()
-
- if *dataDir == "" {
- log.Fatal("must set --data_dir")
- }
-
- // Automatically prepend our own path at the front, to minimize
- // required configuration.
- if l, err := os.Readlink("/proc/self/exe"); err == nil {
- os.Setenv("PATH", filepath.Dir(l)+":"+os.Getenv("PATH"))
- }
-
- logDir := filepath.Join(*dataDir, "logs")
- if *indexDir == "" {
- *indexDir = filepath.Join(*dataDir, "index")
- }
- repoDir := filepath.Join(*dataDir, "repos")
- for _, s := range []string{logDir, *indexDir, repoDir} {
- if _, err := os.Stat(s); err == nil {
- continue
- }
-
- if err := os.MkdirAll(s, 0o755); err != nil {
- log.Fatalf("MkdirAll %s: %v", s, err)
- }
- }
-
- _, err := readConfigURL(opts.mirrorConfigFile)
- if err != nil {
- log.Fatalf("readConfigURL(%s): %v", opts.mirrorConfigFile, err)
- }
-
- pendingRepos := make(chan string, 10)
- go periodicMirrorFile(repoDir, &opts, pendingRepos)
- go deleteLogsLoop(logDir, opts.maxLogAge)
- go deleteOrphanIndexes(*indexDir, repoDir, opts.fetchInterval)
- go indexPendingRepos(*indexDir, repoDir, &opts, pendingRepos)
- periodicFetch(repoDir, *indexDir, &opts, pendingRepos)
-}
diff --git a/cmd/zoekt-mirror-bitbucket-server/main.go b/cmd/zoekt-mirror-bitbucket-server/main.go
deleted file mode 100644
index 9cfb204..0000000
--- a/cmd/zoekt-mirror-bitbucket-server/main.go
+++ /dev/null
@@ -1,270 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This binary fetches all repos of a project, and of a specific type, in case
-// these are specified, and clones them. By default it fetches and clones all
-// existing repos.
-package main
-
-import (
- "context"
- "crypto/tls"
- "flag"
- "fmt"
- "io/ioutil"
- "log"
- "net/http"
- "net/url"
- "os"
- "path/filepath"
- "strings"
- "time"
-
- "github.com/gfleury/go-bitbucket-v1"
-
- "github.com/google/zoekt/gitindex"
-)
-
-func main() {
- dest := flag.String("dest", "", "destination directory")
- serverUrl := flag.String("url", "", "BitBucket Server url")
- disableTLS := flag.Bool("disable-tls", false, "disables TLS verification")
- credentialsFile := flag.String("credentials", ".bitbucket-credentials", "file holding BitBucket Server credentials")
- project := flag.String("project", "", "project to mirror")
- deleteRepos := flag.Bool("delete", false, "delete missing repos")
- namePattern := flag.String("name", "", "only clone repos whose name matches the given regexp.")
- excludePattern := flag.String("exclude", "", "don't mirror repos whose names match this regexp.")
- projectType := flag.String("type", "", "only clone repos whose type matches the given string. "+
- "Type can be either NORMAl or PERSONAL. Clones projects of both types if not set.")
- flag.Parse()
-
- if *serverUrl == "" {
- log.Fatal("must set --url")
- }
-
- rootURL, err := url.Parse(*serverUrl)
- if err != nil {
- log.Fatalf("url.Parse(): %v", err)
- }
-
- if *dest == "" {
- log.Fatal("must set --dest")
- }
-
- if *projectType != "" && !IsValidProjectType(*projectType) {
- log.Fatal("type should be either NORMAL or PERSONAL")
- }
-
- destDir := filepath.Join(*dest, rootURL.Host)
- if err := os.MkdirAll(destDir, 0o755); err != nil {
- log.Fatal(err)
- }
-
- username := ""
- password := ""
- if *credentialsFile == "" {
- log.Fatal("must set --credentials")
- } else {
- content, err := ioutil.ReadFile(*credentialsFile)
- if err != nil {
- log.Fatal(err)
- }
- credentials := strings.Fields(string(content))
- username, password = credentials[0], credentials[1]
- }
-
- basicAuth := bitbucketv1.BasicAuth{UserName: username, Password: password}
- ctx, cancel := context.WithTimeout(context.Background(), 120000*time.Millisecond)
- ctx = context.WithValue(ctx, bitbucketv1.ContextBasicAuth, basicAuth)
- defer cancel()
-
- apiPath, err := url.Parse("/rest")
- if err != nil {
- log.Fatal(err)
- }
-
- apiBaseURL := rootURL.ResolveReference(apiPath).String()
-
- var config *bitbucketv1.Configuration
- if *disableTLS {
- tr := &http.Transport{
- TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
- }
- httpClient := &http.Client{
- Transport: tr,
- }
- httpClientConfig := func(configs *bitbucketv1.Configuration) {
- configs.HTTPClient = httpClient
- }
- config = bitbucketv1.NewConfiguration(apiBaseURL, httpClientConfig)
- } else {
- config = bitbucketv1.NewConfiguration(apiBaseURL)
- }
- client := bitbucketv1.NewAPIClient(ctx, config)
-
- var repos []bitbucketv1.Repository
-
- if *project != "" {
- repos, err = getProjectRepos(*client, *project)
- } else {
- repos, err = getAllRepos(*client)
- }
-
- if err != nil {
- log.Fatal(err)
- }
-
- filter, err := gitindex.NewFilter(*namePattern, *excludePattern)
- if err != nil {
- log.Fatal(err)
- }
-
- trimmed := repos[:0]
- for _, r := range repos {
- if filter.Include(r.Slug) && (*projectType == "" || r.Project.Type == *projectType) {
- trimmed = append(trimmed, r)
- }
- }
- repos = trimmed
-
- if err := cloneRepos(destDir, rootURL.Host, repos, password); err != nil {
- log.Fatalf("cloneRepos: %v", err)
- }
-
- if *deleteRepos {
- if err := deleteStaleRepos(*dest, filter, repos); err != nil {
- log.Fatalf("deleteStaleRepos: %v", err)
- }
- }
-}
-
-func deleteStaleRepos(destDir string, filter *gitindex.Filter, repos []bitbucketv1.Repository) error {
- var baseURL string
- if len(repos) > 0 {
- baseURL = repos[0].Links.Self[0].Href
- } else {
- return nil
- }
- u, err := url.Parse(baseURL)
- if err != nil {
- return err
- }
- u.Path = ""
-
- names := map[string]struct{}{}
- for _, r := range repos {
- names[filepath.Join(u.Host, r.Project.Key, r.Slug+".git")] = struct{}{}
- }
-
- if err := gitindex.DeleteRepos(destDir, u, names, filter); err != nil {
- log.Fatalf("deleteRepos: %v", err)
- }
- return nil
-}
-
-func IsValidProjectType(projectType string) bool {
- switch projectType {
- case "NORMAL", "PERSONAL":
- return true
- }
- return false
-}
-
-func getAllRepos(client bitbucketv1.APIClient) ([]bitbucketv1.Repository, error) {
- var allRepos []bitbucketv1.Repository
- opts := map[string]interface{}{
- "limit": 1000,
- "start": 0,
- }
-
- for {
- resp, err := client.DefaultApi.GetRepositories_19(opts)
- if err != nil {
- return nil, err
- }
-
- repos, err := bitbucketv1.GetRepositoriesResponse(resp)
- if err != nil {
- return nil, err
- }
-
- if len(repos) == 0 {
- break
- }
-
- opts["start"] = opts["start"].(int) + opts["limit"].(int)
-
- allRepos = append(allRepos, repos...)
- }
- return allRepos, nil
-}
-
-func getProjectRepos(client bitbucketv1.APIClient, projectName string) ([]bitbucketv1.Repository, error) {
- var allRepos []bitbucketv1.Repository
- opts := map[string]interface{}{
- "limit": 1000,
- "start": 0,
- }
-
- for {
- resp, err := client.DefaultApi.GetRepositoriesWithOptions(projectName, opts)
- if err != nil {
- return nil, err
- }
-
- repos, err := bitbucketv1.GetRepositoriesResponse(resp)
- if err != nil {
- return nil, err
- }
-
- if len(repos) == 0 {
- break
- }
-
- opts["start"] = opts["start"].(int) + opts["limit"].(int)
-
- allRepos = append(allRepos, repos...)
- }
- return allRepos, nil
-}
-
-func cloneRepos(destDir string, host string, repos []bitbucketv1.Repository, password string) error {
- for _, r := range repos {
- fullName := filepath.Join(r.Project.Key, r.Slug)
- config := map[string]string{
- "zoekt.web-url-type": "bitbucket-server",
- "zoekt.web-url": r.Links.Self[0].Href,
- "zoekt.name": filepath.Join(host, fullName),
- }
-
- httpsCloneUrl := ""
- for _, cloneUrl := range r.Links.Clone {
- // In fact, this is an https url, i.e. there's no separate Name for https.
- if cloneUrl.Name == "http" {
- s := strings.Split(cloneUrl.Href, "@")
- httpsCloneUrl = s[0] + ":" + password + "@" + s[1]
- }
- }
-
- if httpsCloneUrl != "" {
- dest, err := gitindex.CloneRepo(destDir, fullName, httpsCloneUrl, config)
- if err != nil {
- return err
- }
- if dest != "" {
- fmt.Println(dest)
- }
- }
- }
-
- return nil
-}
diff --git a/cmd/zoekt-mirror-gerrit/main.go b/cmd/zoekt-mirror-gerrit/main.go
deleted file mode 100644
index b186de7..0000000
--- a/cmd/zoekt-mirror-gerrit/main.go
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright 2017 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This binary fetches all repos of a Gerrit host.
-
-package main
-
-import (
- "bytes"
- "flag"
- "fmt"
- "io/ioutil"
- "log"
- "net/http"
- "net/url"
- "path/filepath"
- "strconv"
- "strings"
-
- gerrit "github.com/andygrunwald/go-gerrit"
- "github.com/google/zoekt/gitindex"
-)
-
-type loggingRT struct {
- http.RoundTripper
-}
-
-type closeBuffer struct {
- *bytes.Buffer
-}
-
-func (b *closeBuffer) Close() error { return nil }
-
-const debug = false
-
-func (rt *loggingRT) RoundTrip(req *http.Request) (rep *http.Response, err error) {
- if debug {
- log.Println("Req: ", req)
- }
- rep, err = rt.RoundTripper.RoundTrip(req)
- if debug {
- log.Println("Rep: ", rep, err)
- }
- if err == nil {
- body, _ := ioutil.ReadAll(rep.Body)
-
- rep.Body.Close()
- if debug {
- log.Println("body: ", string(body))
- }
- rep.Body = &closeBuffer{bytes.NewBuffer(body)}
- }
- return rep, err
-}
-
-func newLoggingClient() *http.Client {
- return &http.Client{
- Transport: &loggingRT{
- RoundTripper: http.DefaultTransport,
- },
- }
-}
-
-func main() {
- dest := flag.String("dest", "", "destination directory")
- namePattern := flag.String("name", "", "only clone repos whose name matches the regexp.")
- excludePattern := flag.String("exclude", "", "don't mirror repos whose names match this regexp.")
- httpCrendentialsPath := flag.String("http-credentials", "", "path to a file containing http credentials stored like 'user:password'.")
- flag.Parse()
-
- if len(flag.Args()) < 1 {
- log.Fatal("must provide URL argument.")
- }
-
- rootURL, err := url.Parse(flag.Arg(0))
- if err != nil {
- log.Fatalf("url.Parse(): %v", err)
- }
-
- if *httpCrendentialsPath != "" {
- creds, err := ioutil.ReadFile(*httpCrendentialsPath)
- if err != nil {
- log.Print("Cannot read gerrit http credentials, going Anonymous")
- } else {
- splitCreds := strings.Split(strings.TrimSpace(string(creds)), ":")
- rootURL.User = url.UserPassword(splitCreds[0], splitCreds[1])
- }
- }
-
- if *dest == "" {
- log.Fatal("must set --dest")
- }
-
- filter, err := gitindex.NewFilter(*namePattern, *excludePattern)
- if err != nil {
- log.Fatal(err)
- }
-
- client, err := gerrit.NewClient(rootURL.String(), newLoggingClient())
- if err != nil {
- log.Fatalf("NewClient(%s): %v", rootURL, err)
- }
-
- info, _, err := client.Config.GetServerInfo()
- if err != nil {
- log.Fatalf("GetServerInfo: %v", err)
- }
-
- var projectURL string
- for _, s := range []string{"http", "anonymous http"} {
- projectURL = info.Download.Schemes[s].URL
- }
- if projectURL == "" {
- log.Fatalf("project URL is empty, got Schemes %#v", info.Download.Schemes)
- }
-
- projects := make(map[string]gerrit.ProjectInfo)
- skip := "0"
- for {
- page, _, err := client.Projects.ListProjects(&gerrit.ProjectOptions{Skip: skip})
- if err != nil {
- log.Fatalf("ListProjects: %v", err)
- }
-
- if len(*page) == 0 {
- break
- }
- for k, v := range *page {
- projects[k] = v
- }
- skip = strconv.Itoa(len(projects))
- }
-
- for k, v := range projects {
- if !filter.Include(k) {
- continue
- }
-
- cloneURL, err := url.Parse(strings.Replace(projectURL, "${project}", k, -1))
- if err != nil {
- log.Fatalf("url.Parse: %v", err)
- }
-
- name := filepath.Join(cloneURL.Host, cloneURL.Path)
- config := map[string]string{
- "zoekt.name": name,
- "zoekt.gerrit-project": k,
- "zoekt.gerrit-host": rootURL.String(),
- }
-
- for _, wl := range v.WebLinks {
- // default gerrit gitiles config is named browse, and does not include
- // root domain name in it. Cheating.
- switch wl.Name {
- case "browse":
- config["zoekt.web-url"] = fmt.Sprintf("%s://%s%s", rootURL.Scheme,
- rootURL.Host, wl.URL)
- config["zoekt.web-url-type"] = "gitiles"
- default:
- config["zoekt.web-url"] = wl.URL
- config["zoekt.web-url-type"] = wl.Name
- }
- }
-
- if dest, err := gitindex.CloneRepo(*dest, name, cloneURL.String(), config); err != nil {
- log.Fatalf("CloneRepo: %v", err)
- } else {
- fmt.Println(dest)
- }
- }
-}
diff --git a/cmd/zoekt-mirror-github/main.go b/cmd/zoekt-mirror-github/main.go
deleted file mode 100644
index 1b0d8f8..0000000
--- a/cmd/zoekt-mirror-github/main.go
+++ /dev/null
@@ -1,314 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This binary fetches all repos of a user or organization and clones
-// them. It is strongly recommended to get a personal API token from
-// https://github.com/settings/tokens, save the token in a file, and
-// point the --token option to it.
-package main
-
-import (
- "context"
- "flag"
- "fmt"
- "io/ioutil"
- "log"
- "net/url"
- "os"
- "path/filepath"
- "strconv"
- "strings"
-
- "github.com/google/go-github/v27/github"
- "golang.org/x/oauth2"
-
- "github.com/google/zoekt/gitindex"
-)
-
-type topicsFlag []string
-
-func (f *topicsFlag) String() string {
- return strings.Join(*f, ",")
-}
-
-func (f *topicsFlag) Set(value string) error {
- *f = append(*f, value)
- return nil
-}
-
-type reposFilters struct {
- topics []string
- excludeTopics []string
-}
-
-func main() {
- dest := flag.String("dest", "", "destination directory")
- githubURL := flag.String("url", "", "GitHub Enterprise url. If not set github.com will be used as the host.")
- org := flag.String("org", "", "organization to mirror")
- user := flag.String("user", "", "user to mirror")
- token := flag.String("token",
- filepath.Join(os.Getenv("HOME"), ".github-token"),
- "file holding API token.")
- forks := flag.Bool("forks", false, "also mirror forks.")
- deleteRepos := flag.Bool("delete", false, "delete missing repos")
- namePattern := flag.String("name", "", "only clone repos whose name matches the given regexp.")
- excludePattern := flag.String("exclude", "", "don't mirror repos whose names match this regexp.")
- topics := topicsFlag{}
- flag.Var(&topics, "topic", "only clone repos whose have one of given topics. You can add multiple topics by setting this more than once.")
- excludeTopics := topicsFlag{}
- flag.Var(&excludeTopics, "exclude_topic", "don't clone repos whose have one of given topics. You can add multiple topics by setting this more than once.")
-
- flag.Parse()
-
- if *dest == "" {
- log.Fatal("must set --dest")
- }
- if *githubURL == "" && *org == "" && *user == "" {
- log.Fatal("must set either --org or --user when github.com is used as host")
- }
-
- var host string
- var apiBaseURL string
- var client *github.Client
- if *githubURL != "" {
- rootURL, err := url.Parse(*githubURL)
- if err != nil {
- log.Fatal(err)
- }
- host = rootURL.Host
- apiPath, err := url.Parse("/api/v3/")
- if err != nil {
- log.Fatal(err)
- }
- apiBaseURL = rootURL.ResolveReference(apiPath).String()
- client, err = github.NewEnterpriseClient(apiBaseURL, apiBaseURL, nil)
- if err != nil {
- log.Fatal(err)
- }
- } else {
- host = "github.com"
- apiBaseURL = "https://github.com/"
- client = github.NewClient(nil)
- }
- destDir := filepath.Join(*dest, host)
- if err := os.MkdirAll(destDir, 0o755); err != nil {
- log.Fatal(err)
- }
-
- if *token != "" {
- content, err := ioutil.ReadFile(*token)
- if err != nil {
- log.Fatal(err)
- }
-
- ts := oauth2.StaticTokenSource(
- &oauth2.Token{
- AccessToken: strings.TrimSpace(string(content)),
- })
- tc := oauth2.NewClient(context.Background(), ts)
- if *githubURL != "" {
- client, err = github.NewEnterpriseClient(apiBaseURL, apiBaseURL, tc)
- if err != nil {
- log.Fatal(err)
- }
- } else {
- client = github.NewClient(tc)
- }
- }
-
- reposFilters := reposFilters{
- topics: topics,
- excludeTopics: excludeTopics,
- }
- var repos []*github.Repository
- var err error
- if *org != "" {
- repos, err = getOrgRepos(client, *org, reposFilters)
- } else if *user != "" {
- repos, err = getUserRepos(client, *user, reposFilters)
- } else {
- log.Printf("no user or org specified, cloning all repos.")
- repos, err = getUserRepos(client, "", reposFilters)
- }
-
- if err != nil {
- log.Fatal(err)
- }
-
- if !*forks {
- trimmed := repos[:0]
- for _, r := range repos {
- if r.Fork == nil || !*r.Fork {
- trimmed = append(trimmed, r)
- }
- }
- repos = trimmed
- }
-
- filter, err := gitindex.NewFilter(*namePattern, *excludePattern)
- if err != nil {
- log.Fatal(err)
- }
-
- {
- trimmed := repos[:0]
- for _, r := range repos {
- if filter.Include(*r.Name) {
- trimmed = append(trimmed, r)
- }
- }
- repos = trimmed
- }
-
- if err := cloneRepos(destDir, repos); err != nil {
- log.Fatalf("cloneRepos: %v", err)
- }
-
- if *deleteRepos {
- if err := deleteStaleRepos(*dest, filter, repos, *org+*user); err != nil {
- log.Fatalf("deleteStaleRepos: %v", err)
- }
- }
-}
-
-func deleteStaleRepos(destDir string, filter *gitindex.Filter, repos []*github.Repository, user string) error {
- var baseURL string
- if len(repos) > 0 {
- baseURL = *repos[0].HTMLURL
- } else {
- return nil
- }
- u, err := url.Parse(baseURL)
- if err != nil {
- return err
- }
- u.Path = user
-
- names := map[string]struct{}{}
- for _, r := range repos {
- u, err := url.Parse(*r.HTMLURL)
- if err != nil {
- return err
- }
-
- names[filepath.Join(u.Host, u.Path+".git")] = struct{}{}
- }
- if err := gitindex.DeleteRepos(destDir, u, names, filter); err != nil {
- log.Fatalf("deleteRepos: %v", err)
- }
- return nil
-}
-
-func hasIntersection(s1, s2 []string) bool {
- hash := make(map[string]bool)
- for _, e := range s1 {
- hash[e] = true
- }
- for _, e := range s2 {
- if hash[e] {
- return true
- }
- }
- return false
-}
-
-func filterByTopic(repos []*github.Repository, include []string, exclude []string) (filteredRepos []*github.Repository) {
- for _, repo := range repos {
- if (len(include) == 0 || hasIntersection(include, repo.Topics)) &&
- !hasIntersection(exclude, repo.Topics) {
- filteredRepos = append(filteredRepos, repo)
- }
- }
- return
-}
-
-func getOrgRepos(client *github.Client, org string, reposFilters reposFilters) ([]*github.Repository, error) {
- var allRepos []*github.Repository
- opt := &github.RepositoryListByOrgOptions{}
- for {
- repos, resp, err := client.Repositories.ListByOrg(context.Background(), org, opt)
- if err != nil {
- return nil, err
- }
- if len(repos) == 0 {
- break
- }
-
- opt.Page = resp.NextPage
- repos = filterByTopic(repos, reposFilters.topics, reposFilters.excludeTopics)
- allRepos = append(allRepos, repos...)
- if resp.NextPage == 0 {
- break
- }
- }
- return allRepos, nil
-}
-
-func getUserRepos(client *github.Client, user string, reposFilters reposFilters) ([]*github.Repository, error) {
- var allRepos []*github.Repository
- opt := &github.RepositoryListOptions{}
- for {
- repos, resp, err := client.Repositories.List(context.Background(), user, opt)
- if err != nil {
- return nil, err
- }
- if len(repos) == 0 {
- break
- }
-
- opt.Page = resp.NextPage
- repos = filterByTopic(repos, reposFilters.topics, reposFilters.excludeTopics)
- allRepos = append(allRepos, repos...)
- if resp.NextPage == 0 {
- break
- }
- }
- return allRepos, nil
-}
-
-func itoa(p *int) string {
- if p != nil {
- return strconv.Itoa(*p)
- }
- return ""
-}
-
-func cloneRepos(destDir string, repos []*github.Repository) error {
- for _, r := range repos {
- host, err := url.Parse(*r.HTMLURL)
- if err != nil {
- return err
- }
- config := map[string]string{
- "zoekt.web-url-type": "github",
- "zoekt.web-url": *r.HTMLURL,
- "zoekt.name": filepath.Join(host.Hostname(), *r.FullName),
-
- "zoekt.github-stars": itoa(r.StargazersCount),
- "zoekt.github-watchers": itoa(r.WatchersCount),
- "zoekt.github-subscribers": itoa(r.SubscribersCount),
- "zoekt.github-forks": itoa(r.ForksCount),
- }
- dest, err := gitindex.CloneRepo(destDir, *r.FullName, *r.CloneURL, config)
- if err != nil {
- return err
- }
- if dest != "" {
- fmt.Println(dest)
- }
-
- }
-
- return nil
-}
diff --git a/cmd/zoekt-mirror-gitiles/cgit.go b/cmd/zoekt-mirror-gitiles/cgit.go
deleted file mode 100644
index 7517cf8..0000000
--- a/cmd/zoekt-mirror-gitiles/cgit.go
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package main
-
-import (
- "bytes"
- "fmt"
- "io/ioutil"
- "log"
- "net/http"
- "net/url"
- "regexp"
- "strings"
-)
-
-// I will go to programmer hell for trying to parse HTML with
-// regexps. Why doesn't CGit have a JSON interface?
-var cgitRepoEntryRE = regexp.MustCompile(
- `class='sublevel-repo'><a title='([^'"]*)' href='([^']*)'>`)
-
-func normalizedGet(u *url.URL) ([]byte, error) {
- rep, err := http.Get(u.String())
- if err != nil {
- return nil, err
- }
- defer rep.Body.Close()
- if rep.StatusCode != 200 {
- return nil, fmt.Errorf("status %s", rep.Status)
- }
-
- c, err := ioutil.ReadAll(rep.Body)
- if err != nil {
- return nil, err
- }
-
- c = bytes.Replace(c, []byte{'\n'}, []byte{' '}, -1)
- return c, nil
-}
-
-// getCGitRepos finds repo names from the CGit index page hosted at
-// URL `u`.
-func getCGitRepos(u *url.URL, filter func(string) bool) (map[string]*crawlTarget, error) {
- c, err := normalizedGet(u)
- if err != nil {
- return nil, err
- }
-
- pages := map[string]*crawlTarget{}
- for _, m := range cgitRepoEntryRE.FindAllSubmatch(c, -1) {
- nm := strings.TrimSuffix(string(m[1]), ".git")
-
- if !filter(nm) {
- continue
- }
-
- relUrl := string(m[2])
-
- u, err := u.Parse(relUrl)
- if err != nil {
- log.Printf("ignoring u.Parse(%q): %v", relUrl, err)
- continue
- }
- pages[nm] = &crawlTarget{
- webURL: u.String(),
- webURLType: "cgit",
- }
- }
-
- // TODO - parallel?
- for _, target := range pages {
- u, _ := url.Parse(target.webURL)
- c, err := cgitCloneURL(u)
- if err != nil {
- log.Printf("ignoring cgitCloneURL(%s): %v", u, c)
- continue
- }
-
- target.cloneURL = c.String()
- }
- return pages, nil
-}
-
-// We'll take the first URL we get. This may put the git:// URL (which
-// is insecure) at the top, but individual machines (such as
-// git.savannah.gnu) probably would rather receive git:// traffic
-// which is more efficient.
-
-// TODO - do something like `Clone.*<a.*href=` to get the first
-// URL. Older versions don't say vcs-git.
-var cloneURLRe = regexp.MustCompile(
- `rel=["']vcs-git["'] *href=["']([^"']*)["']`)
-
-func cgitCloneURL(u *url.URL) (*url.URL, error) {
- c, err := normalizedGet(u)
- if err != nil {
- return nil, err
- }
-
- m := cloneURLRe.FindSubmatch(c)
- cl, err := url.Parse(string(m[1]))
- if err != nil {
- return nil, err
- }
-
- return cl, nil
-}
diff --git a/cmd/zoekt-mirror-gitiles/gitiles.go b/cmd/zoekt-mirror-gitiles/gitiles.go
deleted file mode 100644
index 9510d10..0000000
--- a/cmd/zoekt-mirror-gitiles/gitiles.go
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2017 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package main
-
-import (
- "bytes"
- "encoding/json"
- "io/ioutil"
- "net/http"
- "net/url"
- "path"
-)
-
-type Project struct {
- Name string
- CloneURL string `json:"clone_url"`
-}
-
-func getGitilesRepos(root *url.URL, filter func(string) bool) (map[string]*crawlTarget, error) {
- jsRoot := *root
- jsRoot.RawQuery = "format=JSON"
- resp, err := http.Get(jsRoot.String())
- if err != nil {
- return nil, err
- }
- defer resp.Body.Close()
-
- content, err := ioutil.ReadAll(resp.Body)
- if err != nil {
- return nil, err
- }
-
- const xssTag = ")]}'\n"
- content = bytes.TrimPrefix(content, []byte(xssTag))
-
- m := map[string]*Project{}
- if err := json.Unmarshal(content, &m); err != nil {
- return nil, err
- }
-
- result := map[string]*crawlTarget{}
- for k, v := range m {
- if k == "All-Users" || k == "All-Projects" {
- continue
- }
- if !filter(k) {
- continue
- }
- web := *root
- web.Path = path.Join(web.Path, v.Name)
- result[k] = &crawlTarget{
- cloneURL: v.CloneURL,
- webURL: web.String(),
- webURLType: "gitiles",
- }
- }
- return result, nil
-}
diff --git a/cmd/zoekt-mirror-gitiles/main.go b/cmd/zoekt-mirror-gitiles/main.go
deleted file mode 100644
index c4210a9..0000000
--- a/cmd/zoekt-mirror-gitiles/main.go
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This binary fetches all repos of a Gitiles host. It does double
-// duty for other "simple" web hosts
-package main
-
-import (
- "flag"
- "fmt"
- "log"
- "net/url"
- "os"
- "path/filepath"
-
- "github.com/google/zoekt/gitindex"
-)
-
-type crawlTarget struct {
- cloneURL string
- webURL string
- webURLType string
-}
-
-type hostCrawler func(*url.URL, func(string) bool) (map[string]*crawlTarget, error)
-
-func main() {
- dest := flag.String("dest", "", "destination directory")
- namePattern := flag.String("name", "", "only clone repos whose name matches the regexp.")
- excludePattern := flag.String("exclude", "", "don't mirror repos whose names match this regexp.")
- hostType := flag.String("type", "gitiles", "which webserver to crawl. Choices: gitiles, cgit")
- flag.Parse()
-
- if len(flag.Args()) < 1 {
- log.Fatal("must provide URL argument.")
- }
-
- var crawler hostCrawler
- switch *hostType {
- case "gitiles":
- crawler = getGitilesRepos
- case "cgit":
- crawler = getCGitRepos
- default:
- log.Fatalf("unknown host type %q", *hostType)
- }
-
- rootURL, err := url.Parse(flag.Arg(0))
- if err != nil {
- log.Fatalf("url.Parse(): %v", err)
- }
-
- if *dest == "" {
- log.Fatal("must set --dest")
- }
-
- if err := os.MkdirAll(filepath.Join(*dest, rootURL.Host, rootURL.Path), 0o755); err != nil {
- log.Fatal(err)
- }
-
- filter, err := gitindex.NewFilter(*namePattern, *excludePattern)
- if err != nil {
- log.Fatal(err)
- }
-
- repos, err := crawler(rootURL, filter.Include)
- if err != nil {
- log.Fatal(err)
- }
-
- for nm, target := range repos {
- // For git.savannah.gnu.org, this puts an ugly "CGit"
- // path component into the name. However, it's
- // possible that there are multiple, different CGit pages
- // on the host, so we have to keep it.
- fullName := filepath.Join(rootURL.Host, rootURL.Path, nm)
- config := map[string]string{
- "zoekt.web-url": target.webURL,
- "zoekt.web-url-type": target.webURLType,
- "zoekt.name": fullName,
- }
-
- dest, err := gitindex.CloneRepo(*dest, fullName, target.cloneURL, config)
- if err != nil {
- log.Fatal(err)
- }
- if dest != "" {
- fmt.Println(dest)
- }
- }
-}
diff --git a/cmd/zoekt-mirror-gitlab/main.go b/cmd/zoekt-mirror-gitlab/main.go
deleted file mode 100644
index e119e87..0000000
--- a/cmd/zoekt-mirror-gitlab/main.go
+++ /dev/null
@@ -1,189 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This binary fetches all repos for a user from gitlab.
-//
-// It is recommended to use a gitlab personal access token:
-// https://docs.gitlab.com/ce/user/profile/personal_access_tokens.html. This
-// token should be stored in a file and the --token option should be used.
-// In addition, the token should be present in the ~/.netrc of the user running
-// the mirror command. For example, the ~/.netrc may look like:
-//
-// machine gitlab.com
-// login oauth
-// password <personal access token>
-//
-package main
-
-import (
- "flag"
- "fmt"
- "io/ioutil"
- "log"
- "net/url"
- "os"
- "path/filepath"
- "strconv"
- "strings"
-
- "github.com/google/zoekt/gitindex"
- gitlab "github.com/xanzy/go-gitlab"
-)
-
-func main() {
- dest := flag.String("dest", "", "destination directory")
- gitlabURL := flag.String("url", "https://gitlab.com/api/v4/", "Gitlab URL. If not set https://gitlab.com/api/v4/ will be used")
- token := flag.String("token",
- filepath.Join(os.Getenv("HOME"), ".gitlab-token"),
- "file holding API token.")
- isMember := flag.Bool("membership", false, "only mirror repos this user is a member of ")
- isPublic := flag.Bool("public", false, "only mirror public repos")
- deleteRepos := flag.Bool("delete", false, "delete missing repos")
- namePattern := flag.String("name", "", "only clone repos whose name matches the given regexp.")
- excludePattern := flag.String("exclude", "", "don't mirror repos whose names match this regexp.")
- flag.Parse()
-
- if *dest == "" {
- log.Fatal("must set --dest")
- }
-
- var host string
- rootURL, err := url.Parse(*gitlabURL)
- if err != nil {
- log.Fatal(err)
- }
- host = rootURL.Host
-
- destDir := filepath.Join(*dest, host)
- if err := os.MkdirAll(destDir, 0o755); err != nil {
- log.Fatal(err)
- }
-
- content, err := ioutil.ReadFile(*token)
- if err != nil {
- log.Fatal(err)
- }
- apiToken := strings.TrimSpace(string(content))
-
- client := gitlab.NewClient(nil, apiToken)
- client.SetBaseURL(*gitlabURL)
-
- opt := &gitlab.ListProjectsOptions{
- ListOptions: gitlab.ListOptions{
- PerPage: 10,
- Page: 1,
- },
- Membership: isMember,
- }
- if *isPublic {
- opt.Visibility = gitlab.Visibility(gitlab.PublicVisibility)
- }
-
- var gitlabProjects []*gitlab.Project
- for {
- projects, resp, err := client.Projects.ListProjects(opt)
- if err != nil {
- log.Fatal(err)
- }
-
- for _, project := range projects {
-
- // Skip projects without a default branch - these should be projects
- // where the repository isn't enabled
- if project.DefaultBranch == "" {
- continue
- }
-
- gitlabProjects = append(gitlabProjects, project)
- }
-
- if resp.CurrentPage >= resp.TotalPages {
- break
- }
-
- opt.Page = resp.NextPage
- }
-
- filter, err := gitindex.NewFilter(*namePattern, *excludePattern)
- if err != nil {
- log.Fatal(err)
- }
-
- {
- trimmed := gitlabProjects[:0]
- for _, p := range gitlabProjects {
- if filter.Include(p.NameWithNamespace) {
- trimmed = append(trimmed, p)
- }
- }
- gitlabProjects = trimmed
- }
-
- fetchProjects(destDir, apiToken, gitlabProjects)
-
- if *deleteRepos {
- if err := deleteStaleProjects(*dest, filter, gitlabProjects); err != nil {
- log.Fatalf("deleteStaleProjects: %v", err)
- }
- }
-}
-
-func deleteStaleProjects(destDir string, filter *gitindex.Filter, projects []*gitlab.Project) error {
- u, err := url.Parse(projects[0].HTTPURLToRepo)
- u.Path = ""
- if err != nil {
- return err
- }
-
- names := map[string]struct{}{}
- for _, p := range projects {
- u, err := url.Parse(p.HTTPURLToRepo)
- if err != nil {
- return err
- }
-
- names[filepath.Join(u.Host, u.Path)] = struct{}{}
- }
-
- if err := gitindex.DeleteRepos(destDir, u, names, filter); err != nil {
- log.Fatalf("deleteRepos: %v", err)
- }
- return nil
-}
-
-func fetchProjects(destDir, token string, projects []*gitlab.Project) {
- for _, p := range projects {
- u, err := url.Parse(p.HTTPURLToRepo)
- if err != nil {
- log.Printf("Unable to parse project URL: %v", err)
- continue
- }
- config := map[string]string{
- "zoekt.web-url-type": "gitlab",
- "zoekt.web-url": p.WebURL,
- "zoekt.name": filepath.Join(u.Hostname(), p.PathWithNamespace),
-
- "zoekt.gitlab-stars": strconv.Itoa(p.StarCount),
- "zoekt.gitlab-forks": strconv.Itoa(p.ForksCount),
- }
-
- cloneURL := p.HTTPURLToRepo
- dest, err := gitindex.CloneRepo(destDir, p.PathWithNamespace, cloneURL, config)
- if err != nil {
- log.Printf("cloneRepos: %v", err)
- continue
- }
- if dest != "" {
- fmt.Println(dest)
- }
- }
-}
diff --git a/cmd/zoekt-repo-index/main.go b/cmd/zoekt-repo-index/main.go
deleted file mode 100644
index 06e8e20..0000000
--- a/cmd/zoekt-repo-index/main.go
+++ /dev/null
@@ -1,381 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-/* zoekt-repo-index indexes a repo-based repository. The constituent
-git repositories should already have been downloaded to the
---repo_cache directory, eg.
-
- go install github.com/google/zoekt/cmd/zoekt-repo-index &&
-
- zoekt-repo-index -base_url https://gfiber.googlesource.com/ \
- -manifest_repo_url https://gfiber.googlesource.com/manifests \
- -manifest_rev_prefix=refs/heads/ \
- -rev_prefix="refs/remotes/" \
- -repo_cache ~/zoekt-serving/repos/ \
- -shard_limit 50000000 \
- master:default_unrestricted.xml
-*/
-package main
-
-import (
- "crypto/sha1"
- "flag"
- "fmt"
- "io/ioutil"
- "log"
- "net/url"
- "path"
- "path/filepath"
- "sort"
- "strings"
-
- "github.com/google/slothfs/manifest"
- "github.com/google/zoekt"
- "github.com/google/zoekt/build"
- "github.com/google/zoekt/gitindex"
- "go.uber.org/automaxprocs/maxprocs"
-
- git "github.com/go-git/go-git/v5"
- "github.com/go-git/go-git/v5/plumbing"
-)
-
-var _ = log.Println
-
-type fileKey struct {
- SubRepoPath string
- Path string
- ID plumbing.Hash
-}
-
-func (k *fileKey) FullPath() string {
- return filepath.Join(k.SubRepoPath, k.Path)
-}
-
-type branchFile struct {
- branch, file string
- mf *manifest.Manifest
- manifestPath string
-}
-
-func parseBranches(manifestRepoURL, revPrefix string, cache *gitindex.RepoCache, args []string) ([]branchFile, error) {
- var branches []branchFile
- if manifestRepoURL != "" {
- u, err := url.Parse(manifestRepoURL)
- if err != nil {
- return nil, err
- }
-
- repo, err := cache.Open(u)
- if err != nil {
- return nil, err
- }
-
- for _, f := range args {
- fs := strings.SplitN(f, ":", 2)
- if len(fs) != 2 {
- return nil, fmt.Errorf("cannot parse %q as BRANCH:FILE", f)
- }
- mf, err := getManifest(repo, revPrefix+fs[0], fs[1])
- if err != nil {
- return nil, fmt.Errorf("manifest %s:%s: %v", fs[0], fs[1], err)
- }
-
- branches = append(branches, branchFile{
- branch: fs[0],
- file: fs[1],
- mf: mf,
- manifestPath: cache.Path(u),
- })
- }
- } else {
- if len(args) == 0 {
- return nil, fmt.Errorf("must give XML file argument")
- }
- for _, f := range args {
- mf, err := manifest.ParseFile(f)
- if err != nil {
- return nil, err
- }
-
- branches = append(branches, branchFile{
- branch: "HEAD",
- file: filepath.Base(f),
- mf: mf,
- manifestPath: f,
- })
- }
- }
- return branches, nil
-}
-
-func main() {
- sizeMax := flag.Int("file_limit", 128<<10, "maximum file size")
- shardLimit := flag.Int("shard_limit", 100<<20, "maximum corpus size for a shard")
- parallelism := flag.Int("parallelism", 1, "maximum number of parallel indexing processes")
-
- revPrefix := flag.String("rev_prefix", "refs/remotes/origin/", "prefix for references")
- baseURLStr := flag.String("base_url", "", "base url to interpret repository names")
- repoCacheDir := flag.String("repo_cache", "", "root for repository cache")
- indexDir := flag.String("index", build.DefaultDir, "index directory for *.zoekt files")
- manifestRepoURL := flag.String("manifest_repo_url", "", "set a URL for a git repository holding manifest XML file. Provide the BRANCH:XML-FILE as further command-line arguments")
- manifestRevPrefix := flag.String("manifest_rev_prefix", "refs/remotes/origin/", "prefixes for branches in manifest repository")
- repoName := flag.String("name", "", "set repository name")
- repoURL := flag.String("url", "", "set repository URL")
- maxSubProjects := flag.Int("max_sub_projects", 0, "trim number of projects in manifest, for debugging.")
- incremental := flag.Bool("incremental", true, "only index if the repository has changed.")
- flag.Parse()
-
- // Tune GOMAXPROCS to match Linux container CPU quota.
- maxprocs.Set()
-
- if *repoCacheDir == "" {
- log.Fatal("must set --repo_cache")
- }
- repoCache := gitindex.NewRepoCache(*repoCacheDir)
-
- if u, err := url.Parse(*baseURLStr); err != nil {
- log.Fatalf("Parse(%q): %v", u, err)
- } else if *repoName == "" {
- *repoName = filepath.Join(u.Host, u.Path)
- }
-
- opts := build.Options{
- Parallelism: *parallelism,
- SizeMax: *sizeMax,
- ShardMax: *shardLimit,
- IndexDir: *indexDir,
- RepositoryDescription: zoekt.Repository{
- Name: *repoName,
- URL: *repoURL,
- },
- }
- opts.SetDefaults()
- baseURL, err := url.Parse(*baseURLStr)
- if err != nil {
- log.Fatalf("Parse baseURL %q: %v", *baseURLStr, err)
- }
-
- branches, err := parseBranches(*manifestRepoURL, *manifestRevPrefix, repoCache, flag.Args())
- if err != nil {
- log.Fatalf("parseBranches(%s, %s): %v", *manifestRepoURL, *manifestRevPrefix, err)
- }
- if len(branches) == 0 {
- log.Fatal("must specify at least one branch")
- }
- if *maxSubProjects > 0 {
- for _, b := range branches {
- if *maxSubProjects < len(b.mf.Project) {
- b.mf.Project = b.mf.Project[:*maxSubProjects]
- }
- }
- }
-
- perBranch := map[string]map[fileKey]gitindex.BlobLocation{}
- opts.SubRepositories = map[string]*zoekt.Repository{}
-
- // branch => repo => version
- versionMap := map[string]map[string]plumbing.Hash{}
- for _, br := range branches {
- br.mf.Filter()
- files, versions, err := iterateManifest(br.mf, *baseURL, *revPrefix, repoCache)
- if err != nil {
- log.Fatalf("iterateManifest: %v", err)
- }
-
- perBranch[br.branch] = files
- for key, loc := range files {
- _, ok := opts.SubRepositories[key.SubRepoPath]
- if ok {
- // This can be incorrect: if the layout of manifests
- // changes across branches, then the same file could
- // be in different subRepos. We'll pretend this is not
- // a problem.
- continue
- }
-
- desc := &zoekt.Repository{}
- if err := gitindex.SetTemplatesFromOrigin(desc, loc.URL); err != nil {
- log.Fatalf("SetTemplatesFromOrigin(%s): %v", loc.URL, err)
- }
-
- opts.SubRepositories[key.SubRepoPath] = desc
- }
- versionMap[br.branch] = versions
- }
-
- for _, br := range branches {
- var paths []string
- for p := range opts.SubRepositories {
- paths = append(paths, p)
- }
- sort.Strings(paths)
-
- // Compute a version of the aggregate. This version
- // has nothing to do with git, but will let us do
- // incrementality correctly.
- hasher := sha1.New()
- for _, p := range paths {
- repo := opts.SubRepositories[p]
- id := versionMap[br.branch][p]
-
- // it is possible that 'id' is zero, if this
- // branch of the manifest doesn't have this
- // particular subrepository.
- hasher.Write([]byte(p))
- hasher.Write([]byte(id.String()))
- repo.Branches = append(repo.Branches, zoekt.RepositoryBranch{
- Name: br.branch,
- Version: id.String(),
- })
- }
-
- opts.RepositoryDescription.Branches = append(opts.RepositoryDescription.Branches, zoekt.RepositoryBranch{
- Name: br.branch,
- Version: fmt.Sprintf("%x", hasher.Sum(nil)),
- })
- }
-
- // key => branch
- all := map[fileKey][]string{}
- for br, files := range perBranch {
- for k := range files {
- all[k] = append(all[k], br)
- }
- }
-
- if *incremental && opts.IncrementalSkipIndexing() {
- return
- }
-
- builder, err := build.NewBuilder(opts)
- if err != nil {
- log.Fatal(err)
- }
- for k, branches := range all {
- loc := perBranch[branches[0]][k]
- data, err := loc.Blob(&k.ID)
- if err != nil {
- log.Fatal(err)
- }
-
- doc := zoekt.Document{
- Name: k.FullPath(),
- Content: data,
- SubRepositoryPath: k.SubRepoPath,
- }
-
- doc.Branches = append(doc.Branches, branches...)
- if err := builder.Add(doc); err != nil {
- log.Printf("Add(%s): %v", doc.Name, err)
- break
- }
- }
- if err := builder.Finish(); err != nil {
- log.Fatalf("Finish: %v", err)
- }
-}
-
-// getManifest parses the manifest XML at the given branch/path inside a Git repository.
-func getManifest(repo *git.Repository, branch, path string) (*manifest.Manifest, error) {
- ref, err := repo.Reference(plumbing.ReferenceName("refs/heads/"+branch), true)
- if err != nil {
- return nil, err
- }
-
- commit, err := repo.CommitObject(ref.Hash())
- if err != nil {
- return nil, err
- }
-
- tree, err := repo.TreeObject(commit.TreeHash)
- if err != nil {
- return nil, err
- }
-
- entry, err := tree.FindEntry(path)
- if err != nil {
- return nil, err
- }
-
- blob, err := repo.BlobObject(entry.Hash)
- if err != nil {
- return nil, err
- }
- r, err := blob.Reader()
- if err != nil {
- return nil, err
- }
- defer r.Close()
-
- content, _ := ioutil.ReadAll(r)
- return manifest.Parse(content)
-}
-
-// iterateManifest constructs a complete tree from the given Manifest.
-func iterateManifest(mf *manifest.Manifest,
- baseURL url.URL, revPrefix string,
- cache *gitindex.RepoCache) (map[fileKey]gitindex.BlobLocation, map[string]plumbing.Hash, error) {
- allFiles := map[fileKey]gitindex.BlobLocation{}
- allVersions := map[string]plumbing.Hash{}
- for _, p := range mf.Project {
- rev := mf.ProjectRevision(&p)
-
- projURL := baseURL
- projURL.Path = path.Join(projURL.Path, p.Name)
-
- topRepo, err := cache.Open(&projURL)
- if err != nil {
- return nil, nil, err
- }
-
- ref, err := topRepo.Reference(plumbing.ReferenceName(revPrefix+rev), true)
- if err != nil {
- return nil, nil, err
- }
-
- commit, err := topRepo.CommitObject(ref.Hash())
- if err != nil {
- return nil, nil, err
- }
- if err != nil {
- return nil, nil, err
- }
-
- allVersions[p.GetPath()] = commit.Hash
-
- tree, err := commit.Tree()
- if err != nil {
- return nil, nil, err
- }
-
- files, versions, err := gitindex.TreeToFiles(topRepo, tree, projURL.String(), cache)
- if err != nil {
- return nil, nil, err
- }
-
- for key, repo := range files {
- allFiles[fileKey{
- SubRepoPath: filepath.Join(p.GetPath(), key.SubRepoPath),
- Path: key.Path,
- ID: key.ID,
- }] = repo
- }
-
- for path, version := range versions {
- allVersions[filepath.Join(p.GetPath(), path)] = version
- }
- }
-
- return allFiles, allVersions, nil
-}
diff --git a/cmd/zoekt-test/main.go b/cmd/zoekt-test/main.go
deleted file mode 100644
index 2d131d1..0000000
--- a/cmd/zoekt-test/main.go
+++ /dev/null
@@ -1,194 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// zoekt-test compares the search engine results with raw substring search
-package main
-
-import (
- "bufio"
- "bytes"
- "context"
- "flag"
- "fmt"
- "io/ioutil"
- "log"
- "os"
- "path/filepath"
- "reflect"
- "sort"
- "strings"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/build"
- "github.com/google/zoekt/query"
- "github.com/google/zoekt/shards"
-)
-
-func readTree(dir string) (map[string][]byte, error) {
- var fns []string
-
- add := func(path string, info os.FileInfo, err error) error {
- if !info.Mode().IsRegular() {
- return nil
- }
-
- fns = append(fns, path)
- return nil
- }
- if err := filepath.Walk(dir, add); err != nil {
- return nil, err
- }
-
- res := map[string][]byte{}
- for _, n := range fns {
- c, err := ioutil.ReadFile(n)
- if err != nil {
- return nil, err
- }
-
- strip := strings.TrimPrefix(n, dir+"/")
- res[strip] = c
- }
- return res, nil
-}
-
-func compare(dir, patfile string, caseSensitive bool) error {
- indexDir, err := ioutil.TempDir("", "")
- if err != nil {
- return err
- }
- defer os.RemoveAll(indexDir)
-
- var opts build.Options
- opts.SetDefaults()
- opts.IndexDir = indexDir
-
- fileContents, err := readTree(dir)
- if err != nil {
- return err
- }
- if len(fileContents) == 0 {
- return fmt.Errorf("no contents")
- }
-
- builder, err := build.NewBuilder(opts)
- if err != nil {
- return err
- }
- for k, v := range fileContents {
- builder.AddFile(k, v)
- }
- if err := builder.Finish(); err != nil {
- return err
- }
-
- if !caseSensitive {
- for k, v := range fileContents {
- fileContents[k] = toLower(v)
- }
- }
-
- f, err := os.Open(patfile)
- if err != nil {
- return err
- }
- searcher, err := shards.NewDirectorySearcher(indexDir)
- if err != nil {
- return err
- }
-
- scanner := bufio.NewScanner(f)
- for scanner.Scan() {
- t := scanner.Text()
- if len(t) < 3 {
- continue
- }
- q := &query.Substring{
- Pattern: t,
- CaseSensitive: caseSensitive,
- }
-
- zFiles := map[string]struct{}{}
- rFiles := map[string]struct{}{}
-
- // search engine results
- var opts zoekt.SearchOptions
- res, err := searcher.Search(context.Background(), q, &opts)
- if err != nil {
- return err
- }
-
- for _, f := range res.Files {
- zFiles[f.FileName] = struct{}{}
- }
-
- // raw search
- needle := []byte(t)
- if !caseSensitive {
- needle = toLower(needle)
- }
-
- for k, v := range fileContents {
- if bytes.Contains(v, needle) {
- rFiles[k] = struct{}{}
- }
- }
-
- if !reflect.DeepEqual(zFiles, rFiles) {
- var add, del []string
- for k := range zFiles {
- if _, ok := rFiles[k]; !ok {
- del = append(del, k)
- }
- }
- for k := range rFiles {
- if _, ok := zFiles[k]; !ok {
- add = append(add, k)
- }
- }
- sort.Strings(add)
- sort.Strings(del)
- log.Printf("pattern %q, add %v, del %v", t, add, del)
- }
- }
- return nil
-}
-
-func main() {
- repo := flag.String("repo", "", "repository to search")
- caseSensitive := flag.Bool("case", false, "case sensitive")
- flag.Parse()
-
- if len(flag.Args()) == 0 {
- fmt.Fprintf(os.Stderr, "pattern file is missing.\n")
- flag.Usage()
- os.Exit(2)
- }
- input := flag.Arg(0)
-
- if err := compare(*repo, input, *caseSensitive); err != nil {
- log.Fatal(err)
- }
-}
-
-func toLower(in []byte) []byte {
- out := make([]byte, len(in))
- for i, c := range in {
- if c >= 'A' && c <= 'Z' {
- c = c - 'A' + 'a'
- }
- out[i] = c
- }
- return out
-}
diff --git a/cmd/zoekt-webserver/main.go b/cmd/zoekt-webserver/main.go
deleted file mode 100644
index 0cb4771..0000000
--- a/cmd/zoekt-webserver/main.go
+++ /dev/null
@@ -1,264 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package main
-
-import (
- "context"
- "crypto/tls"
- "flag"
- "fmt"
- "html/template"
- "io/ioutil"
- "log"
- "net/http"
- "net/http/pprof"
- "os"
- "path/filepath"
- "strings"
- "time"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/build"
- "github.com/google/zoekt/shards"
- "github.com/google/zoekt/web"
- "github.com/prometheus/client_golang/prometheus/promhttp"
- "go.uber.org/automaxprocs/maxprocs"
- "golang.org/x/net/trace"
-)
-
-const logFormat = "2006-01-02T15-04-05.999999999Z07"
-
-func divertLogs(dir string, interval time.Duration) {
- t := time.NewTicker(interval)
- var last *os.File
- for {
- nm := filepath.Join(dir, fmt.Sprintf("zoekt-webserver.%s.%d.log", time.Now().Format(logFormat), os.Getpid()))
- fmt.Fprintf(os.Stderr, "writing logs to %s\n", nm)
-
- f, err := os.Create(nm)
- if err != nil {
- // There is not much we can do now.
- fmt.Fprintf(os.Stderr, "can't create output file %s: %v\n", nm, err)
- os.Exit(2)
- }
-
- log.SetOutput(f)
- last.Close()
-
- last = f
-
- <-t.C
- }
-}
-
-const templateExtension = ".html.tpl"
-
-func loadTemplates(tpl *template.Template, dir string) error {
- fs, err := filepath.Glob(dir + "/*" + templateExtension)
- if err != nil {
- log.Fatalf("Glob: %v", err)
- }
-
- log.Printf("loading templates: %v", fs)
- for _, fn := range fs {
- content, err := ioutil.ReadFile(fn)
- if err != nil {
- return err
- }
-
- base := filepath.Base(fn)
- base = strings.TrimSuffix(base, templateExtension)
- if _, err := tpl.New(base).Parse(string(content)); err != nil {
- return fmt.Errorf("template.Parse(%s): %v", fn, err)
- }
- }
- return nil
-}
-
-func writeTemplates(dir string) error {
- if dir == "" {
- return fmt.Errorf("must set --template_dir")
- }
-
- for k, v := range web.TemplateText {
- nm := filepath.Join(dir, k+templateExtension)
- if err := ioutil.WriteFile(nm, []byte(v), 0o644); err != nil {
- return err
- }
- }
- return nil
-}
-
-func main() {
- logDir := flag.String("log_dir", "", "log to this directory rather than stderr.")
- logRefresh := flag.Duration("log_refresh", 24*time.Hour, "if using --log_dir, start writing a new file this often.")
-
- listen := flag.String("listen", ":6070", "listen on this address.")
- index := flag.String("index", build.DefaultDir, "set index directory to use")
- html := flag.Bool("html", true, "enable HTML interface")
- print := flag.Bool("print", false, "enable local result URLs")
- enablePprof := flag.Bool("pprof", false, "set to enable remote profiling.")
- sslCert := flag.String("ssl_cert", "", "set path to SSL .pem holding certificate.")
- sslKey := flag.String("ssl_key", "", "set path to SSL .pem holding key.")
- hostCustomization := flag.String(
- "host_customization", "",
- "specify host customization, as HOST1=QUERY,HOST2=QUERY")
-
- templateDir := flag.String("template_dir", "", "set directory from which to load custom .html.tpl template files")
- dumpTemplates := flag.Bool("dump_templates", false, "dump templates into --template_dir and exit.")
- version := flag.Bool("version", false, "Print version number")
- flag.Parse()
-
- if *version {
- fmt.Printf("zoekt-webserver version %q\n", zoekt.Version)
- os.Exit(0)
- }
-
- if *dumpTemplates {
- if err := writeTemplates(*templateDir); err != nil {
- log.Fatal(err)
- }
- os.Exit(0)
- }
-
- if *logDir != "" {
- if fi, err := os.Lstat(*logDir); err != nil || !fi.IsDir() {
- log.Fatalf("%s is not a directory", *logDir)
- }
- // We could do fdup acrobatics to also redirect
- // stderr, but it is simpler and more portable for the
- // caller to divert stderr output if necessary.
- go divertLogs(*logDir, *logRefresh)
- }
-
- // Tune GOMAXPROCS to match Linux container CPU quota.
- maxprocs.Set()
-
- if err := os.MkdirAll(*index, 0o755); err != nil {
- log.Fatal(err)
- }
-
- searcher, err := shards.NewDirectorySearcher(*index)
- if err != nil {
- log.Fatal(err)
- }
-
- s := &web.Server{
- Searcher: searcher,
- Top: web.Top,
- Version: zoekt.Version,
- }
-
- if *templateDir != "" {
- if err := loadTemplates(s.Top, *templateDir); err != nil {
- log.Fatalf("loadTemplates: %v", err)
- }
- }
-
- s.Print = *print
- s.HTML = *html
-
- if *hostCustomization != "" {
- s.HostCustomQueries = map[string]string{}
- for _, h := range strings.SplitN(*hostCustomization, ",", -1) {
- if len(h) == 0 {
- continue
- }
- fields := strings.SplitN(h, "=", 2)
- if len(fields) < 2 {
- log.Fatalf("invalid host_customization %q", h)
- }
-
- s.HostCustomQueries[fields[0]] = fields[1]
- }
- }
-
- handler, err := web.NewMux(s)
- if err != nil {
- log.Fatal(err)
- }
-
- handler.Handle("/metrics", promhttp.Handler())
-
- if *enablePprof {
- handler.HandleFunc("/debug/pprof/", pprof.Index)
- handler.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
- handler.HandleFunc("/debug/pprof/profile", pprof.Profile)
- handler.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
- handler.HandleFunc("/debug/pprof/trace", pprof.Trace)
- handler.HandleFunc("/debug/requests/", trace.Traces)
- handler.HandleFunc("/debug/events/", trace.Events)
- }
-
- watchdogAddr := "http://" + *listen
- if *sslCert != "" || *sslKey != "" {
- watchdogAddr = "https://" + *listen
- }
- go watchdog(30*time.Second, watchdogAddr)
-
- if *sslCert != "" || *sslKey != "" {
- log.Printf("serving HTTPS on %s", *listen)
- err = http.ListenAndServeTLS(*listen, *sslCert, *sslKey, handler)
- } else {
- log.Printf("serving HTTP on %s", *listen)
- err = http.ListenAndServe(*listen, handler)
- }
- log.Printf("ListenAndServe: %v", err)
-}
-
-func watchdogOnce(ctx context.Context, client *http.Client, addr string) error {
- ctx, cancel := context.WithDeadline(ctx, time.Now().Add(5*time.Second))
- defer cancel()
-
- req, err := http.NewRequest("GET", addr, nil)
- if err != nil {
- return err
- }
-
- req = req.WithContext(ctx)
-
- resp, err := client.Do(req)
- if err != nil {
- return err
- }
-
- if resp.StatusCode != http.StatusOK {
- return fmt.Errorf("watchdog: status %v", resp.StatusCode)
- }
- return nil
-}
-
-func watchdog(dt time.Duration, addr string) {
- tr := &http.Transport{
- TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
- }
- client := &http.Client{
- Transport: tr,
- }
- tick := time.NewTicker(dt)
-
- errCount := 0
- for range tick.C {
- err := watchdogOnce(context.Background(), client, addr)
- if err != nil {
- errCount++
- } else {
- errCount = 0
- }
- if errCount == 3 {
- log.Panicf("watchdog: %v", err)
- }
- }
-}
diff --git a/cmd/zoekt/main.go b/cmd/zoekt/main.go
deleted file mode 100644
index 035e8fa..0000000
--- a/cmd/zoekt/main.go
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package main
-
-import (
- "context"
- "flag"
- "fmt"
- "log"
- "os"
- "path/filepath"
- "runtime/pprof"
- "time"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/query"
- "github.com/google/zoekt/shards"
-)
-
-func displayMatches(files []zoekt.FileMatch, pat string, withRepo bool, list bool) {
- for _, f := range files {
- r := ""
- if withRepo {
- r = f.Repository + "/"
- }
- if list {
- fmt.Printf("%s%s\n", r, f.FileName)
- continue
- }
-
- for _, m := range f.LineMatches {
- fmt.Printf("%s%s:%d:%s\n", r, f.FileName, m.LineNumber, m.Line)
- }
- }
-}
-
-func loadShard(fn string, verbose bool) (zoekt.Searcher, error) {
- f, err := os.Open(fn)
- if err != nil {
- return nil, err
- }
-
- iFile, err := zoekt.NewIndexFile(f)
- if err != nil {
- return nil, err
- }
-
- s, err := zoekt.NewSearcher(iFile)
- if err != nil {
- iFile.Close()
- return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)
- }
-
- if verbose {
- repo, index, err := zoekt.ReadMetadata(iFile)
- if err != nil {
- iFile.Close()
- return nil, fmt.Errorf("ReadMetadata(%s): %v", fn, err)
- }
- log.Printf("repo metadata: %#v", repo)
- log.Printf("index metadata: %#v", index)
- }
-
- return s, nil
-}
-
-func main() {
- shard := flag.String("shard", "", "search in a specific shard")
- index := flag.String("index_dir",
- filepath.Join(os.Getenv("HOME"), ".zoekt"), "search for index files in `directory`")
- cpuProfile := flag.String("cpu_profile", "", "write cpu profile to `file`")
- profileTime := flag.Duration("profile_time", time.Second, "run this long to gather stats.")
- verbose := flag.Bool("v", false, "print some background data")
- withRepo := flag.Bool("r", false, "print the repo before the file name")
- list := flag.Bool("l", false, "print matching filenames only")
-
- flag.Usage = func() {
- name := os.Args[0]
- fmt.Fprintf(os.Stderr, "Usage:\n\n %s [option] QUERY\n"+
- "for example\n\n %s 'byte file:java -file:test'\n\n", name, name)
- flag.PrintDefaults()
- fmt.Fprintf(os.Stderr, "\n")
- }
- flag.Parse()
-
- if len(flag.Args()) == 0 {
- fmt.Fprintf(os.Stderr, "Pattern is missing.\n")
- flag.Usage()
- os.Exit(2)
- }
- pat := flag.Arg(0)
-
- var searcher zoekt.Searcher
- var err error
- if *shard != "" {
- searcher, err = loadShard(*shard, *verbose)
- } else {
- searcher, err = shards.NewDirectorySearcher(*index)
- }
-
- if err != nil {
- log.Fatal(err)
- }
-
- query, err := query.Parse(pat)
- if err != nil {
- log.Fatal(err)
- }
- if *verbose {
- log.Println("query:", query)
- }
-
- var sOpts zoekt.SearchOptions
- sres, err := searcher.Search(context.Background(), query, &sOpts)
- if *cpuProfile != "" {
- // If profiling, do it another time so we measure with
- // warm caches.
- f, err := os.Create(*cpuProfile)
- if err != nil {
- log.Fatal(err)
- }
- defer f.Close()
- if *verbose {
- log.Println("Displaying matches...")
- }
-
- t := time.Now()
- pprof.StartCPUProfile(f)
- for {
- sres, _ = searcher.Search(context.Background(), query, &sOpts)
- if time.Since(t) > *profileTime {
- break
- }
- }
- pprof.StopCPUProfile()
- }
-
- if err != nil {
- log.Fatal(err)
- }
-
- displayMatches(sres.Files, pat, *withRepo, *list)
- if *verbose {
- log.Printf("stats: %#v", sres.Stats)
- }
-}
diff --git a/contentprovider.go b/contentprovider.go
deleted file mode 100644
index 62fd149..0000000
--- a/contentprovider.go
+++ /dev/null
@@ -1,310 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "bytes"
- "log"
- "sort"
- "unicode/utf8"
-)
-
-var _ = log.Println
-
-// contentProvider is an abstraction to treat matches for names and
-// content with the same code.
-type contentProvider struct {
- id *indexData
- stats *Stats
-
- // mutable
- err error
- idx uint32
- _data []byte
- _nl []uint32
- _nlBuf []uint32
- _sects []DocumentSection
- _sectBuf []DocumentSection
- fileSize uint32
-}
-
-// setDocument skips to the given document.
-func (p *contentProvider) setDocument(docID uint32) {
- fileStart := p.id.boundaries[docID]
-
- p.idx = docID
- p.fileSize = p.id.boundaries[docID+1] - fileStart
-
- p._nl = nil
- p._sects = nil
- p._data = nil
-}
-
-func (p *contentProvider) docSections() []DocumentSection {
- if p._sects == nil {
- var sz uint32
- p._sects, sz, p.err = p.id.readDocSections(p.idx, p._sectBuf)
- p.stats.ContentBytesLoaded += int64(sz)
- p._sectBuf = p._sects
- }
- return p._sects
-}
-
-func (p *contentProvider) newlines() []uint32 {
- if p._nl == nil {
- var sz uint32
- p._nl, sz, p.err = p.id.readNewlines(p.idx, p._nlBuf)
- p._nlBuf = p._nl
- p.stats.ContentBytesLoaded += int64(sz)
- }
- return p._nl
-}
-
-func (p *contentProvider) data(fileName bool) []byte {
- if fileName {
- return p.id.fileNameContent[p.id.fileNameIndex[p.idx]:p.id.fileNameIndex[p.idx+1]]
- }
-
- if p._data == nil {
- p._data, p.err = p.id.readContents(p.idx)
- p.stats.FilesLoaded++
- p.stats.ContentBytesLoaded += int64(len(p._data))
- }
- return p._data
-}
-
-// Find offset in bytes (relative to corpus start) for an offset in
-// runes (relative to document start). If filename is set, the corpus
-// is the set of filenames, with the document being the name itself.
-func (p *contentProvider) findOffset(filename bool, r uint32) uint32 {
- if p.id.metaData.PlainASCII {
- return r
- }
-
- sample := p.id.runeOffsets
- runeEnds := p.id.fileEndRunes
- fileStartByte := p.id.boundaries[p.idx]
- if filename {
- sample = p.id.fileNameRuneOffsets
- runeEnds = p.id.fileNameEndRunes
- fileStartByte = p.id.fileNameIndex[p.idx]
- }
-
- absR := r
- if p.idx > 0 {
- absR += runeEnds[p.idx-1]
- }
-
- byteOff := sample[absR/runeOffsetFrequency]
- left := absR % runeOffsetFrequency
-
- var data []byte
-
- if filename {
- data = p.id.fileNameContent[byteOff:]
- } else {
- data, p.err = p.id.readContentSlice(byteOff, 3*runeOffsetFrequency)
- if p.err != nil {
- return 0
- }
- }
- for left > 0 {
- _, sz := utf8.DecodeRune(data)
- byteOff += uint32(sz)
- data = data[sz:]
- left--
- }
-
- byteOff -= fileStartByte
- return byteOff
-}
-
-func (p *contentProvider) fillMatches(ms []*candidateMatch) []LineMatch {
- var result []LineMatch
- if ms[0].fileName {
- // There is only "line" in a filename.
- res := LineMatch{
- Line: p.id.fileName(p.idx),
- FileName: true,
- }
-
- for _, m := range ms {
- res.LineFragments = append(res.LineFragments, LineFragmentMatch{
- LineOffset: int(m.byteOffset),
- MatchLength: int(m.byteMatchSz),
- Offset: m.byteOffset,
- })
-
- result = []LineMatch{res}
- }
- } else {
- ms = breakMatchesOnNewlines(ms, p.data(false))
- result = p.fillContentMatches(ms)
- }
-
- sects := p.docSections()
- for i, m := range result {
- result[i].Score = matchScore(sects, &m)
- }
-
- return result
-}
-
-func (p *contentProvider) fillContentMatches(ms []*candidateMatch) []LineMatch {
- var result []LineMatch
- for len(ms) > 0 {
- m := ms[0]
- num, lineStart, lineEnd := m.line(p.newlines(), p.fileSize)
-
- var lineCands []*candidateMatch
-
- endMatch := m.byteOffset + m.byteMatchSz
-
- for len(ms) > 0 {
- m := ms[0]
- if int(m.byteOffset) <= lineEnd {
- endMatch = m.byteOffset + m.byteMatchSz
- lineCands = append(lineCands, m)
- ms = ms[1:]
- } else {
- break
- }
- }
-
- if len(lineCands) == 0 {
- log.Panicf(
- "%s %v infinite loop: num %d start,end %d,%d, offset %d",
- p.id.fileName(p.idx), p.id.metaData,
- num, lineStart, lineEnd,
- m.byteOffset)
- }
-
- data := p.data(false)
-
- // Due to merging matches, we may have a match that
- // crosses a line boundary. Prevent confusion by
- // taking lines until we pass the last match
- for lineEnd < len(data) && endMatch > uint32(lineEnd) {
- next := bytes.IndexByte(data[lineEnd+1:], '\n')
- if next == -1 {
- lineEnd = len(data)
- } else {
- // TODO(hanwen): test that checks "+1" part here.
- lineEnd += next + 1
- }
- }
-
- finalMatch := LineMatch{
- LineStart: lineStart,
- LineEnd: lineEnd,
- LineNumber: num,
- }
- finalMatch.Line = data[lineStart:lineEnd]
-
- for _, m := range lineCands {
- fragment := LineFragmentMatch{
- Offset: m.byteOffset,
- LineOffset: int(m.byteOffset) - lineStart,
- MatchLength: int(m.byteMatchSz),
- }
- finalMatch.LineFragments = append(finalMatch.LineFragments, fragment)
- }
- result = append(result, finalMatch)
- }
- return result
-}
-
-const (
- // TODO - how to scale this relative to rank?
- scorePartialWordMatch = 50.0
- scoreWordMatch = 500.0
- scoreImportantThreshold = 2000.0
- scorePartialSymbol = 4000.0
- scoreSymbol = 7000.0
- scoreFactorAtomMatch = 400.0
- scoreShardRankFactor = 20.0
- scoreFileOrderFactor = 10.0
- scoreLineOrderFactor = 1.0
-)
-
-func findSection(secs []DocumentSection, off, sz uint32) *DocumentSection {
- j := sort.Search(len(secs), func(i int) bool {
- return secs[i].End >= off+sz
- })
-
- if j == len(secs) {
- return nil
- }
-
- if secs[j].Start <= off && off+sz <= secs[j].End {
- return &secs[j]
- }
- return nil
-}
-
-func matchScore(secs []DocumentSection, m *LineMatch) float64 {
- var maxScore float64
- for _, f := range m.LineFragments {
- startBoundary := f.LineOffset < len(m.Line) && (f.LineOffset == 0 || byteClass(m.Line[f.LineOffset-1]) != byteClass(m.Line[f.LineOffset]))
-
- end := int(f.LineOffset) + f.MatchLength
- endBoundary := end > 0 && (end == len(m.Line) || byteClass(m.Line[end-1]) != byteClass(m.Line[end]))
-
- score := 0.0
- if startBoundary && endBoundary {
- score = scoreWordMatch
- } else if startBoundary || endBoundary {
- score = scorePartialWordMatch
- }
-
- sec := findSection(secs, f.Offset, uint32(f.MatchLength))
- if sec != nil {
- startMatch := sec.Start == f.Offset
- endMatch := sec.End == f.Offset+uint32(f.MatchLength)
- if startMatch && endMatch {
- score += scoreSymbol
- } else if startMatch || endMatch {
- score += (scoreSymbol + scorePartialSymbol) / 2
- } else {
- score += scorePartialSymbol
- }
- }
- if score > maxScore {
- maxScore = score
- }
- }
- return maxScore
-}
-
-type matchScoreSlice []LineMatch
-
-func (m matchScoreSlice) Len() int { return len(m) }
-func (m matchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
-func (m matchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score }
-
-type fileMatchSlice []FileMatch
-
-func (m fileMatchSlice) Len() int { return len(m) }
-func (m fileMatchSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
-func (m fileMatchSlice) Less(i, j int) bool { return m[i].Score > m[j].Score }
-
-func sortMatchesByScore(ms []LineMatch) {
- sort.Sort(matchScoreSlice(ms))
-}
-
-// Sort a slice of results.
-func SortFilesByScore(ms []FileMatch) {
- sort.Sort(fileMatchSlice(ms))
-}
diff --git a/ctags/json.go b/ctags/json.go
deleted file mode 100644
index 980b54b..0000000
--- a/ctags/json.go
+++ /dev/null
@@ -1,265 +0,0 @@
-// Copyright 2017 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ctags
-
-import (
- "bufio"
- "bytes"
- "encoding/json"
- "fmt"
- "io"
- "log"
- "os"
- "os/exec"
- "runtime"
- "strings"
- "sync"
-)
-
-const debug = false
-
-type ctagsProcess struct {
- cmd *exec.Cmd
- in io.WriteCloser
- out *scanner
- outPipe io.ReadCloser
-}
-
-func newProcess(bin string) (*ctagsProcess, error) {
- opt := "default"
- if runtime.GOOS == "linux" {
- opt = "sandbox"
- }
-
- cmd := exec.Command(bin, "--_interactive="+opt, "--fields=*")
- in, err := cmd.StdinPipe()
- if err != nil {
- return nil, err
- }
-
- out, err := cmd.StdoutPipe()
- if err != nil {
- in.Close()
- return nil, err
- }
- cmd.Stderr = os.Stderr
- proc := ctagsProcess{
- cmd: cmd,
- in: in,
- out: &scanner{r: bufio.NewReaderSize(out, 4096)},
- outPipe: out,
- }
-
- if err := cmd.Start(); err != nil {
- return nil, err
- }
-
- var init reply
- if err := proc.read(&init); err != nil {
- return nil, err
- }
-
- return &proc, nil
-}
-
-func (p *ctagsProcess) Close() {
- p.cmd.Process.Kill()
- p.outPipe.Close()
- p.in.Close()
-}
-
-func (p *ctagsProcess) read(rep *reply) error {
- if !p.out.Scan() {
- // Some errors do not kill the parser. We would deadlock if we waited
- // for the process to exit.
- err := p.out.Err()
- p.Close()
- return err
- }
- if debug {
- log.Printf("read %q", p.out.Bytes())
- }
-
- // See https://github.com/universal-ctags/ctags/issues/1493
- if bytes.Equal([]byte("(null)"), p.out.Bytes()) {
- return nil
- }
-
- err := json.Unmarshal(p.out.Bytes(), rep)
- if err != nil {
- return fmt.Errorf("unmarshal(%q): %v", p.out.Bytes(), err)
- }
- return nil
-}
-
-func (p *ctagsProcess) post(req *request, content []byte) error {
- body, err := json.Marshal(req)
- if err != nil {
- return err
- }
- body = append(body, '\n')
- if debug {
- log.Printf("post %q", body)
- }
-
- if _, err = p.in.Write(body); err != nil {
- return err
- }
- _, err = p.in.Write(content)
- if debug {
- log.Println(string(content))
- }
- return err
-}
-
-type request struct {
- Command string `json:"command"`
- Filename string `json:"filename"`
- Size int `json:"size"`
-}
-
-type reply struct {
- // Init
- Typ string `json:"_type"`
- Name string `json:"name"`
- Version string `json:"version"`
-
- // completed
- Command string `json:"command"`
-
- // Ignore pattern: we don't use it and universal-ctags
- // sometimes generates 'false' as value.
- Path string `json:"path"`
- Language string `json:"language"`
- Line int `json:"line"`
- Kind string `json:"kind"`
- End int `json:"end"`
- Scope string `json:"scope"`
- ScopeKind string `json:"scopeKind"`
- Access string `json:"access"`
- Signature string `json:"signature"`
-}
-
-func (p *ctagsProcess) Parse(name string, content []byte) ([]*Entry, error) {
- req := request{
- Command: "generate-tags",
- Size: len(content),
- Filename: name,
- }
-
- if err := p.post(&req, content); err != nil {
- return nil, err
- }
-
- var es []*Entry
- for {
- var rep reply
- if err := p.read(&rep); err != nil {
- return nil, err
- }
- if rep.Typ == "completed" {
- break
- }
-
- e := Entry{
- Sym: rep.Name,
- Path: rep.Path,
- Line: rep.Line,
- Kind: rep.Kind,
- Language: rep.Language,
- }
-
- es = append(es, &e)
- }
-
- return es, nil
-}
-
-// scanner is like bufio.Scanner but skips long lines instead of returning
-// bufio.ErrTooLong.
-//
-// Additionally it will skip empty lines.
-type scanner struct {
- r *bufio.Reader
- line []byte
- err error
-}
-
-func (s *scanner) Scan() bool {
- if s.err != nil {
- return false
- }
-
- var (
- err error
- line []byte
- )
-
- for err == nil && len(line) == 0 {
- line, err = s.r.ReadSlice('\n')
- for err == bufio.ErrBufferFull {
- // make line empty so we ignore it
- line = nil
- _, err = s.r.ReadSlice('\n')
- }
- line = bytes.TrimSuffix(line, []byte{'\n'})
- line = bytes.TrimSuffix(line, []byte{'\r'})
- }
-
- s.line, s.err = line, err
- return len(line) > 0
-}
-
-func (s *scanner) Bytes() []byte {
- return s.line
-}
-
-func (s *scanner) Err() error {
- if s.err == io.EOF {
- return nil
- }
- return s.err
-}
-
-type Parser interface {
- Parse(name string, content []byte) ([]*Entry, error)
-}
-
-type lockedParser struct {
- p Parser
- l sync.Mutex
-}
-
-func (lp *lockedParser) Parse(name string, content []byte) ([]*Entry, error) {
- lp.l.Lock()
- defer lp.l.Unlock()
- return lp.p.Parse(name, content)
-}
-
-// NewParser creates a parser that is implemented by the given
-// universal-ctags binary. The parser is safe for concurrent use.
-func NewParser(bin string) (Parser, error) {
- if strings.Contains(bin, "universal-ctags") {
- // todo: restart, parallelization.
- proc, err := newProcess(bin)
- if err != nil {
- return nil, err
- }
- return &lockedParser{p: proc}, nil
- }
-
- log.Fatal("not implemented")
- return nil, nil
-}
diff --git a/ctags/json_test.go b/ctags/json_test.go
deleted file mode 100644
index c6f1d3c..0000000
--- a/ctags/json_test.go
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright 2017 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ctags
-
-import (
- "bufio"
- "os/exec"
- "reflect"
- "strings"
- "testing"
-
- "github.com/google/go-cmp/cmp"
-)
-
-func TestJSON(t *testing.T) {
- if _, err := exec.LookPath("universal-ctags"); err != nil {
- t.Skip(err)
- }
-
- p, err := newProcess("universal-ctags")
- if err != nil {
- t.Fatal("newProcess", err)
- }
-
- defer p.Close()
-
- java := `
-package io.zoekt;
-import java.util.concurrent.Future;
-class Back implements Future extends Frob {
- public static int BLA = 1;
- public int member;
- public Back() {
- member = 2;
- }
- public int method() {
- member++;
- }
-}
-`
- name := "io/zoekt/Back.java"
- got, err := p.Parse(name, []byte(java))
- if err != nil {
- t.Errorf("Process: %v", err)
- }
-
- want := []*Entry{
- {
- Sym: "io.zoekt",
- Kind: "package",
- Language: "Java",
- Path: "io/zoekt/Back.java",
- Line: 2,
- },
- {
- Sym: "Back",
- Path: "io/zoekt/Back.java",
- Line: 4,
- Language: "Java",
- Kind: "class",
- },
-
- {
- Sym: "BLA",
- Path: "io/zoekt/Back.java",
- Line: 5,
- Kind: "field",
- Language: "Java",
- },
- {
- Sym: "member",
- Path: "io/zoekt/Back.java",
- Line: 6,
- Language: "Java",
- Kind: "field",
- },
- {
- Sym: "Back",
- Path: "io/zoekt/Back.java",
- Language: "Java",
- Line: 7,
- Kind: "method",
- },
- {
- Sym: "method",
- Language: "Java",
- Path: "io/zoekt/Back.java",
- Line: 10,
- Kind: "method",
- },
- }
-
- for i := range want {
- if !reflect.DeepEqual(got[i], want[i]) {
- t.Fatalf("got %#v, want %#v", got[i], want[i])
- }
- }
-}
-
-func TestScanner(t *testing.T) {
- size := 20
-
- input := strings.Join([]string{
- "aaaaaaaaa",
- strings.Repeat("B", 3*size+3),
- strings.Repeat("C", size) + strings.Repeat("D", size+1),
- "",
- strings.Repeat("e", size-1),
- "f\r",
- "gg",
- }, "\n")
- want := []string{
- "aaaaaaaaa",
- strings.Repeat("e", size-1),
- "f",
- "gg",
- }
-
- var got []string
- r := &scanner{r: bufio.NewReaderSize(strings.NewReader(input), size)}
- for r.Scan() {
- got = append(got, string(r.Bytes()))
- }
- if err := r.Err(); err != nil {
- t.Fatal(err)
- }
-
- if !cmp.Equal(got, want) {
- t.Errorf("mismatch (-want +got):\n%s", cmp.Diff(want, got))
- }
-}
diff --git a/ctags/parse.go b/ctags/parse.go
deleted file mode 100644
index d199428..0000000
--- a/ctags/parse.go
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ctags
-
-import (
- "fmt"
- "strconv"
- "strings"
-)
-
-type Entry struct {
- Sym string
- Path string
- Line int
- Kind string
- Language string
- Parent string
- ParentType string
-
- FileLimited bool
-}
-
-// Parse parses a single line of exuberant "ctags -n" output.
-func Parse(in string) (*Entry, error) {
- fields := strings.Split(in, "\t")
- e := Entry{}
-
- if len(fields) < 3 {
- return nil, fmt.Errorf("too few fields: %q", in)
- }
-
- e.Sym = fields[0]
- e.Path = fields[1]
-
- lstr := fields[2]
- if len(lstr) < 2 {
- return nil, fmt.Errorf("got %q for linenum field", lstr)
- }
-
- l, err := strconv.ParseInt(lstr[:len(lstr)-2], 10, 64)
- if err != nil {
- return nil, err
- }
- e.Line = int(l)
- e.Kind = fields[3]
-
-field:
- for _, f := range fields[3:] {
- if string(f) == "file:" {
- e.FileLimited = true
- }
- for _, p := range []string{"class", "enum"} {
- if strings.HasPrefix(f, p+":") {
- e.Parent = strings.TrimPrefix(f, p+":")
- e.ParentType = p
- continue field
- }
- }
- }
- return &e, nil
-}
diff --git a/ctags/parse_test.go b/ctags/parse_test.go
deleted file mode 100644
index 30f41a1..0000000
--- a/ctags/parse_test.go
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ctags
-
-import (
- "reflect"
- "testing"
-)
-
-func TestParse(t *testing.T) {
- type testcase struct {
- in string
- out *Entry
- }
- cases := []testcase{
- {
- `ABBREV_SHA ./gitiles-servlet/src/main/java/com/google/gitiles/CommitData.java 59;" e enum:CommitData.Field file:`,
- &Entry{
- Sym: "ABBREV_SHA",
- Path: "./gitiles-servlet/src/main/java/com/google/gitiles/CommitData.java",
- Line: 59,
- Kind: "e",
- Parent: "CommitData.Field",
- ParentType: "enum",
- FileLimited: true,
- },
- },
- {
- `ACCESS_ATTRIBUTE ./gitiles-servlet/src/main/java/com/google/gitiles/CommitData.java 55;" f class:BaseServlet file:`,
- &Entry{
- Sym: "ACCESS_ATTRIBUTE",
- Path: "./gitiles-servlet/src/main/java/com/google/gitiles/CommitData.java",
- Line: 55,
- Kind: "f",
- Parent: "BaseServlet",
- ParentType: "class",
- FileLimited: true,
- },
- },
- }
- for _, c := range cases {
- e, err := Parse(c.in)
- if err != nil && c.out != nil {
- t.Errorf("Parse(%s): %v", c.in, err)
- } else if !reflect.DeepEqual(c.out, e) {
- t.Errorf("Parse(%s): got %#v, want %#v", c.in, e, c.out)
- }
- }
-}
diff --git a/doc/ctags.md b/doc/ctags.md
deleted file mode 100644
index 36b4014..0000000
--- a/doc/ctags.md
+++ /dev/null
@@ -1,41 +0,0 @@
-
-CTAGS
-=====
-
-Ctags generates indices of symbol definitions in source files. It
-started its life as part of the BSD Unix, but there are several more
-modern flavors. Zoekt supports both [exuberant
-ctags](http://ctags.sourceforge.net/) and
-[universal-ctags](https://github.com/universal-ctags).
-
-It is strongly recommended to use Universal Ctags, [version
-`db3d9a6`](https://github.com/universal-ctags/ctags/commit/4ff09da9b0a36a9e75c92f4be05d476b35b672cd)
-or newer, running on the Linux platform.
-
-From this version on, universal ctags will be called using seccomp,
-which guarantees that security problems in ctags cannot escalate to
-access to the indexing machine.
-
-Ubuntu, Debian and Arch provide universal ctags with seccomp support
-compiled in. Zoekt expects the `universal-ctags` binary to be on
-`$PATH`. Note: only Ubuntu names the binary `universal-ctags`, while
-most distributions name it `ctags`.
-
-Use the following invocation to compile and install universal-ctags:
-
-```
-sudo apt-get install
- pkg-config autoconf \
- libseccomp-dev libseccomp \
- libjansson-dev libjansson
-
-./autogen.sh
-LDFLAGS=-static ./configure --enable-json --enable-seccomp
-make -j4
-
-# create tarball
-NAME=ctags-$(date --iso-8601=minutes | tr -d ':' | sed 's|\+.*$||')-$(git show --pretty=format:%h -q)
-mkdir ${NAME}
-cp ctags ${NAME}/universal-ctags
-tar zcf ${NAME}.tar.gz ${NAME}/
-```
diff --git a/doc/design.md b/doc/design.md
deleted file mode 100644
index df3bfea..0000000
--- a/doc/design.md
+++ /dev/null
@@ -1,328 +0,0 @@
-
-
-OBJECTIVE
-=========
-
-Provide full text code search for git based corpuses.
-
-Goals:
-
-* sub-50ms results on large codebases, such as Android (~2G text) or
- Chrome
-
-* works well on a single standard Linux machine, with stable storage on SSD
-
-* search multiple repositories and multiple branches.
-
-* provide rich query language, with boolean operators
-
-* integrate with Gerrit/Gitiles code review/browsing system
-
-
-SEARCHING AND INDEXING
-======================
-
-
-Positional trigrams
--------------------
-
-We build an index of ngrams (n=3), where we store the offset of each
-ngram's occurrence within a file. For example, if the corpus is "banana"
-then we generate the index
-
- "ban": 0
- "ana": 1,3
- "nan": 2
-
-If we are searching for a string (eg. "The quick brown fox"), then we
-look for two trigrams (eg. "The" and "fox"), and check that they are
-found at the right distance apart.
-
-Regular expressions are handled by extracting normal strings from the regular
-expressions. For example, to search for
-
- (Path|PathFragment).*=.*/usr/local
-
-we look for
-
- (AND (OR substr:"Path" substr:"PathFragment") substr:"/usr/local")
-
-and any documents thus found would be searched for the regular
-expression.
-
-Compared to indexing 3-grams on a per-file basis, as described
-[here](https://swtch.com/~rsc/regexp/regexp4.html), there are some advantages:
-
-* for each substring, we only have to intersect just a couple of posting-lists:
- one for the beginning, and one for the end.
-
-* Since we touch few posting lists per query, they can be stored on
- slower media, such as SSD.
-
-* we can select any pair of trigrams from the pattern for which the
- number of matches is minimal. For example, we could search for "qui"
- rather than "the".
-
-There are some downsides compared to trigrams:
-
-* The index is large. Empirically, it is about 3x the corpus size, composed of
- 2x (offsets), and 1x (original content). However, since we have to look at
- just a limited number of ngrams, we don't have to keep the index in memory.
-
-Compared to [suffix
-arrays](https://blog.nelhage.com/2015/02/regular-expression-search-with-suffix-arrays/),
-there are the following advantages:
-
-* The index construction is straightforward, and can easily be made
- incremental.
-
-* Since the posting lists for a trigram can be stored on SSD,
- searching with positional trigrams only requires 1.2x corpus size of
- RAM.
-
-* All the matches are returned in document order. This makes it
- straightforward to process compound boolean queries with AND and OR.
-
-Downsides compared to suffix array:
-
-* there is no way to transform regular expressions into index ranges into
- the suffix array.
-
-
-Case sensitivity
-----------------
-
-Code usually is searched without regard for case. In this case, when
-we are looking for "abc", we look for occurrences of all the different
-case variants, ie. {"abc", "Abc", "aBc", "ABc", ... }, and then
-compare the candidate matches without regard for case.
-
-
-UTF-8
------
-
-UTF-8 is the defacto encoding for unicode. Zoekt assumes that files
-are UTF-8 encoded. Characters have differing widths in UTF-8, so we
-use rune offsets in the trigram index, and convert those back to bytes
-with a lookup table: every 100 runes, we store the rune-index to
-byte-index mapping. For corpuses that are completely ASCII (fairly
-normal for source code), we short-circuit this lookup.
-
-
-Branches
---------
-
-Each file blob in the index has a bitmask, representing the branches
-in which the content is found, eg:
-
- branches: [master=1, staging=2, stable=4]
- file "x.java", branch mask=3
- file "x.java", branch mask=4
-
-in this case, the index holds two versions of "x.java", the one
-present in "master" and "staging", and the one in the "stable" branch.
-
-With this technique, we can index many similar branches of a
-repository with little space overhead.
-
-
-Index format
-------------
-
-The index is organized in shards, where each shard is a file, laid out
-such that it can be mmap'd efficiently.
-
-Each shard contains data for one code repository. The basic data in an
-index shard are the following
-
- * file contents
- * filenames
- * the content posting lists (varint encoded)
- * the filename posting lists (varint encoded)
- * branch masks
- * metadata (repository name, index format version, etc.)
-
-In practice, the shard size is about 3x the corpus (size).
-
-The format uses uint32 for all offsets, so the total size of a shard
-should be below 4G. Given the size of the posting data, this caps
-content size per shard at 1G.
-
-Currently, within a shard, a single goroutine searches all documents,
-so the shard size determines the amount of parallelism, and large
-repositories should be split across multiple shards to achieve good
-performance.
-
-The metadata section contains a version number (which by convention is
-also part of the file name of the shard). This provides a smooth
-upgrade path across format versions: generate shards in the new
-format, kill old search service, start new search service, delete old
-shards.
-
-
-Ranking
--------
-
-In absense of advanced signals (e.g. pagerank on symbol references),
-ranking options are limited: the following signals could be used for
-ranking
-
- * number of atoms matched
- * closeness to matches for other atoms
- * quality of match: does match boundary coincide with a word boundary?
- * file latest update time
- * filename lengh
- * tokenizer ranking: does a match fall comment or string literal?
- * symbol ranking: it the match a symbol definition?
-
-For the latter, it is necessary to find symbol definitions and other
-sections within files on indexing. Several (imperfect) programs to do
-this already exist, eg. `ctags`.
-
-
-Query language
---------------
-
-Queries are stored as expression trees, using the following data
-structure:
-
- Query:
- Atom
- | AND QueryList
- | OR QueryList
- | NOT Query
- ;
-
- Atom:
- ConstQuery
- | SubStringQuery
- | RegexpQuery
- | RepoQuery
- | BranchQuery
- ;
-
-Both SubStringQuery and RegexpQuery can apply to either file or
-contents, and can optionally be case-insensitive.
-
-ConstQuery (match everything, or match nothing) is a useful construct
-for partial evaluation of a query: for each index shard through which
-we search, we partially evaluate the query, eg. when the query is
-
- and[substr:"needle" repo:"zoekt"]
-
-then we can rewrite the query to FALSE if we are looking at a shard
-for repository "bazel", skipping the entire shard.
-
-Each query must have at least one positive atom. Negations can only
-serve to prune results generated by positive atoms.
-
-
-Query parsing
--------------
-
-Strings in the input language are considered regular expressions
-but literal regular expressions are simplified to Substring queries,
-
- a.*b => regexp:"a.*b"
- a\.b => substring:"a.b"
-
-leading modifiers select different types of atoms, eg.
-
- file:java => Substring_file:"java"
- branch:master => Repo:"master"
-
-parentheses inside a string (possibly with escaped spaces) are
-interpreted as regular expressions, otherwise they are used for grouping
-
- (abc def) => and[substring:"abc" substring:"def"]
- (abc\ def) => regexp:"(abc def)"
-
-there is an implicit "AND" between elements of a parenthesized list.
-There is an "OR" operator, which has lower priority than the implicit
-"AND":
-
- ppp qqq or rrr sss => or[and[substring:"ppp" substring:"qqq"] and[substring:"rrr" substring:"sss"]]
-
-
-GERRIT/GITILES INTEGRATION
-==========================
-
-Gerrit is a popular system for code review on open source
-projects. Its sister project Gitiles provides a browsing experience.
-
-Any code search integration with Gerrit should be made available in
-Gitiles. Gerrit/Gitiles has a complex ACL system, so a codesearch
-solution for Gerrit/Gitiles should respect these ACLs.
-
-Since Gitiles knows the identity of the logged-in user, it can
-construct search queries that respect ACLs, and even filter results
-afterwards if necessary. In such a setup, only Gitiles is allowed to
-talk to the search service, so it should be protected from general
-access, e.g. by requiring authentication.
-
-A codesearch implementation for Gitiles would change Gitiles to show a
-search box on pages relating to a repository. When searching, Gitiles
-would also render the search results. The process is as follows:
-
- * On receiving a query, Gitiles finds the list of branches visible to the user
- * Gitiles sends the raw query, along with branches and repository to the search service
- * The search service parses the query, and embeds it as follows
-
- (AND original-query repo:REPO (OR "branch:visible-1" "branch:visible-2" .. ))
-
- * The search service returns the search results, leaving it to
- gitiles to render them. Gitiles can apply any further filtering
- as necessary.
-
-
-SERVICE MANAGEMENT
-==================
-
-The above details how indexing and searching works. A fully fledged
-system also crawls repositories and (re)indexes them. Since the system
-is designed to run on a single machine, we provide a service
-management tool, with the following responsibilities:
-
- * Poll git hosting sites (eg. github.com, googlesource.com), to fetch new updates
- * Reindex any changed repositories
- * Run the webserver; and restart if it goes down for any reason
- * Delete old webserver logs
-
-
-Security
---------
-
-This section assumes that 'zoekt' is used as a public facing
-webserver, indexing publicly available data, serving on HTTPS without
-authentication.
-
-Since the UI is unauthenticated, there are no authentication secrets to steal.
-
-Since the code is public, there is no sensitive code to steal.
-
-This leaves us with the following senstive data:
-
- * Credentials for accesssing git repostitories (eg. github access token)
- * TLS server certificates
- * Query logs
-
-The system handles the following untrusted data:
-
- * code in git repositories
- * search queries
-
-Since 'zoekt' itself is written in Go, it does not have memory
-security problems: at worst, a bug in the query parser would lead to a
-crash.
-
-The code to index is handled by `ctags` for symbol detection. The
-security risk this poses is mitigated by using a seccomp based
-sandboxing.
-
-
-Privacy
--------
-
-Webserver logs can contain privacy sensitive data (such as IP
-addresses and search queries). For this reason, the service management
-tool deletes them after a configurable period of time.
diff --git a/doc/faq.md b/doc/faq.md
deleted file mode 100644
index c8d81f0..0000000
--- a/doc/faq.md
+++ /dev/null
@@ -1,161 +0,0 @@
-# Frequently asked questions
-
-## Why codesearch?
-
-Software engineering is more about reading than writing code, and part
-of this process is finding the code that you should read. If you are
-working on a large project, then finding source code through
-navigation quickly becomes inefficient.
-
-Search engines let you find interesting code much faster than browsing
-code, in much the same way that search engines speed up finding things
-on the internet.
-
-## Can you give an example?
-
-I had to implement SSH hashed hostkey checking on a whim recently, and
-here is how I quickly zoomed into the relevant code using
-[our public zoekt instance](http://cs.bazel.build):
-
-* [hash host ssh](http://cs.bazel.build/search?q=hash+host+ssh&num=50): more than 20k results in 750 files, in 3 seconds
-
-* [hash host r:openssh](http://cs.bazel.build/search?q=hash+host+r%3Aopenssh&num=50): 6k results in 114 files, in 20ms
-
-* [hash host r:openssh known_host](http://cs.bazel.build/search?q=hash+host+r%3Aopenssh+known_host&num=50): 4k result in 42 files, in 13ms
-
-the last query still yielded a substantial number of results, but the
-function `hash_host` that I was looking for was the 3rd result from
-the first file.
-
-## What features make a code search engine great?
-
-Often, you don't know exactly what you are looking for, until you
-found it. Code search is effective because you can formulate an
-approximate query, and then refine it based on results you got. For
-this to work, you need the following features:
-
-* Coverage: the code that interests you should be available for searching
-
-* Speed: search should return useful results quickly (sub-second), so
- you can iterate on queries
-
-* Approximate queries: matching should be done case insensitively, on
- arbitrary substrings, so we don't have to know what we are looking
- for in advance.
-
-* Filtering: we can winnow down results by composing more specific queries
-
-* Ranking: interesting results (eg. function definitions, whole word
- matches) should be at the top.
-
-## How does `zoekt` provide for these?
-
-* Coverage: `zoekt` comes with tools to mirror parts of common Git
- hosting sites. `cs.bazel.build` uses this to index most of the
- Google authored open source software on github.com and
- googlesource.com.
-
-* Speed: `zoekt` uses an index based on positional trigrams. For rare
- strings, eg. `nienhuys`, this typically yields results in ~10ms if
- the operating system caches are warm.
-
-* Approximate queries: `zoekt` supports substring patterns and regular
- expressions, and can do case-insensitive matching on UTF-8 text.
-
-* Filtering: you can filter query by adding extra atoms (eg. `f:\.go$`
- limits to Go source code), and filter out terms with `-`, so
- `\blinus\b -torvalds` finds the Linuses other than Linus Torvalds.
-
-* Ranking: zoekt uses
- [ctags](https://github.com/universal-ctags/ctags) to find
- declarations, and these are boosted in the search ranking.
-
-
-## How does this compare to `grep -r`?
-
-Grep lets you find arbitrary substrings, but it doesn't scale to large
-corpuses, and lacks filtering and ranking.
-
-## What about my IDE?
-
-If your project fits into your IDE, than that is great.
-Unfortunately, loading projects into IDEs is slow, cumbersome, and not
-supported by all projects.
-
-## What about the search on `github.com`?
-
-Github's search has great coverage, but unfortunately, its search
-functionality doesn't support arbitrary substrings. For example, a
-query [for part of my
-surname](https://github.com/search?utf8=%E2%9C%93&q=nienhuy&type=Code)
-does not turn up anything (except this document), while
-[my complete
-name](https://github.com/search?utf8=%E2%9C%93&q=nienhuys&type=Code)
-does.
-
-## What about Etsy/Hound?
-
-[Etsy/hound](https://github.com/etsy/hound) is a code search engine
-which supports regular expressions over large corpuses, it is about
-10x slower than zoekt. However, there is only rudimentary support for
-filtering, and there is no symbol ranking.
-
-## What about livegrep?
-
-[livegrep](https://livegrep.com) is a code search engine which
-supports regular expressions over large corpuses. However, due to its
-indexing technique, it requires a lot of RAM and CPU. There is only
-rudimentary support for filtering, and there is no symbol ranking.
-
-## How much resources does `zoekt` require?
-
-The search server should have local SSD to store the index file (which
-is 3.5x the corpus size), and have at least 20% more RAM than the
-corpus size.
-
-## Can I index multiple branches?
-
-Yes. You can index 64 branches (see also
-https://github.com/google/zoekt/issues/32). Files that are identical
-across branches take up space just once in the index.
-
-## How fast is the search?
-
-Rare strings, are extremely fast to retrieve, for example `r:torvalds
-crazy` (search "crazy" in the linux kernel) typically takes [about
-7-10ms on
-cs.bazel.build](http://cs.bazel.build/search?q=r%3Atorvalds+crazy&num=70).
-
-The speed for common strings is dominated by how many results you want
-to see. For example [r:torvalds license] can give some results
-quickly, but producing [all 86k
-results](http://cs.bazel.build/search?q=r%3Atorvalds+license&num=50000)
-takes between 100ms and 1 second. Then, streaming the results to your
-browser, and rendering the HTML takes several seconds.
-
-## How fast is the indexer?
-
-The Linux kernel (55K files, 545M data) takes about 160s to index on
-my x250 laptop using a single thread. The process can be parallelized
-for speedup.
-
-## What does [cs.bazel.build](https://cs.bazel.build/) run on?
-
-Currently, it runs on a single Google Cloud VM with 16 vCPUs, 60G RAM and an
-attached physical SSD.
-
-## How does `zoekt` work?
-
-In short, it splits up the file in trigrams (groups of 3 unicode
-characters), and stores the offset of each occurrence. Substrings are
-found by searching different trigrams from the query at the correct
-distance apart.
-
-## I want to know more
-
-Some further background documentation
-
- * [Designdoc](design.md) for technical details
- * [Godoc](https://godoc.org/github.com/google/zoekt)
- * Gerrit 2016 user summit: [slides](https://storage.googleapis.com/gerrit-talks/summit/2016/zoekt.pdf)
- * Gerrit 2017 user summit: [transcript](https://gitenterprise.me/2017/11/01/gerrit-user-summit-zoekt-code-search-engine/), [slides](https://storage.googleapis.com/gerrit-talks/summit/2017/Zoekt%20-%20improved%20codesearch.pdf), [video](https://www.youtube.com/watch?v=_-KTAvgJYdI)
diff --git a/doc/indexing.md b/doc/indexing.md
deleted file mode 100644
index c6563c1..0000000
--- a/doc/indexing.md
+++ /dev/null
@@ -1,15 +0,0 @@
-
-# Configuration parameters
-
-Parameters are in the `zoekt` section of the git-config.
-
-* `name`: name of the repository, typically HOST/PATH, eg. `github.com/hanwen/usb`.
-
-* `web-url`: base URL for linking to files, commits, and the repository, eg.
-`https://github.com/hanwen/usb`
-
-* `web-url-type`: type of URL, eg. github. Supported are cgit,
- gitiles, gitweb and cgit.
-
-* `github-stars`, `github-forks`, `github-watchers`,
- `github-subscribers`: counters for github interactions
diff --git a/eval.go b/eval.go
deleted file mode 100644
index de32847..0000000
--- a/eval.go
+++ /dev/null
@@ -1,504 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "context"
- "fmt"
- "log"
- "regexp/syntax"
- "sort"
- "strings"
-
- "golang.org/x/net/trace"
-
- "github.com/google/zoekt/query"
-)
-
-const maxUInt16 = 0xffff
-
-// DebugScore controls whether we collect data on match scores are
-// constructed. Intended for use in tests.
-var DebugScore = false
-
-func (m *FileMatch) addScore(what string, s float64) {
- if DebugScore {
- m.Debug += fmt.Sprintf("%s:%f, ", what, s)
- }
- m.Score += s
-}
-
-func (d *indexData) simplify(in query.Q) query.Q {
- eval := query.Map(in, func(q query.Q) query.Q {
- if r, ok := q.(*query.Repo); ok {
- return &query.Const{Value: strings.Contains(d.repoMetaData.Name, r.Pattern)}
- }
- if l, ok := q.(*query.Language); ok {
- _, has := d.metaData.LanguageMap[l.Language]
- if !has {
- return &query.Const{Value: false}
- }
- }
- return q
- })
- return query.Simplify(eval)
-}
-
-func (o *SearchOptions) SetDefaults() {
- if o.ShardMaxMatchCount == 0 {
- // We cap the total number of matches, so overly broad
- // searches don't crash the machine.
- o.ShardMaxMatchCount = 100000
- }
- if o.TotalMaxMatchCount == 0 {
- o.TotalMaxMatchCount = 10 * o.ShardMaxMatchCount
- }
- if o.ShardMaxImportantMatch == 0 {
- o.ShardMaxImportantMatch = 10
- }
- if o.TotalMaxImportantMatch == 0 {
- o.TotalMaxImportantMatch = 10 * o.ShardMaxImportantMatch
- }
-}
-
-func (d *indexData) Search(ctx context.Context, q query.Q, opts *SearchOptions) (sr *SearchResult, err error) {
- copyOpts := *opts
- opts = ©Opts
- opts.SetDefaults()
- importantMatchCount := 0
-
- var res SearchResult
- if len(d.fileNameIndex) == 0 {
- return &res, nil
- }
-
- select {
- case <-ctx.Done():
- res.Stats.ShardsSkipped++
- return &res, nil
- default:
- }
-
- tr := trace.New("indexData.Search", d.file.Name())
- tr.LazyPrintf("opts: %+v", opts)
- defer func() {
- if sr != nil {
- tr.LazyPrintf("num files: %d", len(sr.Files))
- tr.LazyPrintf("stats: %+v", sr.Stats)
- }
- if err != nil {
- tr.LazyPrintf("error: %v", err)
- tr.SetError()
- }
- tr.Finish()
- }()
-
- q = d.simplify(q)
- tr.LazyLog(q, true)
- if c, ok := q.(*query.Const); ok && !c.Value {
- return &res, nil
- }
-
- if opts.EstimateDocCount {
- res.Stats.ShardFilesConsidered = len(d.fileBranchMasks)
- return &res, nil
- }
-
- q = query.Map(q, query.ExpandFileContent)
-
- mt, err := d.newMatchTree(q)
- if err != nil {
- return nil, err
- }
-
- totalAtomCount := 0
- visitMatchTree(mt, func(t matchTree) {
- totalAtomCount++
- })
-
- cp := &contentProvider{
- id: d,
- stats: &res.Stats,
- }
-
- docCount := uint32(len(d.fileBranchMasks))
- lastDoc := int(-1)
-
-nextFileMatch:
- for {
- canceled := false
- select {
- case <-ctx.Done():
- canceled = true
- default:
- }
-
- nextDoc := mt.nextDoc()
- if int(nextDoc) <= lastDoc {
- nextDoc = uint32(lastDoc + 1)
- }
- if nextDoc >= docCount {
- break
- }
- lastDoc = int(nextDoc)
-
- if canceled || (res.Stats.MatchCount >= opts.ShardMaxMatchCount && opts.ShardMaxMatchCount > 0) ||
- (opts.ShardMaxImportantMatch > 0 && importantMatchCount >= opts.ShardMaxImportantMatch) {
- res.Stats.FilesSkipped += d.repoListEntry.Stats.Documents - lastDoc
- break
- }
-
- res.Stats.FilesConsidered++
- mt.prepare(nextDoc)
-
- cp.setDocument(nextDoc)
-
- known := make(map[matchTree]bool)
- for cost := costMin; cost <= costMax; cost++ {
- v, ok := mt.matches(cp, cost, known)
- if ok && !v {
- continue nextFileMatch
- }
-
- if cost == costMax && !ok {
- log.Panicf("did not decide. Repo %s, doc %d, known %v",
- d.repoMetaData.Name, nextDoc, known)
- }
- }
-
- fileMatch := FileMatch{
- Repository: d.repoMetaData.Name,
- FileName: string(d.fileName(nextDoc)),
- Checksum: d.getChecksum(nextDoc),
- Language: d.languageMap[d.languages[nextDoc]],
- }
-
- if s := d.subRepos[nextDoc]; s > 0 {
- if s >= uint32(len(d.subRepoPaths)) {
- log.Panicf("corrupt index: subrepo %d beyond %v", s, d.subRepoPaths)
- }
- path := d.subRepoPaths[s]
- fileMatch.SubRepositoryPath = path
- sr := d.repoMetaData.SubRepoMap[path]
- fileMatch.SubRepositoryName = sr.Name
- if idx := d.branchIndex(nextDoc); idx >= 0 {
- fileMatch.Version = sr.Branches[idx].Version
- }
- } else {
- idx := d.branchIndex(nextDoc)
- if idx >= 0 {
- fileMatch.Version = d.repoMetaData.Branches[idx].Version
- }
- }
-
- atomMatchCount := 0
- visitMatches(mt, known, func(mt matchTree) {
- atomMatchCount++
- })
- finalCands := gatherMatches(mt, known)
-
- if len(finalCands) == 0 {
- nm := d.fileName(nextDoc)
- finalCands = append(finalCands,
- &candidateMatch{
- caseSensitive: false,
- fileName: true,
- substrBytes: nm,
- substrLowered: nm,
- file: nextDoc,
- runeOffset: 0,
- byteOffset: 0,
- byteMatchSz: uint32(len(nm)),
- })
- }
- fileMatch.LineMatches = cp.fillMatches(finalCands)
-
- maxFileScore := 0.0
- for i := range fileMatch.LineMatches {
- if maxFileScore < fileMatch.LineMatches[i].Score {
- maxFileScore = fileMatch.LineMatches[i].Score
- }
-
- // Order by ordering in file.
- fileMatch.LineMatches[i].Score += scoreLineOrderFactor * (1.0 - (float64(i) / float64(len(fileMatch.LineMatches))))
- }
-
- // Maintain ordering of input files. This
- // strictly dominates the in-file ordering of
- // the matches.
- fileMatch.addScore("fragment", maxFileScore)
- fileMatch.addScore("atom", float64(atomMatchCount)/float64(totalAtomCount)*scoreFactorAtomMatch)
-
- // Prefer earlier docs.
- fileMatch.addScore("doc-order", scoreFileOrderFactor*(1.0-float64(nextDoc)/float64(len(d.boundaries))))
- fileMatch.addScore("shard-order", scoreShardRankFactor*float64(d.repoMetaData.Rank)/maxUInt16)
-
- if fileMatch.Score > scoreImportantThreshold {
- importantMatchCount++
- }
- fileMatch.Branches = d.gatherBranches(nextDoc, mt, known)
- sortMatchesByScore(fileMatch.LineMatches)
- if opts.Whole {
- fileMatch.Content = cp.data(false)
- }
-
- res.Files = append(res.Files, fileMatch)
- res.Stats.MatchCount += len(fileMatch.LineMatches)
- res.Stats.FileCount++
- }
- SortFilesByScore(res.Files)
-
- addRepo(&res, &d.repoMetaData)
- for _, v := range d.repoMetaData.SubRepoMap {
- addRepo(&res, v)
- }
-
- visitMatchTree(mt, func(mt matchTree) {
- if atom, ok := mt.(interface{ updateStats(*Stats) }); ok {
- atom.updateStats(&res.Stats)
- }
- })
- return &res, nil
-}
-
-func addRepo(res *SearchResult, repo *Repository) {
- if res.RepoURLs == nil {
- res.RepoURLs = map[string]string{}
- }
- res.RepoURLs[repo.Name] = repo.FileURLTemplate
-
- if res.LineFragments == nil {
- res.LineFragments = map[string]string{}
- }
- res.LineFragments[repo.Name] = repo.LineFragmentTemplate
-}
-
-type sortByOffsetSlice []*candidateMatch
-
-func (m sortByOffsetSlice) Len() int { return len(m) }
-func (m sortByOffsetSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
-func (m sortByOffsetSlice) Less(i, j int) bool {
- return m[i].byteOffset < m[j].byteOffset
-}
-
-// Gather matches from this document. This never returns a mixture of
-// filename/content matches: if there are content matches, all
-// filename matches are trimmed from the result. The matches are
-// returned in document order and are non-overlapping.
-func gatherMatches(mt matchTree, known map[matchTree]bool) []*candidateMatch {
- var cands []*candidateMatch
- visitMatches(mt, known, func(mt matchTree) {
- if smt, ok := mt.(*substrMatchTree); ok {
- cands = append(cands, smt.current...)
- }
- if rmt, ok := mt.(*regexpMatchTree); ok {
- cands = append(cands, rmt.found...)
- }
- })
-
- foundContentMatch := false
- for _, c := range cands {
- if !c.fileName {
- foundContentMatch = true
- break
- }
- }
-
- res := cands[:0]
- for _, c := range cands {
- if !foundContentMatch || !c.fileName {
- res = append(res, c)
- }
- }
- cands = res
-
- // Merge adjacent candidates. This guarantees that the matches
- // are non-overlapping.
- sort.Sort((sortByOffsetSlice)(cands))
- res = cands[:0]
- for i, c := range cands {
- if i == 0 {
- res = append(res, c)
- continue
- }
- last := res[len(res)-1]
- lastEnd := last.byteOffset + last.byteMatchSz
- end := c.byteOffset + c.byteMatchSz
- if lastEnd >= c.byteOffset {
- if end > lastEnd {
- last.byteMatchSz = end - last.byteOffset
- }
- continue
- }
-
- res = append(res, c)
- }
-
- return res
-}
-
-func (d *indexData) branchIndex(docID uint32) int {
- mask := d.fileBranchMasks[docID]
- idx := 0
- for mask != 0 {
- if mask&0x1 != 0 {
- return idx
- }
- idx++
- mask >>= 1
- }
- return -1
-}
-
-// gatherBranches returns a list of branch names.
-func (d *indexData) gatherBranches(docID uint32, mt matchTree, known map[matchTree]bool) []string {
- foundBranchQuery := false
- var branches []string
-
- visitMatches(mt, known, func(mt matchTree) {
- bq, ok := mt.(*branchQueryMatchTree)
- if ok {
- foundBranchQuery = true
- branches = append(branches,
- d.branchNames[uint(bq.mask)])
- }
- })
-
- if !foundBranchQuery {
- mask := d.fileBranchMasks[docID]
- id := uint32(1)
- for mask != 0 {
- if mask&0x1 != 0 {
- branches = append(branches, d.branchNames[uint(id)])
- }
- id <<= 1
- mask >>= 1
- }
- }
- return branches
-}
-
-func (d *indexData) List(ctx context.Context, q query.Q) (rl *RepoList, err error) {
- tr := trace.New("indexData.List", d.file.Name())
- defer func() {
- if rl != nil {
- tr.LazyPrintf("repos size: %d", len(rl.Repos))
- tr.LazyPrintf("crashes: %d", rl.Crashes)
- }
- if err != nil {
- tr.LazyPrintf("error: %v", err)
- tr.SetError()
- }
- tr.Finish()
- }()
-
- q = d.simplify(q)
- tr.LazyLog(q, true)
- c, ok := q.(*query.Const)
-
- if !ok {
- return nil, fmt.Errorf("List should receive Repo-only query")
- }
-
- l := &RepoList{}
- if c.Value {
- l.Repos = append(l.Repos, &d.repoListEntry)
- }
- return l, nil
-}
-
-// regexpToMatchTreeRecursive converts a regular expression to a matchTree mt. If
-// mt is equivalent to the input r, isEqual = true and the matchTree can be used
-// in place of the regex r. If singleLine = true, then the matchTree and all
-// its children only match terms on the same line. singleLine is used during
-// recursion to decide whether to return an andLineMatchTree (singleLine = true)
-// or a andMatchTree (singleLine = false).
-func (d *indexData) regexpToMatchTreeRecursive(r *syntax.Regexp, minTextSize int, fileName bool, caseSensitive bool) (mt matchTree, isEqual bool, singleLine bool, err error) {
- // TODO - we could perhaps transform Begin/EndText in '\n'?
- // TODO - we could perhaps transform CharClass in (OrQuery )
- // if there are just a few runes, and part of a OpConcat?
- switch r.Op {
- case syntax.OpLiteral:
- s := string(r.Rune)
- if len(s) >= minTextSize {
- mt, err := d.newSubstringMatchTree(&query.Substring{Pattern: s, FileName: fileName, CaseSensitive: caseSensitive})
- return mt, true, !strings.Contains(s, "\n"), err
- }
- case syntax.OpCapture:
- return d.regexpToMatchTreeRecursive(r.Sub[0], minTextSize, fileName, caseSensitive)
-
- case syntax.OpPlus:
- return d.regexpToMatchTreeRecursive(r.Sub[0], minTextSize, fileName, caseSensitive)
-
- case syntax.OpRepeat:
- if r.Min == 1 {
- return d.regexpToMatchTreeRecursive(r.Sub[0], minTextSize, fileName, caseSensitive)
- } else if r.Min > 1 {
- // (x){2,} can't be expressed precisely by the matchTree
- mt, _, singleLine, err := d.regexpToMatchTreeRecursive(r.Sub[0], minTextSize, fileName, caseSensitive)
- return mt, false, singleLine, err
- }
- case syntax.OpConcat, syntax.OpAlternate:
- var qs []matchTree
- isEq := true
- singleLine = true
- for _, sr := range r.Sub {
- if sq, subIsEq, subSingleLine, err := d.regexpToMatchTreeRecursive(sr, minTextSize, fileName, caseSensitive); sq != nil {
- if err != nil {
- return nil, false, false, err
- }
- isEq = isEq && subIsEq
- singleLine = singleLine && subSingleLine
- qs = append(qs, sq)
- }
- }
- if r.Op == syntax.OpConcat {
- if len(qs) > 1 {
- isEq = false
- }
- newQs := make([]matchTree, 0, len(qs))
- for _, q := range qs {
- if _, ok := q.(*bruteForceMatchTree); ok {
- continue
- }
- newQs = append(newQs, q)
- }
- if len(newQs) == 1 {
- return newQs[0], isEq, singleLine, nil
- }
- if len(newQs) == 0 {
- return &bruteForceMatchTree{}, isEq, singleLine, nil
- }
- if singleLine {
- return &andLineMatchTree{andMatchTree{children: newQs}}, isEq, singleLine, nil
- }
- return &andMatchTree{newQs}, isEq, singleLine, nil
- }
- for _, q := range qs {
- if _, ok := q.(*bruteForceMatchTree); ok {
- return q, isEq, false, nil
- }
- }
- if len(qs) == 0 {
- return &noMatchTree{"const"}, isEq, false, nil
- }
- return &orMatchTree{qs}, isEq, false, nil
- case syntax.OpStar:
- if r.Sub[0].Op == syntax.OpAnyCharNotNL {
- return &bruteForceMatchTree{}, false, true, nil
- }
- }
- return &bruteForceMatchTree{}, false, false, nil
-}
diff --git a/eval_test.go b/eval_test.go
deleted file mode 100644
index ce16a60..0000000
--- a/eval_test.go
+++ /dev/null
@@ -1,128 +0,0 @@
-// Copyright 2020 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "reflect"
- "regexp/syntax"
- "strings"
- "testing"
-
- "github.com/google/zoekt/query"
-)
-
-var opnames = map[syntax.Op]string{
- syntax.OpNoMatch: "OpNoMatch",
- syntax.OpEmptyMatch: "OpEmptyMatch",
- syntax.OpLiteral: "OpLiteral",
- syntax.OpCharClass: "OpCharClass",
- syntax.OpAnyCharNotNL: "OpAnyCharNotNL",
- syntax.OpAnyChar: "OpAnyChar",
- syntax.OpBeginLine: "OpBeginLine",
- syntax.OpEndLine: "OpEndLine",
- syntax.OpBeginText: "OpBeginText",
- syntax.OpEndText: "OpEndText",
- syntax.OpWordBoundary: "OpWordBoundary",
- syntax.OpNoWordBoundary: "OpNoWordBoundary",
- syntax.OpCapture: "OpCapture",
- syntax.OpStar: "OpStar",
- syntax.OpPlus: "OpPlus",
- syntax.OpQuest: "OpQuest",
- syntax.OpRepeat: "OpRepeat",
- syntax.OpConcat: "OpConcat",
- syntax.OpAlternate: "OpAlternate",
-}
-
-func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) {
- t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub))
- for _, s := range r.Sub {
- printRegexp(t, s, lvl+1)
- }
-}
-
-func substrMT(pattern string) matchTree {
- d := &indexData{}
- mt, _ := d.newSubstringMatchTree(&query.Substring{
- Pattern: pattern,
- })
- return mt
-}
-
-func TestRegexpParse(t *testing.T) {
- type testcase struct {
- in string
- query matchTree
- isEquivalent bool
- }
-
- cases := []testcase{
- {"(foo|)bar", substrMT("bar"), false},
- {"(foo|)", &bruteForceMatchTree{}, false},
- {"(foo|bar)baz.*bla", &andMatchTree{[]matchTree{
- &orMatchTree{[]matchTree{
- substrMT("foo"),
- substrMT("bar"),
- }},
- substrMT("baz"),
- substrMT("bla"),
- }}, false},
- {
- "^[a-z](People)+barrabas$",
- &andMatchTree{[]matchTree{
- substrMT("People"),
- substrMT("barrabas"),
- }}, false,
- },
- {"foo", substrMT("foo"), true},
- {"^foo", substrMT("foo"), false},
- {"(foo) (bar)", &andMatchTree{[]matchTree{substrMT("foo"), substrMT("bar")}}, false},
- {"(thread|needle|haystack)", &orMatchTree{[]matchTree{
- substrMT("thread"),
- substrMT("needle"),
- substrMT("haystack"),
- }}, true},
- {"(foo)(?-s:.)*?(bar)", &andLineMatchTree{andMatchTree{[]matchTree{
- substrMT("foo"),
- substrMT("bar"),
- }}}, false},
- {"(foo)(?-s:.)*?[[:space:]](?-s:.)*?(bar)", &andMatchTree{[]matchTree{
- substrMT("foo"),
- substrMT("bar"),
- }}, false},
- {"(foo){2,}", substrMT("foo"), false},
- {"(...)(...)", &bruteForceMatchTree{}, false},
- }
-
- for _, c := range cases {
- r, err := syntax.Parse(c.in, syntax.Perl)
- if err != nil {
- t.Errorf("Parse(%q): %v", c.in, err)
- continue
- }
- d := indexData{}
- q := query.Regexp{
- Regexp: r,
- }
- gotQuery, isEq, _, _ := d.regexpToMatchTreeRecursive(q.Regexp, 3, q.FileName, q.CaseSensitive)
- if !reflect.DeepEqual(c.query, gotQuery) {
- printRegexp(t, r, 0)
- t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, gotQuery, c.query)
- }
- if isEq != c.isEquivalent {
- printRegexp(t, r, 0)
- t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, isEq, c.isEquivalent)
- }
- }
-}
diff --git a/gitindex/clone.go b/gitindex/clone.go
deleted file mode 100644
index 11b2c87..0000000
--- a/gitindex/clone.go
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gitindex
-
-import (
- "bytes"
- "log"
- "os"
- "os/exec"
- "path/filepath"
- "sort"
-
- git "github.com/go-git/go-git/v5"
- "github.com/go-git/go-git/v5/config"
-)
-
-// CloneRepo clones one repository, adding the given config
-// settings. It returns the bare repo directory. The `name` argument
-// determines where the repo is stored relative to `destDir`. Returns
-// the directory of the repository.
-func CloneRepo(destDir, name, cloneURL string, settings map[string]string) (string, error) {
- parent := filepath.Join(destDir, filepath.Dir(name))
- if err := os.MkdirAll(parent, 0o755); err != nil {
- return "", err
- }
-
- repoDest := filepath.Join(parent, filepath.Base(name)+".git")
- if _, err := os.Lstat(repoDest); err == nil {
- return "", nil
- }
-
- var keys []string
- for k := range settings {
- keys = append(keys, k)
- }
- sort.Strings(keys)
-
- var config []string
- for _, k := range keys {
- if settings[k] != "" {
- config = append(config, "--config", k+"="+settings[k])
- }
- }
-
- cmd := exec.Command(
- "git", "clone", "--bare", "--verbose", "--progress",
- )
- cmd.Args = append(cmd.Args, config...)
- cmd.Args = append(cmd.Args, cloneURL, repoDest)
-
- // Prevent prompting
- cmd.Stdin = &bytes.Buffer{}
- log.Println("running:", cmd.Args)
- if err := cmd.Run(); err != nil {
- return "", err
- }
-
- if err := setFetch(repoDest, "origin", "+refs/heads/*:refs/heads/*"); err != nil {
- log.Printf("addFetch: %v", err)
- }
- return repoDest, nil
-}
-
-func setFetch(repoDir, remote, refspec string) error {
- repo, err := git.PlainOpen(repoDir)
- if err != nil {
- return err
- }
-
- cfg, err := repo.Config()
- if err != nil {
- return err
- }
-
- rm := cfg.Remotes[remote]
- if rm != nil {
- rm.Fetch = []config.RefSpec{config.RefSpec(refspec)}
- }
- if err := repo.Storer.SetConfig(cfg); err != nil {
- return err
- }
-
- return nil
-}
diff --git a/gitindex/clone_test.go b/gitindex/clone_test.go
deleted file mode 100644
index 8445416..0000000
--- a/gitindex/clone_test.go
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2019 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gitindex
-
-import (
- "io/ioutil"
- "os"
- "os/exec"
- "testing"
-
- git "github.com/go-git/go-git/v5"
-)
-
-func TestSetRemote(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatal(err)
- }
- defer os.RemoveAll(dir)
- script := `mkdir orig
-cd orig
-git init
-cd ..
-git clone orig/.git clone.git
-`
-
- cmd := exec.Command("/bin/sh", "-euxc", script)
- cmd.Dir = dir
-
- if out, err := cmd.CombinedOutput(); err != nil {
- t.Fatalf("execution error: %v, output %s", err, out)
- }
-
- r := dir + "/clone.git"
- if err := setFetch(r, "origin", "+refs/heads/*:refs/heads/*"); err != nil {
- t.Fatalf("addFetch: %v", err)
- }
-
- repo, err := git.PlainOpen(r)
- if err != nil {
- t.Fatal("PlainOpen", err)
- }
-
- rm, err := repo.Remote("origin")
- if err != nil {
- t.Fatal("Remote", err)
- }
- if got, want := rm.Config().Fetch[0].String(), "+refs/heads/*:refs/heads/*"; got != want {
- t.Fatalf("got %q want %q", got, want)
- }
-}
diff --git a/gitindex/delete.go b/gitindex/delete.go
deleted file mode 100644
index f53e1af..0000000
--- a/gitindex/delete.go
+++ /dev/null
@@ -1,41 +0,0 @@
-package gitindex
-
-import (
- "fmt"
- "log"
- "net/url"
- "os"
- "path/filepath"
-)
-
-// DeleteRepos deletes stale repos under a specific path in disk. The `names`
-// argument stores names of repos retrieved from the git hosting site
-// and is used along with the `filter` argument to decide on repo deletion.
-func DeleteRepos(baseDir string, urlPrefix *url.URL, names map[string]struct{}, filter *Filter) error {
- paths, err := ListRepos(baseDir, urlPrefix)
- if err != nil {
- return err
- }
- var toDelete []string
- for _, p := range paths {
- _, exists := names[p]
- if filter.Include(filepath.Base(p)) && !exists {
- toDelete = append(toDelete, p)
- }
- }
-
- if len(toDelete) > 0 {
- log.Printf("deleting repos %v", toDelete)
- }
-
- var errs []string
- for _, d := range toDelete {
- if err := os.RemoveAll(filepath.Join(baseDir, d)); err != nil {
- errs = append(errs, err.Error())
- }
- }
- if len(errs) > 0 {
- return fmt.Errorf("errors: %v", errs)
- }
- return nil
-}
diff --git a/gitindex/delete_test.go b/gitindex/delete_test.go
deleted file mode 100644
index 381e6e0..0000000
--- a/gitindex/delete_test.go
+++ /dev/null
@@ -1,86 +0,0 @@
-package gitindex
-
-import (
- "io/ioutil"
- "net/url"
- "os"
- "path/filepath"
- "reflect"
- "testing"
-)
-
-func TestDeleteRepos(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
-
- if err := createSubmoduleRepo(dir); err != nil {
- t.Error("createSubmoduleRepo", err)
- }
-
- reposBefore, err := FindGitRepos(dir)
- if err != nil {
- t.Error("FindGitRepos", err)
- }
-
- gotBefore := map[string]struct{}{}
- for _, r := range reposBefore {
- p, err := filepath.Rel(dir, r)
- if err != nil {
- t.Fatalf("Relative: %v", err)
- }
-
- gotBefore[p] = struct{}{}
- }
-
- wantBefore := map[string]struct{}{
- "gerrit.googlesource.com/bdir.git": {},
- "gerrit.googlesource.com/sub/bdir.git": {},
- "adir/.git": {},
- "bdir/.git": {},
- "gerrit.googlesource.com/adir.git": {},
- }
-
- if !reflect.DeepEqual(gotBefore, wantBefore) {
- t.Fatalf("got %v want %v", gotBefore, wantBefore)
- }
-
- aURL, _ := url.Parse("http://gerrit.googlesource.com")
- aURL.Path = "sub"
- names := map[string]struct{}{
- "bdir/.git": {},
- "gerrit.googlesource.com/adir.git": {},
- }
- filter, _ := NewFilter("", "")
-
- err = DeleteRepos(dir, aURL, names, filter)
- if err != nil {
- t.Fatalf("DeleteRepos: %T", err)
- }
- reposAfter, err := FindGitRepos(dir)
- if err != nil {
- t.Error("FindGitRepos", err)
- }
-
- gotAfter := map[string]struct{}{}
- for _, r := range reposAfter {
- p, err := filepath.Rel(dir, r)
- if err != nil {
- t.Fatalf("Relative: %v", err)
- }
-
- gotAfter[p] = struct{}{}
- }
- wantAfter := map[string]struct{}{
- "gerrit.googlesource.com/bdir.git": {},
- "adir/.git": {},
- "bdir/.git": {},
- "gerrit.googlesource.com/adir.git": {},
- }
-
- if !reflect.DeepEqual(gotAfter, wantAfter) {
- t.Errorf("got %v want %v", gotAfter, wantAfter)
- }
-}
diff --git a/gitindex/filter.go b/gitindex/filter.go
deleted file mode 100644
index eba82f9..0000000
--- a/gitindex/filter.go
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2017 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gitindex
-
-import "regexp"
-
-// Filter is a include/exclude filter to be used for repo names.
-type Filter struct {
- inc, exc *regexp.Regexp
-}
-
-// Include returns true if the name passes the filter.
-func (f *Filter) Include(name string) bool {
- if f.inc != nil {
- if !f.inc.MatchString(name) {
- return false
- }
- }
- if f.exc != nil {
- if f.exc.MatchString(name) {
- return false
- }
- }
- return true
-}
-
-// NewFilter creates a new filter.
-func NewFilter(includeRegex, excludeRegex string) (*Filter, error) {
- f := &Filter{}
- var err error
- if includeRegex != "" {
- f.inc, err = regexp.Compile(includeRegex)
-
- if err != nil {
- return nil, err
- }
- }
- if excludeRegex != "" {
- f.exc, err = regexp.Compile(excludeRegex)
- if err != nil {
- return nil, err
- }
- }
-
- return f, nil
-}
diff --git a/gitindex/index.go b/gitindex/index.go
deleted file mode 100644
index 5715406..0000000
--- a/gitindex/index.go
+++ /dev/null
@@ -1,521 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package gitindex provides functions for indexing Git repositories.
-package gitindex
-
-import (
- "bytes"
- "fmt"
- "io"
- "log"
- "math"
- "net/url"
- "os"
- "path/filepath"
- "sort"
- "strconv"
- "strings"
- "time"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/build"
-
- "github.com/go-git/go-git/v5/config"
- "github.com/go-git/go-git/v5/plumbing"
- "github.com/go-git/go-git/v5/plumbing/object"
-
- git "github.com/go-git/go-git/v5"
-)
-
-// RepoModTime returns the time of last fetch of a git repository.
-func RepoModTime(dir string) (time.Time, error) {
- var last time.Time
- refDir := filepath.Join(dir, "refs")
- if _, err := os.Lstat(refDir); err == nil {
- if err := filepath.Walk(refDir,
- func(_ string, fi os.FileInfo, _ error) error {
- if !fi.IsDir() && last.Before(fi.ModTime()) {
- last = fi.ModTime()
- }
- return nil
- }); err != nil {
- return last, err
- }
- }
-
- // git gc compresses refs into the following file:
- for _, fn := range []string{"info/refs", "packed-refs"} {
- if fi, err := os.Lstat(filepath.Join(dir, fn)); err == nil && !fi.IsDir() && last.Before(fi.ModTime()) {
- last = fi.ModTime()
- }
- }
-
- return last, nil
-}
-
-// FindGitRepos finds directories holding git repositories below the
-// given directory. It will find both bare and the ".git" dirs in
-// non-bare repositories. It returns the full path including the dir
-// passed in.
-func FindGitRepos(dir string) ([]string, error) {
- arg, err := filepath.Abs(dir)
- if err != nil {
- return nil, err
- }
- var dirs []string
- if err := filepath.Walk(arg, func(name string, fi os.FileInfo, err error) error {
- // Best-effort, ignore filepath.Walk failing
- if err != nil {
- return nil
- }
-
- if fi, err := os.Lstat(filepath.Join(name, ".git")); err == nil && fi.IsDir() {
- dirs = append(dirs, filepath.Join(name, ".git"))
- return filepath.SkipDir
- }
-
- if !strings.HasSuffix(name, ".git") || !fi.IsDir() {
- return nil
- }
-
- fi, err = os.Lstat(filepath.Join(name, "objects"))
- if err != nil || !fi.IsDir() {
- return nil
- }
-
- dirs = append(dirs, name)
- return filepath.SkipDir
- }); err != nil {
- return nil, err
- }
-
- return dirs, nil
-}
-
-// setTemplates fills in URL templates for known git hosting
-// sites.
-func setTemplates(repo *zoekt.Repository, u *url.URL, typ string) error {
- repo.URL = u.String()
- switch typ {
- case "gitiles":
- /// eg. https://gerrit.googlesource.com/gitiles/+/master/tools/run_dev.sh#20
- repo.CommitURLTemplate = u.String() + "/+/{{.Version}}"
- repo.FileURLTemplate = u.String() + "/+/{{.Version}}/{{.Path}}"
- repo.LineFragmentTemplate = "#{{.LineNumber}}"
- case "github":
- // eg. https://github.com/hanwen/go-fuse/blob/notify/genversion.sh#L10
- repo.CommitURLTemplate = u.String() + "/commit/{{.Version}}"
- repo.FileURLTemplate = u.String() + "/blob/{{.Version}}/{{.Path}}"
- repo.LineFragmentTemplate = "#L{{.LineNumber}}"
- case "cgit":
- // http://git.savannah.gnu.org/cgit/lilypond.git/tree/elisp/lilypond-mode.el?h=dev/philh&id=b2ca0fefe3018477aaca23b6f672c7199ba5238e#n100
- repo.CommitURLTemplate = u.String() + "/commit/?id={{.Version}}"
- repo.FileURLTemplate = u.String() + "/tree/{{.Path}}/?id={{.Version}}"
- repo.LineFragmentTemplate = "#n{{.LineNumber}}"
- case "gitweb":
- // https://gerrit.libreoffice.org/gitweb?p=online.git;a=blob;f=Makefile.am;h=cfcfd7c36fbae10e269653dc57a9b68c92d4c10b;hb=848145503bf7b98ce4a4aa0a858a0d71dd0dbb26#l10
- repo.FileURLTemplate = u.String() + ";a=blob;f={{.Path}};hb={{.Version}}"
- repo.CommitURLTemplate = u.String() + ";a=commit;h={{.Version}}"
- repo.LineFragmentTemplate = "#l{{.LineNumber}}"
- case "source.bazel.build":
- // https://source.bazel.build/bazel/+/57bc201346e61c62a921c1cbf32ad24f185c10c9
- // https://source.bazel.build/bazel/+/57bc201346e61c62a921c1cbf32ad24f185c10c9:tools/cpp/BUILD.empty;l=10
- repo.CommitURLTemplate = u.String() + "/+/{{.Version}}"
- repo.FileURLTemplate = u.String() + "/+/{{.Version}}:{{.Path}}"
- repo.LineFragmentTemplate = ";l={{.LineNumber}}"
- case "bitbucket-server":
- // https://<bitbucketserver-host>/projects/<project>/repos/<repo>/commits/5be7ca73b898bf17a08e607918accfdeafe1e0bc
- // https://<bitbucketserver-host>/projects/<project>/repos/<repo>/browse/<file>?at=5be7ca73b898bf17a08e607918accfdeafe1e0bc
- repo.CommitURLTemplate = u.String() + "/commits/{{.Version}}"
- repo.FileURLTemplate = u.String() + "/{{.Path}}?at={{.Version}}"
- repo.LineFragmentTemplate = "#{{.LineNumber}}"
- case "gitlab":
- repo.CommitURLTemplate = u.String() + "/commit/{{.Version}}"
- repo.FileURLTemplate = u.String() + "/blob/{{.Version}}/{{.Path}}"
- repo.LineFragmentTemplate = "#L{{.LineNumber}}"
- default:
- return fmt.Errorf("URL scheme type %q unknown", typ)
- }
- return nil
-}
-
-// getCommit returns a tree object for the given reference.
-func getCommit(repo *git.Repository, prefix, ref string) (*object.Commit, error) {
- sha1, err := repo.ResolveRevision(plumbing.Revision(ref))
- // ref might be a branch name (e.g. "master") add branch prefix and try again.
- if err != nil {
- sha1, err = repo.ResolveRevision(plumbing.Revision(filepath.Join(prefix, ref)))
- }
- if err != nil {
- return nil, err
- }
-
- commitObj, err := repo.CommitObject(*sha1)
- if err != nil {
- return nil, err
- }
- return commitObj, nil
-}
-
-func configLookupRemoteURL(cfg *config.Config, key string) string {
- rc := cfg.Remotes[key]
- if rc == nil || len(rc.URLs) == 0 {
- return ""
- }
- return rc.URLs[0]
-}
-
-func setTemplatesFromConfig(desc *zoekt.Repository, repoDir string) error {
- repo, err := git.PlainOpen(repoDir)
- if err != nil {
- return err
- }
-
- cfg, err := repo.Config()
- if err != nil {
- return err
- }
-
- sec := cfg.Raw.Section("zoekt")
-
- webURLStr := sec.Options.Get("web-url")
- webURLType := sec.Options.Get("web-url-type")
-
- if webURLType != "" && webURLStr != "" {
- webURL, err := url.Parse(webURLStr)
- if err != nil {
- return err
- }
- if err := setTemplates(desc, webURL, webURLType); err != nil {
- return err
- }
- }
-
- name := sec.Options.Get("name")
- if name != "" {
- desc.Name = name
- } else {
- remoteURL := configLookupRemoteURL(cfg, "origin")
- if remoteURL == "" {
- return nil
- }
- u, err := url.Parse(remoteURL)
- if err != nil {
- return err
- }
- if err := SetTemplatesFromOrigin(desc, u); err != nil {
- return err
- }
- }
-
- if desc.RawConfig == nil {
- desc.RawConfig = map[string]string{}
- }
- for _, o := range sec.Options {
- desc.RawConfig[o.Key] = o.Value
- }
-
- // Ranking info.
-
- // Github:
- traction := 0
- for _, s := range []string{"github-stars", "github-forks", "github-watchers", "github-subscribers"} {
- f, err := strconv.Atoi(sec.Options.Get(s))
- if err == nil {
- traction += f
- }
- }
-
- if strings.Contains(desc.Name, "googlesource.com/") && traction == 0 {
- // Pretend everything on googlesource.com has 1000
- // github stars.
- traction = 1000
- }
-
- if traction > 0 {
- l := math.Log(float64(traction))
- desc.Rank = uint16((1.0 - 1.0/math.Pow(1+l, 0.6)) * 10000)
- }
-
- return nil
-}
-
-// SetTemplatesFromOrigin fills in templates based on the origin URL.
-func SetTemplatesFromOrigin(desc *zoekt.Repository, u *url.URL) error {
- desc.Name = filepath.Join(u.Host, strings.TrimSuffix(u.Path, ".git"))
-
- if strings.HasSuffix(u.Host, ".googlesource.com") {
- return setTemplates(desc, u, "gitiles")
- } else if u.Host == "github.com" {
- u.Path = strings.TrimSuffix(u.Path, ".git")
- return setTemplates(desc, u, "github")
- } else {
- return fmt.Errorf("unknown git hosting site %q", u)
- }
-}
-
-// The Options structs controls details of the indexing process.
-type Options struct {
- // The repository to be indexed.
- RepoDir string
-
- // If set, follow submodule links. This requires RepoCacheDir to be set.
- Submodules bool
-
- // If set, skip indexing if the existing index shard is newer
- // than the refs in the repository.
- Incremental bool
-
- // Don't error out if some branch is missing
- AllowMissingBranch bool
-
- // Specifies the root of a Repository cache. Needed for submodule indexing.
- RepoCacheDir string
-
- // Indexing options.
- BuildOptions build.Options
-
- // Prefix of the branch to index, e.g. `remotes/origin`.
- BranchPrefix string
-
- // List of branch names to index, e.g. []string{"HEAD", "stable"}
- Branches []string
-}
-
-func expandBranches(repo *git.Repository, bs []string, prefix string) ([]string, error) {
- var result []string
- for _, b := range bs {
- if b == "HEAD" {
- ref, err := repo.Head()
- if err != nil {
- return nil, err
- }
-
- result = append(result, strings.TrimPrefix(ref.Name().String(), prefix))
- continue
- }
-
- if strings.Contains(b, "*") {
- iter, err := repo.Branches()
- if err != nil {
- return nil, err
- }
-
- defer iter.Close()
- for {
- ref, err := iter.Next()
- if err == io.EOF {
- break
- }
- if err != nil {
- return nil, err
- }
-
- name := ref.Name().Short()
- if matched, err := filepath.Match(b, name); err != nil {
- return nil, err
- } else if !matched {
- continue
- }
-
- result = append(result, strings.TrimPrefix(name, prefix))
- }
- continue
- }
-
- result = append(result, b)
- }
-
- return result, nil
-}
-
-// IndexGitRepo indexes the git repository as specified by the options.
-func IndexGitRepo(opts Options) error {
- // Set max thresholds, since we use them in this function.
- opts.BuildOptions.SetDefaults()
- if opts.RepoDir == "" {
- return fmt.Errorf("gitindex: must set RepoDir")
- }
-
- opts.BuildOptions.RepositoryDescription.Source = opts.RepoDir
- repo, err := git.PlainOpen(opts.RepoDir)
- if err != nil {
- return err
- }
-
- if err := setTemplatesFromConfig(&opts.BuildOptions.RepositoryDescription, opts.RepoDir); err != nil {
- log.Printf("setTemplatesFromConfig(%s): %s", opts.RepoDir, err)
- }
-
- repoCache := NewRepoCache(opts.RepoCacheDir)
-
- // branch => (path, sha1) => repo.
- repos := map[fileKey]BlobLocation{}
-
- // fileKey => branches
- branchMap := map[fileKey][]string{}
-
- // Branch => Repo => SHA1
- branchVersions := map[string]map[string]plumbing.Hash{}
-
- branches, err := expandBranches(repo, opts.Branches, opts.BranchPrefix)
- if err != nil {
- return err
- }
- for _, b := range branches {
- commit, err := getCommit(repo, opts.BranchPrefix, b)
- if err != nil {
- if opts.AllowMissingBranch && err.Error() == "reference not found" {
- continue
- }
-
- return err
- }
-
- opts.BuildOptions.RepositoryDescription.Branches = append(opts.BuildOptions.RepositoryDescription.Branches, zoekt.RepositoryBranch{
- Name: b,
- Version: commit.Hash.String(),
- })
-
- tree, err := commit.Tree()
- if err != nil {
- return err
- }
-
- files, subVersions, err := TreeToFiles(repo, tree, opts.BuildOptions.RepositoryDescription.URL, repoCache)
- if err != nil {
- return err
- }
- for k, v := range files {
- repos[k] = v
- branchMap[k] = append(branchMap[k], b)
- }
-
- branchVersions[b] = subVersions
- }
-
- if opts.Incremental && opts.BuildOptions.IncrementalSkipIndexing() {
- return nil
- }
-
- reposByPath := map[string]BlobLocation{}
- for key, location := range repos {
- reposByPath[key.SubRepoPath] = location
- }
-
- opts.BuildOptions.SubRepositories = map[string]*zoekt.Repository{}
- for path, location := range reposByPath {
- tpl := opts.BuildOptions.RepositoryDescription
- if path != "" {
- tpl = zoekt.Repository{URL: location.URL.String()}
- if err := SetTemplatesFromOrigin(&tpl, location.URL); err != nil {
- log.Printf("setTemplatesFromOrigin(%s, %s): %s", path, location.URL, err)
- }
- }
- opts.BuildOptions.SubRepositories[path] = &tpl
- }
- for _, br := range opts.BuildOptions.RepositoryDescription.Branches {
- for path, repo := range opts.BuildOptions.SubRepositories {
- id := branchVersions[br.Name][path]
- repo.Branches = append(repo.Branches, zoekt.RepositoryBranch{
- Name: br.Name,
- Version: id.String(),
- })
- }
- }
-
- builder, err := build.NewBuilder(opts.BuildOptions)
- if err != nil {
- return err
- }
- defer builder.Finish()
-
- var names []string
- fileKeys := map[string][]fileKey{}
- for key := range repos {
- n := key.FullPath()
- fileKeys[n] = append(fileKeys[n], key)
- names = append(names, n)
- }
-
- sort.Strings(names)
- names = uniq(names)
-
- for _, name := range names {
- keys := fileKeys[name]
-
- for _, key := range keys {
- brs := branchMap[key]
- blob, err := repos[key].Repo.BlobObject(key.ID)
- if err != nil {
- return err
- }
-
- if blob.Size > int64(opts.BuildOptions.SizeMax) && !opts.BuildOptions.IgnoreSizeMax(key.FullPath()) {
- if err := builder.Add(zoekt.Document{
- SkipReason: fmt.Sprintf("file size %d exceeds maximum size %d", blob.Size, opts.BuildOptions.SizeMax),
- Name: key.FullPath(),
- Branches: brs,
- SubRepositoryPath: key.SubRepoPath,
- }); err != nil {
- return err
- }
- continue
- }
-
- contents, err := blobContents(blob)
- if err != nil {
- return err
- }
- if err := builder.Add(zoekt.Document{
- SubRepositoryPath: key.SubRepoPath,
- Name: key.FullPath(),
- Content: contents,
- Branches: brs,
- }); err != nil {
- return err
- }
- }
- }
- return builder.Finish()
-}
-
-func blobContents(blob *object.Blob) ([]byte, error) {
- r, err := blob.Reader()
- if err != nil {
- return nil, err
- }
- defer r.Close()
-
- var buf bytes.Buffer
- buf.Grow(int(blob.Size))
- _, err = buf.ReadFrom(r)
- if err != nil {
- return nil, err
- }
- return buf.Bytes(), nil
-}
-
-func uniq(ss []string) []string {
- result := ss[:0]
- var last string
- for i, s := range ss {
- if i == 0 || s != last {
- result = append(result, s)
- }
- last = s
- }
- return result
-}
diff --git a/gitindex/index_test.go b/gitindex/index_test.go
deleted file mode 100644
index bc24796..0000000
--- a/gitindex/index_test.go
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright 2021 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gitindex
-
-import (
- "io/ioutil"
- "os"
- "os/exec"
- "path/filepath"
- "testing"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/build"
-)
-
-func TestIndexEmptyRepo(t *testing.T) {
- tmp, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir %v", err)
- }
- defer os.RemoveAll(tmp)
-
- cmd := exec.Command("git", "init", "-b", "master", "repo")
- cmd.Dir = tmp
-
- if err := cmd.Run(); err != nil {
- t.Fatalf("cmd.Run: %v", err)
- }
-
- desc := zoekt.Repository{
- Name: "repo",
- }
- opts := Options{
- RepoDir: filepath.Join(tmp, "repo", ".git"),
- BuildOptions: build.Options{
- RepositoryDescription: desc,
- IndexDir: tmp,
- },
- }
-
- if err := IndexGitRepo(opts); err != nil {
- t.Fatalf("IndexGitRepo: %v", err)
- }
-}
diff --git a/gitindex/repocache.go b/gitindex/repocache.go
deleted file mode 100644
index a8528f5..0000000
--- a/gitindex/repocache.go
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gitindex
-
-import (
- "net/url"
- "os"
- "path/filepath"
- "strings"
- "sync"
-
- git "github.com/go-git/go-git/v5"
-)
-
-// RepoCache is a set of repositories on the file system, named and
-// stored by URL.
-type RepoCache struct {
- baseDir string
-
- reposMu sync.Mutex
- repos map[string]*git.Repository
-}
-
-// NewRepoCache creates a new RepoCache rooted at the given directory.
-func NewRepoCache(dir string) *RepoCache {
- return &RepoCache{
- baseDir: dir,
- repos: make(map[string]*git.Repository),
- }
-}
-
-func repoKeyStr(key string) string {
- if !strings.HasSuffix(key, ".git") {
- key += ".git"
- }
- return key
-}
-
-func repoKey(u *url.URL) string {
- return repoKeyStr(filepath.Join(u.Host, u.Path))
-}
-
-// Path returns the absolute path of the bare repository.
-func Path(baseDir string, name string) string {
- key := repoKeyStr(name)
- return filepath.Join(baseDir, key)
-}
-
-func (rc *RepoCache) Path(u *url.URL) string {
- key := repoKey(u)
- return filepath.Join(rc.baseDir, key)
-}
-
-// Open opens a git repository. The cache retains a pointer to the
-// repository.
-func (rc *RepoCache) Open(u *url.URL) (*git.Repository, error) {
- dir := rc.Path(u)
- rc.reposMu.Lock()
- defer rc.reposMu.Unlock()
-
- key := repoKey(u)
- r := rc.repos[key]
- if r != nil {
- return r, nil
- }
-
- repo, err := git.PlainOpen(dir)
- if err == nil {
- rc.repos[key] = repo
- }
- return repo, err
-}
-
-// ListRepos returns paths to repos on disk that start with the given
-// URL prefix. The paths are relative to baseDir, and typically
-// include a ".git" suffix.
-func ListRepos(baseDir string, u *url.URL) ([]string, error) {
- key := filepath.Join(u.Host, u.Path)
-
- var paths []string
- walk := func(path string, info os.FileInfo, err error) error {
- if err != nil {
- return err
- }
- if !info.IsDir() {
- return nil
- }
- if strings.HasSuffix(path, ".git") && !strings.HasSuffix(path, "/.git") {
- _, err := git.PlainOpen(path)
- if err == nil {
- p, err := filepath.Rel(baseDir, path)
- if err == nil {
- paths = append(paths, p)
- }
- }
- return filepath.SkipDir
- }
- return nil
- }
-
- if err := filepath.Walk(filepath.Join(baseDir, key), walk); err != nil {
- return nil, err
- }
- return paths, nil
-}
diff --git a/gitindex/repocache_test.go b/gitindex/repocache_test.go
deleted file mode 100644
index 8d22d3a..0000000
--- a/gitindex/repocache_test.go
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2017 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gitindex
-
-import (
- "io/ioutil"
- "net/url"
- "os"
- "reflect"
- "sort"
- "testing"
-)
-
-func TestListReposNonExistent(t *testing.T) {
- u, err := url.Parse("https://gerrit.googlesource.com/")
- if err != nil {
- t.Fatalf("url.Parse: %v", err)
- }
-
- rs, err := ListRepos("/doesnotexist", u)
- if err == nil {
- t.Fatalf("ListRepos(/doesnotexist): %v", rs)
- }
-}
-
-func TestListRepos(t *testing.T) {
- tmp, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir %v", err)
- }
- defer os.RemoveAll(tmp)
- if err := createSubmoduleRepo(tmp); err != nil {
- t.Fatalf("createSubmoduleRepo %v", err)
- }
-
- u, err := url.Parse("https://gerrit.googlesource.com/")
- if err != nil {
- t.Fatalf("url.Parse: %v", err)
- }
- rs, err := ListRepos(tmp, u)
- if err != nil {
- t.Fatalf("ListRepos(%s): %v", u, err)
- }
-
- want := []string{
- "gerrit.googlesource.com/adir.git",
- "gerrit.googlesource.com/bdir.git",
- "gerrit.googlesource.com/sub/bdir.git",
- }
- sort.Strings(rs)
-
- if !reflect.DeepEqual(rs, want) {
- t.Fatalf("got %v, want %v", rs, want)
- }
-}
diff --git a/gitindex/submodule.go b/gitindex/submodule.go
deleted file mode 100644
index 0836c89..0000000
--- a/gitindex/submodule.go
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gitindex
-
-import (
- "bytes"
-
- "github.com/go-git/go-git/v5/plumbing/format/config"
-)
-
-// SubmoduleEntry represent one entry in a .gitmodules file
-type SubmoduleEntry struct {
- Path string
- URL string
- Branch string
-}
-
-// ParseGitModules parses the contents of a .gitmodules file.
-func ParseGitModules(content []byte) (map[string]*SubmoduleEntry, error) {
- dec := config.NewDecoder(bytes.NewBuffer(content))
- cfg := &config.Config{}
-
- if err := dec.Decode(cfg); err != nil {
- return nil, err
- }
-
- result := map[string]*SubmoduleEntry{}
- for _, s := range cfg.Sections {
- if s.Name != "submodule" {
- continue
- }
-
- for _, ss := range s.Subsections {
- name := ss.Name
- e := &SubmoduleEntry{}
- for _, o := range ss.Options {
- switch o.Key {
- case "branch":
- e.Branch = o.Value
- case "path":
- e.Path = o.Value
- case "url":
- e.URL = o.Value
- }
- }
-
- result[name] = e
- }
- }
-
- return result, nil
-}
diff --git a/gitindex/submodule_test.go b/gitindex/submodule_test.go
deleted file mode 100644
index ca7fef3..0000000
--- a/gitindex/submodule_test.go
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gitindex
-
-import (
- "reflect"
- "testing"
-)
-
-func TestParseGitModules(t *testing.T) {
- testData := `[submodule "plugins/abc"]
- path = plugins/abc
- url = ../plugins/abc
- branch = .`
-
- got, err := ParseGitModules([]byte(testData))
- if err != nil {
- t.Fatalf("ParseGitModules: %T", err)
- }
-
- want := map[string]*SubmoduleEntry{
- "plugins/abc": {
- Path: "plugins/abc",
- URL: "../plugins/abc",
- Branch: ".",
- },
- }
- if !reflect.DeepEqual(got, want) {
- t.Fatalf("got %v, want %v", got, want)
- }
-}
diff --git a/gitindex/tree.go b/gitindex/tree.go
deleted file mode 100644
index 145cfd6..0000000
--- a/gitindex/tree.go
+++ /dev/null
@@ -1,217 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gitindex
-
-import (
- "fmt"
- "io"
- "log"
- "net/url"
- "path"
- "path/filepath"
- "strings"
-
- "github.com/go-git/go-git/v5/plumbing"
- "github.com/go-git/go-git/v5/plumbing/filemode"
- "github.com/go-git/go-git/v5/plumbing/object"
-
- git "github.com/go-git/go-git/v5"
-)
-
-// repoWalker walks a tree, recursing into submodules.
-type repoWalker struct {
- repo *git.Repository
-
- repoURL *url.URL
- tree map[fileKey]BlobLocation
-
- // Path => SubmoduleEntry
- submodules map[string]*SubmoduleEntry
-
- // Path => commit SHA1
- subRepoVersions map[string]plumbing.Hash
- repoCache *RepoCache
-}
-
-// subURL returns the URL for a submodule.
-func (w *repoWalker) subURL(relURL string) (*url.URL, error) {
- if w.repoURL == nil {
- return nil, fmt.Errorf("no URL for base repo")
- }
- if strings.HasPrefix(relURL, "../") {
- u := *w.repoURL
- u.Path = path.Join(u.Path, relURL)
- return &u, nil
- }
-
- return url.Parse(relURL)
-}
-
-// newRepoWalker creates a new repoWalker.
-func newRepoWalker(r *git.Repository, repoURL string, repoCache *RepoCache) *repoWalker {
- u, _ := url.Parse(repoURL)
- return &repoWalker{
- repo: r,
- repoURL: u,
- tree: map[fileKey]BlobLocation{},
- repoCache: repoCache,
- subRepoVersions: map[string]plumbing.Hash{},
- }
-}
-
-// parseModuleMap initializes rw.submodules.
-func (rw *repoWalker) parseModuleMap(t *object.Tree) error {
- modEntry, _ := t.File(".gitmodules")
- if modEntry != nil {
- c, err := blobContents(&modEntry.Blob)
- if err != nil {
- return err
- }
- mods, err := ParseGitModules(c)
- if err != nil {
- return err
- }
- rw.submodules = map[string]*SubmoduleEntry{}
- for _, entry := range mods {
- rw.submodules[entry.Path] = entry
- }
- }
- return nil
-}
-
-// TreeToFiles fetches the blob SHA1s for a tree. If repoCache is
-// non-nil, recurse into submodules. In addition, it returns a mapping
-// that indicates in which repo each SHA1 can be found.
-func TreeToFiles(r *git.Repository, t *object.Tree,
- repoURL string, repoCache *RepoCache) (map[fileKey]BlobLocation, map[string]plumbing.Hash, error) {
- rw := newRepoWalker(r, repoURL, repoCache)
-
- if err := rw.parseModuleMap(t); err != nil {
- return nil, nil, err
- }
-
- tw := object.NewTreeWalker(t, true, make(map[plumbing.Hash]bool))
- defer tw.Close()
- for {
- name, entry, err := tw.Next()
- if err == io.EOF {
- break
- }
- if err := rw.handleEntry(name, &entry); err != nil {
- return nil, nil, err
- }
- }
- return rw.tree, rw.subRepoVersions, nil
-}
-
-func (r *repoWalker) tryHandleSubmodule(p string, id *plumbing.Hash) error {
- if err := r.handleSubmodule(p, id); err != nil {
- log.Printf("submodule %s: ignoring error %v", p, err)
- }
- return nil
-}
-
-func (r *repoWalker) handleSubmodule(p string, id *plumbing.Hash) error {
- submod := r.submodules[p]
- if submod == nil {
- return fmt.Errorf("no entry for submodule path %q", r.repoURL)
- }
-
- subURL, err := r.subURL(submod.URL)
- if err != nil {
- return err
- }
-
- subRepo, err := r.repoCache.Open(subURL)
- if err != nil {
- return err
- }
-
- obj, err := subRepo.CommitObject(*id)
- if err != nil {
- return err
- }
- tree, err := subRepo.TreeObject(obj.TreeHash)
- if err != nil {
- return err
- }
-
- r.subRepoVersions[p] = *id
-
- subTree, subVersions, err := TreeToFiles(subRepo, tree, subURL.String(), r.repoCache)
- if err != nil {
- return err
- }
- for k, repo := range subTree {
- r.tree[fileKey{
- SubRepoPath: filepath.Join(p, k.SubRepoPath),
- Path: k.Path,
- ID: k.ID,
- }] = repo
- }
- for k, v := range subVersions {
- r.subRepoVersions[filepath.Join(p, k)] = v
- }
- return nil
-}
-
-func (r *repoWalker) handleEntry(p string, e *object.TreeEntry) error {
- if e.Mode == filemode.Submodule && r.repoCache != nil {
- if err := r.tryHandleSubmodule(p, &e.Hash); err != nil {
- return fmt.Errorf("submodule %s: %v", p, err)
- }
- }
-
- switch e.Mode {
- case filemode.Regular, filemode.Executable:
- default:
- return nil
- }
-
- r.tree[fileKey{
- Path: p,
- ID: e.Hash,
- }] = BlobLocation{
- Repo: r.repo,
- URL: r.repoURL,
- }
- return nil
-}
-
-// fileKey describes a blob at a location in the final tree. We also
-// record the subrepository from where it came.
-type fileKey struct {
- SubRepoPath string
- Path string
- ID plumbing.Hash
-}
-
-func (k *fileKey) FullPath() string {
- return filepath.Join(k.SubRepoPath, k.Path)
-}
-
-// BlobLocation holds data where a blob can be found.
-type BlobLocation struct {
- Repo *git.Repository
- URL *url.URL
-}
-
-func (l *BlobLocation) Blob(id *plumbing.Hash) ([]byte, error) {
- blob, err := l.Repo.BlobObject(*id)
- if err != nil {
- return nil, err
- }
- return blobContents(blob)
-}
diff --git a/gitindex/tree_test.go b/gitindex/tree_test.go
deleted file mode 100644
index eff31b3..0000000
--- a/gitindex/tree_test.go
+++ /dev/null
@@ -1,486 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gitindex
-
-import (
- "bytes"
- "context"
- "fmt"
- "io/ioutil"
- "net/url"
- "os"
- "os/exec"
- "path/filepath"
- "reflect"
- "sort"
- "testing"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/build"
- "github.com/google/zoekt/query"
- "github.com/google/zoekt/shards"
-)
-
-func createSubmoduleRepo(dir string) error {
- if err := os.MkdirAll(dir, 0o755); err != nil {
- return err
- }
- script := `mkdir adir bdir
-cd adir
-git init -b master
-mkdir subdir
-echo acont > afile
-echo sub-cont > subdir/sub-file
-git add afile subdir/sub-file
-git config user.email "you@example.com"
-git config user.name "Your Name"
-git commit -am amsg
-
-cd ..
-cd bdir
-git init -b master
-echo bcont > bfile
-git add bfile
-git config user.email "you@example.com"
-git config user.name "Your Name"
-git commit -am bmsg
-
-cd ../adir
-git submodule add --name bname -- ../bdir bname
-git commit -am bmodmsg
-cat .gitmodules
-cd ..
-mkdir gerrit.googlesource.com
-git clone --bare adir gerrit.googlesource.com/adir.git
-git clone --bare bdir gerrit.googlesource.com/bdir.git
-
-mkdir gerrit.googlesource.com/bogus.git
-mkdir gerrit.googlesource.com/sub
-git clone --bare bdir gerrit.googlesource.com/sub/bdir.git
-
-cat << EOF > gerrit.googlesource.com/adir.git/config
-[core]
- repositoryformatversion = 0
- filemode = true
- bare = true
-[remote "origin"]
- url = http://gerrit.googlesource.com/adir
-[branch "master"]
- remote = origin
- merge = refs/heads/master
-EOF
-`
- cmd := exec.Command("/bin/sh", "-euxc", script)
- cmd.Dir = dir
- if out, err := cmd.CombinedOutput(); err != nil {
- return fmt.Errorf("execution error: %v, output %s", err, out)
- }
- return nil
-}
-
-func TestFindGitRepos(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
-
- if err := createSubmoduleRepo(dir); err != nil {
- t.Error("createSubmoduleRepo", err)
- }
- repos, err := FindGitRepos(dir)
- if err != nil {
- t.Error("FindGitRepos", err)
- }
-
- got := map[string]bool{}
- for _, r := range repos {
- p, err := filepath.Rel(dir, r)
- if err != nil {
- t.Fatalf("Relative: %v", err)
- }
-
- got[p] = true
- }
-
- want := map[string]bool{
- "gerrit.googlesource.com/bdir.git": true,
- "gerrit.googlesource.com/sub/bdir.git": true,
- "adir/.git": true,
- "bdir/.git": true,
- "gerrit.googlesource.com/adir.git": true,
- }
- if !reflect.DeepEqual(got, want) {
- t.Errorf("got %v want %v", got, want)
- }
-}
-
-func TestTreeToFiles(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
-
- if err := createSubmoduleRepo(dir); err != nil {
- t.Fatalf("TempDir: %v", err)
- }
-
- cache := NewRepoCache(dir)
-
- aURL, _ := url.Parse("http://gerrit.googlesource.com/adir")
- repo, err := cache.Open(aURL)
- if err != nil {
- t.Fatalf("Open: %v", err)
- }
-
- headRef, err := repo.Head()
- if err != nil {
- t.Fatalf("HEAD tree: %v", err)
- }
- commit, err := repo.CommitObject(headRef.Hash())
- if err != nil {
- t.Fatalf("commit obj HEAD: %v", err)
- }
-
- tree, err := repo.TreeObject(commit.TreeHash)
- if err != nil {
- t.Fatalf("AsTree: %v", err)
- }
-
- files, versions, err := TreeToFiles(repo, tree, aURL.String(), cache)
- if err != nil {
- t.Fatalf("TreeToFiles: %v", err)
- }
-
- bnameHash := versions["bname"]
- if entry, err := tree.FindEntry("bname"); err != nil {
- t.Fatalf("FindEntry %v", err)
- } else if !bytes.Equal(bnameHash[:], entry.Hash[:]) {
- t.Fatalf("got 'bname' versions %v, want %v", bnameHash, entry.Hash)
- }
-
- var paths []string
- for k := range files {
- paths = append(paths, k.FullPath())
- }
- sort.Strings(paths)
-
- want := []string{".gitmodules", "afile", "bname/bfile", "subdir/sub-file"}
- if !reflect.DeepEqual(paths, want) {
- t.Errorf("got %v, want %v", paths, want)
- }
-}
-
-func TestSubmoduleIndex(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
-
- if err := createSubmoduleRepo(dir); err != nil {
- t.Fatalf("createSubmoduleRepo: %v", err)
- }
-
- indexDir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatal(err)
- }
- defer os.RemoveAll(indexDir)
-
- buildOpts := build.Options{
- IndexDir: indexDir,
- }
- opts := Options{
- RepoDir: filepath.Join(dir, "gerrit.googlesource.com", "adir.git"),
- BuildOptions: buildOpts,
- BranchPrefix: "refs/heads/",
- Branches: []string{"master"},
- Submodules: true,
- Incremental: true,
- RepoCacheDir: dir,
- }
- if err := IndexGitRepo(opts); err != nil {
- t.Fatalf("IndexGitRepo: %v", err)
- }
-
- searcher, err := shards.NewDirectorySearcher(indexDir)
- if err != nil {
- t.Fatal("NewDirectorySearcher", err)
- }
- defer searcher.Close()
-
- results, err := searcher.Search(context.Background(),
- &query.Substring{Pattern: "bcont"},
- &zoekt.SearchOptions{})
- if err != nil {
- t.Fatal("Search", err)
- }
-
- if len(results.Files) != 1 {
- t.Fatalf("got search result %v, want 1 file", results.Files)
- }
-
- file := results.Files[0]
- if got, want := file.SubRepositoryName, "gerrit.googlesource.com/bdir"; got != want {
- t.Errorf("got subrepo name %q, want %q", got, want)
- }
- if got, want := file.SubRepositoryPath, "bname"; got != want {
- t.Errorf("got subrepo path %q, want %q", got, want)
- }
-
- subVersion := file.Version
- if len(subVersion) != 40 {
- t.Fatalf("got %q, want hex sha1", subVersion)
- }
-
- if results, err := searcher.Search(context.Background(), &query.Substring{Pattern: "acont"}, &zoekt.SearchOptions{}); err != nil {
- t.Fatalf("Search('acont'): %v", err)
- } else if len(results.Files) != 1 {
- t.Errorf("got %v, want 1 result", results.Files)
- } else if f := results.Files[0]; f.Version == subVersion {
- t.Errorf("version in super repo matched version is subrepo.")
- }
-}
-
-func TestAllowMissingBranch(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
- if err := createSubmoduleRepo(dir); err != nil {
- t.Fatalf("createSubmoduleRepo: %v", err)
- }
-
- indexDir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatal(err)
- }
- defer os.RemoveAll(indexDir)
-
- buildOpts := build.Options{
- IndexDir: indexDir,
- }
-
- opts := Options{
- RepoDir: filepath.Join(dir, "gerrit.googlesource.com", "adir.git"),
- BuildOptions: buildOpts,
- BranchPrefix: "refs/heads/",
- Branches: []string{"master", "nonexist"},
- Submodules: true,
- Incremental: true,
- RepoCacheDir: dir,
- }
- if err := IndexGitRepo(opts); err == nil {
- t.Fatalf("IndexGitRepo(nonexist) succeeded")
- }
- opts.AllowMissingBranch = true
- if err := IndexGitRepo(opts); err != nil {
- t.Fatalf("IndexGitRepo(nonexist, allow): %v", err)
- }
-}
-
-func createMultibranchRepo(dir string) error {
- if err := os.MkdirAll(dir, 0o755); err != nil {
- return err
- }
- script := `mkdir repo
-cd repo
-git init -b master
-mkdir subdir
-echo acont > afile
-echo sub-cont > subdir/sub-file
-git add afile subdir/sub-file
-git config user.email "you@example.com"
-git config user.name "Your Name"
-git commit -am amsg
-
-git branch branchdir/a
-
-echo acont >> afile
-git add afile subdir/sub-file
-git commit -am amsg
-
-git branch branchdir/b
-
-git branch c
-
-git update-ref refs/meta/config HEAD
-`
- cmd := exec.Command("/bin/sh", "-euxc", script)
- cmd.Dir = dir
- if out, err := cmd.CombinedOutput(); err != nil {
- return fmt.Errorf("execution error: %v, output %s", err, out)
- }
- return nil
-}
-
-func TestBranchWildcard(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
-
- if err := createMultibranchRepo(dir); err != nil {
- t.Fatalf("createMultibranchRepo: %v", err)
- }
-
- indexDir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatal(err)
- }
- defer os.RemoveAll(indexDir)
-
- buildOpts := build.Options{
- IndexDir: indexDir,
- RepositoryDescription: zoekt.Repository{
- Name: "repo",
- },
- }
- buildOpts.SetDefaults()
-
- opts := Options{
- RepoDir: filepath.Join(dir + "/repo"),
- BuildOptions: buildOpts,
- BranchPrefix: "refs/heads",
- Branches: []string{"branchdir/*"},
- Submodules: true,
- Incremental: true,
- }
- if err := IndexGitRepo(opts); err != nil {
- t.Fatalf("IndexGitRepo: %v", err)
- }
-
- searcher, err := shards.NewDirectorySearcher(indexDir)
- if err != nil {
- t.Fatal("NewDirectorySearcher", err)
- }
- defer searcher.Close()
-
- if rlist, err := searcher.List(context.Background(), &query.Repo{Pattern: ""}); err != nil {
- t.Fatalf("List(): %v", err)
- } else if len(rlist.Repos) != 1 {
- t.Errorf("got %v, want 1 result", rlist.Repos)
- } else if repo := rlist.Repos[0]; len(repo.Repository.Branches) != 2 {
- t.Errorf("got branches %v, want 2", repo.Repository.Branches)
- } else if repo := rlist.Repos[0]; repo.Stats.Documents != 3 {
- t.Errorf("got document count %d, want 3", repo.Stats.Documents)
- }
-}
-
-func TestSkipSubmodules(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
-
- if err := createSubmoduleRepo(dir); err != nil {
- t.Fatalf("createMultibranchRepo: %v", err)
- }
-
- indexDir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatal(err)
- }
- defer os.RemoveAll(indexDir)
-
- buildOpts := build.Options{
- IndexDir: indexDir,
- RepositoryDescription: zoekt.Repository{
- Name: "gerrit.googlesource.com/adir",
- },
- }
- if err := os.Rename(dir+"/gerrit.googlesource.com/bdir.git",
- dir+"/gerrit.googlesource.com/notexist.git"); err != nil {
- t.Fatalf("Rename: %v", err)
- }
-
- opts := Options{
- RepoDir: filepath.Join(dir, "gerrit.googlesource.com", "adir.git"),
- BuildOptions: buildOpts,
- BranchPrefix: "refs/heads",
- Branches: []string{"master"},
- Submodules: false,
- }
- if err := IndexGitRepo(opts); err != nil {
- t.Fatalf("IndexGitRepo: %v", err)
- }
-}
-
-func TestFullAndShortRefNames(t *testing.T) {
- dir, err := ioutil.TempDir("", "git")
- if err != nil {
- t.Fatalf("TempDir: %v", err)
- }
- defer os.RemoveAll(dir)
-
- if err := createMultibranchRepo(dir); err != nil {
- t.Fatalf("createMultibranchRepo: %v", err)
- }
-
- indexDir, err := ioutil.TempDir("", "index-")
- if err != nil {
- t.Fatal(err)
- }
- os.RemoveAll(indexDir)
-
- buildOpts := build.Options{
- IndexDir: indexDir,
- RepositoryDescription: zoekt.Repository{
- Name: "repo",
- },
- }
- buildOpts.SetDefaults()
-
- opts := Options{
- RepoDir: filepath.Join(dir + "/repo"),
- BuildOptions: buildOpts,
- BranchPrefix: "refs/heads",
- Branches: []string{"refs/heads/master", "branchdir/a", "refs/meta/config"},
- Submodules: false,
- Incremental: false,
- AllowMissingBranch: false,
- }
- if err := IndexGitRepo(opts); err != nil {
- t.Fatalf("IndexGitRepo: %v", err)
- }
-
- searcher, err := shards.NewDirectorySearcher(indexDir)
- if err != nil {
- t.Fatal("NewDirectorySearcher", err)
- }
- defer searcher.Close()
-
- if rlist, err := searcher.List(context.Background(), &query.Repo{Pattern: ""}); err != nil {
- t.Fatalf("List(): %v", err)
- } else if len(rlist.Repos) != 1 {
- t.Errorf("got %v, want 1 result", rlist.Repos)
- } else if repo := rlist.Repos[0]; len(repo.Repository.Branches) != 3 {
- t.Errorf("got branches %v, want 3", repo.Repository.Branches)
- }
-}
-
-func TestUniq(t *testing.T) {
- in := []string{"a", "b", "b", "c", "c"}
- want := []string{"a", "b", "c"}
- got := uniq(in)
- if !reflect.DeepEqual(got, want) {
- t.Errorf("got %v, want %v", got, want)
- }
-}
diff --git a/go.mod b/go.mod
deleted file mode 100644
index 90ae146..0000000
--- a/go.mod
+++ /dev/null
@@ -1,27 +0,0 @@
-module github.com/google/zoekt
-
-go 1.13
-
-require (
- github.com/andygrunwald/go-gerrit v0.0.0-20191101112536-3f5e365ccf57
- github.com/bmatcuk/doublestar v1.3.4
- github.com/fsnotify/fsnotify v1.4.7
- github.com/gfleury/go-bitbucket-v1 v0.0.0-20200104105711-ddbafbb02522
- github.com/go-git/go-git/v5 v5.0.0
- github.com/golang/protobuf v1.3.3 // indirect
- github.com/google/go-cmp v0.5.5
- github.com/google/go-github/v27 v27.0.6
- github.com/google/slothfs v0.0.0-20190417171004-6b42407d9230
- github.com/kylelemons/godebug v1.1.0
- github.com/mitchellh/mapstructure v1.1.2 // indirect
- github.com/pkg/errors v0.9.1 // indirect
- github.com/prometheus/client_golang v1.5.1
- github.com/xanzy/go-gitlab v0.25.0
- go.uber.org/automaxprocs v1.3.0
- golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6
- golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d
- golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e
- google.golang.org/appengine v1.6.5 // indirect
- gopkg.in/yaml.v2 v2.2.8 // indirect
- humungus.tedunangst.com/r/gerc v0.1.2
-)
diff --git a/go.sum b/go.sum
deleted file mode 100644
index 4b1a482..0000000
--- a/go.sum
+++ /dev/null
@@ -1,212 +0,0 @@
-cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
-github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
-github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7 h1:uSoVVbwJiQipAclBbw+8quDsfcvFjOpI5iCf4p/cqCs=
-github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7/go.mod h1:6zEj6s6u/ghQa61ZWa/C2Aw3RkjiTBOix7dkqa1VLIs=
-github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
-github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
-github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
-github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
-github.com/andygrunwald/go-gerrit v0.0.0-20191101112536-3f5e365ccf57 h1:wtSQ14h8qAUezER6QPfYmCh5+W5Ly1lVruhm/QeOVUE=
-github.com/andygrunwald/go-gerrit v0.0.0-20191101112536-3f5e365ccf57/go.mod h1:0iuRQp6WJ44ts+iihy5E/WlPqfg5RNeQxOmzRkxCdtk=
-github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 h1:kFOfPq6dUM1hTo4JG6LR5AXSUEsOjtdm0kw0FtQtMJA=
-github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c=
-github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
-github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
-github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
-github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
-github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
-github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
-github.com/bmatcuk/doublestar v1.3.4 h1:gPypJ5xD31uhX6Tf54sDPUOBXTqKH4c9aPY66CyQrS0=
-github.com/bmatcuk/doublestar v1.3.4/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE=
-github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
-github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
-github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
-github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg=
-github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o=
-github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 h1:BHsljHzVlRcyQhjrss6TZTdY2VfCqZPbv5k3iBFa2ZQ=
-github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
-github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
-github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
-github.com/gfleury/go-bitbucket-v1 v0.0.0-20200104105711-ddbafbb02522 h1:hrfa10FjSX7jqupn/o8vyEsJ807SyKvuf+iFiEytTN8=
-github.com/gfleury/go-bitbucket-v1 v0.0.0-20200104105711-ddbafbb02522/go.mod h1:Se0U4YUmRkRAOh8kD7KXz+3VCUBmvTFcdWP2QYYRjjc=
-github.com/gliderlabs/ssh v0.2.2 h1:6zsha5zo/TWhRhwqCD3+EarCAgZ2yN28ipRnGPnwkI0=
-github.com/gliderlabs/ssh v0.2.2/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
-github.com/go-git/gcfg v1.5.0 h1:Q5ViNfGF8zFgyJWPqYwA7qGFoMTEiBmdlkcfRmpIMa4=
-github.com/go-git/gcfg v1.5.0/go.mod h1:5m20vg6GwYabIxaOonVkTdrILxQMpEShl1xiMF4ua+E=
-github.com/go-git/go-billy/v5 v5.0.0 h1:7NQHvd9FVid8VL4qVUMm8XifBK+2xCoZ2lSk0agRrHM=
-github.com/go-git/go-billy/v5 v5.0.0/go.mod h1:pmpqyWchKfYfrkb/UVH4otLvyi/5gJlGI4Hb3ZqZ3W0=
-github.com/go-git/go-git-fixtures/v4 v4.0.1 h1:q+IFMfLx200Q3scvt2hN79JsEzy4AmBTp/pqnefH+Bc=
-github.com/go-git/go-git-fixtures/v4 v4.0.1/go.mod h1:m+ICp2rF3jDhFgEZ/8yziagdT1C+ZpZcrJjappBCDSw=
-github.com/go-git/go-git/v5 v5.0.0 h1:k5RWPm4iJwYtfWoxIJy4wJX9ON7ihPeZZYC1fLYDnpg=
-github.com/go-git/go-git/v5 v5.0.0/go.mod h1:oYD8y9kWsGINPFJoLdaScGCN6dlKg23blmClfZwtUVA=
-github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
-github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
-github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
-github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
-github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
-github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
-github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.3.3 h1:gyjaxf+svBWX08ZjK86iN9geUJF0H6gp2IRKX6Nf6/I=
-github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
-github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
-github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
-github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
-github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-github/v27 v27.0.6 h1:oiOZuBmGHvrGM1X9uNUAUlLgp5r1UUO/M/KnbHnLRlQ=
-github.com/google/go-github/v27 v27.0.6/go.mod h1:/0Gr8pJ55COkmv+S/yPKCczSkUPIM/LnFyubufRNIS0=
-github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk=
-github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
-github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
-github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
-github.com/google/slothfs v0.0.0-20190417171004-6b42407d9230 h1:iBLrJ79cF90CZmpskySqhPvzrWr9njBYEsOZubXLZlc=
-github.com/google/slothfs v0.0.0-20190417171004-6b42407d9230/go.mod h1:kzvK/MFjZSNdFgc1tCZML3E1nVvnB4/npSKEuvMoECU=
-github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=
-github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo=
-github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
-github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
-github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
-github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
-github.com/kevinburke/ssh_config v0.0.0-20190725054713-01f96b0aa0cd h1:Coekwdh0v2wtGp9Gmz1Ze3eVRAWJMLokvN3QjdzCHLY=
-github.com/kevinburke/ssh_config v0.0.0-20190725054713-01f96b0aa0cd/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM=
-github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
-github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
-github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
-github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
-github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
-github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
-github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
-github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
-github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
-github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
-github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
-github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
-github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE=
-github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
-github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
-github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
-github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
-github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
-github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
-github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
-github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
-github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
-github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
-github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
-github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
-github.com/prometheus/client_golang v1.5.1 h1:bdHYieyGlH+6OLEk2YQha8THib30KP0/yD0YH9m6xcA=
-github.com/prometheus/client_golang v1.5.1/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
-github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
-github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M=
-github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
-github.com/prometheus/common v0.9.1 h1:KOMtN28tlbam3/7ZKEYKHhKoJZYYj3gMH4uc62x7X7U=
-github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=
-github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
-github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
-github.com/prometheus/procfs v0.0.8 h1:+fpWZdT24pJBiqJdAwYBjPSk+5YmQzYNPYzQsdzLkt8=
-github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
-github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
-github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
-github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
-github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
-github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
-github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
-github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
-github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
-github.com/xanzy/go-gitlab v0.25.0 h1:G5aTZeqZd66Q6qMVieBfmHBsPpF0jY92zCLAMpULe3I=
-github.com/xanzy/go-gitlab v0.25.0/go.mod h1:t4Bmvnxj7k37S4Y17lfLx+nLqkf/oQwT2HagfWKv5Og=
-github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70=
-github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4=
-go.uber.org/automaxprocs v1.3.0 h1:II28aZoGdaglS5vVNnspf28lnZpXScxtIozx1lAjdb0=
-go.uber.org/automaxprocs v1.3.0/go.mod h1:9CWT6lKIep8U41DDaPiH6eFscnTyjfTANNQNx6LrIcA=
-golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
-golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
-golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073 h1:xMPOj6Pz6UipU1wXLkrtqpHbR0AVFnyPEQq/wRWz9lM=
-golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
-golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
-golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20181108082009-03003ca0c849/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
-golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6 h1:0PC75Fz/kyMGhL0e1QnypqK2kQMqKt9csD1GnMJR+Zk=
-golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
-golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
-golang.org/x/oauth2 v0.0.0-20181106182150-f42d05182288/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
-golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d h1:TzXSXBo42m9gQenoE3b9BGiEpg5IG2JkU5FkPIawgtw=
-golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY=
-golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190221075227-b4e8571b14e0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c=
-golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
-golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
-golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
-golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
-google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
-google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
-google.golang.org/appengine v1.6.5 h1:tycE03LOZYQNhDpS27tcQdAzLCVMaj7QT2SXxebnpCM=
-google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
-gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
-gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
-gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME=
-gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI=
-gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
-gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
-humungus.tedunangst.com/r/gerc v0.1.2 h1:eW5yTbRLFFWSu/RpTdkLxaVPlNlFUzxc02VQsftzg64=
-humungus.tedunangst.com/r/gerc v0.1.2/go.mod h1:tuYnDVV3WEGI9NEX5/3Iz5xVNimFzN4+83qZvFf/GUg=
diff --git a/hititer.go b/hititer.go
deleted file mode 100644
index 356c505..0000000
--- a/hititer.go
+++ /dev/null
@@ -1,259 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "encoding/binary"
- "fmt"
-)
-
-// hitIterator finds potential search matches, measured in offsets of
-// the concatenation of all documents.
-type hitIterator interface {
- // Return the first hit, or maxUInt32 if none.
- first() uint32
-
- // Skip until past limit. The argument maxUInt32 should be
- // treated specially.
- next(limit uint32)
-
- // Return how many bytes were read.
- updateStats(s *Stats)
-}
-
-// distanceHitIterator looks for hits at a fixed distance apart.
-type distanceHitIterator struct {
- started bool
- distance uint32
- i1 hitIterator
- i2 hitIterator
-}
-
-func (i *distanceHitIterator) String() string {
- return fmt.Sprintf("dist(%d, %v, %v)", i.distance, i.i1, i.i2)
-}
-
-func (i *distanceHitIterator) findNext() {
- for {
- var p1, p2 uint32
- p1 = i.i1.first()
- p2 = i.i2.first()
- if p1 == maxUInt32 || p2 == maxUInt32 {
- i.i1.next(maxUInt32)
- break
- }
-
- if p1+i.distance < p2 {
- i.i1.next(p2 - i.distance - 1)
- } else if p1+i.distance > p2 {
- i.i2.next(p1 + i.distance - 1)
- } else {
- break
- }
- }
-}
-
-func (i *distanceHitIterator) first() uint32 {
- if !i.started {
- i.findNext()
- i.started = true
- }
- return i.i1.first()
-}
-
-func (i *distanceHitIterator) updateStats(s *Stats) {
- i.i1.updateStats(s)
- i.i2.updateStats(s)
-}
-
-func (i *distanceHitIterator) next(limit uint32) {
- i.i1.next(limit)
- l2 := limit + i.distance
-
- if l2 < limit { // overflow.
- l2 = maxUInt32
- }
- i.i2.next(l2)
- i.findNext()
-}
-
-func (d *indexData) newDistanceTrigramIter(ng1, ng2 ngram, dist uint32, caseSensitive, fileName bool) (hitIterator, error) {
- if dist == 0 {
- return nil, fmt.Errorf("d == 0")
- }
-
- i1, err := d.trigramHitIterator(ng1, caseSensitive, fileName)
- if err != nil {
- return nil, err
- }
- i2, err := d.trigramHitIterator(ng2, caseSensitive, fileName)
- if err != nil {
- return nil, err
- }
- return &distanceHitIterator{
- i1: i1,
- i2: i2,
- distance: dist,
- }, nil
-}
-
-func (d *indexData) trigramHitIterator(ng ngram, caseSensitive, fileName bool) (hitIterator, error) {
- variants := []ngram{ng}
- if !caseSensitive {
- variants = generateCaseNgrams(ng)
- }
-
- iters := make([]hitIterator, 0, len(variants))
- for _, v := range variants {
- if fileName {
- blob := d.fileNameNgrams[v]
- if len(blob) > 0 {
- iters = append(iters, &inMemoryIterator{
- d.fileNameNgrams[v],
- v,
- })
- }
- continue
- }
-
- sec := d.ngrams[v]
- blob, err := d.readSectionBlob(sec)
- if err != nil {
- return nil, err
- }
- if len(blob) > 0 {
- iters = append(iters, newCompressedPostingIterator(blob, v))
- }
- }
-
- if len(iters) == 1 {
- return iters[0], nil
- }
- return &mergingIterator{
- iters: iters,
- }, nil
-}
-
-// inMemoryIterator is hitIterator that goes over an in-memory uint32 posting list.
-type inMemoryIterator struct {
- postings []uint32
- what ngram
-}
-
-func (i *inMemoryIterator) String() string {
- return fmt.Sprintf("mem(%s):%v", i.what, i.postings)
-}
-
-func (i *inMemoryIterator) first() uint32 {
- if len(i.postings) > 0 {
- return i.postings[0]
- }
- return maxUInt32
-}
-
-func (i *inMemoryIterator) updateStats(s *Stats) {
-}
-
-func (i *inMemoryIterator) next(limit uint32) {
- if limit == maxUInt32 {
- i.postings = nil
- }
-
- for len(i.postings) > 0 && i.postings[0] <= limit {
- i.postings = i.postings[1:]
- }
-}
-
-// compressedPostingIterator goes over a delta varint encoded posting
-// list.
-type compressedPostingIterator struct {
- blob, orig []byte
- _first uint32
- what ngram
-}
-
-func newCompressedPostingIterator(b []byte, w ngram) *compressedPostingIterator {
- d, sz := binary.Uvarint(b)
- return &compressedPostingIterator{
- _first: uint32(d),
- blob: b[sz:],
- orig: b,
- what: w,
- }
-}
-
-func (i *compressedPostingIterator) String() string {
- return fmt.Sprintf("compressed(%s, %d, [%d bytes])", i.what, i._first, len(i.blob))
-}
-
-func (i *compressedPostingIterator) first() uint32 {
- return i._first
-}
-
-func (i *compressedPostingIterator) next(limit uint32) {
- if limit == maxUInt32 {
- i.blob = nil
- i._first = maxUInt32
- return
- }
-
- for i._first <= limit && len(i.blob) > 0 {
- delta, sz := binary.Uvarint(i.blob)
- i._first += uint32(delta)
- i.blob = i.blob[sz:]
- }
-
- if i._first <= limit && len(i.blob) == 0 {
- i._first = maxUInt32
- }
-}
-
-func (i *compressedPostingIterator) updateStats(s *Stats) {
- s.IndexBytesLoaded += int64(len(i.orig) - len(i.blob))
-}
-
-// mergingIterator forms the merge of a set of hitIterators, to
-// implement an OR operation at the hit level.
-type mergingIterator struct {
- iters []hitIterator
-}
-
-func (i *mergingIterator) String() string {
- return fmt.Sprintf("merge:%v", i.iters)
-}
-
-func (i *mergingIterator) updateStats(s *Stats) {
- for _, j := range i.iters {
- j.updateStats(s)
- }
-}
-
-func (i *mergingIterator) first() uint32 {
- r := uint32(maxUInt32)
- for _, j := range i.iters {
- f := j.first()
- if f < r {
- r = f
- }
- }
-
- return r
-}
-
-func (i *mergingIterator) next(limit uint32) {
- for _, j := range i.iters {
- j.next(limit)
- }
-}
diff --git a/hititer_test.go b/hititer_test.go
deleted file mode 100644
index 0c276c1..0000000
--- a/hititer_test.go
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright 2019 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "fmt"
- "math/rand"
- "reflect"
- "sort"
- "testing"
- "testing/quick"
-
- "github.com/google/go-cmp/cmp"
-)
-
-func TestCompressedPostingIterator_limit(t *testing.T) {
- f := func(nums, limits []uint32) bool {
- if len(nums) == 0 || len(limits) == 0 {
- return true
- }
-
- nums = sortedUnique(nums)
- sort.Slice(limits, func(i, j int) bool { return limits[i] < limits[j] })
-
- want := doHitIterator(&inMemoryIterator{postings: nums}, limits)
-
- it := newCompressedPostingIterator(toDeltas(nums), stringToNGram("abc"))
- got := doHitIterator(it, limits)
- if !reflect.DeepEqual(want, got) {
- t.Log(cmp.Diff(want, got))
- return false
- }
- return true
- }
- if err := quick.Check(f, nil); err != nil {
- t.Error(err)
- }
-}
-
-func doHitIterator(it hitIterator, limits []uint32) []uint32 {
- var nums []uint32
- for _, limit := range limits {
- it.next(limit)
- nums = append(nums, it.first())
- }
- return nums
-}
-
-func BenchmarkCompressedPostingIterator(b *testing.B) {
- cases := []struct{ size, limitSize int }{
- {100, 50},
- {10000, 100},
- {10000, 1000},
- {10000, 10000},
- {100000, 100},
- {100000, 1000},
- {100000, 10000},
- {100000, 100000},
- }
- for _, tt := range cases {
- b.Run(fmt.Sprintf("%d_%d", tt.size, tt.limitSize), func(b *testing.B) {
- benchmarkCompressedPostingIterator(b, tt.size, tt.limitSize)
- })
- }
-}
-
-func benchmarkCompressedPostingIterator(b *testing.B, size, limitsSize int) {
- nums := genUints32(size)
- limits := genUints32(limitsSize)
-
- nums = sortedUnique(nums)
- sort.Slice(limits, func(i, j int) bool { return limits[i] < limits[j] })
-
- ng := stringToNGram("abc")
- deltas := toDeltas(nums)
-
- b.ResetTimer()
-
- for n := 0; n < b.N; n++ {
- it := newCompressedPostingIterator(deltas, ng)
- for _, limit := range limits {
- it.next(limit)
- _ = it.first()
- }
- var s Stats
- it.updateStats(&s)
- b.SetBytes(s.IndexBytesLoaded)
- }
-}
-
-func genUints32(size int) []uint32 {
- // Deterministic for benchmarks
- r := rand.New(rand.NewSource(int64(size)))
- nums := make([]uint32, size)
- for i := range nums {
- nums[i] = r.Uint32()
- }
- return nums
-}
diff --git a/index_test.go b/index_test.go
deleted file mode 100644
index 0f84197..0000000
--- a/index_test.go
+++ /dev/null
@@ -1,1938 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "bytes"
- "context"
- "fmt"
- "reflect"
- "regexp/syntax"
- "strings"
- "testing"
-
- "github.com/kylelemons/godebug/pretty"
-
- "github.com/google/zoekt/query"
-)
-
-func clearScores(r *SearchResult) {
- for i := range r.Files {
- r.Files[i].Score = 0.0
- for j := range r.Files[i].LineMatches {
- r.Files[i].LineMatches[j].Score = 0.0
- }
- r.Files[i].Checksum = nil
- r.Files[i].Debug = ""
- }
-}
-
-func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder {
- b, err := NewIndexBuilder(repo)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- for i, d := range docs {
- if err := b.Add(d); err != nil {
- t.Fatalf("Add %d: %v", i, err)
- }
- }
- return b
-}
-
-func TestBoundary(t *testing.T) {
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: []byte("x the")},
- Document{Name: "f1", Content: []byte("reader")})
- res := searchForTest(t, b, &query.Substring{Pattern: "there"})
- if len(res.Files) > 0 {
- t.Fatalf("got %v, want no matches", res.Files)
- }
-}
-
-func TestDocSectionInvalid(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
- doc := Document{
- Name: "f1",
- Content: []byte("01234567890123"),
- Symbols: []DocumentSection{{5, 8}, {7, 9}},
- }
-
- if err := b.Add(doc); err == nil {
- t.Errorf("overlapping doc sections should fail")
- }
-
- doc = Document{
- Name: "f1",
- Content: []byte("01234567890123"),
- Symbols: []DocumentSection{{0, 20}},
- }
-
- if err := b.Add(doc); err == nil {
- t.Errorf("doc sections beyond EOF should fail")
- }
-}
-
-func TestBasic(t *testing.T) {
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f2",
- Content: []byte("to carry water in the no later bla"),
- // ------------- 0123456789012345678901234567890123456789
- })
-
- res := searchForTest(t, b, &query.Substring{
- Pattern: "water",
- CaseSensitive: true,
- })
- fmatches := res.Files
- if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
- t.Fatalf("got %v, want 1 matches", fmatches)
- }
-
- got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
- want := "f2:9"
- if got != want {
- t.Errorf("1: got %s, want %s", got, want)
- }
-}
-
-func TestEmptyIndex(t *testing.T) {
- b := testIndexBuilder(t, nil)
- searcher := searcherForTest(t, b)
-
- var opts SearchOptions
- if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
- t.Fatalf("Search: %v", err)
- }
-
- if _, err := searcher.List(context.Background(), &query.Repo{}); err != nil {
- t.Fatalf("List: %v", err)
- }
-
- if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
- t.Fatalf("Search: %v", err)
- }
-}
-
-type memSeeker struct {
- data []byte
-}
-
-func (s *memSeeker) Name() string {
- return "memseeker"
-}
-
-func (s *memSeeker) Close() {}
-func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
- return s.data[off : off+sz], nil
-}
-
-func (s *memSeeker) Size() (uint32, error) {
- return uint32(len(s.data)), nil
-}
-
-func TestNewlines(t *testing.T) {
- b := testIndexBuilder(t, nil,
- Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
-
- sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
-
- matches := sres.Files
- want := []FileMatch{{
- FileName: "filename",
- LineMatches: []LineMatch{
- {
- LineFragments: []LineFragmentMatch{{
- Offset: 8,
- LineOffset: 2,
- MatchLength: 3,
- }},
- Line: []byte("line2"),
- LineStart: 6,
- LineEnd: 11,
- LineNumber: 2,
- },
- },
- }}
-
- if !reflect.DeepEqual(matches, want) {
- t.Errorf("got %v, want %v", matches, want)
- }
-}
-
-// A result spanning multiple lines should have LineMatches that only cover
-// single lines.
-func TestQueryNewlines(t *testing.T) {
- text := "line1\nline2\nbla"
- b := testIndexBuilder(t, nil,
- Document{Name: "filename", Content: []byte(text)})
- sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
- matches := sres.Files
- if len(matches) != 1 {
- t.Fatalf("got %d file matches, want exactly one", len(matches))
- }
- m := matches[0]
- if len(m.LineMatches) != 2 {
- t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches))
- }
-}
-
-func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
- searcher := searcherForTest(t, b)
- var opts SearchOptions
- if len(o) > 0 {
- opts = o[0]
- }
- res, err := searcher.Search(context.Background(), q, &opts)
- if err != nil {
- t.Fatalf("Search(%s): %v", q, err)
- }
- clearScores(res)
- return res
-}
-
-func searcherForTest(t *testing.T, b *IndexBuilder) Searcher {
- var buf bytes.Buffer
- b.Write(&buf)
- f := &memSeeker{buf.Bytes()}
-
- searcher, err := NewSearcher(f)
- if err != nil {
- t.Fatalf("NewSearcher: %v", err)
- }
-
- return searcher
-}
-
-func TestFileBasedSearch(t *testing.T) {
- c1 := []byte("I love bananas without skin")
- // -----------0123456789012345678901234567890123456789
- c2 := []byte("In Dutch, ananas means pineapple")
- // -----------0123456789012345678901234567890123456789
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: c1},
- Document{Name: "f2", Content: c2},
- )
- sres := searchForTest(t, b, &query.Substring{
- CaseSensitive: false,
- Pattern: "ananas",
- })
-
- matches := sres.Files
- if len(matches) != 2 {
- t.Fatalf("got %v, want 2 matches", matches)
- }
- if matches[0].FileName != "f2" || matches[1].FileName != "f1" {
- t.Fatalf("got %v, want matches {f1,f2}", matches)
- }
- if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 {
- t.Fatalf("got %#v, want offsets 10,8", matches)
- }
-}
-
-func TestCaseFold(t *testing.T) {
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
- // ---------- 012345678901234567890123456
- )
- sres := searchForTest(t, b, &query.Substring{
- Pattern: "bananas",
- CaseSensitive: true,
- })
- matches := sres.Files
- if len(matches) != 0 {
- t.Errorf("foldcase: got %#v, want 0 matches", matches)
- }
-
- sres = searchForTest(t, b,
- &query.Substring{
- Pattern: "BaNaNAS",
- CaseSensitive: true,
- })
- matches = sres.Files
- if len(matches) != 1 {
- t.Errorf("no foldcase: got %v, want 1 matches", matches)
- } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
- t.Errorf("foldcase: got %v, want offsets 7", matches)
- }
-}
-
-func TestAndSearch(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("f1", []byte("x banana y"))
- b.AddFile("f2", []byte("x apple y"))
- b.AddFile("f3", []byte("x banana apple y"))
- // ---------------------0123456789012345
- sres := searchForTest(t, b, query.NewAnd(
- &query.Substring{
- Pattern: "banana",
- },
- &query.Substring{
- Pattern: "apple",
- },
- ))
- matches := sres.Files
- if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
- t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
- }
-
- if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
- t.Fatalf("got %#v, want offsets 2,9", matches)
- }
-
- wantStats := Stats{
- FilesLoaded: 1,
- ContentBytesLoaded: 18,
- IndexBytesLoaded: 8,
- NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
- MatchCount: 1,
- FileCount: 1,
- FilesConsidered: 2,
- }
- if diff := pretty.Compare(wantStats, sres.Stats); diff != "" {
- t.Errorf("got stats diff %s", diff)
- }
-}
-
-func TestAndNegateSearch(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("f1", []byte("x banana y"))
- b.AddFile("f4", []byte("x banana apple y"))
- // ---------------------0123456789012345
- sres := searchForTest(t, b, query.NewAnd(
- &query.Substring{
- Pattern: "banana",
- },
- &query.Not{Child: &query.Substring{
- Pattern: "apple",
- }}))
-
- matches := sres.Files
-
- if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
- t.Fatalf("got %v, want 1 match", matches)
- }
- if matches[0].FileName != "f1" {
- t.Fatalf("got match %#v, want FileName: f1", matches[0])
- }
- if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
- t.Fatalf("got %v, want offsets 2,9", matches)
- }
-}
-
-func TestNegativeMatchesOnlyShortcut(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("f1", []byte("x banana y"))
- b.AddFile("f2", []byte("x appelmoes y"))
- b.AddFile("f3", []byte("x appelmoes y"))
- b.AddFile("f3", []byte("x appelmoes y"))
-
- sres := searchForTest(t, b, query.NewAnd(
- &query.Substring{
- Pattern: "banana",
- },
- &query.Not{Child: &query.Substring{
- Pattern: "appel",
- }}))
-
- if sres.Stats.FilesConsidered != 1 {
- t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
- }
-}
-
-func TestFileSearch(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("banzana", []byte("x orange y"))
- // --------0123456
- b.AddFile("banana", []byte("x apple y"))
- // --------789012
- sres := searchForTest(t, b, &query.Substring{
- Pattern: "anan",
- FileName: true,
- })
-
- matches := sres.Files
- if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
- t.Fatalf("got %v, want 1 match", matches)
- }
-
- got := matches[0].LineMatches[0]
- want := LineMatch{
- Line: []byte("banana"),
- LineFragments: []LineFragmentMatch{{
- Offset: 1,
- LineOffset: 1,
- MatchLength: 4,
- }},
- FileName: true,
- }
-
- if !reflect.DeepEqual(got, want) {
- t.Errorf("got %#v, want %#v", got, want)
- }
-}
-
-func TestFileCase(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("BANANA", []byte("x orange y"))
- sres := searchForTest(t, b, &query.Substring{
- Pattern: "banana",
- FileName: true,
- })
-
- matches := sres.Files
- if len(matches) != 1 || matches[0].FileName != "BANANA" {
- t.Fatalf("got %v, want 1 match 'BANANA'", matches)
- }
-}
-
-func TestFileRegexpSearchBruteForce(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("banzana", []byte("x orange y"))
- // --------------------------0123456879
- b.AddFile("banana", []byte("x apple y"))
- sres := searchForTest(t, b, &query.Regexp{
- Regexp: mustParseRE("[qn][zx]"),
- FileName: true,
- })
-
- matches := sres.Files
- if len(matches) != 1 || matches[0].FileName != "banzana" {
- t.Fatalf("got %v, want 1 match on 'banzana'", matches)
- }
-}
-
-func TestFileRegexpSearchShortString(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("banana.py", []byte("x orange y"))
- sres := searchForTest(t, b, &query.Regexp{
- Regexp: mustParseRE("ana.py"),
- FileName: true,
- })
-
- matches := sres.Files
- if len(matches) != 1 || matches[0].FileName != "banana.py" {
- t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
- }
-}
-
-func TestFileSubstringSearchBruteForce(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("BANZANA", []byte("x orange y"))
- b.AddFile("banana", []byte("x apple y"))
-
- q := &query.Substring{
- Pattern: "z",
- FileName: true,
- }
-
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
- t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
- }
-}
-
-func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("BANZANA", []byte("x orange y"))
- b.AddFile("bananaq", []byte("x apple y"))
-
- q := &query.Substring{
- Pattern: "q",
- FileName: true,
- }
-
- res := searchForTest(t, b, q)
- if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
- t.Fatalf("got %v, want 1 match in %q", res.Files, want)
- }
-}
-
-func TestSearchMatchAll(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("banzana", []byte("x orange y"))
- // --------------------------0123456879
- b.AddFile("banana", []byte("x apple y"))
- sres := searchForTest(t, b, &query.Const{Value: true})
-
- matches := sres.Files
- if len(matches) != 2 {
- t.Fatalf("got %v, want 2 matches", matches)
- }
-}
-
-func TestSearchNewline(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("banzana", []byte("abcd\ndefg"))
- sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
-
- // Just check that we don't crash.
-
- matches := sres.Files
- if len(matches) != 1 {
- t.Fatalf("got %v, want 1 matches", matches)
- }
-}
-
-func TestSearchMatchAllRegexp(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("banzana", []byte("abcd"))
- // --------------------------0123456879
- b.AddFile("banana", []byte("pqrs"))
- sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
-
- matches := sres.Files
- if len(matches) != 2 || sres.Stats.MatchCount != 2 {
- t.Fatalf("got %v, want 2 matches", matches)
- }
- if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
- t.Fatalf("want 4 chars in every file, got %#v", matches)
- }
-}
-
-func TestFileRestriction(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- b.AddFile("banana1", []byte("x orange y"))
- // --------------------------0123456879
- b.AddFile("banana2", []byte("x apple y"))
- b.AddFile("orange", []byte("x apple y"))
- sres := searchForTest(t, b, query.NewAnd(
- &query.Substring{
- Pattern: "banana",
- FileName: true,
- },
- &query.Substring{
- Pattern: "apple",
- }))
-
- matches := sres.Files
- if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
- t.Fatalf("got %v, want 1 match", matches)
- }
-
- match := matches[0].LineMatches[0]
- got := string(match.Line)
- want := "x apple y"
- if got != want {
- t.Errorf("got match %#v, want line %q", match, want)
- }
-}
-
-func TestFileNameBoundary(t *testing.T) {
- b := testIndexBuilder(t, nil,
- Document{Name: "banana2", Content: []byte("x apple y")},
- Document{Name: "helpers.go", Content: []byte("x apple y")},
- Document{Name: "foo", Content: []byte("x apple y")})
- sres := searchForTest(t, b, &query.Substring{
- Pattern: "helpers.go",
- FileName: true,
- })
-
- matches := sres.Files
- if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
- t.Fatalf("got %v, want 1 match", matches)
- }
-}
-
-func TestWordBoundaryRanking(t *testing.T) {
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: []byte("xbytex xbytex")},
- Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")},
- // -----------------------------------0123456 789012 34567890
- Document{Name: "f3", Content: []byte("xbytex ybytex")})
-
- sres := searchForTest(t, b, &query.Substring{
- Pattern: "byte",
- })
-
- if len(sres.Files) != 3 {
- t.Fatalf("got %#v, want 3 files", sres.Files)
- }
-
- file0 := sres.Files[0]
- if file0.FileName != "f2" || len(file0.LineMatches) != 3 {
- t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0)
- }
-
- if file0.LineMatches[0].LineFragments[0].Offset != 13 {
- t.Fatalf("got first match %#v, want full word match", sres.Files[0].LineMatches[0])
- }
- if file0.LineMatches[1].LineFragments[0].Offset != 7 {
- t.Fatalf("got second match %#v, want partial word match", sres.Files[0].LineMatches[1])
- }
-}
-
-func TestDocumentOrder(t *testing.T) {
- var docs []Document
- for i := 0; i < 3; i++ {
- docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
- }
-
- b := testIndexBuilder(t, nil, docs...)
-
- sres := searchForTest(t, b, query.NewAnd(
- &query.Substring{
- Pattern: "needle",
- }))
-
- want := []string{"f0", "f1", "f2"}
- var got []string
- for _, f := range sres.Files {
- got = append(got, f.FileName)
- }
- if !reflect.DeepEqual(got, want) {
- t.Fatalf("got %v, want %v", got, want)
- }
-}
-
-func TestBranchMask(t *testing.T) {
- b := testIndexBuilder(t, &Repository{
- Branches: []RepositoryBranch{
- {"master", "v-master"},
- {"stable", "v-stable"},
- {"bonzai", "v-bonzai"},
- },
- }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
- Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
- Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
- Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
- )
-
- sres := searchForTest(t, b, query.NewAnd(
- &query.Substring{
- Pattern: "needle",
- },
- &query.Branch{
- Pattern: "table",
- }))
-
- if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
- t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
- }
-
- if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
- t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
- }
-}
-
-func TestBranchLimit(t *testing.T) {
- for limit := 64; limit <= 65; limit++ {
- r := &Repository{}
- for i := 0; i < limit; i++ {
- s := fmt.Sprintf("b%d", i)
- r.Branches = append(r.Branches, RepositoryBranch{
- s, "v-" + s,
- })
- }
- _, err := NewIndexBuilder(r)
- if limit == 64 && err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- } else if limit == 65 && err == nil {
- t.Fatalf("NewIndexBuilder succeeded")
- }
- }
-}
-
-func TestBranchReport(t *testing.T) {
- branches := []string{"stable", "master"}
- b := testIndexBuilder(t, &Repository{
- Branches: []RepositoryBranch{
- {"stable", "vs"},
- {"master", "vm"},
- },
- },
- Document{Name: "f2", Content: []byte("needle"), Branches: branches})
- sres := searchForTest(t, b, &query.Substring{
- Pattern: "needle",
- })
- if len(sres.Files) != 1 {
- t.Fatalf("got %v, want 1 result from f2", sres.Files)
- }
-
- f := sres.Files[0]
- if !reflect.DeepEqual(f.Branches, branches) {
- t.Fatalf("got branches %q, want %q", f.Branches, branches)
- }
-}
-
-func TestBranchVersions(t *testing.T) {
- b := testIndexBuilder(t, &Repository{
- Branches: []RepositoryBranch{
- {"stable", "v-stable"},
- {"master", "v-master"},
- },
- }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
-
- sres := searchForTest(t, b, &query.Substring{
- Pattern: "needle",
- })
- if len(sres.Files) != 1 {
- t.Fatalf("got %v, want 1 result from f2", sres.Files)
- }
-
- f := sres.Files[0]
- if f.Version != "v-master" {
- t.Fatalf("got file %#v, want version 'v-master'", f)
- }
-}
-
-func mustParseRE(s string) *syntax.Regexp {
- r, err := syntax.Parse(s, 0)
- if err != nil {
- panic(err)
- }
-
- return r
-}
-
-func TestRegexp(t *testing.T) {
- content := []byte("needle the bla")
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: content,
- })
- // ------------------------------01234567890123
-
- sres := searchForTest(t, b,
- &query.Regexp{
- Regexp: mustParseRE("dle.*bla"),
- })
-
- if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
- t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
- }
-
- got := sres.Files[0].LineMatches[0]
- want := LineMatch{
- LineFragments: []LineFragmentMatch{{
- LineOffset: 3,
- Offset: 3,
- MatchLength: 11,
- }},
- Line: content,
- FileName: false,
- LineNumber: 1,
- LineStart: 0,
- LineEnd: 14,
- }
-
- if !reflect.DeepEqual(got, want) {
- t.Errorf("got %#v, want %#v", got, want)
- }
-}
-
-func TestRegexpFile(t *testing.T) {
- content := []byte("needle the bla")
- // ----------------01234567890123
-
- name := "let's play: find the mussel"
- b := testIndexBuilder(t, nil,
- Document{Name: name, Content: content},
- Document{Name: "play.txt", Content: content})
-
- sres := searchForTest(t, b,
- &query.Regexp{
- Regexp: mustParseRE("play.*mussel"),
- FileName: true,
- })
-
- if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
- t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
- }
-
- if sres.Files[0].FileName != name {
- t.Errorf("got match %#v, want name %q", sres.Files[0], name)
- }
-}
-
-func TestRegexpOrder(t *testing.T) {
- content := []byte("bla the needle")
- // ----------------01234567890123
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: content})
-
- sres := searchForTest(t, b,
- &query.Regexp{
- Regexp: mustParseRE("dle.*bla"),
- })
-
- if len(sres.Files) != 0 {
- t.Fatalf("got %v, want 0 matches", sres.Files)
- }
-}
-
-func TestRepoName(t *testing.T) {
- content := []byte("bla the needle")
- // ----------------01234567890123
- b := testIndexBuilder(t, &Repository{Name: "bla"},
- Document{Name: "f1", Content: content})
-
- sres := searchForTest(t, b,
- query.NewAnd(
- &query.Substring{Pattern: "needle"},
- &query.Repo{Pattern: "foo"},
- ))
-
- if len(sres.Files) != 0 {
- t.Fatalf("got %v, want 0 matches", sres.Files)
- }
-
- if sres.Stats.FilesConsidered > 0 {
- t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
- }
-
- sres = searchForTest(t, b,
- query.NewAnd(
- &query.Substring{Pattern: "needle"},
- &query.Repo{Pattern: "bla"},
- ))
- if len(sres.Files) != 1 {
- t.Fatalf("got %v, want 1 match", sres.Files)
- }
-}
-
-func TestMergeMatches(t *testing.T) {
- content := []byte("blablabla")
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: content})
-
- sres := searchForTest(t, b,
- &query.Substring{Pattern: "bla"})
- if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
- t.Fatalf("got %v, want 1 match", sres.Files)
- }
-}
-
-func TestRepoURL(t *testing.T) {
- content := []byte("blablabla")
- b := testIndexBuilder(t, &Repository{
- Name: "name",
- URL: "URL",
- CommitURLTemplate: "commit",
- FileURLTemplate: "file-url",
- LineFragmentTemplate: "fragment",
- }, Document{Name: "f1", Content: content})
-
- sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
-
- if sres.RepoURLs["name"] != "file-url" {
- t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
- }
- if sres.LineFragments["name"] != "fragment" {
- t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
- }
-}
-
-func TestRegexpCaseSensitive(t *testing.T) {
- content := []byte("bla\nfunc unmarshalGitiles\n")
- b := testIndexBuilder(t, nil, Document{
- Name: "f1",
- Content: content,
- })
-
- res := searchForTest(t, b,
- &query.Regexp{
- Regexp: mustParseRE("func.*Gitiles"),
- CaseSensitive: true,
- })
-
- if len(res.Files) != 1 {
- t.Fatalf("got %v, want one match", res.Files)
- }
-}
-
-func TestRegexpCaseFolding(t *testing.T) {
- content := []byte("bla\nfunc unmarshalGitiles\n")
-
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: content})
- res := searchForTest(t, b,
- &query.Regexp{
- Regexp: mustParseRE("func.*GITILES"),
- CaseSensitive: false,
- })
-
- if len(res.Files) != 1 {
- t.Fatalf("got %v, want one match", res.Files)
- }
-}
-
-func TestCaseRegexp(t *testing.T) {
- content := []byte("BLABLABLA")
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: content})
- res := searchForTest(t, b,
- &query.Regexp{
- Regexp: mustParseRE("[xb][xl][xa]"),
- CaseSensitive: true,
- })
-
- if len(res.Files) > 0 {
- t.Fatalf("got %v, want no matches", res.Files)
- }
-}
-
-func TestNegativeRegexp(t *testing.T) {
- content := []byte("BLABLABLA needle bla")
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: content})
- res := searchForTest(t, b,
- query.NewAnd(
- &query.Substring{
- Pattern: "needle",
- },
- &query.Not{
- Child: &query.Regexp{
- Regexp: mustParseRE(".cs"),
- },
- }))
-
- if len(res.Files) != 1 {
- t.Fatalf("got %v, want 1 match", res.Files)
- }
-}
-
-func TestSymbolRank(t *testing.T) {
- content := []byte("func bla() blubxxxxx")
- // ----------------01234567890123456789
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: content,
- }, Document{
- Name: "f2",
- Content: content,
- Symbols: []DocumentSection{{5, 8}},
- }, Document{
- Name: "f3",
- Content: content,
- })
-
- res := searchForTest(t, b,
- &query.Substring{
- CaseSensitive: false,
- Pattern: "bla",
- })
-
- if len(res.Files) != 3 {
- t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
- }
- if res.Files[0].FileName != "f2" {
- t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
- }
-}
-
-func TestSymbolRankRegexpUTF8(t *testing.T) {
- prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
- content := []byte(prefix +
- "func bla() blub")
- // ------012345678901234
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: content,
- }, Document{
- Name: "f2",
- Content: content,
- Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
- }, Document{
- Name: "f3",
- Content: content,
- })
-
- res := searchForTest(t, b,
- &query.Regexp{
- Regexp: mustParseRE("b.a"),
- })
-
- if len(res.Files) != 3 {
- t.Fatalf("got %#v, want 3 files", res.Files)
- }
- if res.Files[0].FileName != "f2" {
- t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
- }
-}
-
-func TestPartialSymbolRank(t *testing.T) {
- content := []byte("func bla() blub")
- // ----------------012345678901234
-
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: content,
- Symbols: []DocumentSection{{4, 9}},
- }, Document{
- Name: "f2",
- Content: content,
- Symbols: []DocumentSection{{4, 8}},
- }, Document{
- Name: "f3",
- Content: content,
- Symbols: []DocumentSection{{4, 9}},
- })
-
- res := searchForTest(t, b,
- &query.Substring{
- Pattern: "bla",
- })
-
- if len(res.Files) != 3 {
- t.Fatalf("got %#v, want 3 files", res.Files)
- }
- if res.Files[0].FileName != "f2" {
- t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
- }
-}
-
-func TestNegativeRepo(t *testing.T) {
- content := []byte("bla the needle")
- // ----------------01234567890123
- b := testIndexBuilder(t, &Repository{
- Name: "bla",
- }, Document{Name: "f1", Content: content})
-
- sres := searchForTest(t, b,
- query.NewAnd(
- &query.Substring{Pattern: "needle"},
- &query.Not{Child: &query.Repo{Pattern: "bla"}},
- ))
-
- if len(sres.Files) != 0 {
- t.Fatalf("got %v, want 0 matches", sres.Files)
- }
-}
-
-func TestListRepos(t *testing.T) {
- content := []byte("bla the needle")
- // ----------------01234567890123
- b := testIndexBuilder(t, &Repository{
- Name: "reponame",
- },
- Document{Name: "f1", Content: content},
- Document{Name: "f2", Content: content})
-
- searcher := searcherForTest(t, b)
- q := &query.Repo{Pattern: "epo"}
- res, err := searcher.List(context.Background(), q)
- if err != nil {
- t.Fatalf("List(%v): %v", q, err)
- }
- if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
- t.Fatalf("got %v, want 1 matches", res)
- }
- if got := res.Repos[0].Stats.Shards; got != 1 {
- t.Fatalf("got %d, want 1 shard", got)
- }
- q = &query.Repo{Pattern: "bla"}
- res, err = searcher.List(context.Background(), q)
- if err != nil {
- t.Fatalf("List(%v): %v", q, err)
- }
- if len(res.Repos) != 0 {
- t.Fatalf("got %v, want 0 matches", res)
- }
-}
-
-func TestMetadata(t *testing.T) {
- content := []byte("bla the needle")
- // ----------------01234567890123
- b := testIndexBuilder(t, &Repository{
- Name: "reponame",
- }, Document{Name: "f1", Content: content},
- Document{Name: "f2", Content: content})
-
- var buf bytes.Buffer
- b.Write(&buf)
- f := &memSeeker{buf.Bytes()}
-
- rd, _, err := ReadMetadata(f)
- if err != nil {
- t.Fatalf("ReadMetadata: %v", err)
- }
-
- if got, want := rd.Name, "reponame"; got != want {
- t.Fatalf("got %q want %q", got, want)
- }
-}
-
-func TestOr(t *testing.T) {
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: []byte("needle")},
- Document{Name: "f2", Content: []byte("banana")})
- sres := searchForTest(t, b, query.NewOr(
- &query.Substring{Pattern: "needle"},
- &query.Substring{Pattern: "banana"}))
-
- if len(sres.Files) != 2 {
- t.Fatalf("got %v, want 2 files", sres.Files)
- }
-}
-
-func TestAtomCountScore(t *testing.T) {
- b := testIndexBuilder(t,
- &Repository{
- Branches: []RepositoryBranch{
- {"branches", "v1"},
- {"needle", "v2"},
- },
- },
- Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}},
- Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}},
- Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}})
-
- sres := searchForTest(t, b,
- query.NewOr(
- &query.Substring{Pattern: "needle"},
- &query.Substring{Pattern: "needle", FileName: true},
- &query.Branch{Pattern: "needle"},
- ))
- var got []string
- for _, f := range sres.Files {
- got = append(got, f.FileName)
- }
- want := []string{"needle-file-branch", "needle-file", "f1"}
- if !reflect.DeepEqual(got, want) {
- t.Errorf("got %v, want %v", got, want)
- }
-}
-
-func TestImportantCutoff(t *testing.T) {
- content := []byte("func bla() blub")
- // ----------------012345678901234
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: content,
- Symbols: []DocumentSection{{5, 8}},
- }, Document{
- Name: "f2",
- Content: content,
- })
- opts := SearchOptions{
- ShardMaxImportantMatch: 1,
- }
-
- sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}, opts)
- if len(sres.Files) != 1 || sres.Files[0].FileName != "f1" {
- t.Errorf("got %v, wanted 1 match 'f1'", sres.Files)
- }
-}
-
-func TestFrequency(t *testing.T) {
- content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
- // ----------------012345678901234
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: content,
- })
-
- sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
- if len(sres.Files) != 0 {
- t.Errorf("got %v, wanted 0 matches", sres.Files)
- }
-}
-
-func TestMatchNewline(t *testing.T) {
- re, err := syntax.Parse("[^a]a", syntax.ClassNL)
- if err != nil {
- t.Fatalf("syntax.Parse: %v", err)
- }
-
- content := []byte("pqr\nalex")
- // ----------------0123 4567
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: content,
- })
-
- sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
- if len(sres.Files) != 1 {
- t.Errorf("got %v, wanted 1 matches", sres.Files)
- } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
- t.Errorf("got match line %q, want %q", l, content)
- }
-}
-
-func TestSubRepo(t *testing.T) {
- subRepos := map[string]*Repository{
- "sub": {
- Name: "sub-name",
- LineFragmentTemplate: "sub-line",
- },
- }
-
- content := []byte("pqr\nalex")
- // ----------------0123 4567
-
- b := testIndexBuilder(t, &Repository{
- SubRepoMap: subRepos,
- }, Document{
- Name: "sub/f1",
- Content: content,
- SubRepositoryPath: "sub",
- })
-
- sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
- if len(sres.Files) != 1 {
- t.Fatalf("got %v, wanted 1 matches", sres.Files)
- }
-
- f := sres.Files[0]
- if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
- t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
- }
-
- if sres.LineFragments["sub-name"] != "sub-line" {
- t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
- }
-}
-
-func TestSearchEither(t *testing.T) {
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: []byte("bla needle bla")},
- Document{Name: "needle-file-branch", Content: []byte("bla content")})
-
- sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
- if len(sres.Files) != 2 {
- t.Fatalf("got %v, wanted 2 matches", sres.Files)
- }
-
- sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
- if len(sres.Files) != 1 {
- t.Fatalf("got %v, wanted 1 match", sres.Files)
- }
-
- if got, want := sres.Files[0].FileName, "f1"; got != want {
- t.Errorf("got %q, want %q", got, want)
- }
-}
-
-func TestUnicodeExactMatch(t *testing.T) {
- needle := "néédlÉ"
- content := []byte("blá blá " + needle + " blâ")
- // ----------------01234567 8
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: content})
-
- if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
- t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
- }
-}
-
-func TestUnicodeCoverContent(t *testing.T) {
- needle := "néédlÉ"
- content := []byte("blá blá " + needle + " blâ")
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: content})
-
- if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
- t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
- }
-
- res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
- if len(res.Files) != 1 {
- t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
- }
-
- if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
- t.Errorf("got %d want %d", got, want)
- }
-}
-
-func TestUnicodeNonCoverContent(t *testing.T) {
- needle := "nééáádlÉ"
- //---------01234567
- content := []byte("blá blá " + needle + " blâ")
- // ----------------01234567 8901234 5678
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: content})
-
- res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
- if len(res.Files) != 1 {
- t.Fatalf("got %v, wanted 1 match", res.Files)
- }
-
- if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
- t.Errorf("got %d want %d", got, want)
- }
-}
-
-const kelvinCodePoint = 8490
-
-func TestUnicodeVariableLength(t *testing.T) {
- lower := 'k'
- upper := rune(kelvinCodePoint)
-
- needle := "nee" + string([]rune{lower}) + "eed"
- corpus := []byte("nee" + string([]rune{upper}) + "eed" +
- " ee" + string([]rune{lower}) + "ee" +
- " ee" + string([]rune{upper}) + "ee")
-
- b := testIndexBuilder(t, nil,
- Document{Name: "f1", Content: []byte(corpus)})
-
- res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
- if len(res.Files) != 1 {
- t.Fatalf("got %v, wanted 1 match", res.Files)
- }
-}
-
-func TestUnicodeFileStartOffsets(t *testing.T) {
- unicode := "世界"
- wat := "waaaaaat"
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: []byte(unicode),
- },
- Document{
- Name: "f2",
- Content: []byte(wat),
- },
- )
- q := &query.Substring{Pattern: wat, Content: true}
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 {
- t.Fatalf("got %v, wanted 1 match", res.Files)
- }
-}
-
-func TestLongFileUTF8(t *testing.T) {
- needle := "neeedle"
-
- // 6 bytes.
- unicode := "世界"
- content := []byte(strings.Repeat(unicode, 100) + needle)
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: []byte(strings.Repeat("a", 50)),
- },
- Document{
- Name: "f2",
- Content: content,
- })
-
- q := &query.Substring{Pattern: needle, Content: true}
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 {
- t.Errorf("got %v, want 1 result", res)
- }
-}
-
-func TestEstimateDocCount(t *testing.T) {
- content := []byte("bla needle bla")
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{Name: "f1", Content: content},
- Document{Name: "f2", Content: content},
- )
-
- if sres := searchForTest(t, b,
- query.NewAnd(
- &query.Substring{Pattern: "needle"},
- &query.Repo{Pattern: "reponame"},
- ), SearchOptions{
- EstimateDocCount: true,
- }); sres.Stats.ShardFilesConsidered != 2 {
- t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
- }
- if sres := searchForTest(t, b,
- query.NewAnd(
- &query.Substring{Pattern: "needle"},
- &query.Repo{Pattern: "nomatch"},
- ), SearchOptions{
- EstimateDocCount: true,
- }); sres.Stats.ShardFilesConsidered != 0 {
- t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
- }
-}
-
-func TestUTF8CorrectCorpus(t *testing.T) {
- needle := "neeedle"
-
- // 6 bytes.
- unicode := "世界"
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: []byte(strings.Repeat(unicode, 100)),
- },
- Document{
- Name: "xxxxxneeedle",
- Content: []byte("hello"),
- })
-
- q := &query.Substring{Pattern: needle, FileName: true}
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 {
- t.Errorf("got %v, want 1 result", res)
- }
-}
-
-func TestBuilderStats(t *testing.T) {
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: []byte(strings.Repeat("abcd", 1024)),
- })
- var buf bytes.Buffer
- b.Write(&buf)
-
- if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
- t.Errorf("got %d, want %d", got, want)
- }
-}
-
-func TestIOStats(t *testing.T) {
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: []byte(strings.Repeat("abcd", 1024)),
- })
-
- q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
- res := searchForTest(t, b, q)
-
- // 4096 (content) + 2 (overhead: newlines or doc sections)
- if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
- t.Errorf("got content I/O %d, want %d", got, want)
- }
-
- // 1024 entries, each 4 bytes apart. 4 fits into single byte
- // delta encoded.
- if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
- t.Errorf("got index I/O %d, want %d", got, want)
- }
-}
-
-func TestStartLineAnchor(t *testing.T) {
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: []byte(
- `hello
-start of middle of line
-`),
- })
-
- q, err := query.Parse("^start")
- if err != nil {
- t.Errorf("parse: %v", err)
- }
-
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 {
- t.Errorf("got %v, want 1 file", res.Files)
- }
-
- q, err = query.Parse("^middle")
- if err != nil {
- t.Errorf("parse: %v", err)
- }
- res = searchForTest(t, b, q)
- if len(res.Files) != 0 {
- t.Errorf("got %v, want 0 files", res.Files)
- }
-}
-
-func TestAndOrUnicode(t *testing.T) {
- q, err := query.Parse("orange.*apple")
- if err != nil {
- t.Errorf("parse: %v", err)
- }
- finalQ := query.NewAnd(q,
- query.NewOr(query.NewAnd(&query.Repo{Pattern: "name"},
- query.NewOr(&query.Branch{Pattern: "master"}))))
-
- b := testIndexBuilder(t, &Repository{
- Name: "name",
- Branches: []RepositoryBranch{{"master", "master-version"}},
- }, Document{
- Name: "f2",
- Content: []byte("orange\u2318apple"),
- // --------------0123456 78901
- Branches: []string{"master"},
- })
-
- res := searchForTest(t, b, finalQ)
- if len(res.Files) != 1 {
- t.Errorf("got %v, want 1 result", res.Files)
- }
-}
-
-func TestAndShort(t *testing.T) {
- content := []byte("bla needle at orange bla")
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{Name: "f1", Content: content},
- Document{Name: "f2", Content: []byte("xx at xx")},
- Document{Name: "f3", Content: []byte("yy orange xx")},
- )
-
- q := query.NewAnd(&query.Substring{Pattern: "at"},
- &query.Substring{Pattern: "orange"})
-
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
- t.Errorf("got %v, want 1 result", res.Files)
- }
-}
-
-func TestNoCollectRegexpSubstring(t *testing.T) {
- content := []byte("bla final bla\nfoo final, foo")
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{Name: "f1", Content: content},
- )
-
- q := &query.Regexp{
- Regexp: mustParseRE("final[,.]"),
- }
-
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 {
- t.Fatalf("got %v, want 1 result", res.Files)
- }
- if f := res.Files[0]; len(f.LineMatches) != 1 {
- t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
- }
-}
-
-func printLineMatches(ms []LineMatch) string {
- var ss []string
- for _, m := range ms {
- ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
- }
-
- return strings.Join(ss, ", ")
-}
-
-func TestLang(t *testing.T) {
- content := []byte("bla needle bla")
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{Name: "f1", Content: content},
- Document{Name: "f2", Language: "java", Content: content},
- Document{Name: "f3", Language: "cpp", Content: content},
- )
-
- q := query.NewAnd(&query.Substring{Pattern: "needle"},
- &query.Language{Language: "cpp"})
-
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 {
- t.Fatalf("got %v, want 1 result in f3", res.Files)
- }
- f := res.Files[0]
- if f.FileName != "f3" || f.Language != "cpp" {
- t.Fatalf("got %v, want 1 match with language cpp", f)
- }
-}
-
-func TestLangShortcut(t *testing.T) {
- content := []byte("bla needle bla")
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{Name: "f2", Language: "java", Content: content},
- Document{Name: "f3", Language: "cpp", Content: content},
- )
-
- q := query.NewAnd(&query.Substring{Pattern: "needle"},
- &query.Language{Language: "fortran"})
-
- res := searchForTest(t, b, q)
- if len(res.Files) != 0 {
- t.Fatalf("got %v, want 0 results", res.Files)
- }
- if res.Stats.IndexBytesLoaded > 0 {
- t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
- }
-}
-
-func TestNoTextMatchAtoms(t *testing.T) {
- content := []byte("bla needle bla")
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{Name: "f1", Content: content},
- Document{Name: "f2", Language: "java", Content: content},
- Document{Name: "f3", Language: "cpp", Content: content},
- )
- q := query.NewAnd(&query.Language{Language: "java"})
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 {
- t.Fatalf("got %v, want 1 result in f3", res.Files)
- }
-}
-
-func TestNoPositiveAtoms(t *testing.T) {
- content := []byte("bla needle bla")
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{Name: "f1", Content: content},
- Document{Name: "f2", Content: content},
- )
-
- q := query.NewAnd(
- &query.Not{Child: &query.Substring{Pattern: "xyz"}},
- &query.Repo{Pattern: "reponame"})
- res := searchForTest(t, b, q)
- if len(res.Files) != 2 {
- t.Fatalf("got %v, want 2 results in f3", res.Files)
- }
-}
-
-func TestSymbolBoundaryStart(t *testing.T) {
- content := []byte("start\nbla bla\nend")
- // ----------------012345 67890123 456
-
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{
- Name: "f1",
- Content: content,
- Symbols: []DocumentSection{{0, 5}, {14, 17}},
- },
- )
- q := &query.Symbol{
- Atom: &query.Substring{Pattern: "start"},
- }
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
- t.Fatalf("got %v, want 1 line in 1 file", res.Files)
- }
- m := res.Files[0].LineMatches[0].LineFragments[0]
- if m.Offset != 0 {
- t.Fatalf("got offset %d want 0", m.Offset)
- }
-}
-
-func TestSymbolBoundaryEnd(t *testing.T) {
- content := []byte("start\nbla bla\nend")
- // ----------------012345 67890123 456
-
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{
- Name: "f1",
- Content: content,
- Symbols: []DocumentSection{{14, 17}},
- },
- )
- q := &query.Symbol{
- Atom: &query.Substring{Pattern: "end"},
- }
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
- t.Fatalf("got %v, want 1 line in 1 file", res.Files)
- }
- m := res.Files[0].LineMatches[0].LineFragments[0]
- if m.Offset != 14 {
- t.Fatalf("got offset %d want 0", m.Offset)
- }
-}
-
-func TestSymbolAtom(t *testing.T) {
- content := []byte("bla\nsymblabla\nbla")
- // ----------------0123 456789012
-
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{
- Name: "f1",
- Content: content,
- Symbols: []DocumentSection{{4, 12}},
- },
- )
- q := &query.Symbol{
- Atom: &query.Substring{Pattern: "bla"},
- }
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
- t.Fatalf("got %v, want 1 line in 1 file", res.Files)
- }
- m := res.Files[0].LineMatches[0].LineFragments[0]
- if m.Offset != 7 || m.MatchLength != 3 {
- t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
- }
-}
-
-func TestSymbolAtomExact(t *testing.T) {
- content := []byte("bla\nsym\nbla\nsym\nasymb")
- // ----------------0123 4567 89012
-
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{
- Name: "f1",
- Content: content,
- Symbols: []DocumentSection{{4, 7}},
- },
- )
- q := &query.Symbol{
- Atom: &query.Substring{Pattern: "sym"},
- }
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
- t.Fatalf("got %v, want 1 line in 1 file", res.Files)
- }
- m := res.Files[0].LineMatches[0].LineFragments[0]
- if m.Offset != 4 {
- t.Fatalf("got offset %d, want 7", m.Offset)
- }
-}
-
-func TestHitIterTerminate(t *testing.T) {
- // contrived input: trigram frequencies forces selecting abc +
- // def for the distance iteration. There is no match, so this
- // will advance the compressedPostingIterator to beyond the
- // end.
- content := []byte("abc bcdbcd cdecde abcabc def efg")
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: content,
- },
- )
- searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
-}
-
-func TestDistanceHitIterBailLast(t *testing.T) {
- content := []byte("AST AST AST UASH")
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: content,
- },
- )
- res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
- if len(res.Files) != 0 {
- t.Fatalf("got %v, want no results", res.Files)
- }
-}
-
-func TestDocumentSectionRuneBoundary(t *testing.T) {
- content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- for i, sec := range []DocumentSection{
- {2, 6},
- {3, 7},
- } {
- if err := b.Add(Document{
- Name: "f1",
- Content: []byte(content),
- Symbols: []DocumentSection{sec},
- }); err == nil {
- t.Errorf("%d: Add succeeded", i)
- }
- }
-}
-
-func TestUnicodeQuery(t *testing.T) {
- content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
- b := testIndexBuilder(t, nil,
- Document{
- Name: "f1",
- Content: []byte(content),
- },
- )
-
- q := &query.Substring{Pattern: content}
- res := searchForTest(t, b, q)
- if len(res.Files) != 1 {
- t.Fatalf("want 1 match, got %v", res.Files)
- }
-
- f := res.Files[0]
- if len(f.LineMatches) != 1 {
- t.Fatalf("want 1 line, got %v", f.LineMatches)
- }
- l := f.LineMatches[0]
-
- if len(l.LineFragments) != 1 {
- t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
- }
- fr := l.LineFragments[0]
- if fr.MatchLength != len(content) {
- t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
- }
-}
-
-func TestSkipInvalidContent(t *testing.T) {
- for _, content := range []string{
- // Binary
- "abc def \x00 abc",
- } {
-
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- if err := b.Add(Document{
- Name: "f1",
- Content: []byte(content),
- }); err != nil {
- t.Fatal(err)
- }
-
- q := &query.Substring{Pattern: "abc def"}
- res := searchForTest(t, b, q)
- if len(res.Files) != 0 {
- t.Fatalf("got %v, want no results", res.Files)
- }
-
- q = &query.Substring{Pattern: "NOT-INDEXED"}
- res = searchForTest(t, b, q)
- if len(res.Files) != 1 {
- t.Fatalf("got %v, want 1 result", res.Files)
- }
- }
-}
-
-func TestCheckText(t *testing.T) {
- for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
- if err := CheckText([]byte(text), 20000); err != nil {
- t.Errorf("CheckText(%q): %v", text, err)
- }
- }
- for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
- if err := CheckText([]byte(text), 15); err == nil {
- t.Errorf("CheckText(%q) succeeded", text)
- }
- }
-}
-
-func TestLineAnd(t *testing.T) {
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
- Document{Name: "f2", Content: []byte("apple orange\nbanana")},
- Document{Name: "f3", Content: []byte("banana grape")},
- )
- pattern := "(apple)(?-s:.)*?(banana)"
- r, _ := syntax.Parse(pattern, syntax.Perl)
-
- q := query.Regexp{
- Regexp: r,
- Content: true,
- }
- res := searchForTest(t, b, &q)
- wantRegexpCount := 1
- if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
- t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
- }
- if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
- t.Errorf("got %v, want 1 result", res.Files)
- }
-}
-
-func TestLineAndFileName(t *testing.T) {
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{Name: "f1", Content: []byte("apple banana\ngrape")},
- Document{Name: "f2", Content: []byte("apple banana\norange")},
- Document{Name: "apple banana", Content: []byte("banana grape")},
- )
- pattern := "(apple)(?-s:.)*?(banana)"
- r, _ := syntax.Parse(pattern, syntax.Perl)
-
- q := query.Regexp{
- Regexp: r,
- FileName: true,
- }
- res := searchForTest(t, b, &q)
- wantRegexpCount := 1
- if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
- t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
- }
- if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
- t.Errorf("got %v, want 1 result", res.Files)
- }
-}
-
-func TestMultiLineRegex(t *testing.T) {
- b := testIndexBuilder(t, &Repository{Name: "reponame"},
- Document{Name: "f1", Content: []byte("apple banana\ngrape")},
- Document{Name: "f2", Content: []byte("apple orange")},
- Document{Name: "f3", Content: []byte("grape apple")},
- )
- pattern := "(apple).*?[[:space:]].*?(grape)"
- r, _ := syntax.Parse(pattern, syntax.Perl)
-
- q := query.Regexp{
- Regexp: r,
- }
- res := searchForTest(t, b, &q)
- wantRegexpCount := 2
- if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
- t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
- }
- if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
- t.Errorf("got %v, want 1 result", res.Files)
- }
-}
diff --git a/indexbuilder.go b/indexbuilder.go
deleted file mode 100644
index b5c87aa..0000000
--- a/indexbuilder.go
+++ /dev/null
@@ -1,414 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "bytes"
- "encoding/binary"
- "fmt"
- "hash/crc64"
- "html/template"
- "log"
- "path/filepath"
- "sort"
- "unicode/utf8"
-)
-
-var _ = log.Println
-
-const ngramSize = 3
-
-type searchableString struct {
- data []byte
-}
-
-// Filled by the linker (see build-deploy.sh)
-var Version string
-
-// Store character (unicode codepoint) offset (in bytes) this often.
-const runeOffsetFrequency = 100
-
-type postingsBuilder struct {
- postings map[ngram][]byte
- lastOffsets map[ngram]uint32
-
- // To support UTF-8 searching, we must map back runes to byte
- // offsets. As a first attempt, we sample regularly. The
- // precise offset can be found by walking from the recorded
- // offset to the desired rune.
- runeOffsets []uint32
- runeCount uint32
-
- isPlainASCII bool
-
- endRunes []uint32
- endByte uint32
-}
-
-func newPostingsBuilder() *postingsBuilder {
- return &postingsBuilder{
- postings: map[ngram][]byte{},
- lastOffsets: map[ngram]uint32{},
- isPlainASCII: true,
- }
-}
-
-// Store trigram offsets for the given UTF-8 data. The
-// DocumentSections must correspond to rune boundaries in the UTF-8
-// data.
-func (s *postingsBuilder) newSearchableString(data []byte, byteSections []DocumentSection) (*searchableString, []DocumentSection, error) {
- dest := searchableString{
- data: data,
- }
- var buf [8]byte
- var runeGram [3]rune
-
- var runeIndex uint32
- byteCount := 0
- dataSz := uint32(len(data))
-
- byteSectionBoundaries := make([]uint32, 0, 2*len(byteSections))
- for _, s := range byteSections {
- byteSectionBoundaries = append(byteSectionBoundaries, s.Start, s.End)
- }
- var runeSectionBoundaries []uint32
-
- endRune := s.runeCount
- for ; len(data) > 0; runeIndex++ {
- c, sz := utf8.DecodeRune(data)
- if sz > 1 {
- s.isPlainASCII = false
- }
- data = data[sz:]
-
- runeGram[0], runeGram[1], runeGram[2] = runeGram[1], runeGram[2], c
-
- if idx := s.runeCount + runeIndex; idx%runeOffsetFrequency == 0 {
- s.runeOffsets = append(s.runeOffsets, s.endByte+uint32(byteCount))
- }
- for len(byteSectionBoundaries) > 0 && byteSectionBoundaries[0] == uint32(byteCount) {
- runeSectionBoundaries = append(runeSectionBoundaries,
- endRune+uint32(runeIndex))
- byteSectionBoundaries = byteSectionBoundaries[1:]
- }
-
- byteCount += sz
-
- if runeIndex < 2 {
- continue
- }
-
- ng := runesToNGram(runeGram)
- lastOff := s.lastOffsets[ng]
- newOff := endRune + uint32(runeIndex) - 2
-
- m := binary.PutUvarint(buf[:], uint64(newOff-lastOff))
- s.postings[ng] = append(s.postings[ng], buf[:m]...)
- s.lastOffsets[ng] = newOff
- }
- s.runeCount += runeIndex
-
- for len(byteSectionBoundaries) > 0 && byteSectionBoundaries[0] < uint32(byteCount) {
- return nil, nil, fmt.Errorf("no rune for section boundary at byte %d", byteSectionBoundaries[0])
- }
-
- // Handle symbol definition that ends at file end. This can
- // happen for labels at the end of .bat files.
-
- for len(byteSectionBoundaries) > 0 && byteSectionBoundaries[0] == uint32(byteCount) {
- runeSectionBoundaries = append(runeSectionBoundaries,
- endRune+runeIndex)
- byteSectionBoundaries = byteSectionBoundaries[1:]
- }
- runeSecs := make([]DocumentSection, 0, len(byteSections))
- for i := 0; i < len(runeSectionBoundaries); i += 2 {
- runeSecs = append(runeSecs, DocumentSection{
- Start: runeSectionBoundaries[i],
- End: runeSectionBoundaries[i+1],
- })
- }
-
- s.endRunes = append(s.endRunes, s.runeCount)
- s.endByte += dataSz
- return &dest, runeSecs, nil
-}
-
-// IndexBuilder builds a single index shard.
-type IndexBuilder struct {
- contentStrings []*searchableString
- nameStrings []*searchableString
- docSections [][]DocumentSection
- runeDocSections []DocumentSection
-
- checksums []byte
-
- branchMasks []uint64
- subRepos []uint32
-
- contentPostings *postingsBuilder
- namePostings *postingsBuilder
-
- // root repository
- repo Repository
-
- // name to index.
- subRepoIndices map[string]uint32
-
- // language => language code
- languageMap map[string]byte
-
- // languages codes
- languages []byte
-}
-
-func (d *Repository) verify() error {
- for _, t := range []string{d.FileURLTemplate, d.LineFragmentTemplate, d.CommitURLTemplate} {
- if _, err := template.New("").Parse(t); err != nil {
- return err
- }
- }
- return nil
-}
-
-// ContentSize returns the number of content bytes so far ingested.
-func (b *IndexBuilder) ContentSize() uint32 {
- // Add the name too so we don't skip building index if we have
- // lots of empty files.
- return b.contentPostings.endByte + b.namePostings.endByte
-}
-
-// NewIndexBuilder creates a fresh IndexBuilder. The passed in
-// Repository contains repo metadata, and may be set to nil.
-func NewIndexBuilder(r *Repository) (*IndexBuilder, error) {
- b := &IndexBuilder{
- contentPostings: newPostingsBuilder(),
- namePostings: newPostingsBuilder(),
- languageMap: map[string]byte{},
- }
-
- if r == nil {
- r = &Repository{}
- }
- if err := b.setRepository(r); err != nil {
- return nil, err
- }
- return b, nil
-}
-
-func (b *IndexBuilder) setRepository(desc *Repository) error {
- if len(b.contentStrings) > 0 {
- return fmt.Errorf("setRepository called after adding files")
- }
- if err := desc.verify(); err != nil {
- return err
- }
-
- if len(desc.Branches) > 64 {
- return fmt.Errorf("too many branches")
- }
-
- b.repo = *desc
-
- // copy subrepomap without root
- b.repo.SubRepoMap = map[string]*Repository{}
- for k, v := range desc.SubRepoMap {
- if k != "" {
- b.repo.SubRepoMap[k] = v
- }
- }
-
- b.populateSubRepoIndices()
- return nil
-}
-
-type DocumentSection struct {
- Start, End uint32
-}
-
-// Document holds a document (file) to index.
-type Document struct {
- Name string
- Content []byte
- Branches []string
- SubRepositoryPath string
- Language string
-
- // If set, something is wrong with the file contents, and this
- // is the reason it wasn't indexed.
- SkipReason string
-
- // Document sections for symbols. Offsets should use bytes.
- Symbols []DocumentSection
-}
-
-type docSectionSlice []DocumentSection
-
-func (m docSectionSlice) Len() int { return len(m) }
-func (m docSectionSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
-func (m docSectionSlice) Less(i, j int) bool { return m[i].Start < m[j].Start }
-
-// AddFile is a convenience wrapper for Add
-func (b *IndexBuilder) AddFile(name string, content []byte) error {
- return b.Add(Document{Name: name, Content: content})
-}
-
-// CheckText returns a reason why the given contents are probably not source texts.
-func CheckText(content []byte, maxTrigramCount int) error {
- if len(content) == 0 {
- return nil
- }
-
- if len(content) < ngramSize {
- return fmt.Errorf("file size smaller than %d", ngramSize)
- }
-
- trigrams := map[ngram]struct{}{}
-
- var cur [3]rune
- byteCount := 0
- for len(content) > 0 {
- if content[0] == 0 {
- return fmt.Errorf("binary data at byte offset %d", byteCount)
- }
-
- r, sz := utf8.DecodeRune(content)
- content = content[sz:]
- byteCount += sz
-
- cur[0], cur[1], cur[2] = cur[1], cur[2], r
- if cur[0] == 0 {
- // start of file.
- continue
- }
-
- trigrams[runesToNGram(cur)] = struct{}{}
- if len(trigrams) > maxTrigramCount {
- // probably not text.
- return fmt.Errorf("number of trigrams exceeds %d", maxTrigramCount)
- }
- }
- return nil
-}
-
-func (b *IndexBuilder) populateSubRepoIndices() {
- if b.subRepoIndices != nil {
- return
- }
- paths := []string{""}
- for k := range b.repo.SubRepoMap {
- paths = append(paths, k)
- }
- sort.Strings(paths)
- b.subRepoIndices = make(map[string]uint32, len(paths))
- for i, p := range paths {
- b.subRepoIndices[p] = uint32(i)
- }
-}
-
-const notIndexedMarker = "NOT-INDEXED: "
-
-// Add a file which only occurs in certain branches.
-func (b *IndexBuilder) Add(doc Document) error {
- hasher := crc64.New(crc64.MakeTable(crc64.ISO))
-
- if idx := bytes.IndexByte(doc.Content, 0); idx >= 0 {
- doc.SkipReason = fmt.Sprintf("binary content at byte offset %d", idx)
- doc.Language = "binary"
- }
-
- if doc.SkipReason != "" {
- doc.Content = []byte(notIndexedMarker + doc.SkipReason)
- doc.Symbols = nil
- if doc.Language == "" {
- doc.Language = "skipped"
- }
- }
-
- sort.Sort(docSectionSlice(doc.Symbols))
- var last DocumentSection
- for i, s := range doc.Symbols {
- if i > 0 {
- if last.End > s.Start {
- return fmt.Errorf("sections overlap")
- }
- }
- last = s
- }
- if last.End > uint32(len(doc.Content)) {
- return fmt.Errorf("section goes past end of content")
- }
-
- if doc.SubRepositoryPath != "" {
- rel, err := filepath.Rel(doc.SubRepositoryPath, doc.Name)
- if err != nil || rel == doc.Name {
- return fmt.Errorf("path %q must start subrepo path %q", doc.Name, doc.SubRepositoryPath)
- }
- }
- docStr, runeSecs, err := b.contentPostings.newSearchableString(doc.Content, doc.Symbols)
- if err != nil {
- return err
- }
- nameStr, _, err := b.namePostings.newSearchableString([]byte(doc.Name), nil)
- if err != nil {
- return err
- }
-
- subRepoIdx, ok := b.subRepoIndices[doc.SubRepositoryPath]
- if !ok {
- return fmt.Errorf("unknown subrepo path %q", doc.SubRepositoryPath)
- }
-
- var mask uint64
- for _, br := range doc.Branches {
- m := b.branchMask(br)
- if m == 0 {
- return fmt.Errorf("no branch found for %s", br)
- }
- mask |= m
- }
-
- b.subRepos = append(b.subRepos, subRepoIdx)
-
- hasher.Write(doc.Content)
-
- b.contentStrings = append(b.contentStrings, docStr)
- b.runeDocSections = append(b.runeDocSections, runeSecs...)
-
- b.nameStrings = append(b.nameStrings, nameStr)
- b.docSections = append(b.docSections, doc.Symbols)
- b.branchMasks = append(b.branchMasks, mask)
- b.checksums = append(b.checksums, hasher.Sum(nil)...)
-
- langCode, ok := b.languageMap[doc.Language]
- if !ok {
- if len(b.languageMap) >= 255 {
- return fmt.Errorf("too many languages")
- }
- langCode = byte(len(b.languageMap))
- b.languageMap[doc.Language] = langCode
- }
- b.languages = append(b.languages, langCode)
-
- return nil
-}
-
-func (b *IndexBuilder) branchMask(br string) uint64 {
- for i, b := range b.repo.Branches {
- if b.Name == br {
- return uint64(1) << uint(i)
- }
- }
- return 0
-}
diff --git a/indexdata.go b/indexdata.go
deleted file mode 100644
index 4262116..0000000
--- a/indexdata.go
+++ /dev/null
@@ -1,275 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "fmt"
- "hash/crc64"
- "unicode/utf8"
-
- "github.com/google/zoekt/query"
-)
-
-// indexData holds the pattern-independent data that we have to have
-// in memory to search. Most of the memory is taken up by the ngram =>
-// offset index.
-type indexData struct {
- file IndexFile
-
- ngrams map[ngram]simpleSection
-
- newlinesStart uint32
- newlinesIndex []uint32
-
- docSectionsStart uint32
- docSectionsIndex []uint32
-
- runeDocSections []DocumentSection
-
- // rune offset=>byte offset mapping, relative to the start of the content corpus
- runeOffsets []uint32
-
- // offsets of file contents; includes end of last file
- boundariesStart uint32
- boundaries []uint32
-
- // rune offsets for the file content boundaries
- fileEndRunes []uint32
-
- fileNameContent []byte
- fileNameIndex []uint32
- fileNameNgrams map[ngram][]uint32
-
- // rune offset=>byte offset mapping, relative to the start of the filename corpus
- fileNameRuneOffsets []uint32
-
- // rune offsets for the file name boundaries
- fileNameEndRunes []uint32
-
- fileBranchMasks []uint64
-
- // mask (power of 2) => name
- branchNames map[uint]string
-
- // name => mask (power of 2)
- branchIDs map[string]uint
-
- metaData IndexMetadata
- repoMetaData Repository
-
- subRepos []uint32
- subRepoPaths []string
-
- // Checksums for all the files, at 8-byte intervals
- checksums []byte
-
- // languages for all the files.
- languages []byte
-
- // inverse of LanguageMap in metaData
- languageMap map[byte]string
-
- repoListEntry RepoListEntry
-}
-
-func (d *indexData) getChecksum(idx uint32) []byte {
- start := crc64.Size * idx
- return d.checksums[start : start+crc64.Size]
-}
-
-func (d *indexData) calculateStats() {
- var last uint32
- if len(d.boundaries) > 0 {
- last += d.boundaries[len(d.boundaries)-1]
- }
-
- lastFN := last
- if len(d.fileNameIndex) > 0 {
- lastFN = d.fileNameIndex[len(d.fileNameIndex)-1]
- }
-
- stats := RepoStats{
- IndexBytes: int64(d.memoryUse()),
- ContentBytes: int64(int(last) + int(lastFN)),
- Documents: len(d.newlinesIndex) - 1,
- Shards: 1,
- }
- d.repoListEntry = RepoListEntry{
- Repository: d.repoMetaData,
- IndexMetadata: d.metaData,
- Stats: stats,
- }
-}
-
-func (d *indexData) String() string {
- return fmt.Sprintf("shard(%s)", d.file.Name())
-}
-
-func (d *indexData) memoryUse() int {
- sz := 0
- for _, a := range [][]uint32{
- d.newlinesIndex, d.docSectionsIndex,
- d.boundaries, d.fileNameIndex,
- d.runeOffsets, d.fileNameRuneOffsets,
- d.fileEndRunes, d.fileNameEndRunes,
- } {
- sz += 4 * len(a)
- }
- sz += 8 * len(d.runeDocSections)
- sz += 8 * len(d.fileBranchMasks)
- sz += 12 * len(d.ngrams)
- for _, v := range d.fileNameNgrams {
- sz += 4*len(v) + 4
- }
- return sz
-}
-
-const maxUInt32 = 0xffffffff
-
-func firstMinarg(xs []uint32) uint32 {
- m := uint32(maxUInt32)
- j := len(xs)
- for i, x := range xs {
- if x < m {
- m = x
- j = i
- }
- }
- return uint32(j)
-}
-
-func lastMinarg(xs []uint32) uint32 {
- m := uint32(maxUInt32)
- j := len(xs)
- for i, x := range xs {
- if x <= m {
- m = x
- j = i
- }
- }
- return uint32(j)
-}
-
-func (data *indexData) ngramFrequency(ng ngram, filename bool) uint32 {
- if filename {
- return uint32(len(data.fileNameNgrams[ng]))
- }
-
- return data.ngrams[ng].sz
-}
-
-type ngramIterationResults struct {
- matchIterator
-
- caseSensitive bool
- fileName bool
- substrBytes []byte
- substrLowered []byte
-}
-
-func (r *ngramIterationResults) String() string {
- return fmt.Sprintf("wrapper(%v)", r.matchIterator)
-}
-
-func (r *ngramIterationResults) candidates() []*candidateMatch {
- cs := r.matchIterator.candidates()
- for _, c := range cs {
- c.caseSensitive = r.caseSensitive
- c.fileName = r.fileName
- c.substrBytes = r.substrBytes
- c.substrLowered = r.substrLowered
- }
- return cs
-}
-
-func (d *indexData) iterateNgrams(query *query.Substring) (*ngramIterationResults, error) {
- str := query.Pattern
-
- // Find the 2 least common ngrams from the string.
- ngramOffs := splitNGrams([]byte(query.Pattern))
- frequencies := make([]uint32, 0, len(ngramOffs))
- for _, o := range ngramOffs {
- var freq uint32
- if query.CaseSensitive {
- freq = d.ngramFrequency(o.ngram, query.FileName)
- } else {
- for _, v := range generateCaseNgrams(o.ngram) {
- freq += d.ngramFrequency(v, query.FileName)
- }
- }
-
- if freq == 0 {
- return &ngramIterationResults{
- matchIterator: &noMatchTree{
- Why: "freq=0",
- },
- }, nil
- }
-
- frequencies = append(frequencies, freq)
- }
- firstI := firstMinarg(frequencies)
- frequencies[firstI] = maxUInt32
- lastI := lastMinarg(frequencies)
- if firstI > lastI {
- lastI, firstI = firstI, lastI
- }
-
- firstNG := ngramOffs[firstI].ngram
- lastNG := ngramOffs[lastI].ngram
- iter := &ngramDocIterator{
- leftPad: firstI,
- rightPad: uint32(utf8.RuneCountInString(str)) - firstI,
- }
- if query.FileName {
- iter.ends = d.fileNameEndRunes
- } else {
- iter.ends = d.fileEndRunes
- }
-
- if firstI != lastI {
- i, err := d.newDistanceTrigramIter(firstNG, lastNG, lastI-firstI, query.CaseSensitive, query.FileName)
- if err != nil {
- return nil, err
- }
-
- iter.iter = i
- } else {
- hitIter, err := d.trigramHitIterator(lastNG, query.CaseSensitive, query.FileName)
- if err != nil {
- return nil, err
- }
- iter.iter = hitIter
- }
-
- patBytes := []byte(query.Pattern)
- lowerPatBytes := toLower(patBytes)
-
- return &ngramIterationResults{
- matchIterator: iter,
- caseSensitive: query.CaseSensitive,
- fileName: query.FileName,
- substrBytes: patBytes,
- substrLowered: lowerPatBytes,
- }, nil
-}
-
-func (d *indexData) fileName(i uint32) []byte {
- return d.fileNameContent[d.fileNameIndex[i]:d.fileNameIndex[i+1]]
-}
-
-func (s *indexData) Close() {
- s.file.Close()
-}
diff --git a/indexfile_other.go b/indexfile_other.go
deleted file mode 100644
index 1c03fe8..0000000
--- a/indexfile_other.go
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build !linux,!darwin
-
-package zoekt
-
-import (
- "fmt"
- "os"
-)
-
-// NewIndexFile returns a new index file. The index file takes
-// ownership of the passed in file, and may close it.
-func NewIndexFile(f *os.File) (IndexFile, error) {
- return &indexFileFromOS{f}, nil
-}
-
-type indexFileFromOS struct {
- f *os.File
-}
-
-func (f *indexFileFromOS) Read(off, sz uint32) ([]byte, error) {
- r := make([]byte, sz)
- _, err := f.f.ReadAt(r, int64(off))
- return r, err
-}
-
-func (f indexFileFromOS) Size() (uint32, error) {
- fi, err := f.f.Stat()
- if err != nil {
- return 0, err
- }
-
- sz := fi.Size()
-
- if sz >= maxUInt32 {
- return 0, fmt.Errorf("overflow")
- }
-
- return uint32(sz), nil
-}
-
-func (f indexFileFromOS) Close() {
- f.f.Close()
-}
-
-func (f indexFileFromOS) Name() string {
- return f.f.Name()
-}
diff --git a/indexfile_unix.go b/indexfile_unix.go
deleted file mode 100644
index d7e9402..0000000
--- a/indexfile_unix.go
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build linux darwin
-
-package zoekt
-
-import (
- "fmt"
- "os"
- "syscall"
-)
-
-type mmapedIndexFile struct {
- name string
- size uint32
- data []byte
-}
-
-func (f *mmapedIndexFile) Read(off, sz uint32) ([]byte, error) {
- if off+sz > uint32(len(f.data)) {
- return nil, fmt.Errorf("out of bounds: %d, len %d", off+sz, len(f.data))
- }
- return f.data[off : off+sz], nil
-}
-
-func (f *mmapedIndexFile) Name() string {
- return f.name
-}
-
-func (f *mmapedIndexFile) Size() (uint32, error) {
- return f.size, nil
-}
-
-func (f *mmapedIndexFile) Close() {
- syscall.Munmap(f.data)
-}
-
-// NewIndexFile returns a new index file. The index file takes
-// ownership of the passed in file, and may close it.
-func NewIndexFile(f *os.File) (IndexFile, error) {
- defer f.Close()
-
- fi, err := f.Stat()
- if err != nil {
- return nil, err
- }
-
- sz := fi.Size()
- if sz >= maxUInt32 {
- return nil, fmt.Errorf("file %s too large: %d", f.Name(), sz)
- }
- r := &mmapedIndexFile{
- name: f.Name(),
- size: uint32(sz),
- }
-
- rounded := (r.size + 4095) &^ 4095
- r.data, err = syscall.Mmap(int(f.Fd()), 0, int(rounded), syscall.PROT_READ, syscall.MAP_SHARED)
- if err != nil {
- return nil, err
- }
-
- return r, err
-}
diff --git a/matchiter.go b/matchiter.go
deleted file mode 100644
index c76e1b8..0000000
--- a/matchiter.go
+++ /dev/null
@@ -1,280 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "bytes"
- "fmt"
- "sort"
- "unicode/utf8"
-
- "github.com/google/zoekt/query"
-)
-
-// candidateMatch is a candidate match for a substring.
-type candidateMatch struct {
- caseSensitive bool
- fileName bool
-
- substrBytes []byte
- substrLowered []byte
-
- file uint32
-
- // Offsets are relative to the start of the filename or file contents.
- runeOffset uint32
- byteOffset uint32
- byteMatchSz uint32
-}
-
-// Matches content against the substring, and populates byteMatchSz on success
-func (m *candidateMatch) matchContent(content []byte) bool {
- if m.caseSensitive {
- comp := bytes.Equal(m.substrBytes, content[m.byteOffset:m.byteOffset+uint32(len(m.substrBytes))])
-
- m.byteMatchSz = uint32(len(m.substrBytes))
- return comp
- } else {
- // It is tempting to try a simple ASCII based
- // comparison if possible, but we need more
- // information. Simple ASCII chars have unicode upper
- // case variants (the ASCII 'k' has the Kelvin symbol
- // as upper case variant). We can only degrade to
- // ASCII if we are sure that both the corpus and the
- // query is ASCII only
- sz, ok := caseFoldingEqualsRunes(m.substrLowered, content[m.byteOffset:])
- m.byteMatchSz = uint32(sz)
- return ok
- }
-}
-
-// line returns the line holding the match. If the match starts with
-// the newline ending line M, we return M. The line is characterized
-// by its linenumber (base-1, byte index of line start, byte index of
-// line end). The line end is the index of a newline, or the filesize
-// (if matching the last line of the file.)
-func (m *candidateMatch) line(newlines []uint32, fileSize uint32) (lineNum, lineStart, lineEnd int) {
- idx := sort.Search(len(newlines), func(n int) bool {
- return newlines[n] >= m.byteOffset
- })
-
- end := int(fileSize)
- if idx < len(newlines) {
- end = int(newlines[idx])
- }
-
- start := 0
- if idx > 0 {
- start = int(newlines[idx-1] + 1)
- }
-
- return idx + 1, start, end
-}
-
-// matchIterator is a docIterator that produces candidateMatches for a given document
-type matchIterator interface {
- docIterator
-
- candidates() []*candidateMatch
- updateStats(*Stats)
-}
-
-// noMatchTree is both matchIterator and matchTree that matches nothing.
-type noMatchTree struct {
- Why string
-}
-
-func (t *noMatchTree) String() string {
- return fmt.Sprintf("not(%q)", t.Why)
-}
-
-func (t *noMatchTree) candidates() []*candidateMatch {
- return nil
-}
-
-func (t *noMatchTree) nextDoc() uint32 {
- return maxUInt32
-}
-
-func (t *noMatchTree) prepare(uint32) {}
-
-func (t *noMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
- return false, true
-}
-
-func (t *noMatchTree) updateStats(*Stats) {}
-
-func (m *candidateMatch) String() string {
- return fmt.Sprintf("%d:%d", m.file, m.runeOffset)
-}
-
-type ngramDocIterator struct {
- leftPad uint32
- rightPad uint32
-
- iter hitIterator
- ends []uint32
-
- // mutable
- fileIdx uint32
- matchCount int
-}
-
-// nextFileIndex returns the smallest index j of ends such that
-// ends[j] > offset, assuming ends[f] <= offset.
-func nextFileIndex(offset, f uint32, ends []uint32) uint32 {
- d := uint32(1)
- for f < uint32(len(ends)) && ends[f] <= offset {
- if f+d < uint32(len(ends)) && ends[f+d] <= offset {
- f += d
- d *= 2
- } else if d > 1 {
- d = d/4 + 1
- } else {
- f++
- }
- }
- return f
-}
-
-func (i *ngramDocIterator) nextDoc() uint32 {
- i.fileIdx = nextFileIndex(i.iter.first(), i.fileIdx, i.ends)
- if i.fileIdx >= uint32(len(i.ends)) {
- return maxUInt32
- }
- return i.fileIdx
-}
-
-func (i *ngramDocIterator) String() string {
- return fmt.Sprintf("ngram(L=%d,R=%d,%v)", i.leftPad, i.rightPad, i.iter)
-}
-
-func (i *ngramDocIterator) prepare(nextDoc uint32) {
- var start uint32
- if nextDoc > 0 {
- start = i.ends[nextDoc-1]
- }
- if start > 0 {
- i.iter.next(start + i.leftPad - 1)
- }
- i.fileIdx = nextDoc
-}
-
-func (i *ngramDocIterator) updateStats(s *Stats) {
- i.iter.updateStats(s)
- s.NgramMatches += i.matchCount
-}
-
-func (i *ngramDocIterator) candidates() []*candidateMatch {
- if i.fileIdx >= uint32(len(i.ends)) {
- return nil
- }
-
- var fileStart uint32
- if i.fileIdx > 0 {
- fileStart = i.ends[i.fileIdx-1]
- }
- fileEnd := i.ends[i.fileIdx]
-
- var candidates []*candidateMatch
- for {
- p1 := i.iter.first()
- if p1 == maxUInt32 || p1 >= i.ends[i.fileIdx] {
- break
- }
- i.iter.next(p1)
-
- if p1 < i.leftPad+fileStart || p1+i.rightPad > fileEnd {
- continue
- }
-
- candidates = append(candidates, &candidateMatch{
- file: uint32(i.fileIdx),
- runeOffset: p1 - fileStart - i.leftPad,
- })
- }
- i.matchCount += len(candidates)
- return candidates
-}
-
-type trimBySectionMatchIter struct {
- matchIterator
-
- patternSize uint32
- fileEndRunes []uint32
-
- // mutable
- doc uint32
- sections []DocumentSection
-}
-
-func (i *trimBySectionMatchIter) String() string {
- return fmt.Sprintf("trimSection(sz=%d, %v)", i.patternSize, i.matchIterator)
-}
-
-func (d *indexData) newTrimByDocSectionIter(q *query.Substring, iter matchIterator) *trimBySectionMatchIter {
- return &trimBySectionMatchIter{
- matchIterator: iter,
- patternSize: uint32(utf8.RuneCountInString(q.Pattern)),
- fileEndRunes: d.fileEndRunes,
- sections: d.runeDocSections,
- }
-}
-
-func (i *trimBySectionMatchIter) prepare(doc uint32) {
- i.matchIterator.prepare(doc)
- i.doc = doc
-
- var fileStart uint32
- if doc > 0 {
- fileStart = i.fileEndRunes[doc-1]
- }
-
- for len(i.sections) > 0 && i.sections[0].Start < fileStart {
- i.sections = i.sections[1:]
- }
-}
-
-func (i *trimBySectionMatchIter) candidates() []*candidateMatch {
- var fileStart uint32
- if i.doc > 0 {
- fileStart = i.fileEndRunes[i.doc-1]
- }
-
- ms := i.matchIterator.candidates()
- trimmed := ms[:0]
- for len(i.sections) > 0 && len(ms) > 0 {
- start := fileStart + ms[0].runeOffset
- end := start + i.patternSize
- if start >= i.sections[0].End {
- i.sections = i.sections[1:]
- continue
- }
-
- if start < i.sections[0].Start {
- ms = ms[1:]
- continue
- }
-
- // here we have: sec.Start <= start < sec.End
- if end <= i.sections[0].End {
- // complete match falls inside section.
- trimmed = append(trimmed, ms[0])
- }
-
- ms = ms[1:]
- }
- return trimmed
-}
diff --git a/matchtree.go b/matchtree.go
deleted file mode 100644
index 66d9372..0000000
--- a/matchtree.go
+++ /dev/null
@@ -1,743 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "fmt"
- "log"
- "regexp"
- "strings"
- "unicode/utf8"
-
- "github.com/google/zoekt/query"
-)
-
-// A docIterator iterates over documents in order.
-type docIterator interface {
- // provide the next document where we can may find something
- // interesting.
- nextDoc() uint32
-
- // clears any per-document state of the docIterator, and
- // prepares for evaluating the given doc. The argument is
- // strictly increasing over time.
- prepare(nextDoc uint32)
-}
-
-const (
- costConst = 0
- costMemory = 1
- costContent = 2
- costRegexp = 3
-)
-
-const (
- costMin = costConst
- costMax = costRegexp
-)
-
-// An expression tree coupled with matches. The matchtree has two
-// functions:
-//
-// * it implements boolean combinations (and, or, not)
-//
-// * it implements shortcuts, where we skip documents (for example: if
-// there are no trigram matches, we can be sure there are no substring
-// matches). The matchtree iterates over the documents as they are
-// ordered in the shard.
-//
-// The general process for a given (shard, query) is
-//
-// - construct matchTree for the query
-//
-// - find all different leaf matchTrees (substring, regexp, etc.)
-//
-// in a loop:
-//
-// - find next doc to process using nextDoc
-//
-// - evaluate atoms (leaf expressions that match text)
-//
-// - evaluate the tree using matches(), storing the result in map.
-//
-// - if the complete tree returns (matches() == true) for the document,
-// collect all text matches by looking at leaf matchTrees
-//
-type matchTree interface {
- docIterator
-
- // returns whether this matches, and if we are sure.
- matches(cp *contentProvider, cost int, known map[matchTree]bool) (match bool, sure bool)
-}
-
-// docMatchTree iterates over documents for which predicate(docID) returns true.
-type docMatchTree struct {
- // the number of documents in a shard.
- numDocs uint32
-
- predicate func(docID uint32) bool
-
- // provides additional information about the reason why the docMatchTree was
- // created.
- reason string
-
- // mutable
- firstDone bool
- docID uint32
-}
-
-type bruteForceMatchTree struct {
- // mutable
- firstDone bool
- docID uint32
-}
-
-type andLineMatchTree struct {
- andMatchTree
-}
-
-type andMatchTree struct {
- children []matchTree
-}
-
-type orMatchTree struct {
- children []matchTree
-}
-
-type notMatchTree struct {
- child matchTree
-}
-
-// Don't visit this subtree for collecting matches.
-type noVisitMatchTree struct {
- matchTree
-}
-
-type regexpMatchTree struct {
- regexp *regexp.Regexp
-
- fileName bool
-
- // mutable
- reEvaluated bool
- found []*candidateMatch
-
- // nextDoc, prepare.
- bruteForceMatchTree
-}
-
-type substrMatchTree struct {
- matchIterator
-
- query *query.Substring
- caseSensitive bool
- fileName bool
-
- // mutable
- current []*candidateMatch
- contEvaluated bool
-}
-
-type branchQueryMatchTree struct {
- fileMasks []uint64
- mask uint64
-
- // mutable
- firstDone bool
- docID uint32
-}
-
-// all prepare methods
-
-func (t *bruteForceMatchTree) prepare(doc uint32) {
- t.docID = doc
- t.firstDone = true
-}
-
-func (t *docMatchTree) prepare(doc uint32) {
- t.docID = doc
- t.firstDone = true
-}
-
-func (t *andMatchTree) prepare(doc uint32) {
- for _, c := range t.children {
- c.prepare(doc)
- }
-}
-
-func (t *regexpMatchTree) prepare(doc uint32) {
- t.found = t.found[:0]
- t.reEvaluated = false
- t.bruteForceMatchTree.prepare(doc)
-}
-
-func (t *orMatchTree) prepare(doc uint32) {
- for _, c := range t.children {
- c.prepare(doc)
- }
-}
-
-func (t *notMatchTree) prepare(doc uint32) {
- t.child.prepare(doc)
-}
-
-func (t *substrMatchTree) prepare(nextDoc uint32) {
- t.matchIterator.prepare(nextDoc)
- t.current = t.matchIterator.candidates()
- t.contEvaluated = false
-}
-
-func (t *branchQueryMatchTree) prepare(doc uint32) {
- t.firstDone = true
- t.docID = doc
-}
-
-// nextDoc
-
-func (t *docMatchTree) nextDoc() uint32 {
- var start uint32
- if t.firstDone {
- start = t.docID + 1
- }
- for i := start; i < t.numDocs; i++ {
- if t.predicate(i) {
- return i
- }
- }
- return maxUInt32
-}
-
-func (t *bruteForceMatchTree) nextDoc() uint32 {
- if !t.firstDone {
- return 0
- }
- return t.docID + 1
-}
-
-func (t *andMatchTree) nextDoc() uint32 {
- var max uint32
- for _, c := range t.children {
- m := c.nextDoc()
- if m > max {
- max = m
- }
- }
- return max
-}
-
-func (t *orMatchTree) nextDoc() uint32 {
- min := uint32(maxUInt32)
- for _, c := range t.children {
- m := c.nextDoc()
- if m < min {
- min = m
- }
- }
- return min
-}
-
-func (t *notMatchTree) nextDoc() uint32 {
- return 0
-}
-
-func (t *branchQueryMatchTree) nextDoc() uint32 {
- var start uint32
- if t.firstDone {
- start = t.docID + 1
- }
-
- for i := start; i < uint32(len(t.fileMasks)); i++ {
- if (t.mask & t.fileMasks[i]) != 0 {
- return i
- }
- }
- return maxUInt32
-}
-
-// all String methods
-
-func (t *bruteForceMatchTree) String() string {
- return "all"
-}
-
-func (t *docMatchTree) String() string {
- return fmt.Sprintf("doc(%s)", t.reason)
-}
-
-func (t *andMatchTree) String() string {
- return fmt.Sprintf("and%v", t.children)
-}
-
-func (t *regexpMatchTree) String() string {
- return fmt.Sprintf("re(%s)", t.regexp)
-}
-
-func (t *orMatchTree) String() string {
- return fmt.Sprintf("or%v", t.children)
-}
-
-func (t *notMatchTree) String() string {
- return fmt.Sprintf("not(%v)", t.child)
-}
-
-func (t *substrMatchTree) String() string {
- f := ""
- if t.fileName {
- f = "f"
- }
-
- return fmt.Sprintf("%ssubstr(%q, %v, %v)", f, t.query.Pattern, t.current, t.matchIterator)
-}
-
-func (t *branchQueryMatchTree) String() string {
- return fmt.Sprintf("branch(%x)", t.mask)
-}
-
-// visitMatches visits all atoms in matchTree. Note: This visits
-// noVisitMatchTree. For collecting matches use visitMatches.
-func visitMatchTree(t matchTree, f func(matchTree)) {
- switch s := t.(type) {
- case *andMatchTree:
- for _, ch := range s.children {
- visitMatchTree(ch, f)
- }
- case *orMatchTree:
- for _, ch := range s.children {
- visitMatchTree(ch, f)
- }
- case *andLineMatchTree:
- visitMatchTree(&s.andMatchTree, f)
- case *noVisitMatchTree:
- visitMatchTree(s.matchTree, f)
- case *notMatchTree:
- visitMatchTree(s.child, f)
- default:
- f(t)
- }
-}
-
-// visitMatches visits all atoms which can contribute matches. Note: This
-// skips noVisitMatchTree.
-func visitMatches(t matchTree, known map[matchTree]bool, f func(matchTree)) {
- switch s := t.(type) {
- case *andMatchTree:
- for _, ch := range s.children {
- if known[ch] {
- visitMatches(ch, known, f)
- }
- }
- case *andLineMatchTree:
- visitMatches(&s.andMatchTree, known, f)
- case *orMatchTree:
- for _, ch := range s.children {
- if known[ch] {
- visitMatches(ch, known, f)
- }
- }
- case *notMatchTree:
- case *noVisitMatchTree:
- // don't collect into negative trees.
- default:
- f(s)
- }
-}
-
-// all matches() methods.
-
-func (t *docMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
- return t.predicate(cp.idx), true
-}
-
-func (t *bruteForceMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
- return true, true
-}
-
-// andLineMatchTree is a performance optimization of andMatchTree. For content
-// searches we don't want to run the regex engine if there is no line that
-// contains matches from all terms.
-func (t *andLineMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
- matches, sure := t.andMatchTree.matches(cp, cost, known)
- if !(sure && matches) {
- return matches, sure
- }
-
- // find child with fewest candidates
- min := maxUInt32
- fewestChildren := 0
- for ix, child := range t.children {
- v, ok := child.(*substrMatchTree)
- // make sure we are running a content search and that all candidates are a
- // substrMatchTree
- if !ok || v.fileName {
- return matches, sure
- }
- if len(v.current) < min {
- min = len(v.current)
- fewestChildren = ix
- }
- }
-
- type lineRange struct {
- start int
- end int
- }
- lines := make([]lineRange, 0, len(t.children[fewestChildren].(*substrMatchTree).current))
- prev := -1
- for _, candidate := range t.children[fewestChildren].(*substrMatchTree).current {
- line, byteStart, byteEnd := candidate.line(cp.newlines(), cp.fileSize)
- if line == prev {
- continue
- }
- prev = line
- lines = append(lines, lineRange{byteStart, byteEnd})
- }
-
- // children keeps track of the children's candidates we have already seen.
- children := make([][]*candidateMatch, 0, len(t.children)-1)
- for j, child := range t.children {
- if j == fewestChildren {
- continue
- }
- children = append(children, child.(*substrMatchTree).current)
- }
-
-nextLine:
- for i := 0; i < len(lines); i++ {
- hits := 1
- nextChild:
- for j := range children {
- nextCandidate:
- for len(children[j]) > 0 {
- candidate := children[j][0]
- bo := int(cp.findOffset(false, candidate.runeOffset))
- if bo < lines[i].start {
- children[j] = children[j][1:]
- continue nextCandidate
- }
- if bo <= lines[i].end {
- hits++
- continue nextChild
- }
- // move the `lines` iterator forward until bo <= line.end
- for i < len(lines) && bo > lines[i].end {
- i++
- }
- i--
- continue nextLine
- }
- }
- // return early once we found any line that contains matches from all children
- if hits == len(t.children) {
- return matches, true
- }
- }
- return false, true
-}
-
-func (t *andMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
- sure := true
-
- for _, ch := range t.children {
- v, ok := evalMatchTree(cp, cost, known, ch)
- if ok && !v {
- return false, true
- }
- if !ok {
- sure = false
- }
- }
-
- return true, sure
-}
-
-func (t *orMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
- matches := false
- sure := true
- for _, ch := range t.children {
- v, ok := evalMatchTree(cp, cost, known, ch)
- if ok {
- // we could short-circuit, but we want to use
- // the other possibilities as a ranking
- // signal.
- matches = matches || v
- } else {
- sure = false
- }
- }
- return matches, sure
-}
-
-func (t *branchQueryMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
- return t.fileMasks[t.docID]&t.mask != 0, true
-}
-
-func (t *regexpMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
- if t.reEvaluated {
- return len(t.found) > 0, true
- }
-
- if cost < costRegexp {
- return false, false
- }
-
- cp.stats.RegexpsConsidered++
- idxs := t.regexp.FindAllIndex(cp.data(t.fileName), -1)
- found := t.found[:0]
- for _, idx := range idxs {
- cm := &candidateMatch{
- byteOffset: uint32(idx[0]),
- byteMatchSz: uint32(idx[1] - idx[0]),
- fileName: t.fileName,
- }
-
- found = append(found, cm)
- }
- t.found = found
- t.reEvaluated = true
-
- return len(t.found) > 0, true
-}
-
-// breakMatchesOnNewlines returns matches resulting from breaking each element
-// of cms on newlines within text.
-func breakMatchesOnNewlines(cms []*candidateMatch, text []byte) []*candidateMatch {
- var lineCMs []*candidateMatch
- for _, cm := range cms {
- lineCMs = append(lineCMs, breakOnNewlines(cm, text)...)
- }
- return lineCMs
-}
-
-// breakOnNewlines returns matches resulting from breaking cm on newlines
-// within text.
-func breakOnNewlines(cm *candidateMatch, text []byte) []*candidateMatch {
- var cms []*candidateMatch
- addMe := &candidateMatch{}
- *addMe = *cm
- for i := uint32(cm.byteOffset); i < cm.byteOffset+cm.byteMatchSz; i++ {
- if text[i] == '\n' {
- addMe.byteMatchSz = i - addMe.byteOffset
- if addMe.byteMatchSz != 0 {
- cms = append(cms, addMe)
- }
-
- addMe = &candidateMatch{}
- *addMe = *cm
- addMe.byteOffset = i + 1
- }
- }
- addMe.byteMatchSz = cm.byteOffset + cm.byteMatchSz - addMe.byteOffset
- if addMe.byteMatchSz != 0 {
- cms = append(cms, addMe)
- }
- return cms
-}
-
-func evalMatchTree(cp *contentProvider, cost int, known map[matchTree]bool, mt matchTree) (bool, bool) {
- if v, ok := known[mt]; ok {
- return v, true
- }
-
- v, ok := mt.matches(cp, cost, known)
- if ok {
- known[mt] = v
- }
-
- return v, ok
-}
-
-func (t *notMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
- v, ok := evalMatchTree(cp, cost, known, t.child)
- return !v, ok
-}
-
-func (t *substrMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
- if t.contEvaluated {
- return len(t.current) > 0, true
- }
-
- if len(t.current) == 0 {
- return false, true
- }
-
- if t.fileName && cost < costMemory {
- return false, false
- }
-
- if !t.fileName && cost < costContent {
- return false, false
- }
-
- pruned := t.current[:0]
- for _, m := range t.current {
- if m.byteOffset == 0 && m.runeOffset > 0 {
- m.byteOffset = cp.findOffset(m.fileName, m.runeOffset)
- }
- if m.matchContent(cp.data(m.fileName)) {
- pruned = append(pruned, m)
- }
- }
- t.current = pruned
- t.contEvaluated = true
-
- return len(t.current) > 0, true
-}
-
-func (d *indexData) newMatchTree(q query.Q) (matchTree, error) {
- if q == nil {
- return nil, fmt.Errorf("got nil (sub)query")
- }
- switch s := q.(type) {
- case *query.Regexp:
- // RegexpToMatchTreeRecursive tries to distill a matchTree that matches a
- // superset of the regexp. If the returned matchTree is equivalent to the
- // original regexp, it returns true. An equivalent matchTree has the same
- // behaviour as the original regexp and can be used instead.
- //
- subMT, isEq, _, err := d.regexpToMatchTreeRecursive(s.Regexp, ngramSize, s.FileName, s.CaseSensitive)
- if err != nil {
- return nil, err
- }
- // if the query can be used in place of the regexp
- // return the subtree
- if isEq {
- return subMT, nil
- }
-
- prefix := ""
- if !s.CaseSensitive {
- prefix = "(?i)"
- }
-
- tr := ®expMatchTree{
- regexp: regexp.MustCompile(prefix + s.Regexp.String()),
- fileName: s.FileName,
- }
-
- return &andMatchTree{
- children: []matchTree{
- tr, &noVisitMatchTree{subMT},
- },
- }, nil
- case *query.And:
- var r []matchTree
- for _, ch := range s.Children {
- ct, err := d.newMatchTree(ch)
- if err != nil {
- return nil, err
- }
- r = append(r, ct)
- }
- return &andMatchTree{r}, nil
- case *query.Or:
- var r []matchTree
- for _, ch := range s.Children {
- ct, err := d.newMatchTree(ch)
- if err != nil {
- return nil, err
- }
- r = append(r, ct)
- }
- return &orMatchTree{r}, nil
- case *query.Not:
- ct, err := d.newMatchTree(s.Child)
- return ¬MatchTree{
- child: ct,
- }, err
-
- case *query.Substring:
- return d.newSubstringMatchTree(s)
-
- case *query.Branch:
- mask := uint64(0)
- if s.Pattern == "HEAD" {
- mask = 1
- } else {
- for nm, m := range d.branchIDs {
- if strings.Contains(nm, s.Pattern) {
- mask |= uint64(m)
- }
- }
- }
- return &branchQueryMatchTree{
- mask: mask,
- fileMasks: d.fileBranchMasks,
- }, nil
- case *query.Const:
- if s.Value {
- return &bruteForceMatchTree{}, nil
- } else {
- return &noMatchTree{"const"}, nil
- }
- case *query.Language:
- code, ok := d.metaData.LanguageMap[s.Language]
- if !ok {
- return &noMatchTree{"lang"}, nil
- }
- return &docMatchTree{
- reason: "language",
- numDocs: uint32(len(d.languages)),
- predicate: func(docID uint32) bool {
- return d.languages[docID] == code
- },
- }, nil
-
- case *query.Symbol:
- mt, err := d.newSubstringMatchTree(s.Atom)
- if err != nil {
- return nil, err
- }
-
- if _, ok := mt.(*regexpMatchTree); ok {
- return nil, fmt.Errorf("regexps and short queries not implemented for symbol search")
- }
- subMT, ok := mt.(*substrMatchTree)
- if !ok {
- return nil, fmt.Errorf("found %T inside query.Symbol", mt)
- }
-
- subMT.matchIterator = d.newTrimByDocSectionIter(s.Atom, subMT.matchIterator)
- return subMT, nil
- }
- log.Panicf("type %T", q)
- return nil, nil
-}
-
-func (d *indexData) newSubstringMatchTree(s *query.Substring) (matchTree, error) {
- st := &substrMatchTree{
- query: s,
- caseSensitive: s.CaseSensitive,
- fileName: s.FileName,
- }
-
- if utf8.RuneCountInString(s.Pattern) < ngramSize {
- prefix := ""
- if !s.CaseSensitive {
- prefix = "(?i)"
- }
- t := ®expMatchTree{
- regexp: regexp.MustCompile(prefix + regexp.QuoteMeta(s.Pattern)),
- fileName: s.FileName,
- }
- return t, nil
- }
-
- result, err := d.iterateNgrams(s)
- if err != nil {
- return nil, err
- }
- st.matchIterator = result
- return st, nil
-}
diff --git a/matchtree_test.go b/matchtree_test.go
deleted file mode 100644
index 07aa4a5..0000000
--- a/matchtree_test.go
+++ /dev/null
@@ -1,191 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "reflect"
- "testing"
-
- "github.com/google/zoekt/query"
-)
-
-func Test_breakOnNewlines(t *testing.T) {
- type args struct {
- cm *candidateMatch
- text []byte
- }
- tests := []struct {
- name string
- args args
- want []*candidateMatch
- }{
- {
- name: "trivial case",
- args: args{
- cm: &candidateMatch{
- byteOffset: 0,
- byteMatchSz: 0,
- },
- text: nil,
- },
- want: nil,
- },
- {
- name: "no newlines",
- args: args{
- cm: &candidateMatch{
- byteOffset: 0,
- byteMatchSz: 1,
- },
- text: []byte("a"),
- },
- want: []*candidateMatch{
- {
- byteOffset: 0,
- byteMatchSz: 1,
- },
- },
- },
- {
- name: "newline at start",
- args: args{
- cm: &candidateMatch{
- byteOffset: 0,
- byteMatchSz: 2,
- },
- text: []byte("\na"),
- },
- want: []*candidateMatch{
- {
- byteOffset: 1,
- byteMatchSz: 1,
- },
- },
- },
- {
- name: "newline at end",
- args: args{
- cm: &candidateMatch{
- byteOffset: 0,
- byteMatchSz: 2,
- },
- text: []byte("a\n"),
- },
- want: []*candidateMatch{
- {
- byteOffset: 0,
- byteMatchSz: 1,
- },
- },
- },
- {
- name: "newline in middle",
- args: args{
- cm: &candidateMatch{
- byteOffset: 0,
- byteMatchSz: 3,
- },
- text: []byte("a\nb"),
- },
- want: []*candidateMatch{
- {
- byteOffset: 0,
- byteMatchSz: 1,
- },
- {
- byteOffset: 2,
- byteMatchSz: 1,
- },
- },
- },
- {
- name: "two newlines",
- args: args{
- cm: &candidateMatch{
- byteOffset: 0,
- byteMatchSz: 5,
- },
- text: []byte("a\nb\nc"),
- },
- want: []*candidateMatch{
- {
- byteOffset: 0,
- byteMatchSz: 1,
- },
- {
- byteOffset: 2,
- byteMatchSz: 1,
- },
- {
- byteOffset: 4,
- byteMatchSz: 1,
- },
- },
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- if got := breakOnNewlines(tt.args.cm, tt.args.text); !reflect.DeepEqual(got, tt.want) {
- type PrintableCm struct {
- byteOffset uint32
- byteMatchSz uint32
- }
- var got2, want2 []PrintableCm
- for _, g := range got {
- got2 = append(got2, PrintableCm{byteOffset: g.byteOffset, byteMatchSz: g.byteMatchSz})
- }
- for _, w := range tt.want {
- want2 = append(want2, PrintableCm{byteOffset: w.byteOffset, byteMatchSz: w.byteMatchSz})
- }
- t.Errorf("breakMatchOnNewlines() = %+v, want %+v", got2, want2)
- }
- })
- }
-}
-
-func TestEquivalentQuerySkipRegexpTree(t *testing.T) {
- tests := []struct {
- query string
- skip bool
- }{
- {query: "^foo", skip: false},
- {query: "foo", skip: true},
- {query: "thread|needle|haystack", skip: true},
- {query: "contain(er|ing)", skip: false},
- {query: "thread (needle|haystack)", skip: true},
- {query: "thread (needle|)", skip: false},
- }
-
- for _, tt := range tests {
- q, err := query.Parse(tt.query)
- if err != nil {
- t.Errorf("Error parsing query: %s", "sym:"+tt.query)
- continue
- }
-
- d := &indexData{}
- mt, err := d.newMatchTree(q)
- if err != nil {
- t.Errorf("Error creating match tree from query: %s", q)
- continue
- }
-
- visitMatchTree(mt, func(m matchTree) {
- if _, ok := m.(*regexpMatchTree); ok && tt.skip {
- t.Errorf("Expected regexpMatchTree to be skipped for query: %s", q)
- }
- })
- }
-}
diff --git a/query/bits.go b/query/bits.go
deleted file mode 100644
index 8b199d2..0000000
--- a/query/bits.go
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package query
-
-func toLower(in []byte) []byte {
- out := make([]byte, len(in))
- for i, c := range in {
- if c >= 'A' && c <= 'Z' {
- c = c - 'A' + 'a'
- }
- out[i] = c
- }
- return out
-}
diff --git a/query/parse.go b/query/parse.go
deleted file mode 100644
index ecbebf7..0000000
--- a/query/parse.go
+++ /dev/null
@@ -1,474 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package query
-
-import (
- "bytes"
- "fmt"
- "log"
- "regexp/syntax"
-)
-
-var _ = log.Printf
-
-type SuggestQueryError struct {
- Message string
- Suggestion string
-}
-
-func (e *SuggestQueryError) Error() string {
- return fmt.Sprintf("%s. Suggestion: %s", e.Message, e.Suggestion)
-}
-
-// parseStringLiteral parses a string literal, consumes the starting
-// quote too.
-func parseStringLiteral(in []byte) (lit []byte, n int, err error) {
- left := in[1:]
- found := false
-
-loop:
- for len(left) > 0 {
- c := left[0]
- left = left[1:]
- switch c {
- case '"':
- found = true
- break loop
- case '\\':
- // TODO - other escape sequences.
- if len(left) == 0 {
- return nil, 0, fmt.Errorf("query: missing char after \\")
- }
- c = left[0]
- left = left[1:]
-
- lit = append(lit, c)
- default:
- lit = append(lit, c)
- }
- }
- if !found {
- return nil, 0, fmt.Errorf("query: unterminated quoted string")
- }
- return lit, len(in) - len(left), nil
-}
-
-// orOperator is a placeholder intermediate so we can represent [A,
-// or, B] before we convert it to Or{A, B}
-type orOperator struct{}
-
-func (o *orOperator) String() string {
- return "orOp"
-}
-
-func isSpace(c byte) bool {
- return c == ' ' || c == '\t'
-}
-
-// Parse parses a string into a query.
-func Parse(qStr string) (Q, error) {
- b := []byte(qStr)
-
- qs, _, err := parseExprList(b)
- if err != nil {
- return nil, err
- }
-
- q, err := parseOperators(qs)
- if err != nil {
- return nil, err
- }
-
- return Simplify(q), nil
-}
-
-// parseExpr parses a single expression, returning the result, and the
-// number of bytes consumed.
-func parseExpr(in []byte) (Q, int, error) {
- b := in[:]
- var expr Q
- for len(b) > 0 && isSpace(b[0]) {
- b = b[1:]
- }
-
- tok, err := nextToken(b)
- if err != nil {
- return nil, 0, err
- }
- if tok == nil {
- return nil, 0, nil
- }
- b = b[len(tok.Input):]
-
- text := string(tok.Text)
- switch tok.Type {
- case tokCase:
- switch text {
- case "yes":
- case "no":
- case "auto":
- default:
- return nil, 0, fmt.Errorf("query: unknown case argument %q, want {yes,no,auto}", text)
- }
- expr = &caseQ{text}
- case tokRepo:
- expr = &Repo{Pattern: text}
- case tokBranch:
- expr = &Branch{Pattern: text}
- case tokText, tokRegex:
- q, err := regexpQuery(text, false, false)
- if err != nil {
- return nil, 0, err
- }
- expr = q
- case tokFile:
- q, err := regexpQuery(text, false, true)
- if err != nil {
- return nil, 0, err
- }
- expr = q
-
- case tokContent:
- q, err := regexpQuery(text, true, false)
- if err != nil {
- return nil, 0, err
- }
- expr = q
- case tokLang:
- expr = &Language{Language: text}
-
- case tokSym:
- if text == "" {
- return nil, 0, fmt.Errorf("the sym: atom must have an argument")
- }
- expr = &Symbol{&Substring{Pattern: text}}
-
- case tokParenClose:
- // Caller must consume paren.
- expr = nil
-
- case tokParenOpen:
- qs, n, err := parseExprList(b)
- b = b[n:]
- if err != nil {
- return nil, 0, err
- }
-
- pTok, err := nextToken(b)
- if err != nil {
- return nil, 0, err
- }
- if pTok == nil || pTok.Type != tokParenClose {
- return nil, 0, fmt.Errorf("query: missing close paren, got token %v", pTok)
- }
-
- b = b[len(pTok.Input):]
- expr, err = parseOperators(qs)
- if err != nil {
- return nil, 0, err
- }
- case tokNegate:
- subQ, n, err := parseExpr(b)
- if err != nil {
- return nil, 0, err
- }
- if subQ == nil {
- return nil, 0, fmt.Errorf("query: '-' operator needs an argument")
- }
- b = b[n:]
- expr = &Not{subQ}
-
- }
-
- return expr, len(in) - len(b), nil
-}
-
-// regexpQuery parses an atom into either a regular expression, or a
-// simple substring atom.
-func regexpQuery(text string, content, file bool) (Q, error) {
- var expr Q
-
- r, err := syntax.Parse(text, syntax.ClassNL|syntax.PerlX|syntax.UnicodeGroups)
- if err != nil {
- return nil, err
- }
-
- if r.Op == syntax.OpLiteral {
- expr = &Substring{
- Pattern: string(r.Rune),
- FileName: file,
- Content: content,
- }
- } else {
- expr = &Regexp{
- Regexp: r,
- FileName: file,
- Content: content,
- }
- }
-
- return expr, nil
-}
-
-// parseOperators interprets the orOperator in a list of queries.
-func parseOperators(in []Q) (Q, error) {
- top := &Or{}
- cur := &And{}
-
- seenOr := false
- for _, q := range in {
- if _, ok := q.(*orOperator); ok {
- seenOr = true
- if len(cur.Children) == 0 {
- return nil, fmt.Errorf("query: OR operator should have operand")
- }
- top.Children = append(top.Children, cur)
- cur = &And{}
- } else {
- cur.Children = append(cur.Children, q)
- }
- }
-
- if seenOr && len(cur.Children) == 0 {
- return nil, fmt.Errorf("query: OR operator should have operand")
- }
- top.Children = append(top.Children, cur)
- return top, nil
-}
-
-// parseExprList parses a list of query expressions. It is the
-// workhorse of the Parse function.
-func parseExprList(in []byte) ([]Q, int, error) {
- b := in[:]
- var qs []Q
- for len(b) > 0 {
- for len(b) > 0 && isSpace(b[0]) {
- b = b[1:]
- }
- tok, _ := nextToken(b)
- if tok != nil && tok.Type == tokParenClose {
- break
- } else if tok != nil && tok.Type == tokOr {
- qs = append(qs, &orOperator{})
- b = b[len(tok.Input):]
- continue
- }
-
- q, n, err := parseExpr(b)
- if err != nil {
- return nil, 0, err
- }
-
- if q == nil {
- // eof or a ')'
- break
- }
- qs = append(qs, q)
- b = b[n:]
- }
-
- setCase := "auto"
- newQS := qs[:0]
- for _, q := range qs {
- if sc, ok := q.(*caseQ); ok {
- setCase = sc.Flavor
- } else {
- newQS = append(newQS, q)
- }
- }
- qs = mapQueryList(newQS, func(q Q) Q {
- if sc, ok := q.(setCaser); ok {
- sc.setCase(setCase)
- }
- return q
- })
- return qs, len(in) - len(b), nil
-}
-
-type token struct {
- Type int
- // The value of the token
- Text []byte
-
- // The input that we consumed to form the token.
- Input []byte
-}
-
-func (t *token) String() string {
- return fmt.Sprintf("%s:%q", tokNames[t.Type], t.Text)
-}
-
-// token types.
-const (
- tokText = 0
- tokFile = 1
- tokRepo = 2
- tokCase = 3
- tokBranch = 4
- tokParenOpen = 5
- tokParenClose = 6
- tokError = 7
- tokNegate = 8
- tokRegex = 9
- tokOr = 10
- tokContent = 11
- tokLang = 12
- tokSym = 13
-)
-
-var tokNames = map[int]string{
- tokBranch: "Branch",
- tokCase: "Case",
- tokError: "Error",
- tokFile: "File",
- tokNegate: "Negate",
- tokOr: "Or",
- tokParenClose: "ParenClose",
- tokParenOpen: "ParenOpen",
- tokRegex: "Regex",
- tokRepo: "Repo",
- tokText: "Text",
- tokLang: "Language",
- tokSym: "Symbol",
-}
-
-var prefixes = map[string]int{
- "b:": tokBranch,
- "branch:": tokBranch,
- "c:": tokContent,
- "case:": tokCase,
- "content:": tokContent,
- "f:": tokFile,
- "file:": tokFile,
- "r:": tokRepo,
- "regex:": tokRegex,
- "repo:": tokRepo,
- "lang:": tokLang,
- "sym:": tokSym,
-}
-
-var reservedWords = map[string]int{
- "or": tokOr,
-}
-
-func (t *token) setType() {
- // After we consumed the input, we have to interpret some of the text,
- // eg. to distinguish between ")" the text and ) the query grouping
- // parenthesis.
- if len(t.Text) == 1 && t.Text[0] == '(' {
- t.Type = tokParenOpen
- }
- if len(t.Text) == 1 && t.Text[0] == ')' {
- t.Type = tokParenClose
- }
-
- for w, typ := range reservedWords {
- if string(t.Text) == w && string(t.Input) == w {
- t.Type = typ
- break
- }
- }
-
- for pref, typ := range prefixes {
- if !bytes.HasPrefix(t.Input, []byte(pref)) {
- continue
- }
-
- t.Text = t.Text[len(pref):]
- t.Type = typ
- break
- }
-}
-
-// nextToken returns the next token from the given input.
-func nextToken(in []byte) (*token, error) {
- left := in[:]
- parenCount := 0
- var cur token
- if len(left) == 0 {
- return nil, nil
- }
-
- if left[0] == '-' {
- return &token{
- Type: tokNegate,
- Text: []byte{'-'},
- Input: in[:1],
- }, nil
- }
-
- foundSpace := false
-
-loop:
- for len(left) > 0 {
- c := left[0]
- switch c {
- case '(':
- parenCount++
- cur.Text = append(cur.Text, c)
- left = left[1:]
- case ')':
- if parenCount == 0 {
- if len(cur.Text) == 0 {
- cur.Text = []byte{')'}
- left = left[1:]
- }
- break loop
- }
-
- cur.Text = append(cur.Text, c)
- left = left[1:]
- parenCount--
-
- case '"':
- t, n, err := parseStringLiteral(left)
- if err != nil {
- return nil, err
- }
- cur.Text = append(cur.Text, t...)
- left = left[n:]
- case '\\':
- left = left[1:]
- if len(left) == 0 {
- return nil, fmt.Errorf("query: lone \\ at end")
- }
- c := left[0]
- cur.Text = append(cur.Text, '\\', c)
- left = left[1:]
-
- case ' ', '\n', '\t':
- if parenCount > 0 {
- foundSpace = true
- }
- break loop
- default:
- cur.Text = append(cur.Text, c)
- left = left[1:]
- }
- }
-
- if len(cur.Text) == 0 {
- return nil, nil
- }
-
- if foundSpace && cur.Text[0] == '(' {
- cur.Text = cur.Text[:1]
- cur.Input = in[:1]
- } else {
- cur.Input = in[:len(in)-len(left)]
- }
- cur.setType()
- return &cur, nil
-}
diff --git a/query/parse_test.go b/query/parse_test.go
deleted file mode 100644
index 8acd719..0000000
--- a/query/parse_test.go
+++ /dev/null
@@ -1,164 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package query
-
-import (
- "log"
- "reflect"
- "regexp/syntax"
- "testing"
-)
-
-func mustParseRE(s string) *syntax.Regexp {
- r, err := syntax.Parse(s, syntax.ClassNL|syntax.PerlX|syntax.UnicodeGroups)
- if err != nil {
- log.Panicf("parsing %q: %v", s, err)
- }
- return r
-}
-
-func TestParseQuery(t *testing.T) {
- type testcase struct {
- in string
- want Q
- }
-
- for _, c := range []testcase{
- {`\bword\b`, &Regexp{Regexp: mustParseRE(`\bword\b`)}},
- {"fi\"le:bla\"", &Substring{Pattern: "file:bla"}},
- {"abc or def", NewOr(&Substring{Pattern: "abc"}, &Substring{Pattern: "def"})},
- {"(abc or def)", NewOr(&Substring{Pattern: "abc"}, &Substring{Pattern: "def"})},
- {"(ppp qqq or rrr sss)", NewOr(
- NewAnd(&Substring{Pattern: "ppp"}, &Substring{Pattern: "qqq"}),
- NewAnd(&Substring{Pattern: "rrr"}, &Substring{Pattern: "sss"}))},
- {"((x) ora b(z(d)))", NewAnd(
- &Regexp{Regexp: mustParseRE("(x)")},
- &Substring{Pattern: "ora"},
- &Regexp{Regexp: mustParseRE("b(z(d))")})},
- {"( )", &Const{Value: true}},
- {"(abc)(de)", &Regexp{Regexp: mustParseRE("(abc)(de)")}},
- {"sub-pixel", &Substring{Pattern: "sub-pixel"}},
- {"abc", &Substring{Pattern: "abc"}},
- {"ABC", &Substring{Pattern: "ABC", CaseSensitive: true}},
- {"\"abc bcd\"", &Substring{Pattern: "abc bcd"}},
- {"abc bcd", NewAnd(
- &Substring{Pattern: "abc"},
- &Substring{Pattern: "bcd"})},
- {"f:fs", &Substring{Pattern: "fs", FileName: true}},
- {"fs", &Substring{Pattern: "fs"}},
- {"-abc", &Not{&Substring{Pattern: "abc"}}},
- {"abccase:yes", &Substring{Pattern: "abccase:yes"}},
- {"file:abc", &Substring{Pattern: "abc", FileName: true}},
- {"branch:pqr", &Branch{Pattern: "pqr"}},
- {"((x) )", &Regexp{Regexp: mustParseRE("(x)")}},
- {"file:helpers\\.go byte", NewAnd(
- &Substring{Pattern: "helpers.go", FileName: true},
- &Substring{Pattern: "byte"})},
- {"(abc def)", NewAnd(
- &Substring{Pattern: "abc"},
- &Substring{Pattern: "def"})},
- {"(abc def", nil},
- {"regex:abc[p-q]", &Regexp{Regexp: mustParseRE("abc[p-q]")}},
- {"aBc[p-q]", &Regexp{Regexp: mustParseRE("aBc[p-q]"), CaseSensitive: true}},
- {"aBc[p-q] case:auto", &Regexp{Regexp: mustParseRE("aBc[p-q]"), CaseSensitive: true}},
- {"repo:go", &Repo{"go"}},
-
- {"file:\"\"", &Const{true}},
- {"abc.*def", &Regexp{Regexp: mustParseRE("abc.*def")}},
- {"abc\\.\\*def", &Substring{Pattern: "abc.*def"}},
- {"(abc)", &Regexp{Regexp: mustParseRE("(abc)")}},
-
- {"c:abc", &Substring{Pattern: "abc", Content: true}},
- {"content:abc", &Substring{Pattern: "abc", Content: true}},
-
- {"lang:c++", &Language{"c++"}},
- {"sym:pqr", &Symbol{&Substring{Pattern: "pqr"}}},
- {"sym:Pqr", &Symbol{&Substring{Pattern: "Pqr", CaseSensitive: true}}},
-
- // case
- {"abc case:yes", &Substring{Pattern: "abc", CaseSensitive: true}},
- {"abc case:auto", &Substring{Pattern: "abc", CaseSensitive: false}},
- {"ABC case:auto", &Substring{Pattern: "ABC", CaseSensitive: true}},
- {"ABC case:\"auto\"", &Substring{Pattern: "ABC", CaseSensitive: true}},
- {"abc -f:def case:yes", NewAnd(
- &Substring{Pattern: "abc", CaseSensitive: true},
- &Not{Child: &Substring{Pattern: "def", FileName: true, CaseSensitive: true}},
- )},
-
- // errors.
- {"--", nil},
- {"\"abc", nil},
- {"\"a\\", nil},
- {"case:foo", nil},
-
- {"sym:", nil},
- {"abc or", nil},
- {"or abc", nil},
- {"def or or abc", nil},
-
- {"", &Const{Value: true}},
- } {
- got, err := Parse(c.in)
- if (c.want == nil) != (err != nil) {
- t.Errorf("Parse(%q): error %v, want %v", c.in, err, c.want)
- } else if got != nil {
- if !reflect.DeepEqual(got, c.want) {
- t.Errorf("Parse(%s): got %v want %v", c.in, got, c.want)
- }
- }
- }
-}
-
-func TestTokenize(t *testing.T) {
- type testcase struct {
- in string
- typ int
- text string
- }
-
- cases := []testcase{
- {"file:bla", tokFile, "bla"},
- {"file:bla ", tokFile, "bla"},
- {"f:bla ", tokFile, "bla"},
- {"(abc def) ", tokParenOpen, "("},
- {"(abcdef)", tokText, "(abcdef)"},
- {"(abc)(de)", tokText, "(abc)(de)"},
- {"(ab(c)def) ", tokText, "(ab(c)def)"},
- {"(ab\\ def) ", tokText, "(ab\\ def)"},
- {") ", tokParenClose, ")"},
- {"a(bc))", tokText, "a(bc)"},
- {"abc) ", tokText, "abc"},
- {"file:\"bla\"", tokFile, "bla"},
- {"\"file:bla\"", tokText, "file:bla"},
- {"\\", tokError, ""},
- {"o\"r\" bla", tokText, "or"},
- {"or bla", tokOr, "or"},
- {"ar bla", tokText, "ar"},
- }
- for _, c := range cases {
- tok, err := nextToken([]byte(c.in))
- if err != nil {
- tok = &token{Type: tokError}
- }
- if tok.Type != c.typ {
- t.Errorf("%s: got type %d, want %d", c.in, tok.Type, c.typ)
- continue
- }
-
- if string(tok.Text) != c.text {
- t.Errorf("%s: got text %q, want %q", c.in, tok.Text, c.text)
- }
- }
-}
diff --git a/query/query.go b/query/query.go
deleted file mode 100644
index 31741a0..0000000
--- a/query/query.go
+++ /dev/null
@@ -1,394 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package query
-
-import (
- "fmt"
- "log"
- "reflect"
- "regexp/syntax"
- "strings"
-)
-
-var _ = log.Println
-
-// Q is a representation for a possibly hierarchical search query.
-type Q interface {
- String() string
-}
-
-// RegexpQuery is a query looking for regular expressions matches.
-type Regexp struct {
- Regexp *syntax.Regexp
- FileName bool
- Content bool
- CaseSensitive bool
-}
-
-// Symbol finds a string that is a symbol.
-type Symbol struct {
- Atom *Substring
-}
-
-func (s *Symbol) String() string {
- return fmt.Sprintf("sym:%s", s.Atom)
-}
-
-func (q *Regexp) String() string {
- pref := ""
- if q.FileName {
- pref = "file_"
- }
- if q.CaseSensitive {
- pref = "case_" + pref
- }
- return fmt.Sprintf("%sregex:%q", pref, q.Regexp.String())
-}
-
-type caseQ struct {
- Flavor string
-}
-
-func (c *caseQ) String() string {
- return "case:" + c.Flavor
-}
-
-type Language struct {
- Language string
-}
-
-func (l *Language) String() string {
- return "lang:" + l.Language
-}
-
-type Const struct {
- Value bool
-}
-
-func (q *Const) String() string {
- if q.Value {
- return "TRUE"
- }
- return "FALSE"
-}
-
-type Repo struct {
- Pattern string
-}
-
-func (q *Repo) String() string {
- return fmt.Sprintf("repo:%s", q.Pattern)
-}
-
-// Substring is the most basic query: a query for a substring.
-type Substring struct {
- Pattern string
- CaseSensitive bool
-
- // Match only filename
- FileName bool
-
- // Match only content
- Content bool
-}
-
-func (q *Substring) String() string {
- s := ""
-
- t := ""
- if q.FileName {
- t = "file_"
- } else if q.Content {
- t = "content_"
- }
-
- s += fmt.Sprintf("%ssubstr:%q", t, q.Pattern)
- if q.CaseSensitive {
- s = "case_" + s
- }
- return s
-}
-
-type setCaser interface {
- setCase(string)
-}
-
-func (q *Substring) setCase(k string) {
- switch k {
- case "yes":
- q.CaseSensitive = true
- case "no":
- q.CaseSensitive = false
- case "auto":
- // TODO - unicode
- q.CaseSensitive = (q.Pattern != string(toLower([]byte(q.Pattern))))
- }
-}
-
-func (q *Symbol) setCase(k string) {
- q.Atom.setCase(k)
-}
-
-func (q *Regexp) setCase(k string) {
- switch k {
- case "yes":
- q.CaseSensitive = true
- case "no":
- q.CaseSensitive = false
- case "auto":
- q.CaseSensitive = (q.Regexp.String() != LowerRegexp(q.Regexp).String())
- }
-}
-
-// Or is matched when any of its children is matched.
-type Or struct {
- Children []Q
-}
-
-func (q *Or) String() string {
- var sub []string
- for _, ch := range q.Children {
- sub = append(sub, ch.String())
- }
- return fmt.Sprintf("(or %s)", strings.Join(sub, " "))
-}
-
-// Not inverts the meaning of its child.
-type Not struct {
- Child Q
-}
-
-func (q *Not) String() string {
- return fmt.Sprintf("(not %s)", q.Child)
-}
-
-// And is matched when all its children are.
-type And struct {
- Children []Q
-}
-
-func (q *And) String() string {
- var sub []string
- for _, ch := range q.Children {
- sub = append(sub, ch.String())
- }
- return fmt.Sprintf("(and %s)", strings.Join(sub, " "))
-}
-
-// NewAnd is syntactic sugar for constructing And queries.
-func NewAnd(qs ...Q) Q {
- return &And{Children: qs}
-}
-
-// NewOr is syntactic sugar for constructing Or queries.
-func NewOr(qs ...Q) Q {
- return &Or{Children: qs}
-}
-
-// Branch limits search to a specific branch.
-type Branch struct {
- Pattern string
-}
-
-func (q *Branch) String() string {
- return fmt.Sprintf("branch:%q", q.Pattern)
-}
-
-func queryChildren(q Q) []Q {
- switch s := q.(type) {
- case *And:
- return s.Children
- case *Or:
- return s.Children
- }
- return nil
-}
-
-func flattenAndOr(children []Q, typ Q) ([]Q, bool) {
- var flat []Q
- changed := false
- for _, ch := range children {
- ch, subChanged := flatten(ch)
- changed = changed || subChanged
- if reflect.TypeOf(ch) == reflect.TypeOf(typ) {
- changed = true
- subChildren := queryChildren(ch)
- if subChildren != nil {
- flat = append(flat, subChildren...)
- }
- } else {
- flat = append(flat, ch)
- }
- }
-
- return flat, changed
-}
-
-// (and (and x y) z) => (and x y z) , the same for "or"
-func flatten(q Q) (Q, bool) {
- switch s := q.(type) {
- case *And:
- if len(s.Children) == 1 {
- return s.Children[0], true
- }
- flatChildren, changed := flattenAndOr(s.Children, s)
- return &And{flatChildren}, changed
- case *Or:
- if len(s.Children) == 1 {
- return s.Children[0], true
- }
- flatChildren, changed := flattenAndOr(s.Children, s)
- return &Or{flatChildren}, changed
- case *Not:
- child, changed := flatten(s.Child)
- return &Not{child}, changed
- default:
- return q, false
- }
-}
-
-func mapQueryList(qs []Q, f func(Q) Q) []Q {
- neg := make([]Q, len(qs))
- for i, sub := range qs {
- neg[i] = Map(sub, f)
- }
- return neg
-}
-
-func invertConst(q Q) Q {
- c, ok := q.(*Const)
- if ok {
- return &Const{!c.Value}
- }
- return q
-}
-
-func evalAndOrConstants(q Q, children []Q) Q {
- _, isAnd := q.(*And)
-
- children = mapQueryList(children, evalConstants)
-
- newCH := children[:0]
- for _, ch := range children {
- c, ok := ch.(*Const)
- if ok {
- if c.Value == isAnd {
- continue
- } else {
- return ch
- }
- }
- newCH = append(newCH, ch)
- }
- if len(newCH) == 0 {
- return &Const{isAnd}
- }
- if isAnd {
- return &And{newCH}
- }
- return &Or{newCH}
-}
-
-func evalConstants(q Q) Q {
- switch s := q.(type) {
- case *And:
- return evalAndOrConstants(q, s.Children)
- case *Or:
- return evalAndOrConstants(q, s.Children)
- case *Not:
- ch := evalConstants(s.Child)
- if _, ok := ch.(*Const); ok {
- return invertConst(ch)
- }
- return &Not{ch}
- case *Substring:
- if len(s.Pattern) == 0 {
- return &Const{true}
- }
- case *Regexp:
- if s.Regexp.Op == syntax.OpEmptyMatch {
- return &Const{true}
- }
- case *Branch:
- if s.Pattern == "" {
- return &Const{true}
- }
- }
- return q
-}
-
-func Simplify(q Q) Q {
- q = evalConstants(q)
- for {
- var changed bool
- q, changed = flatten(q)
- if !changed {
- break
- }
- }
-
- return q
-}
-
-// Map runs f over the q.
-func Map(q Q, f func(q Q) Q) Q {
- switch s := q.(type) {
- case *And:
- q = &And{Children: mapQueryList(s.Children, f)}
- case *Or:
- q = &Or{Children: mapQueryList(s.Children, f)}
- case *Not:
- q = &Not{Child: Map(s.Child, f)}
- }
- return f(q)
-}
-
-// Expand expands Substr queries into (OR file_substr content_substr)
-// queries, and the same for Regexp queries..
-func ExpandFileContent(q Q) Q {
- switch s := q.(type) {
- case *Substring:
- if !s.FileName && !s.Content {
- f := *s
- f.FileName = true
- c := *s
- c.Content = true
- return NewOr(&f, &c)
- }
- case *Regexp:
- if !s.FileName && !s.Content {
- f := *s
- f.FileName = true
- c := *s
- c.Content = true
- return NewOr(&f, &c)
- }
- }
- return q
-}
-
-// VisitAtoms runs `v` on all atom queries within `q`.
-func VisitAtoms(q Q, v func(q Q)) {
- Map(q, func(iQ Q) Q {
- switch iQ.(type) {
- case *And:
- case *Or:
- case *Not:
- default:
- v(iQ)
- }
- return iQ
- })
-}
diff --git a/query/query_test.go b/query/query_test.go
deleted file mode 100644
index 0d85b29..0000000
--- a/query/query_test.go
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package query
-
-import (
- "log"
- "reflect"
- "testing"
-)
-
-var _ = log.Println
-
-func TestQueryString(t *testing.T) {
- q := &Or{[]Q{
- &And{[]Q{
- &Substring{Pattern: "hoi"},
- &Not{&Substring{Pattern: "hai"}},
- }},
- }}
- got := q.String()
- want := `(or (and substr:"hoi" (not substr:"hai")))`
-
- if got != want {
- t.Errorf("got %s, want %s", got, want)
- }
-}
-
-func TestSimplify(t *testing.T) {
- type testcase struct {
- in Q
- want Q
- }
-
- cases := []testcase{
- {
- in: NewOr(
- NewOr(
- NewAnd(&Substring{Pattern: "hoi"},
- &Not{&Substring{Pattern: "hai"}}),
- NewOr(
- &Substring{Pattern: "zip"},
- &Substring{Pattern: "zap"},
- ))),
- want: NewOr(
- NewAnd(
- &Substring{Pattern: "hoi"},
- &Not{&Substring{Pattern: "hai"}}),
- &Substring{Pattern: "zip"},
- &Substring{Pattern: "zap"}),
- },
- {in: &And{}, want: &Const{true}},
- {in: &Or{}, want: &Const{false}},
- {in: NewAnd(&Const{true}, &Const{false}), want: &Const{false}},
- {in: NewOr(&Const{false}, &Const{true}), want: &Const{true}},
- {in: &Not{&Const{true}}, want: &Const{false}},
- {
- in: NewAnd(
- &Substring{Pattern: "byte"},
- &Not{NewAnd(&Substring{Pattern: "byte"})}),
- want: NewAnd(
- &Substring{Pattern: "byte"},
- &Not{&Substring{Pattern: "byte"}}),
- },
- }
-
- for _, c := range cases {
- got := Simplify(c.in)
- if !reflect.DeepEqual(got, c.want) {
- t.Errorf("got %s, want %s", got, c.want)
- }
- }
-}
-
-func TestMap(t *testing.T) {
- in := NewAnd(&Substring{Pattern: "bla"}, &Not{&Repo{"foo"}})
- out := NewAnd(&Substring{Pattern: "bla"}, &Not{&Const{false}})
-
- f := func(q Q) Q {
- if _, ok := q.(*Repo); ok {
- return &Const{false}
- }
- return q
- }
- got := Map(in, f)
- if !reflect.DeepEqual(got, out) {
- t.Errorf("got %v, want %v", got, out)
- }
-}
-
-func TestVisitAtoms(t *testing.T) {
- in := NewAnd(&Substring{}, &Repo{}, &Not{&Const{}})
- count := 0
- VisitAtoms(in, func(q Q) {
- count++
- })
- if count != 3 {
- t.Errorf("got %d, want 3", count)
- }
-}
diff --git a/query/regexp.go b/query/regexp.go
deleted file mode 100644
index 64c9def..0000000
--- a/query/regexp.go
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package query
-
-import (
- "log"
- "regexp/syntax"
-)
-
-var _ = log.Println
-
-func LowerRegexp(r *syntax.Regexp) *syntax.Regexp {
- newRE := *r
- switch r.Op {
- case syntax.OpLiteral, syntax.OpCharClass:
- newRE.Rune = make([]rune, len(r.Rune))
- for i, c := range r.Rune {
- if c >= 'A' && c <= 'Z' {
- newRE.Rune[i] = c + 'a' - 'A'
- } else {
- newRE.Rune[i] = c
- }
- }
- default:
- newRE.Sub = make([]*syntax.Regexp, len(newRE.Sub))
- for i, s := range r.Sub {
- newRE.Sub[i] = LowerRegexp(s)
- }
- }
-
- return &newRE
-}
diff --git a/query/regexp_test.go b/query/regexp_test.go
deleted file mode 100644
index a3a12a0..0000000
--- a/query/regexp_test.go
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package query
-
-import (
- "regexp/syntax"
- "strings"
- "testing"
-)
-
-var opnames = map[syntax.Op]string{
- syntax.OpNoMatch: "OpNoMatch",
- syntax.OpEmptyMatch: "OpEmptyMatch",
- syntax.OpLiteral: "OpLiteral",
- syntax.OpCharClass: "OpCharClass",
- syntax.OpAnyCharNotNL: "OpAnyCharNotNL",
- syntax.OpAnyChar: "OpAnyChar",
- syntax.OpBeginLine: "OpBeginLine",
- syntax.OpEndLine: "OpEndLine",
- syntax.OpBeginText: "OpBeginText",
- syntax.OpEndText: "OpEndText",
- syntax.OpWordBoundary: "OpWordBoundary",
- syntax.OpNoWordBoundary: "OpNoWordBoundary",
- syntax.OpCapture: "OpCapture",
- syntax.OpStar: "OpStar",
- syntax.OpPlus: "OpPlus",
- syntax.OpQuest: "OpQuest",
- syntax.OpRepeat: "OpRepeat",
- syntax.OpConcat: "OpConcat",
- syntax.OpAlternate: "OpAlternate",
-}
-
-func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) {
- t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub))
- for _, s := range r.Sub {
- printRegexp(t, s, lvl+1)
- }
-}
-
-func TestLowerRegexp(t *testing.T) {
- in := "[a-zA-Z]fooBAR"
- re := mustParseRE(in)
- in = re.String()
- got := LowerRegexp(re)
- want := "[a-za-z]foobar"
- if got.String() != want {
- printRegexp(t, re, 0)
- printRegexp(t, got, 0)
- t.Errorf("got %s, want %s", got, want)
- }
-
- if re.String() != in {
- t.Errorf("got mutated original %s want %s", re.String(), in)
- }
-}
diff --git a/read.go b/read.go
deleted file mode 100644
index 8351a23..0000000
--- a/read.go
+++ /dev/null
@@ -1,484 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "encoding/binary"
- "encoding/json"
- "fmt"
- "log"
- "sort"
-)
-
-// IndexFile is a file suitable for concurrent read access. For performance
-// reasons, it allows a mmap'd implementation.
-type IndexFile interface {
- Read(off uint32, sz uint32) ([]byte, error)
- Size() (uint32, error)
- Close()
- Name() string
-}
-
-// reader is a stateful file
-type reader struct {
- r IndexFile
- off uint32
-}
-
-func (r *reader) seek(off uint32) {
- r.off = off
-}
-
-func (r *reader) U32() (uint32, error) {
- b, err := r.r.Read(r.off, 4)
- r.off += 4
- if err != nil {
- return 0, err
- }
- return binary.BigEndian.Uint32(b), nil
-}
-
-func (r *reader) U64() (uint64, error) {
- b, err := r.r.Read(r.off, 8)
- r.off += 8
- if err != nil {
- return 0, err
- }
- return binary.BigEndian.Uint64(b), nil
-}
-
-func (r *reader) ReadByte() (byte, error) {
- b, err := r.r.Read(r.off, 1)
- r.off += 1
- if err != nil {
- return 0, err
- }
- return b[0], nil
-}
-
-func (r *reader) Varint() (uint64, error) {
- v, err := binary.ReadUvarint(r)
- if err != nil {
- return 0, err
- }
- return v, nil
-}
-
-func (r *reader) Str() (string, error) {
- slen, err := r.Varint()
- if err != nil {
- return "", err
- }
- b, err := r.r.Read(r.off, uint32(slen))
- if err != nil {
- return "", err
- }
- r.off += uint32(slen)
- return string(b), nil
-}
-
-func (r *reader) readTOC(toc *indexTOC) error {
- sz, err := r.r.Size()
- if err != nil {
- return err
- }
- r.off = sz - 8
-
- var tocSection simpleSection
- if err := tocSection.read(r); err != nil {
- return err
- }
-
- r.seek(tocSection.off)
-
- sectionCount, err := r.U32()
- if err != nil {
- return err
- }
-
- if sectionCount == 0 {
- // tagged sections are indicated by a 0 sectionCount,
- // and then a list of string-tagged type-indicated sections.
- secs := toc.sectionsTagged()
- for r.off < tocSection.off+tocSection.sz {
- tag, err := r.Str()
- if err != nil {
- return err
- }
- kind, err := r.Varint()
- if err != nil {
- return err
- }
- sec := secs[tag]
- if sec != nil && sec.kind() == sectionKind(kind) {
- // happy path
- if err := sec.read(r); err != nil {
- return err
- }
- continue
- }
- // error case: skip over unknown section
- if sec == nil {
- log.Printf("file %s TOC has unknown section %q", r.r.Name(), tag)
- } else {
- return fmt.Errorf("file %s TOC section %q expects kind %d, got kind %d", r.r.Name(), tag,
- kind, sec.kind())
- }
- if kind == 0 {
- (&simpleSection{}).read(r)
- } else if kind == 1 {
- (&compoundSection{}).read(r)
- }
- }
- } else {
- // TODO: Remove this branch when ReaderMinFeatureVersion >= 10
-
- secs := toc.sections()
-
- if len(secs) != int(sectionCount) {
- return fmt.Errorf("section count mismatch: got %d want %d", sectionCount, len(secs))
- }
-
- for _, s := range secs {
- if err := s.read(r); err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-func (r *indexData) readSectionBlob(sec simpleSection) ([]byte, error) {
- return r.file.Read(sec.off, sec.sz)
-}
-
-func readSectionU32(f IndexFile, sec simpleSection) ([]uint32, error) {
- if sec.sz%4 != 0 {
- return nil, fmt.Errorf("barf: section size %% 4 != 0: sz %d ", sec.sz)
- }
- blob, err := f.Read(sec.off, sec.sz)
- if err != nil {
- return nil, err
- }
- arr := make([]uint32, 0, len(blob)/4)
- for len(blob) > 0 {
- arr = append(arr, binary.BigEndian.Uint32(blob))
- blob = blob[4:]
- }
- return arr, nil
-}
-
-func readSectionU64(f IndexFile, sec simpleSection) ([]uint64, error) {
- if sec.sz%8 != 0 {
- return nil, fmt.Errorf("barf: section size %% 8 != 0: sz %d ", sec.sz)
- }
- blob, err := f.Read(sec.off, sec.sz)
- if err != nil {
- return nil, err
- }
- arr := make([]uint64, 0, len(blob)/8)
- for len(blob) > 0 {
- arr = append(arr, binary.BigEndian.Uint64(blob))
- blob = blob[8:]
- }
- return arr, nil
-}
-
-func (r *reader) readJSON(data interface{}, sec *simpleSection) error {
- blob, err := r.r.Read(sec.off, sec.sz)
- if err != nil {
- return err
- }
-
- return json.Unmarshal(blob, data)
-}
-
-func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) {
- d := indexData{
- file: r.r,
- ngrams: map[ngram]simpleSection{},
- fileNameNgrams: map[ngram][]uint32{},
- branchIDs: map[string]uint{},
- branchNames: map[uint]string{},
- }
-
- blob, err := d.readSectionBlob(toc.metaData)
- if err != nil {
- return nil, err
- }
-
- if err := json.Unmarshal(blob, &d.metaData); err != nil {
- return nil, err
- }
-
- if d.metaData.IndexFormatVersion != IndexFormatVersion {
- return nil, fmt.Errorf("file is v%d, want v%d", d.metaData.IndexFormatVersion, IndexFormatVersion)
- }
-
- if d.metaData.IndexFeatureVersion < ReadMinFeatureVersion {
- return nil, fmt.Errorf("file is feature version %d, want feature version >= %d", d.metaData.IndexFeatureVersion, ReadMinFeatureVersion)
- }
-
- if d.metaData.IndexMinReaderVersion > FeatureVersion {
- return nil, fmt.Errorf("file needs read feature version >= %d, have read feature version %d", d.metaData.IndexMinReaderVersion, FeatureVersion)
- }
-
- blob, err = d.readSectionBlob(toc.repoMetaData)
- if err != nil {
- return nil, err
- }
- if err := json.Unmarshal(blob, &d.repoMetaData); err != nil {
- return nil, err
- }
-
- d.boundariesStart = toc.fileContents.data.off
- d.boundaries = toc.fileContents.relativeIndex()
- d.newlinesStart = toc.newlines.data.off
- d.newlinesIndex = toc.newlines.relativeIndex()
- d.docSectionsStart = toc.fileSections.data.off
- d.docSectionsIndex = toc.fileSections.relativeIndex()
-
- d.checksums, err = d.readSectionBlob(toc.contentChecksums)
- if err != nil {
- return nil, err
- }
-
- d.languages, err = d.readSectionBlob(toc.languages)
- if err != nil {
- return nil, err
- }
-
- d.ngrams, err = d.readNgrams(toc)
- if err != nil {
- return nil, err
- }
-
- d.fileBranchMasks, err = readSectionU64(d.file, toc.branchMasks)
- if err != nil {
- return nil, err
- }
-
- d.fileNameContent, err = d.readSectionBlob(toc.fileNames.data)
- if err != nil {
- return nil, err
- }
-
- d.fileNameIndex = toc.fileNames.relativeIndex()
-
- d.fileNameNgrams, err = d.readFileNameNgrams(toc)
- if err != nil {
- return nil, err
- }
-
- for j, br := range d.repoMetaData.Branches {
- id := uint(1) << uint(j)
- d.branchIDs[br.Name] = id
- d.branchNames[id] = br.Name
- }
-
- blob, err = d.readSectionBlob(toc.runeDocSections)
- if err != nil {
- return nil, err
- }
- d.runeDocSections = unmarshalDocSections(blob, nil)
-
- for sect, dest := range map[simpleSection]*[]uint32{
- toc.subRepos: &d.subRepos,
- toc.runeOffsets: &d.runeOffsets,
- toc.nameRuneOffsets: &d.fileNameRuneOffsets,
- toc.nameEndRunes: &d.fileNameEndRunes,
- toc.fileEndRunes: &d.fileEndRunes,
- } {
- if blob, err := d.readSectionBlob(sect); err != nil {
- return nil, err
- } else {
- *dest = fromSizedDeltas(blob, nil)
- }
- }
-
- keys := []string{""}
- for k := range d.repoMetaData.SubRepoMap {
- if k != "" { // we used to marshal "" in SubRepoMap. Prevent adding twice.
- keys = append(keys, k)
- }
- }
- sort.Strings(keys)
- d.subRepoPaths = keys
-
- d.languageMap = map[byte]string{}
- for k, v := range d.metaData.LanguageMap {
- d.languageMap[v] = k
- }
-
- if err := d.verify(); err != nil {
- return nil, err
- }
-
- d.calculateStats()
- return &d, nil
-}
-
-const ngramEncoding = 8
-
-func (d *indexData) readNgrams(toc *indexTOC) (map[ngram]simpleSection, error) {
- textContent, err := d.readSectionBlob(toc.ngramText)
- if err != nil {
- return nil, err
- }
- postingsIndex := toc.postings.relativeIndex()
-
- ngrams := make(map[ngram]simpleSection, len(textContent)/ngramEncoding)
- for i := 0; i < len(textContent); i += ngramEncoding {
- j := i / ngramEncoding
- ng := ngram(binary.BigEndian.Uint64(textContent[i : i+ngramEncoding]))
- ngrams[ng] = simpleSection{
- toc.postings.data.off + postingsIndex[j],
- postingsIndex[j+1] - postingsIndex[j],
- }
- }
-
- return ngrams, nil
-}
-
-func (d *indexData) readFileNameNgrams(toc *indexTOC) (map[ngram][]uint32, error) {
- nameNgramText, err := d.readSectionBlob(toc.nameNgramText)
- if err != nil {
- return nil, err
- }
-
- fileNamePostingsData, err := d.readSectionBlob(toc.namePostings.data)
- if err != nil {
- return nil, err
- }
-
- fileNamePostingsIndex := toc.namePostings.relativeIndex()
-
- fileNameNgrams := make(map[ngram][]uint32, len(nameNgramText)/ngramEncoding)
- for i := 0; i < len(nameNgramText); i += ngramEncoding {
- j := i / ngramEncoding
- off := fileNamePostingsIndex[j]
- end := fileNamePostingsIndex[j+1]
- ng := ngram(binary.BigEndian.Uint64(nameNgramText[i : i+ngramEncoding]))
- fileNameNgrams[ng] = fromDeltas(fileNamePostingsData[off:end], nil)
- }
-
- return fileNameNgrams, nil
-}
-
-func (d *indexData) verify() error {
- // This is not an exhaustive check: the postings can easily
- // generate OOB acccesses, and are expensive to check, but this lets us rule out
- // other sources of OOB access.
- n := len(d.fileNameIndex)
- if n == 0 {
- return nil
- }
-
- n--
- for what, got := range map[string]int{
- "boundaries": len(d.boundaries) - 1,
- "branch masks": len(d.fileBranchMasks),
- "doc section index": len(d.docSectionsIndex) - 1,
- "newlines index": len(d.newlinesIndex) - 1,
- } {
- if got != n {
- return fmt.Errorf("got %s %d, want %d", what, got, n)
- }
- }
- return nil
-}
-
-func (d *indexData) readContents(i uint32) ([]byte, error) {
- return d.readSectionBlob(simpleSection{
- off: d.boundariesStart + d.boundaries[i],
- sz: d.boundaries[i+1] - d.boundaries[i],
- })
-}
-
-func (d *indexData) readContentSlice(off uint32, sz uint32) ([]byte, error) {
- // TODO(hanwen): cap result if it is at the end of the content
- // section.
- return d.readSectionBlob(simpleSection{
- off: d.boundariesStart + off,
- sz: sz,
- })
-}
-
-func (d *indexData) readNewlines(i uint32, buf []uint32) ([]uint32, uint32, error) {
- sec := simpleSection{
- off: d.newlinesStart + d.newlinesIndex[i],
- sz: d.newlinesIndex[i+1] - d.newlinesIndex[i],
- }
- blob, err := d.readSectionBlob(sec)
- if err != nil {
- return nil, 0, err
- }
-
- return fromSizedDeltas(blob, buf), sec.sz, nil
-}
-
-func (d *indexData) readDocSections(i uint32, buf []DocumentSection) ([]DocumentSection, uint32, error) {
- sec := simpleSection{
- off: d.docSectionsStart + d.docSectionsIndex[i],
- sz: d.docSectionsIndex[i+1] - d.docSectionsIndex[i],
- }
- blob, err := d.readSectionBlob(sec)
- if err != nil {
- return nil, 0, err
- }
-
- return unmarshalDocSections(blob, buf), sec.sz, nil
-}
-
-// NewSearcher creates a Searcher for a single index file. Search
-// results coming from this searcher are valid only for the lifetime
-// of the Searcher itself, ie. []byte members should be copied into
-// fresh buffers if the result is to survive closing the shard.
-func NewSearcher(r IndexFile) (Searcher, error) {
- rd := &reader{r: r}
-
- var toc indexTOC
- if err := rd.readTOC(&toc); err != nil {
- return nil, err
- }
- indexData, err := rd.readIndexData(&toc)
- if err != nil {
- return nil, err
- }
- indexData.file = r
- return indexData, nil
-}
-
-// ReadMetadata returns the metadata of index shard without reading
-// the index data. The IndexFile is not closed.
-func ReadMetadata(inf IndexFile) (*Repository, *IndexMetadata, error) {
- rd := &reader{r: inf}
- var toc indexTOC
- if err := rd.readTOC(&toc); err != nil {
- return nil, nil, err
- }
-
- var md IndexMetadata
- if err := rd.readJSON(&md, &toc.metaData); err != nil {
- return nil, nil, err
- }
-
- var repo Repository
- if err := rd.readJSON(&repo, &toc.repoMetaData); err != nil {
- return nil, nil, err
- }
-
- return &repo, &md, nil
-}
diff --git a/read_test.go b/read_test.go
deleted file mode 100644
index b6b57cd..0000000
--- a/read_test.go
+++ /dev/null
@@ -1,178 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "bytes"
- "flag"
- "fmt"
- "io/fs"
- "os"
- "path"
- "reflect"
- "testing"
-)
-
-var update = flag.Bool("update", false, "update the golden files of this test")
-
-func TestReadWrite(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- if err := b.AddFile("filename", []byte("abcde")); err != nil {
- t.Fatalf("AddFile: %v", err)
- }
-
- var buf bytes.Buffer
- b.Write(&buf)
- f := &memSeeker{buf.Bytes()}
-
- r := reader{r: f}
-
- var toc indexTOC
- err = r.readTOC(&toc)
-
- if err != nil {
- t.Errorf("got read error %v", err)
- }
- if toc.fileContents.data.sz != 5 {
- t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
- }
-
- data, err := r.readIndexData(&toc)
- if err != nil {
- t.Fatalf("readIndexData: %v", err)
- }
- if got := data.fileName(0); string(got) != "filename" {
- t.Errorf("got filename %q, want %q", got, "filename")
- }
-
- if len(data.ngrams) != 3 {
- t.Fatalf("got ngrams %v, want 3 ngrams", data.ngrams)
- }
-
- if _, ok := data.ngrams[stringToNGram("bcq")]; ok {
- t.Errorf("found ngram bcd in %v", data.ngrams)
- }
-}
-
-func TestReadWriteNames(t *testing.T) {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- if err := b.AddFile("abCd", []byte("")); err != nil {
- t.Fatalf("AddFile: %v", err)
- }
-
- var buf bytes.Buffer
- b.Write(&buf)
- f := &memSeeker{buf.Bytes()}
-
- r := reader{r: f}
-
- var toc indexTOC
- if err := r.readTOC(&toc); err != nil {
- t.Errorf("got read error %v", err)
- }
- if toc.fileNames.data.sz != 4 {
- t.Errorf("got contents size %d, want 4", toc.fileNames.data.sz)
- }
-
- data, err := r.readIndexData(&toc)
- if err != nil {
- t.Fatalf("readIndexData: %v", err)
- }
- if !reflect.DeepEqual([]uint32{0, 4}, data.fileNameIndex) {
- t.Errorf("got index %v, want {0,4}", data.fileNameIndex)
- }
- if got := data.fileNameNgrams[stringToNGram("bCd")]; !reflect.DeepEqual(got, []uint32{1}) {
- t.Errorf("got trigram bcd at bits %v, want sz 2", data.fileNameNgrams)
- }
-}
-
-func TestBackwardsCompat(t *testing.T) {
- if *update {
- b, err := NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- if err := b.AddFile("filename", []byte("abcde")); err != nil {
- t.Fatalf("AddFile: %v", err)
- }
-
- var buf bytes.Buffer
- b.Write(&buf)
-
- outname := fmt.Sprintf("testdata/backcompat/new_v%d.%05d.zoekt", IndexFormatVersion, 0)
- t.Log("writing new file", outname)
-
- err = os.WriteFile(outname, buf.Bytes(), 0644)
- if err != nil {
- t.Fatalf("Creating output file: %v", err)
- }
- }
-
- compatibleFiles, err := fs.Glob(os.DirFS("."), "testdata/backcompat/*.zoekt")
- if err != nil {
- t.Fatalf("fs.Glob: %v", err)
- }
-
- for _, fname := range compatibleFiles {
- t.Run(path.Base(fname),
- func(t *testing.T) {
- f, err := os.Open(fname)
- if err != nil {
- t.Fatal("os.Open", err)
- }
- idx, err := NewIndexFile(f)
- if err != nil {
- t.Fatal("NewIndexFile", err)
- }
- r := reader{r: idx}
-
- var toc indexTOC
- err = r.readTOC(&toc)
-
- if err != nil {
- t.Errorf("got read error %v", err)
- }
- if toc.fileContents.data.sz != 5 {
- t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
- }
-
- data, err := r.readIndexData(&toc)
- if err != nil {
- t.Fatalf("readIndexData: %v", err)
- }
- if got := data.fileName(0); string(got) != "filename" {
- t.Errorf("got filename %q, want %q", got, "filename")
- }
-
- if len(data.ngrams) != 3 {
- t.Fatalf("got ngrams %v, want 3 ngrams", data.ngrams)
- }
-
- if _, ok := data.ngrams[stringToNGram("bcq")]; ok {
- t.Errorf("found ngram bcd in %v", data.ngrams)
- }
- },
- )
- }
-}
diff --git a/section.go b/section.go
deleted file mode 100644
index 90e4e94..0000000
--- a/section.go
+++ /dev/null
@@ -1,183 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "encoding/binary"
- "io"
- "log"
-)
-
-var _ = log.Println
-
-// writer is an io.Writer that keeps track of errors and offsets
-type writer struct {
- err error
- w io.Writer
- off uint32
-}
-
-func (w *writer) Write(b []byte) error {
- if w.err != nil {
- return w.err
- }
-
- var n int
- n, w.err = w.w.Write(b)
- w.off += uint32(n)
- return w.err
-}
-
-func (w *writer) Off() uint32 { return w.off }
-
-func (w *writer) B(b byte) {
- s := []byte{b}
- w.Write(s)
-}
-
-func (w *writer) U32(n uint32) {
- var enc [4]byte
- binary.BigEndian.PutUint32(enc[:], n)
- w.Write(enc[:])
-}
-
-func (w *writer) U64(n uint64) {
- var enc [8]byte
- binary.BigEndian.PutUint64(enc[:], n)
- w.Write(enc[:])
-}
-
-func (w *writer) Varint(n uint32) {
- var enc [8]byte
- m := binary.PutUvarint(enc[:], uint64(n))
- w.Write(enc[:m])
-}
-
-func (w *writer) String(s string) {
- b := []byte(s)
- w.Varint(uint32(len(b)))
- w.Write(b)
-}
-
-func (s *simpleSection) start(w *writer) {
- s.off = w.Off()
-}
-
-func (s *simpleSection) end(w *writer) {
- s.sz = w.Off() - s.off
-}
-
-// section is a range of bytes in the index file.
-type section interface {
- read(*reader) error
- write(*writer)
- kind() sectionKind // simple or complex, used in serialization
-}
-
-type sectionKind int
-
-const (
- sectionKindSimple sectionKind = 0
- sectionKindComplex sectionKind = 1
-)
-
-// simpleSection is a simple range of bytes.
-type simpleSection struct {
- off uint32
- sz uint32
-}
-
-func (s *simpleSection) kind() sectionKind {
- return sectionKindSimple
-}
-
-func (s *simpleSection) read(r *reader) error {
- var err error
- s.off, err = r.U32()
- if err != nil {
- return err
- }
- s.sz, err = r.U32()
- if err != nil {
- return err
- }
- return nil
-}
-
-func (s *simpleSection) write(w *writer) {
- w.U32(s.off)
- w.U32(s.sz)
-}
-
-// compoundSection is a range of bytes containg a list of variable
-// sized items.
-type compoundSection struct {
- data simpleSection
-
- offsets []uint32
- index simpleSection
-}
-
-func (s *compoundSection) kind() sectionKind {
- return sectionKindComplex
-}
-
-func (s *compoundSection) start(w *writer) {
- s.data.start(w)
-}
-
-func (s *compoundSection) end(w *writer) {
- s.data.end(w)
- s.index.start(w)
- for _, o := range s.offsets {
- w.U32(o)
- }
- s.index.end(w)
-}
-
-func (s *compoundSection) addItem(w *writer, item []byte) {
- s.offsets = append(s.offsets, w.Off())
- w.Write(item)
-}
-
-func (s *compoundSection) write(w *writer) {
- s.data.write(w)
- s.index.write(w)
-}
-
-func (s *compoundSection) read(r *reader) error {
- if err := s.data.read(r); err != nil {
- return err
- }
- if err := s.index.read(r); err != nil {
- return err
- }
- var err error
- s.offsets, err = readSectionU32(r.r, s.index)
- return err
-}
-
-// relativeIndex returns the relative offsets of the items (first
-// element is 0), plus a final marking the end of the last item.
-func (s *compoundSection) relativeIndex() []uint32 {
- ri := make([]uint32, 0, len(s.offsets)+1)
- for _, o := range s.offsets {
- ri = append(ri, o-s.offsets[0])
- }
- if len(s.offsets) > 0 {
- ri = append(ri, s.data.sz)
- }
- return ri
-}
diff --git a/section_test.go b/section_test.go
deleted file mode 100644
index 19ed111..0000000
--- a/section_test.go
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "reflect"
- "testing"
-)
-
-func TestDeltas(t *testing.T) {
- in := []uint32{1, 72, 0xfff}
- out := toSizedDeltas(in)
- round := fromSizedDeltas(out, nil)
- if !reflect.DeepEqual(in, round) {
- t.Errorf("got %v, want %v", round, in)
- }
-}
diff --git a/shards/shards.go b/shards/shards.go
deleted file mode 100644
index 41afe40..0000000
--- a/shards/shards.go
+++ /dev/null
@@ -1,548 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package shards
-
-import (
- "context"
- "fmt"
- "log"
- "os"
- "runtime"
- "runtime/debug"
- "sort"
- "time"
-
- "golang.org/x/net/trace"
- "golang.org/x/sync/semaphore"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/query"
- "github.com/prometheus/client_golang/prometheus"
- "github.com/prometheus/client_golang/prometheus/promauto"
-)
-
-var (
- metricShardsLoaded = promauto.NewGauge(prometheus.GaugeOpts{
- Name: "zoekt_shards_loaded",
- Help: "The number of shards currently loaded",
- })
- metricShardsLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_shards_loaded_total",
- Help: "The total number of shards loaded",
- })
- metricShardsLoadFailedTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_shards_load_failed_total",
- Help: "The total number of shard loads that failed",
- })
-
- metricSearchRunning = promauto.NewGauge(prometheus.GaugeOpts{
- Name: "zoekt_search_running",
- Help: "The number of concurrent search requests running",
- })
- metricSearchShardRunning = promauto.NewGauge(prometheus.GaugeOpts{
- Name: "zoekt_search_shard_running",
- Help: "The number of concurrent search requests in a shard running",
- })
- metricSearchFailedTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_search_failed_total",
- Help: "The total number of search requests that failed",
- })
- metricSearchDuration = promauto.NewHistogram(prometheus.HistogramOpts{
- Name: "zoekt_search_duration_seconds",
- Help: "The duration a search request took in seconds",
- Buckets: prometheus.DefBuckets, // DefBuckets good for service timings
- })
-
- // A Counter per Stat. Name should match field in zoekt.Stats.
- metricSearchContentBytesLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_search_content_loaded_bytes_total",
- Help: "Total amount of I/O for reading contents",
- })
- metricSearchIndexBytesLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_search_index_loaded_bytes_total",
- Help: "Total amount of I/O for reading from index",
- })
- metricSearchCrashesTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_search_crashes_total",
- Help: "Total number of search shards that had a crash",
- })
- metricSearchFileCountTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_search_file_count_total",
- Help: "Total number of files containing a match",
- })
- metricSearchShardFilesConsideredTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_search_shard_files_considered_total",
- Help: "Total number of files in shards that we considered",
- })
- metricSearchFilesConsideredTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_search_files_considered_total",
- Help: "Total files that we evaluated. Equivalent to files for which all atom matches (including negations) evaluated to true",
- })
- metricSearchFilesLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_search_files_loaded_total",
- Help: "Total files for which we loaded file content to verify substring matches",
- })
- metricSearchFilesSkippedTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_search_files_skipped_total",
- Help: "Total candidate files whose contents weren't examined because we gathered enough matches",
- })
- metricSearchShardsSkippedTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_search_shards_skipped_total",
- Help: "Total shards that we did not process because a query was canceled",
- })
- metricSearchMatchCountTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_search_match_count_total",
- Help: "Total number of non-overlapping matches",
- })
- metricSearchNgramMatchesTotal = promauto.NewCounter(prometheus.CounterOpts{
- Name: "zoekt_search_ngram_matches_total",
- Help: "Total number of candidate matches as a result of searching ngrams",
- })
-
- metricListRunning = promauto.NewGauge(prometheus.GaugeOpts{
- Name: "zoekt_list_running",
- Help: "The number of concurrent list requests running",
- })
- metricListShardRunning = promauto.NewGauge(prometheus.GaugeOpts{
- Name: "zoekt_list_shard_running",
- Help: "The number of concurrent list requests in a shard running",
- })
-)
-
-type rankedShard struct {
- zoekt.Searcher
- rank uint16
-}
-
-type shardedSearcher struct {
- // Limit the number of parallel queries. Since searching is
- // CPU bound, we can't do better than #CPU queries in
- // parallel. If we do so, we just create more memory
- // pressure.
- throttle *semaphore.Weighted
- capacity int64
-
- shards map[string]rankedShard
-
- rankedVersion uint64
- ranked []rankedShard
-}
-
-func newShardedSearcher(n int64) *shardedSearcher {
- ss := &shardedSearcher{
- shards: make(map[string]rankedShard),
- throttle: semaphore.NewWeighted(n),
- capacity: n,
- }
- return ss
-}
-
-// NewDirectorySearcher returns a searcher instance that loads all
-// shards corresponding to a glob into memory.
-func NewDirectorySearcher(dir string) (zoekt.Searcher, error) {
- ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0)))
- tl := &loader{
- ss: ss,
- }
- dw, err := NewDirectoryWatcher(dir, tl)
- if err != nil {
- return nil, err
- }
-
- return &directorySearcher{
- Searcher: ss,
- directoryWatcher: dw,
- }, nil
-}
-
-type directorySearcher struct {
- zoekt.Searcher
-
- directoryWatcher *DirectoryWatcher
-}
-
-func (s *directorySearcher) Close() {
- // We need to Stop directoryWatcher first since it calls load/unload on
- // Searcher.
- s.directoryWatcher.Stop()
- s.Searcher.Close()
-}
-
-type loader struct {
- ss *shardedSearcher
-}
-
-func (tl *loader) load(key string) {
- shard, err := loadShard(key)
- if err != nil {
- metricShardsLoadFailedTotal.Inc()
- log.Printf("reloading: %s, err %v ", key, err)
- return
- }
-
- metricShardsLoadedTotal.Inc()
- tl.ss.replace(key, shard)
-}
-
-func (tl *loader) drop(key string) {
- tl.ss.replace(key, nil)
-}
-
-func (ss *shardedSearcher) String() string {
- return "shardedSearcher"
-}
-
-// Close closes references to open files. It may be called only once.
-func (ss *shardedSearcher) Close() {
- ss.lock()
- defer ss.unlock()
- for _, s := range ss.shards {
- s.Close()
- }
- ss.shards = make(map[string]rankedShard)
-}
-
-func (ss *shardedSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) {
- tr := trace.New("shardedSearcher.Search", "")
- tr.LazyLog(q, true)
- tr.LazyPrintf("opts: %+v", opts)
- overallStart := time.Now()
- metricSearchRunning.Inc()
- defer func() {
- metricSearchRunning.Dec()
- metricSearchDuration.Observe(time.Since(overallStart).Seconds())
- if sr != nil {
- metricSearchContentBytesLoadedTotal.Add(float64(sr.Stats.ContentBytesLoaded))
- metricSearchIndexBytesLoadedTotal.Add(float64(sr.Stats.IndexBytesLoaded))
- metricSearchCrashesTotal.Add(float64(sr.Stats.Crashes))
- metricSearchFileCountTotal.Add(float64(sr.Stats.FileCount))
- metricSearchShardFilesConsideredTotal.Add(float64(sr.Stats.ShardFilesConsidered))
- metricSearchFilesConsideredTotal.Add(float64(sr.Stats.FilesConsidered))
- metricSearchFilesLoadedTotal.Add(float64(sr.Stats.FilesLoaded))
- metricSearchFilesSkippedTotal.Add(float64(sr.Stats.FilesSkipped))
- metricSearchShardsSkippedTotal.Add(float64(sr.Stats.ShardsSkipped))
- metricSearchMatchCountTotal.Add(float64(sr.Stats.MatchCount))
- metricSearchNgramMatchesTotal.Add(float64(sr.Stats.NgramMatches))
-
- tr.LazyPrintf("num files: %d", len(sr.Files))
- tr.LazyPrintf("stats: %+v", sr.Stats)
- }
- if err != nil {
- metricSearchFailedTotal.Inc()
-
- tr.LazyPrintf("error: %v", err)
- tr.SetError()
- }
- tr.Finish()
- }()
-
- start := time.Now()
-
- aggregate := &zoekt.SearchResult{
- RepoURLs: map[string]string{},
- LineFragments: map[string]string{},
- }
-
- // This critical section is large, but we don't want to deal with
- // searches on shards that have just been closed.
- if err := ss.rlock(ctx); err != nil {
- return aggregate, err
- }
- defer ss.runlock()
- tr.LazyPrintf("acquired lock")
- aggregate.Wait = time.Since(start)
- start = time.Now()
-
- shards := ss.getShards()
- all := make(chan shardResult, len(shards))
-
- var childCtx context.Context
- var cancel context.CancelFunc
- if opts.MaxWallTime == 0 {
- childCtx, cancel = context.WithCancel(ctx)
- } else {
- childCtx, cancel = context.WithTimeout(ctx, opts.MaxWallTime)
- }
-
- defer cancel()
-
- // For each query, throttle the number of parallel
- // actions. Since searching is mostly CPU bound, we limit the
- // number of parallel searches. This reduces the peak working
- // set, which hopefully stops https://cs.bazel.build from crashing
- // when looking for the string "com".
- feeder := make(chan zoekt.Searcher, len(shards))
- for _, s := range shards {
- feeder <- s
- }
- close(feeder)
- for i := 0; i < runtime.GOMAXPROCS(0); i++ {
- go func() {
- for s := range feeder {
- searchOneShard(childCtx, s, q, opts, all)
- }
- }()
- }
-
- for range shards {
- r := <-all
- if r.err != nil {
- return nil, r.err
- }
- aggregate.Files = append(aggregate.Files, r.sr.Files...)
- aggregate.Stats.Add(r.sr.Stats)
-
- if len(r.sr.Files) > 0 {
- for k, v := range r.sr.RepoURLs {
- aggregate.RepoURLs[k] = v
- }
- for k, v := range r.sr.LineFragments {
- aggregate.LineFragments[k] = v
- }
- }
-
- if cancel != nil && opts.TotalMaxMatchCount > 0 && aggregate.Stats.MatchCount > opts.TotalMaxMatchCount {
- cancel()
- cancel = nil
- }
- }
-
- zoekt.SortFilesByScore(aggregate.Files)
- if max := opts.MaxDocDisplayCount; max > 0 && len(aggregate.Files) > max {
- aggregate.Files = aggregate.Files[:max]
- }
- for i := range aggregate.Files {
- copySlice(&aggregate.Files[i].Content)
- copySlice(&aggregate.Files[i].Checksum)
- for l := range aggregate.Files[i].LineMatches {
- copySlice(&aggregate.Files[i].LineMatches[l].Line)
- }
- }
-
- aggregate.Duration = time.Since(start)
- return aggregate, nil
-}
-
-func copySlice(src *[]byte) {
- dst := make([]byte, len(*src))
- copy(dst, *src)
- *src = dst
-}
-
-type shardResult struct {
- sr *zoekt.SearchResult
- err error
-}
-
-func searchOneShard(ctx context.Context, s zoekt.Searcher, q query.Q, opts *zoekt.SearchOptions, sink chan shardResult) {
- metricSearchShardRunning.Inc()
- defer func() {
- metricSearchShardRunning.Dec()
- if r := recover(); r != nil {
- log.Printf("crashed shard: %s: %s, %s", s.String(), r, debug.Stack())
-
- var r zoekt.SearchResult
- r.Stats.Crashes = 1
- sink <- shardResult{&r, nil}
- }
- }()
-
- ms, err := s.Search(ctx, q, opts)
- sink <- shardResult{ms, err}
-}
-
-func (ss *shardedSearcher) List(ctx context.Context, r query.Q) (rl *zoekt.RepoList, err error) {
- tr := trace.New("shardedSearcher.List", "")
- tr.LazyLog(r, true)
- metricListRunning.Inc()
- defer func() {
- metricListRunning.Dec()
- if rl != nil {
- tr.LazyPrintf("repos size: %d", len(rl.Repos))
- tr.LazyPrintf("crashes: %d", rl.Crashes)
- }
- if err != nil {
- tr.LazyPrintf("error: %v", err)
- tr.SetError()
- }
- tr.Finish()
- }()
-
- type res struct {
- rl *zoekt.RepoList
- err error
- }
-
- if err := ss.rlock(ctx); err != nil {
- return nil, err
- }
- defer ss.runlock()
- tr.LazyPrintf("acquired lock")
-
- shards := ss.getShards()
- shardCount := len(shards)
- all := make(chan res, shardCount)
- tr.LazyPrintf("shardCount: %d", len(shards))
-
- for _, s := range shards {
- go func(s zoekt.Searcher) {
- metricListShardRunning.Inc()
- defer func() {
- metricListShardRunning.Dec()
- if r := recover(); r != nil {
- all <- res{
- &zoekt.RepoList{Crashes: 1}, nil,
- }
- }
- }()
- ms, err := s.List(ctx, r)
- all <- res{ms, err}
- }(s.Searcher)
- }
-
- crashes := 0
- uniq := map[string]*zoekt.RepoListEntry{}
-
- for i := 0; i < shardCount; i++ {
- r := <-all
- if r.err != nil {
- return nil, r.err
- }
- crashes += r.rl.Crashes
- for _, r := range r.rl.Repos {
- prev, ok := uniq[r.Repository.Name]
- if !ok {
- cp := *r
- uniq[r.Repository.Name] = &cp
- } else {
- prev.Stats.Add(&r.Stats)
- }
- }
- }
-
- aggregate := make([]*zoekt.RepoListEntry, 0, len(uniq))
- for _, v := range uniq {
- aggregate = append(aggregate, v)
- }
- return &zoekt.RepoList{
- Repos: aggregate,
- Crashes: crashes,
- }, nil
-}
-
-func (s *shardedSearcher) rlock(ctx context.Context) error {
- return s.throttle.Acquire(ctx, 1)
-}
-
-// getShards returns the currently loaded shards. The shards must be accessed
-// under a rlock call. The shards are sorted by decreasing rank and should not
-// be mutated.
-func (s *shardedSearcher) getShards() []rankedShard {
- if len(s.ranked) > 0 {
- return s.ranked
- }
-
- var res []rankedShard
- for _, sh := range s.shards {
- res = append(res, sh)
- }
- sort.Slice(res, func(i, j int) bool {
- return res[i].rank > res[j].rank
- })
-
- // Cache ranked. We currently hold a read lock, so start a goroutine which
- // acquires a write lock to update. Use requiredVersion to ensure our
- // cached slice is still current after acquiring the write lock.
- go func(ranked []rankedShard, requiredVersion uint64) {
- s.lock()
- if s.rankedVersion == requiredVersion {
- s.ranked = ranked
- }
- s.unlock()
- }(res, s.rankedVersion)
-
- return res
-}
-
-func (s *shardedSearcher) runlock() {
- s.throttle.Release(1)
-}
-
-func (s *shardedSearcher) lock() {
- // won't error since context.Background won't expire
- _ = s.throttle.Acquire(context.Background(), s.capacity)
-}
-
-func (s *shardedSearcher) unlock() {
- s.throttle.Release(s.capacity)
-}
-
-func shardRank(s zoekt.Searcher) uint16 {
- q := query.Repo{}
- result, err := s.List(context.Background(), &q)
- if err != nil {
- return 0
- }
- if len(result.Repos) == 0 {
- return 0
- }
- return result.Repos[0].Repository.Rank
-}
-
-func (s *shardedSearcher) replace(key string, shard zoekt.Searcher) {
- var rank uint16
- if shard != nil {
- rank = shardRank(shard)
- }
-
- s.lock()
- defer s.unlock()
- old := s.shards[key]
- if old.Searcher != nil {
- old.Close()
- }
-
- if shard == nil {
- delete(s.shards, key)
- } else {
- s.shards[key] = rankedShard{
- rank: rank,
- Searcher: shard,
- }
- }
- s.rankedVersion++
- s.ranked = nil
-
- metricShardsLoaded.Set(float64(len(s.shards)))
-}
-
-func loadShard(fn string) (zoekt.Searcher, error) {
- f, err := os.Open(fn)
- if err != nil {
- return nil, err
- }
-
- iFile, err := zoekt.NewIndexFile(f)
- if err != nil {
- return nil, err
- }
- s, err := zoekt.NewSearcher(iFile)
- if err != nil {
- iFile.Close()
- return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)
- }
-
- return s, nil
-}
diff --git a/shards/shards_test.go b/shards/shards_test.go
deleted file mode 100644
index 3df2025..0000000
--- a/shards/shards_test.go
+++ /dev/null
@@ -1,228 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package shards
-
-import (
- "bytes"
- "context"
- "fmt"
- "log"
- "os"
- "runtime"
- "testing"
- "time"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/query"
-)
-
-type crashSearcher struct{}
-
-func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
- panic("search")
-}
-
-func (s *crashSearcher) List(ctx context.Context, q query.Q) (*zoekt.RepoList, error) {
- panic("list")
-}
-
-func (s *crashSearcher) Stats() (*zoekt.RepoStats, error) {
- return &zoekt.RepoStats{}, nil
-}
-
-func (s *crashSearcher) Close() {}
-
-func (s *crashSearcher) String() string { return "crashSearcher" }
-
-func TestCrashResilience(t *testing.T) {
- out := &bytes.Buffer{}
- log.SetOutput(out)
- defer log.SetOutput(os.Stderr)
- ss := newShardedSearcher(2)
- ss.shards = map[string]rankedShard{
- "x": {Searcher: &crashSearcher{}},
- }
-
- q := &query.Substring{Pattern: "hoi"}
- opts := &zoekt.SearchOptions{}
- if res, err := ss.Search(context.Background(), q, opts); err != nil {
- t.Fatalf("Search: %v", err)
- } else if res.Stats.Crashes != 1 {
- t.Errorf("got stats %#v, want crashes = 1", res.Stats)
- }
-
- if res, err := ss.List(context.Background(), q); err != nil {
- t.Fatalf("List: %v", err)
- } else if res.Crashes != 1 {
- t.Errorf("got result %#v, want crashes = 1", res)
- }
-}
-
-type rankSearcher struct {
- rank uint16
-}
-
-func (s *rankSearcher) Close() {
-}
-
-func (s *rankSearcher) String() string {
- return ""
-}
-
-func (s *rankSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
- select {
- case <-ctx.Done():
- return &zoekt.SearchResult{}, nil
- default:
- }
-
- // Ugly, but without sleep it's too fast, and we can't
- // simulate the cutoff.
- time.Sleep(time.Millisecond)
- return &zoekt.SearchResult{
- Files: []zoekt.FileMatch{
- {
- FileName: fmt.Sprintf("f%d", s.rank),
- Score: float64(s.rank),
- },
- },
- Stats: zoekt.Stats{
- MatchCount: 1,
- },
- }, nil
-}
-
-func (s *rankSearcher) List(ctx context.Context, q query.Q) (*zoekt.RepoList, error) {
- return &zoekt.RepoList{
- Repos: []*zoekt.RepoListEntry{
- {Repository: zoekt.Repository{Rank: s.rank}},
- },
- }, nil
-}
-
-func TestOrderByShard(t *testing.T) {
- ss := newShardedSearcher(1)
-
- n := 10 * runtime.GOMAXPROCS(0)
- for i := 0; i < n; i++ {
- ss.replace(fmt.Sprintf("shard%d", i),
- &rankSearcher{
- rank: uint16(i),
- })
- }
-
- if res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}); err != nil {
- t.Errorf("Search: %v", err)
- } else if len(res.Files) != n {
- t.Fatalf("empty options: got %d results, want %d", len(res.Files), n)
- }
-
- opts := zoekt.SearchOptions{
- TotalMaxMatchCount: 3,
- }
- res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &opts)
- if err != nil {
- t.Errorf("Search: %v", err)
- }
-
- if len(res.Files) < opts.TotalMaxMatchCount {
- t.Errorf("got %d results, want %d", len(res.Files), opts.TotalMaxMatchCount)
- }
- if len(res.Files) == n {
- t.Errorf("got %d results, want < %d", len(res.Files), n)
- }
- for i, f := range res.Files {
- rev := n - 1 - i
- want := fmt.Sprintf("f%d", rev)
- got := f.FileName
-
- if got != want {
- t.Logf("%d: got %q, want %q", i, got, want)
- }
- }
-}
-
-type memSeeker struct {
- data []byte
-}
-
-func (s *memSeeker) Name() string {
- return "memseeker"
-}
-
-func (s *memSeeker) Close() {}
-func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
- return s.data[off : off+sz], nil
-}
-
-func (s *memSeeker) Size() (uint32, error) {
- return uint32(len(s.data)), nil
-}
-
-func TestUnloadIndex(t *testing.T) {
- b, err := zoekt.NewIndexBuilder(nil)
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- for i, d := range []zoekt.Document{{
- Name: "filename",
- Content: []byte("needle needle needle"),
- }} {
- if err := b.Add(d); err != nil {
- t.Fatalf("Add %d: %v", i, err)
- }
- }
-
- var buf bytes.Buffer
- b.Write(&buf)
- indexBytes := buf.Bytes()
- indexFile := &memSeeker{indexBytes}
- searcher, err := zoekt.NewSearcher(indexFile)
- if err != nil {
- t.Fatalf("NewSearcher: %v", err)
- }
-
- ss := newShardedSearcher(2)
- ss.replace("key", searcher)
-
- var opts zoekt.SearchOptions
- q := &query.Substring{Pattern: "needle"}
- res, err := ss.Search(context.Background(), q, &opts)
- if err != nil {
- t.Fatalf("Search(%s): %v", q, err)
- }
-
- forbidden := byte(29)
- for i := range indexBytes {
- // non-ASCII
- indexBytes[i] = forbidden
- }
-
- for _, f := range res.Files {
- if bytes.Contains(f.Content, []byte{forbidden}) {
- t.Errorf("found %d in content %q", forbidden, f.Content)
- }
- if bytes.Contains(f.Checksum, []byte{forbidden}) {
- t.Errorf("found %d in checksum %q", forbidden, f.Checksum)
- }
-
- for _, l := range f.LineMatches {
- if bytes.Contains(l.Line, []byte{forbidden}) {
- t.Errorf("found %d in line %q", forbidden, l.Line)
- }
- }
- }
-}
diff --git a/shards/watcher.go b/shards/watcher.go
deleted file mode 100644
index 62f2067..0000000
--- a/shards/watcher.go
+++ /dev/null
@@ -1,212 +0,0 @@
-// Copyright 2017 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package shards
-
-import (
- "fmt"
- "log"
- "os"
- "path/filepath"
- "runtime"
- "sort"
- "strings"
- "sync"
- "time"
-
- "github.com/fsnotify/fsnotify"
-)
-
-type shardLoader interface {
- // Load a new file. Should be safe for concurrent calls.
- load(filename string)
- drop(filename string)
-}
-
-type DirectoryWatcher struct {
- dir string
- timestamps map[string]time.Time
- loader shardLoader
-
- closeOnce sync.Once
- // quit is closed by Close to signal the directory watcher to stop.
- quit chan struct{}
- // stopped is closed once the directory watcher has stopped.
- stopped chan struct{}
-}
-
-func (sw *DirectoryWatcher) Stop() {
- sw.closeOnce.Do(func() {
- close(sw.quit)
- <-sw.stopped
- })
-}
-
-func NewDirectoryWatcher(dir string, loader shardLoader) (*DirectoryWatcher, error) {
- sw := &DirectoryWatcher{
- dir: dir,
- timestamps: map[string]time.Time{},
- loader: loader,
- quit: make(chan struct{}),
- stopped: make(chan struct{}),
- }
- if err := sw.scan(); err != nil {
- return nil, err
- }
-
- if err := sw.watch(); err != nil {
- return nil, err
- }
-
- return sw, nil
-}
-
-func (s *DirectoryWatcher) String() string {
- return fmt.Sprintf("shardWatcher(%s)", s.dir)
-}
-
-func (s *DirectoryWatcher) scan() error {
- fs, err := filepath.Glob(filepath.Join(s.dir, "*.zoekt"))
- if err != nil {
- return err
- }
-
- if len(s.timestamps) == 0 && len(fs) == 0 {
- return fmt.Errorf("directory %s is empty", s.dir)
- }
-
- ts := map[string]time.Time{}
- for _, fn := range fs {
- fi, err := os.Lstat(fn)
- if err != nil {
- continue
- }
-
- ts[fn] = fi.ModTime()
- }
-
- var toLoad []string
- for k, mtime := range ts {
- if t, ok := s.timestamps[k]; !ok || t != mtime {
- toLoad = append(toLoad, k)
- s.timestamps[k] = mtime
- }
- }
-
- var toDrop []string
- // Unload deleted shards.
- for k := range s.timestamps {
- if _, ok := ts[k]; !ok {
- toDrop = append(toDrop, k)
- delete(s.timestamps, k)
- }
- }
-
- if len(toDrop) > 0 {
- log.Printf("unloading %d shard(s)", len(toDrop))
- }
- for _, t := range toDrop {
- log.Printf("unloading: %s", filepath.Base(t))
- s.loader.drop(t)
- }
-
- if len(toLoad) == 0 {
- return nil
- }
-
- log.Printf("loading %d shard(s): %s", len(toLoad), humanTruncateList(toLoad, 5))
-
- // Limit amount of concurrent shard loads.
- throttle := make(chan struct{}, runtime.GOMAXPROCS(0))
- lastProgress := time.Now()
- for i, t := range toLoad {
- // If taking a while to start-up occasionally give a progress message
- if time.Since(lastProgress) > 10*time.Second {
- log.Printf("still need to load %d shards...", len(toLoad)-i)
- lastProgress = time.Now()
- }
-
- throttle <- struct{}{}
- go func(k string) {
- s.loader.load(k)
- <-throttle
- }(t)
- }
- for i := 0; i < cap(throttle); i++ {
- throttle <- struct{}{}
- }
-
- return nil
-}
-
-func humanTruncateList(paths []string, max int) string {
- sort.Strings(paths)
- var b strings.Builder
- for i, p := range paths {
- if i >= max {
- fmt.Fprintf(&b, "... %d more", len(paths)-i)
- break
- }
- if i > 0 {
- b.WriteString(", ")
- }
- b.WriteString(filepath.Base(p))
- }
- return b.String()
-}
-
-func (s *DirectoryWatcher) watch() error {
- watcher, err := fsnotify.NewWatcher()
- if err != nil {
- return err
- }
- if err := watcher.Add(s.dir); err != nil {
- return err
- }
-
- // intermediate signal channel so if there are multiple watcher.Events we
- // only call scan once.
- signal := make(chan struct{}, 1)
-
- go func() {
- for {
- select {
- case <-watcher.Events:
- select {
- case signal <- struct{}{}:
- default:
- }
- case err := <-watcher.Errors:
- // Ignore ErrEventOverflow since we rely on the presence of events so
- // safe to ignore.
- if err != nil && err != fsnotify.ErrEventOverflow {
- log.Println("watcher error:", err)
- }
- case <-s.quit:
- watcher.Close()
- close(signal)
- return
- }
- }
- }()
-
- go func() {
- defer close(s.stopped)
- for range signal {
- s.scan()
- }
- }()
-
- return nil
-}
diff --git a/shards/watcher_test.go b/shards/watcher_test.go
deleted file mode 100644
index 5aad02f..0000000
--- a/shards/watcher_test.go
+++ /dev/null
@@ -1,129 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package shards
-
-import (
- "io/ioutil"
- "os"
- "path/filepath"
- "strings"
- "testing"
- "time"
-)
-
-type loggingLoader struct {
- loads chan string
- drops chan string
-}
-
-func (l *loggingLoader) load(k string) {
- l.loads <- k
-}
-
-func (l *loggingLoader) drop(k string) {
- l.drops <- k
-}
-
-func advanceFS() {
- time.Sleep(10 * time.Millisecond)
-}
-
-func TestDirWatcherUnloadOnce(t *testing.T) {
- dir, err := ioutil.TempDir("", "")
- if err != nil {
- t.Fatal(err)
- }
- defer os.RemoveAll(dir)
-
- logger := &loggingLoader{
- loads: make(chan string, 10),
- drops: make(chan string, 10),
- }
- _, err = NewDirectoryWatcher(dir, logger)
- if err == nil || !strings.Contains(err.Error(), "empty") {
- t.Fatalf("got %v, want 'empty'", err)
- }
-
- shard := filepath.Join(dir, "foo.zoekt")
- if err := ioutil.WriteFile(shard, []byte("hello"), 0o644); err != nil {
- t.Fatalf("WriteFile: %v", err)
- }
-
- dw, err := NewDirectoryWatcher(dir, logger)
- if err != nil {
- t.Fatalf("NewDirectoryWatcher: %v", err)
- }
- defer dw.Stop()
-
- if got := <-logger.loads; got != shard {
- t.Fatalf("got load event %v, want %v", got, shard)
- }
-
- // Must sleep because of FS timestamp resolution.
- advanceFS()
- if err := ioutil.WriteFile(shard, []byte("changed"), 0o644); err != nil {
- t.Fatalf("WriteFile: %v", err)
- }
-
- if got := <-logger.loads; got != shard {
- t.Fatalf("got load event %v, want %v", got, shard)
- }
-
- advanceFS()
- if err := os.Remove(shard); err != nil {
- t.Fatalf("Remove: %v", err)
- }
-
- if got := <-logger.drops; got != shard {
- t.Fatalf("got drops event %v, want %v", got, shard)
- }
-
- advanceFS()
- if err := ioutil.WriteFile(shard+".bla", []byte("changed"), 0o644); err != nil {
- t.Fatalf("WriteFile: %v", err)
- }
-
- dw.Stop()
-
- select {
- case k := <-logger.loads:
- t.Errorf("spurious load of %q", k)
- case k := <-logger.drops:
- t.Errorf("spurious drops of %q", k)
- default:
- }
-}
-
-func TestHumanTruncateList(t *testing.T) {
- paths := []string{
- "dir/1",
- "dir/2",
- "dir/3",
- "dir/4",
- }
-
- assert := func(max int, want string) {
- got := humanTruncateList(paths, max)
- if got != want {
- t.Errorf("unexpected humanTruncateList max=%d.\ngot: %s\nwant: %s", max, got, want)
- }
- }
-
- assert(1, "1... 3 more")
- assert(2, "1, 2... 2 more")
- assert(3, "1, 2, 3... 1 more")
- assert(4, "1, 2, 3, 4")
- assert(5, "1, 2, 3, 4")
-}
diff --git a/testdata/backcompat/static_toc_v15.00000.zoekt b/testdata/backcompat/static_toc_v15.00000.zoekt
deleted file mode 100644
index a070892..0000000
--- a/testdata/backcompat/static_toc_v15.00000.zoekt
+++ /dev/null
Binary files differ
diff --git a/toc.go b/toc.go
deleted file mode 100644
index 9eab283..0000000
--- a/toc.go
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright 2017 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-// FormatVersion is a version number. It is increased every time the
-// on-disk index format is changed.
-// 5: subrepositories.
-// 6: remove size prefix for posting varint list.
-// 7: move subrepos into Repository struct.
-// 8: move repoMetaData out of indexMetadata
-// 9: use bigendian uint64 for trigrams.
-// 10: sections for rune offsets.
-// 11: file ends in rune offsets.
-// 12: 64-bit branchmasks.
-// 13: content checksums
-// 14: languages
-// 15: rune based symbol sections
-// 16 (TBA): TODO: remove fallback parsing in readTOC
-const IndexFormatVersion = 15
-
-// FeatureVersion is increased if a feature is added that requires reindexing data
-// without changing the format version
-// 2: Rank field for shards.
-// 3: Rank documents within shards
-// 4: Dedup file bugfix
-// 5: Remove max line size limit
-// 6: Include '#' into the LineFragment template
-// 7: Record skip reasons in the index.
-// 8: Record source path in the index.
-// 9: Bump default max file size.
-// 10: Switch to a more flexible TOC format.
-const FeatureVersion = 10
-
-// WriteMinFeatureVersion and ReadMinFeatureVersion constrain forwards and backwards
-// compatibility. For example, if a new way to encode filenameNgrams on disk is
-// added using a new section but the old one is retained, this would only bump
-// FeatureVersion, since the previous version can read the file and ignore the
-// new section, but the index files should be regenerated.
-// When the new encoding is fully rolled out and stable, the section with the old
-// encoding and the associated reader can be removed, and WriteMinFeatureVersion and
-// ReadMinFeatureVersion can be set to the current FeatureVersion, indicating
-// that the reader must handle the new version and that older versions are no
-// longer valid.
-// In this way, compatibility with arbitrary version offsets can be indicated.
-
-// WriteMinFeatureVersion constrains forwards compatibility by emitting files
-// that won't load in zoekt with a FeatureVersion below it.
-const WriteMinFeatureVersion = 10
-
-// ReadMinFeatureVersion constrains backwards compatibility by refusing to
-// load a file with a FeatureVersion below it.
-const ReadMinFeatureVersion = 8
-
-type indexTOC struct {
- fileContents compoundSection
- fileNames compoundSection
- fileSections compoundSection
- postings compoundSection
- newlines compoundSection
- ngramText simpleSection
- runeOffsets simpleSection
- fileEndRunes simpleSection
- languages simpleSection
-
- branchMasks simpleSection
- subRepos simpleSection
-
- nameNgramText simpleSection
- namePostings compoundSection
- nameRuneOffsets simpleSection
- metaData simpleSection
- repoMetaData simpleSection
- nameEndRunes simpleSection
- contentChecksums simpleSection
- runeDocSections simpleSection
-}
-
-func (t *indexTOC) sections() []section {
- // This old sections list is only needed to maintain backwards compatibility,
- // and can be removed when a migration to tagged sections is complete.
- return []section{
- // This must be first, so it can be reliably read across
- // file format versions.
- &t.metaData,
- &t.repoMetaData,
- &t.fileContents,
- &t.fileNames,
- &t.fileSections,
- &t.newlines,
- &t.ngramText,
- &t.postings,
- &t.nameNgramText,
- &t.namePostings,
- &t.branchMasks,
- &t.subRepos,
- &t.runeOffsets,
- &t.nameRuneOffsets,
- &t.fileEndRunes,
- &t.nameEndRunes,
- &t.contentChecksums,
- &t.languages,
- &t.runeDocSections,
- }
-}
-
-type taggedSection struct {
- tag string
- sec section
-}
-
-func (t *indexTOC) sectionsTagged() map[string]section {
- out := map[string]section{}
- for _, ent := range t.sectionsTaggedList() {
- out[ent.tag] = ent.sec
- }
- for _, ent := range t.sectionsTaggedCompatibilityList() {
- out[ent.tag] = ent.sec
- }
- return out
-}
-
-func (t *indexTOC) sectionsTaggedList() []taggedSection {
- return []taggedSection{
- {"metadata", &t.metaData},
- {"repoMetaData", &t.repoMetaData},
- {"fileContents", &t.fileContents},
- {"fileNames", &t.fileNames},
- {"fileSections", &t.fileSections},
- {"newlines", &t.newlines},
- {"ngramText", &t.ngramText},
- {"postings", &t.postings},
- {"nameNgramText", &t.nameNgramText},
- {"namePostings", &t.namePostings},
- {"branchMasks", &t.branchMasks},
- {"subRepos", &t.subRepos},
- {"runeOffsets", &t.runeOffsets},
- {"nameRuneOffsets", &t.nameRuneOffsets},
- {"fileEndRunes", &t.fileEndRunes},
- {"nameEndRunes", &t.nameEndRunes},
- {"contentChecksums", &t.contentChecksums},
- {"languages", &t.languages},
- {"runeDocSections", &t.runeDocSections},
- }
-}
-
-// sectionsTaggedCompatibilityList returns a list of sections that will be
-// handled or converted for backwards compatiblity, but aren't written by
-// the current iteration of the indexer.
-func (t *indexTOC) sectionsTaggedCompatibilityList() []taggedSection {
- return []taggedSection{}
-}
diff --git a/web/api.go b/web/api.go
deleted file mode 100644
index 8eca345..0000000
--- a/web/api.go
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package web
-
-import (
- "time"
-
- "github.com/google/zoekt"
-)
-
-type LastInput struct {
- Query string
- Num int
-
- // If set, focus on the search box.
- AutoFocus bool
-}
-
-// Result holds the data provided to the search results template.
-type ResultInput struct {
- Last LastInput
- QueryStr string
- Query string
- Stats zoekt.Stats
- Duration time.Duration
- FileMatches []*FileMatch
- SearchOptions string
-}
-
-// FileMatch holds the per file data provided to search results template
-type FileMatch struct {
- FileName string
- Repo string
- ResultID string
- Language string
- // If this was a duplicate result, this will contain the file
- // of the first match.
- DuplicateID string
-
- Branches []string
- Matches []Match
- URL string
-}
-
-// Match holds the per line data provided to the search results template
-type Match struct {
- URL string
- FileName string
- LineNum int
-
- Fragments []Fragment
-}
-
-// Fragment holds data of a single contiguous match within in a line
-// for the results template.
-type Fragment struct {
- Pre string
- Match string
- Post string
-}
-
-// SearchBoxInput is provided to the SearchBox template.
-type SearchBoxInput struct {
- Last LastInput
- Stats *zoekt.RepoStats
- Version string
- Uptime time.Duration
-}
-
-// RepoListInput is provided to the RepoList template.
-type RepoListInput struct {
- Last LastInput
- Stats zoekt.RepoStats
- Repos []Repository
-}
-
-// Branch holds the metadata for a indexed branch.
-type Branch struct {
- Name string
- Version string
- URL string
-}
-
-// Repository holds the metadata for an indexed repository.
-type Repository struct {
- Name string
- URL string
- IndexTime time.Time
- Branches []Branch
- Files int64
-
- // Total amount of content bytes.
- Size int64
-}
-
-// PrintInput is provided to the server.Print template.
-type PrintInput struct {
- Repo, Name string
- Lines []string
- Last LastInput
-}
diff --git a/web/e2e_test.go b/web/e2e_test.go
deleted file mode 100644
index 479441b..0000000
--- a/web/e2e_test.go
+++ /dev/null
@@ -1,442 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package web
-
-import (
- "bytes"
- "context"
- "fmt"
- "io/ioutil"
- "log"
- "net/http"
- "net/http/httptest"
- "strings"
- "testing"
- "time"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/query"
-)
-
-// TODO(hanwen): cut & paste from ../ . Should create internal test
-// util package.
-type memSeeker struct {
- data []byte
-}
-
-func (s *memSeeker) Close() {}
-func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
- return s.data[off : off+sz], nil
-}
-
-func (s *memSeeker) Size() (uint32, error) {
- return uint32(len(s.data)), nil
-}
-
-func (s *memSeeker) Name() string {
- return "memSeeker"
-}
-
-func searcherForTest(t *testing.T, b *zoekt.IndexBuilder) zoekt.Searcher {
- var buf bytes.Buffer
- b.Write(&buf)
- f := &memSeeker{buf.Bytes()}
-
- searcher, err := zoekt.NewSearcher(f)
- if err != nil {
- t.Fatalf("NewSearcher: %v", err)
- }
-
- return searcher
-}
-
-func TestBasic(t *testing.T) {
- b, err := zoekt.NewIndexBuilder(&zoekt.Repository{
- Name: "name",
- URL: "repo-url",
- CommitURLTemplate: "{{.Version}}",
- FileURLTemplate: "file-url",
- LineFragmentTemplate: "#line",
- Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}},
- })
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
- if err := b.Add(zoekt.Document{
- Name: "f2",
- Content: []byte("to carry water in the no later bla"),
- // ------------- 0123456789012345678901234567890123
- // ------------- 0 1 2 3
- Branches: []string{"master"},
- }); err != nil {
- t.Fatalf("Add: %v", err)
- }
-
- s := searcherForTest(t, b)
- srv := Server{
- Searcher: s,
- Top: Top,
- HTML: true,
- }
-
- mux, err := NewMux(&srv)
- if err != nil {
- t.Fatalf("NewMux: %v", err)
- }
-
- ts := httptest.NewServer(mux)
- defer ts.Close()
-
- nowStr := time.Now().Format("Jan 02, 2006 15:04")
- for req, needles := range map[string][]string{
- "/": {"from 1 repositories"},
- "/search?q=water": {
- "href=\"file-url#line",
- "carry <b>water</b>",
- },
- "/search?q=r:": {
- "1234\">master",
- "Found 1 repositories",
- nowStr,
- "repo-url\">name",
- "1 files (36)",
- },
- "/search?q=magic": {
- `value=magic`,
- },
- "/robots.txt": {
- "disallow: /search",
- },
- } {
- checkNeedles(t, ts, req, needles)
- }
-}
-
-func TestPrint(t *testing.T) {
- b, err := zoekt.NewIndexBuilder(&zoekt.Repository{
- Name: "name",
- URL: "repo-url",
- CommitURLTemplate: "{{.Version}}",
- FileURLTemplate: "file-url",
- LineFragmentTemplate: "line",
- Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}},
- })
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
- if err := b.Add(zoekt.Document{
- Name: "f2",
- Content: []byte("to carry water in the no later bla"),
- Branches: []string{"master"},
- }); err != nil {
- t.Fatalf("Add: %v", err)
- }
-
- if err := b.Add(zoekt.Document{
- Name: "dir/f2",
- Content: []byte("blabla"),
- Branches: []string{"master"},
- }); err != nil {
- t.Fatalf("Add: %v", err)
- }
-
- s := searcherForTest(t, b)
- srv := Server{
- Searcher: s,
- Top: Top,
- HTML: true,
- Print: true,
- }
-
- mux, err := NewMux(&srv)
- if err != nil {
- t.Fatalf("NewMux: %v", err)
- }
-
- ts := httptest.NewServer(mux)
- defer ts.Close()
-
- for req, needles := range map[string][]string{
- "/print?q=bla&r=name&f=f2": {
- `pre id="l1" class="inline-pre"><span class="noselect"><a href="#l1">`,
- },
- } {
- checkNeedles(t, ts, req, needles)
- }
-}
-
-func TestPrintDefault(t *testing.T) {
- b, err := zoekt.NewIndexBuilder(&zoekt.Repository{
- Name: "name",
- URL: "repo-url",
- Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}},
- })
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
- if err := b.Add(zoekt.Document{
- Name: "f2",
- Content: []byte("to carry water in the no later bla"),
- Branches: []string{"master"},
- }); err != nil {
- t.Fatalf("Add: %v", err)
- }
- s := searcherForTest(t, b)
- srv := Server{
- Searcher: s,
- Top: Top,
- HTML: true,
- }
-
- mux, err := NewMux(&srv)
- if err != nil {
- t.Fatalf("NewMux: %v", err)
- }
-
- ts := httptest.NewServer(mux)
- defer ts.Close()
-
- for req, needles := range map[string][]string{
- "/search?q=water": {
- `href="print?`,
- },
- } {
- checkNeedles(t, ts, req, needles)
- }
-}
-
-func checkNeedles(t *testing.T, ts *httptest.Server, req string, needles []string) {
- res, err := http.Get(ts.URL + req)
- if err != nil {
- t.Fatal(err)
- }
- resultBytes, err := ioutil.ReadAll(res.Body)
- res.Body.Close()
- if err != nil {
- log.Fatal(err)
- }
-
- result := string(resultBytes)
- for _, want := range needles {
- if !strings.Contains(result, want) {
- t.Errorf("query %q: result did not have %q: %s", req, want, result)
- }
- }
- if notWant := "crashed"; strings.Contains(result, notWant) {
- t.Errorf("result has %q: %s", notWant, result)
- }
- if notWant := "bytes skipped)..."; strings.Contains(result, notWant) {
- t.Errorf("result has %q: %s", notWant, result)
- }
-}
-
-type crashSearcher struct {
- zoekt.Searcher
-}
-
-func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
- res := zoekt.SearchResult{}
- res.Stats.Crashes = 1
- return &res, nil
-}
-
-func TestCrash(t *testing.T) {
- srv := Server{
- Searcher: &crashSearcher{},
- Top: Top,
- HTML: true,
- }
-
- mux, err := NewMux(&srv)
- if err != nil {
- t.Fatalf("NewMux: %v", err)
- }
-
- ts := httptest.NewServer(mux)
- defer ts.Close()
-
- res, err := http.Get(ts.URL + "/search?q=water")
- if err != nil {
- t.Fatal(err)
- }
- resultBytes, err := ioutil.ReadAll(res.Body)
- res.Body.Close()
- if err != nil {
- t.Fatal(err)
- }
-
- result := string(resultBytes)
- if want := "1 shards crashed"; !strings.Contains(result, want) {
- t.Errorf("result did not have %q: %s", want, result)
- }
-}
-
-func TestHostCustomization(t *testing.T) {
- b, err := zoekt.NewIndexBuilder(&zoekt.Repository{
- Name: "name",
- })
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
- if err := b.Add(zoekt.Document{
- Name: "file",
- Content: []byte("bla"),
- }); err != nil {
- t.Fatalf("Add: %v", err)
- }
-
- s := searcherForTest(t, b)
- srv := Server{
- Searcher: s,
- Top: Top,
- HTML: true,
- HostCustomQueries: map[string]string{
- "myproject.io": "r:myproject",
- },
- }
-
- mux, err := NewMux(&srv)
- if err != nil {
- t.Fatalf("NewMux: %v", err)
- }
-
- ts := httptest.NewServer(mux)
- defer ts.Close()
-
- req, err := http.NewRequest("GET", ts.URL, &bytes.Buffer{})
- if err != nil {
- t.Fatalf("NewRequest: %v", err)
- }
- req.Host = "myproject.io"
- res, err := (&http.Client{}).Do(req)
- if err != nil {
- t.Fatalf("Do(%v): %v", req, err)
- }
- resultBytes, err := ioutil.ReadAll(res.Body)
- res.Body.Close()
- if err != nil {
- t.Fatalf("ReadAll: %v", err)
- }
-
- if got, want := string(resultBytes), "r:myproject"; !strings.Contains(got, want) {
- t.Fatalf("got %s, want substring %q", got, want)
- }
-}
-
-func TestDupResult(t *testing.T) {
- b, err := zoekt.NewIndexBuilder(&zoekt.Repository{
- Name: "name",
- })
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- for i := 0; i < 2; i++ {
- if err := b.Add(zoekt.Document{
- Name: fmt.Sprintf("file%d", i),
- Content: []byte("bla"),
- }); err != nil {
- t.Fatalf("Add: %v", err)
- }
- }
- s := searcherForTest(t, b)
- srv := Server{
- Searcher: s,
- Top: Top,
- HTML: true,
- }
-
- mux, err := NewMux(&srv)
- if err != nil {
- t.Fatalf("NewMux: %v", err)
- }
-
- ts := httptest.NewServer(mux)
- defer ts.Close()
-
- req, err := http.NewRequest("GET", ts.URL+"/search?q=bla", &bytes.Buffer{})
- if err != nil {
- t.Fatalf("NewRequest: %v", err)
- }
- res, err := (&http.Client{}).Do(req)
- if err != nil {
- t.Fatalf("Do(%v): %v", req, err)
- }
- resultBytes, err := ioutil.ReadAll(res.Body)
- res.Body.Close()
- if err != nil {
- t.Fatalf("ReadAll: %v", err)
- }
-
- if got, want := string(resultBytes), "Duplicate result"; !strings.Contains(got, want) {
- t.Fatalf("got %s, want substring %q", got, want)
- }
-}
-
-func TestTruncateLine(t *testing.T) {
- b, err := zoekt.NewIndexBuilder(&zoekt.Repository{
- Name: "name",
- })
- if err != nil {
- t.Fatalf("NewIndexBuilder: %v", err)
- }
-
- largePadding := bytes.Repeat([]byte{'a'}, 100*1000) // 100kb
- if err := b.Add(zoekt.Document{
- Name: "file",
- Content: append(append(largePadding, []byte("helloworld")...), largePadding...),
- }); err != nil {
- t.Fatalf("Add: %v", err)
- }
- s := searcherForTest(t, b)
- srv := Server{
- Searcher: s,
- Top: Top,
- HTML: true,
- }
-
- mux, err := NewMux(&srv)
- if err != nil {
- t.Fatalf("NewMux: %v", err)
- }
-
- ts := httptest.NewServer(mux)
- defer ts.Close()
-
- req, err := http.NewRequest("GET", ts.URL+"/search?q=helloworld", &bytes.Buffer{})
- if err != nil {
- t.Fatalf("NewRequest: %v", err)
- }
- res, err := (&http.Client{}).Do(req)
- if err != nil {
- t.Fatalf("Do(%v): %v", req, err)
- }
- resultBytes, err := ioutil.ReadAll(res.Body)
- res.Body.Close()
- if err != nil {
- t.Fatalf("ReadAll: %v", err)
- }
-
- if got, want := len(resultBytes)/1000, 10; got > want {
- t.Fatalf("got %dkb response, want <= %dkb", got, want)
- }
- result := string(resultBytes)
- if want := "aa<b>helloworld</b>aa"; !strings.Contains(result, want) {
- t.Fatalf("got %s, want substring %q", result, want)
- }
- if want := "bytes skipped)..."; !strings.Contains(result, want) {
- t.Fatalf("got %s, want substring %q", result, want)
- }
-}
diff --git a/web/server.go b/web/server.go
deleted file mode 100644
index 94a16ac..0000000
--- a/web/server.go
+++ /dev/null
@@ -1,582 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package web
-
-import (
- "bytes"
- "fmt"
- "html/template"
- "log"
- "net"
- "net/http"
- "regexp"
- "regexp/syntax"
- "sort"
- "strconv"
- "strings"
- "sync"
- "time"
-
- "golang.org/x/net/context"
-
- "github.com/google/zoekt"
- "github.com/google/zoekt/query"
-)
-
-var Funcmap = template.FuncMap{
- "Inc": func(orig int) int {
- return orig + 1
- },
- "More": func(orig int) int {
- return orig * 3
- },
- "HumanUnit": func(orig int64) string {
- b := orig
- suffix := ""
- if orig > 10*(1<<30) {
- suffix = "G"
- b = orig / (1 << 30)
- } else if orig > 10*(1<<20) {
- suffix = "M"
- b = orig / (1 << 20)
- } else if orig > 10*(1<<10) {
- suffix = "K"
- b = orig / (1 << 10)
- }
-
- return fmt.Sprintf("%d%s", b, suffix)
- },
- "LimitPre": func(limit int, pre string) string {
- if len(pre) < limit {
- return pre
- }
- return fmt.Sprintf("...(%d bytes skipped)...%s", len(pre)-limit, pre[len(pre)-limit:])
- },
- "LimitPost": func(limit int, post string) string {
- if len(post) < limit {
- return post
- }
- return fmt.Sprintf("%s...(%d bytes skipped)...", post[:limit], len(post)-limit)
- },
-}
-
-const defaultNumResults = 50
-
-type Server struct {
- Searcher zoekt.Searcher
-
- // Serve HTML interface
- HTML bool
-
- // If set, show files from the index.
- Print bool
-
- // Version string for this server.
- Version string
-
- // Depending on the Host header, add a query to the entry
- // page. For example, when serving on "search.myproject.org"
- // we could add "r:myproject" automatically. This allows a
- // single instance to serve as search engine for multiple
- // domains.
- HostCustomQueries map[string]string
-
- // This should contain the following templates: "didyoumean"
- // (for suggestions), "repolist" (for the repo search result
- // page), "result" for the search results, "search" (for the
- // opening page), "box" for the search query input element and
- // "print" for the show file functionality.
- Top *template.Template
-
- didYouMean *template.Template
- repolist *template.Template
- search *template.Template
- result *template.Template
- print *template.Template
- about *template.Template
- robots *template.Template
-
- startTime time.Time
-
- templateMu sync.Mutex
- templateCache map[string]*template.Template
-
- lastStatsMu sync.Mutex
- lastStats *zoekt.RepoStats
- lastStatsTS time.Time
-}
-
-func (s *Server) getTemplate(str string) *template.Template {
- s.templateMu.Lock()
- defer s.templateMu.Unlock()
- t := s.templateCache[str]
- if t != nil {
- return t
- }
-
- t, err := template.New("cache").Parse(str)
- if err != nil {
- log.Printf("template parse error: %v", err)
- t = template.Must(template.New("empty").Parse(""))
- }
- s.templateCache[str] = t
- return t
-}
-
-func NewMux(s *Server) (*http.ServeMux, error) {
- s.print = s.Top.Lookup("print")
- if s.print == nil {
- return nil, fmt.Errorf("missing template 'print'")
- }
-
- for k, v := range map[string]**template.Template{
- "didyoumean": &s.didYouMean,
- "results": &s.result,
- "print": &s.print,
- "search": &s.search,
- "repolist": &s.repolist,
- "about": &s.about,
- "robots": &s.robots,
- } {
- *v = s.Top.Lookup(k)
- if *v == nil {
- return nil, fmt.Errorf("missing template %q", k)
- }
- }
-
- s.templateCache = map[string]*template.Template{}
- s.startTime = time.Now()
-
- mux := http.NewServeMux()
-
- if s.HTML {
- mux.HandleFunc("/robots.txt", s.serveRobots)
- mux.HandleFunc("/search", s.serveSearch)
- mux.HandleFunc("/", s.serveSearchBox)
- mux.HandleFunc("/about", s.serveAbout)
- mux.HandleFunc("/print", s.servePrint)
- }
-
- return mux, nil
-}
-
-func (s *Server) serveSearch(w http.ResponseWriter, r *http.Request) {
- err := s.serveSearchErr(w, r)
-
- if suggest, ok := err.(*query.SuggestQueryError); ok {
- var buf bytes.Buffer
- if err := s.didYouMean.Execute(&buf, suggest); err != nil {
- http.Error(w, err.Error(), http.StatusTeapot)
- }
-
- w.Write(buf.Bytes())
- return
- }
-
- if err != nil {
- http.Error(w, err.Error(), http.StatusTeapot)
- }
-}
-
-func (s *Server) serveSearchErr(w http.ResponseWriter, r *http.Request) error {
- qvals := r.URL.Query()
- queryStr := qvals.Get("q")
- if queryStr == "" {
- return fmt.Errorf("no query found")
- }
-
- q, err := query.Parse(queryStr)
- if err != nil {
- return err
- }
-
- repoOnly := true
- query.VisitAtoms(q, func(q query.Q) {
- _, ok := q.(*query.Repo)
- repoOnly = repoOnly && ok
- })
- if repoOnly {
- return s.serveListReposErr(q, queryStr, w, r)
- }
-
- numStr := qvals.Get("num")
-
- num, err := strconv.Atoi(numStr)
- if err != nil || num <= 0 {
- num = defaultNumResults
- }
-
- sOpts := zoekt.SearchOptions{
- MaxWallTime: 10 * time.Second,
- }
-
- sOpts.SetDefaults()
-
- ctx := r.Context()
- if result, err := s.Searcher.Search(ctx, q, &zoekt.SearchOptions{EstimateDocCount: true}); err != nil {
- return err
- } else if numdocs := result.ShardFilesConsidered; numdocs > 10000 {
- // If the search touches many shards and many files, we
- // have to limit the number of matches. This setting
- // is based on the number of documents eligible after
- // considering reponames, so large repos (both
- // android, chromium are about 500k files) aren't
- // covered fairly.
-
- // 10k docs, 50 num -> max match = (250 + 250 / 10)
- sOpts.ShardMaxMatchCount = num*5 + (5*num)/(numdocs/1000)
-
- // 10k docs, 50 num -> max important match = 4
- sOpts.ShardMaxImportantMatch = num/20 + num/(numdocs/500)
- } else {
- // Virtually no limits for a small corpus; important
- // matches are just as expensive as normal matches.
- n := numdocs + num*100
- sOpts.ShardMaxImportantMatch = n
- sOpts.ShardMaxMatchCount = n
- sOpts.TotalMaxMatchCount = n
- sOpts.TotalMaxImportantMatch = n
- }
- sOpts.MaxDocDisplayCount = num
-
- result, err := s.Searcher.Search(ctx, q, &sOpts)
- if err != nil {
- return err
- }
-
- fileMatches, err := s.formatResults(result, queryStr, s.Print)
- if err != nil {
- return err
- }
-
- res := ResultInput{
- Last: LastInput{
- Query: queryStr,
- Num: num,
- AutoFocus: true,
- },
- Stats: result.Stats,
- Query: q.String(),
- QueryStr: queryStr,
- SearchOptions: sOpts.String(),
- FileMatches: fileMatches,
- }
- if res.Stats.Wait < res.Stats.Duration/10 {
- // Suppress queueing stats if they are neglible.
- res.Stats.Wait = 0
- }
-
- var buf bytes.Buffer
- if err := s.result.Execute(&buf, &res); err != nil {
- return err
- }
-
- w.Write(buf.Bytes())
- return nil
-}
-
-func (s *Server) servePrint(w http.ResponseWriter, r *http.Request) {
- err := s.servePrintErr(w, r)
- if err != nil {
- http.Error(w, err.Error(), http.StatusTeapot)
- }
-}
-
-const statsStaleNess = 30 * time.Second
-
-func (s *Server) fetchStats(ctx context.Context) (*zoekt.RepoStats, error) {
- s.lastStatsMu.Lock()
- stats := s.lastStats
- if time.Since(s.lastStatsTS) > statsStaleNess {
- stats = nil
- }
- s.lastStatsMu.Unlock()
-
- if stats != nil {
- return stats, nil
- }
-
- repos, err := s.Searcher.List(ctx, &query.Const{Value: true})
- if err != nil {
- return nil, err
- }
-
- stats = &zoekt.RepoStats{}
- names := map[string]struct{}{}
- for _, r := range repos.Repos {
- stats.Add(&r.Stats)
- names[r.Repository.Name] = struct{}{}
- }
- stats.Repos = len(names)
-
- s.lastStatsMu.Lock()
- s.lastStatsTS = time.Now()
- s.lastStats = stats
- s.lastStatsMu.Unlock()
-
- return stats, nil
-}
-
-func (s *Server) serveSearchBoxErr(w http.ResponseWriter, r *http.Request) error {
- stats, err := s.fetchStats(r.Context())
- if err != nil {
- return err
- }
- d := SearchBoxInput{
- Last: LastInput{
- Num: defaultNumResults,
- AutoFocus: true,
- },
- Stats: stats,
- Version: s.Version,
- Uptime: time.Since(s.startTime),
- }
-
- d.Last.Query = r.URL.Query().Get("q")
- if d.Last.Query == "" {
- custom := s.HostCustomQueries[r.Host]
- if custom == "" {
- host, _, _ := net.SplitHostPort(r.Host)
- custom = s.HostCustomQueries[host]
- }
-
- if custom != "" {
- d.Last.Query = custom + " "
- }
- }
-
- var buf bytes.Buffer
- if err := s.search.Execute(&buf, &d); err != nil {
- return err
- }
- w.Write(buf.Bytes())
- return nil
-}
-
-func (s *Server) serveSearchBox(w http.ResponseWriter, r *http.Request) {
- if err := s.serveSearchBoxErr(w, r); err != nil {
- http.Error(w, err.Error(), http.StatusTeapot)
- }
-}
-
-func (s *Server) serveAboutErr(w http.ResponseWriter, r *http.Request) error {
- stats, err := s.fetchStats(r.Context())
- if err != nil {
- return err
- }
-
- d := SearchBoxInput{
- Stats: stats,
- Version: s.Version,
- Uptime: time.Since(s.startTime),
- }
-
- var buf bytes.Buffer
- if err := s.about.Execute(&buf, &d); err != nil {
- return err
- }
- w.Write(buf.Bytes())
- return nil
-}
-
-func (s *Server) serveAbout(w http.ResponseWriter, r *http.Request) {
- if err := s.serveAboutErr(w, r); err != nil {
- http.Error(w, err.Error(), http.StatusTeapot)
- }
-}
-
-func (s *Server) serveRobotsErr(w http.ResponseWriter, r *http.Request) error {
- data := struct{}{}
- var buf bytes.Buffer
- if err := s.robots.Execute(&buf, &data); err != nil {
- return err
- }
- w.Write(buf.Bytes())
- return nil
-}
-
-func (s *Server) serveRobots(w http.ResponseWriter, r *http.Request) {
- if err := s.serveRobotsErr(w, r); err != nil {
- http.Error(w, err.Error(), http.StatusTeapot)
- }
-}
-
-func (s *Server) serveListReposErr(q query.Q, qStr string, w http.ResponseWriter, r *http.Request) error {
- ctx := r.Context()
- repos, err := s.Searcher.List(ctx, q)
- if err != nil {
- return err
- }
-
- qvals := r.URL.Query()
- order := qvals.Get("order")
- switch order {
- case "", "name", "revname":
- sort.Slice(repos.Repos, func(i, j int) bool {
- return strings.Compare(repos.Repos[i].Repository.Name, repos.Repos[j].Repository.Name) < 0
- })
- case "size", "revsize":
- sort.Slice(repos.Repos, func(i, j int) bool {
- return repos.Repos[i].Stats.ContentBytes < repos.Repos[j].Stats.ContentBytes
- })
- case "time", "revtime":
- sort.Slice(repos.Repos, func(i, j int) bool {
- return repos.Repos[i].IndexMetadata.IndexTime.Before(
- repos.Repos[j].IndexMetadata.IndexTime)
- })
- default:
- return fmt.Errorf("got unknown sort key %q, allowed [rev]name, [rev]time, [rev]size", order)
- }
- if strings.HasPrefix(order, "rev") {
- for i, j := 0, len(repos.Repos)-1; i < j; {
- repos.Repos[i], repos.Repos[j] = repos.Repos[j], repos.Repos[i]
- i++
- j--
-
- }
- }
-
- aggregate := zoekt.RepoStats{
- Repos: len(repos.Repos),
- }
- for _, s := range repos.Repos {
- aggregate.Add(&s.Stats)
- }
- res := RepoListInput{
- Last: LastInput{
- Query: qStr,
- AutoFocus: true,
- },
- Stats: aggregate,
- }
-
- numStr := qvals.Get("num")
- num, err := strconv.Atoi(numStr)
- if err != nil || num <= 0 {
- num = 0
- }
- if num > 0 {
- if num > len(repos.Repos) {
- num = len(repos.Repos)
- }
-
- repos.Repos = repos.Repos[:num]
- }
-
- for _, r := range repos.Repos {
- t := s.getTemplate(r.Repository.CommitURLTemplate)
-
- repo := Repository{
- Name: r.Repository.Name,
- URL: r.Repository.URL,
- IndexTime: r.IndexMetadata.IndexTime,
- Size: r.Stats.ContentBytes,
- Files: int64(r.Stats.Documents),
- }
- for _, b := range r.Repository.Branches {
- var buf bytes.Buffer
- if err := t.Execute(&buf, b); err != nil {
- return err
- }
- repo.Branches = append(repo.Branches,
- Branch{
- Name: b.Name,
- Version: b.Version,
- URL: buf.String(),
- })
- }
- res.Repos = append(res.Repos, repo)
- }
-
- var buf bytes.Buffer
- if err := s.repolist.Execute(&buf, &res); err != nil {
- return err
- }
-
- w.Write(buf.Bytes())
- return nil
-}
-
-func (s *Server) servePrintErr(w http.ResponseWriter, r *http.Request) error {
- qvals := r.URL.Query()
- fileStr := qvals.Get("f")
- repoStr := qvals.Get("r")
- queryStr := qvals.Get("q")
- numStr := qvals.Get("num")
- num, err := strconv.Atoi(numStr)
- if err != nil || num <= 0 {
- num = defaultNumResults
- }
-
- re, err := syntax.Parse("^"+regexp.QuoteMeta(fileStr)+"$", 0)
- if err != nil {
- return err
- }
- qs := []query.Q{
- &query.Regexp{Regexp: re, FileName: true, CaseSensitive: true},
- &query.Repo{Pattern: repoStr},
- }
-
- if branchStr := qvals.Get("b"); branchStr != "" {
- qs = append(qs, &query.Branch{Pattern: branchStr})
- }
-
- q := &query.And{Children: qs}
-
- sOpts := zoekt.SearchOptions{
- Whole: true,
- }
-
- ctx := r.Context()
- result, err := s.Searcher.Search(ctx, q, &sOpts)
- if err != nil {
- return err
- }
-
- if len(result.Files) != 1 {
- var ss []string
- for _, n := range result.Files {
- ss = append(ss, n.FileName)
- }
- return fmt.Errorf("ambiguous result: %v", ss)
- }
-
- f := result.Files[0]
-
- byteLines := bytes.Split(f.Content, []byte{'\n'})
- strLines := make([]string, 0, len(byteLines))
- for _, l := range byteLines {
- strLines = append(strLines, string(l))
- }
-
- d := PrintInput{
- Name: f.FileName,
- Repo: f.Repository,
- Lines: strLines,
- Last: LastInput{
- Query: queryStr,
- Num: num,
- AutoFocus: false,
- },
- }
-
- var buf bytes.Buffer
- if err := s.print.Execute(&buf, &d); err != nil {
- return err
- }
-
- w.Write(buf.Bytes())
- return nil
-}
diff --git a/web/snippets.go b/web/snippets.go
deleted file mode 100644
index 1b6112e..0000000
--- a/web/snippets.go
+++ /dev/null
@@ -1,149 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package web
-
-import (
- "bytes"
- "html/template"
- "log"
- "net/url"
- "strconv"
- "strings"
-
- "github.com/google/zoekt"
-)
-
-func (s *Server) formatResults(result *zoekt.SearchResult, query string, localPrint bool) ([]*FileMatch, error) {
- var fmatches []*FileMatch
-
- templateMap := map[string]*template.Template{}
- fragmentMap := map[string]*template.Template{}
- if !localPrint {
- for repo, str := range result.RepoURLs {
- if str != "" {
- templateMap[repo] = s.getTemplate(str)
- }
- }
- for repo, str := range result.LineFragments {
- if str != "" {
- fragmentMap[repo] = s.getTemplate(str)
- }
- }
- }
- getFragment := func(repo string, linenum int) string {
- tpl := fragmentMap[repo]
-
- if tpl == nil || localPrint {
- return "#l" + strconv.Itoa(linenum)
- }
-
- var buf bytes.Buffer
- if err := tpl.Execute(&buf, map[string]string{
- "LineNumber": strconv.Itoa(linenum),
- }); err != nil {
- log.Printf("fragment template: %v", err)
- return ""
- }
- return buf.String()
- }
- getURL := func(repo, filename string, branches []string, version string) string {
- tpl := templateMap[repo]
- if localPrint || tpl == nil {
- v := make(url.Values)
- v.Add("r", repo)
- v.Add("f", filename)
- v.Add("q", query)
- if len(branches) > 0 {
- v.Add("b", branches[0])
- }
- return "print?" + v.Encode()
- }
-
- var buf bytes.Buffer
- b := ""
- if len(branches) > 0 {
- b = branches[0]
- }
- err := tpl.Execute(&buf, map[string]string{
- "Branch": b,
- "Version": version,
- "Path": filename,
- })
- if err != nil {
- log.Printf("url template: %v", err)
- return ""
- }
- return buf.String()
- }
-
- // hash => result-id
- seenFiles := map[string]string{}
- for _, f := range result.Files {
- fMatch := FileMatch{
- FileName: f.FileName,
- Repo: f.Repository,
- ResultID: f.Repository + ":" + f.FileName,
- Branches: f.Branches,
- Language: f.Language,
- }
-
- if dup, ok := seenFiles[string(f.Checksum)]; ok {
- fMatch.DuplicateID = dup
- } else {
- seenFiles[string(f.Checksum)] = fMatch.ResultID
- }
-
- if f.SubRepositoryName != "" {
- fn := strings.TrimPrefix(fMatch.FileName[len(f.SubRepositoryPath):], "/")
- fMatch.URL = getURL(f.SubRepositoryName, fn, f.Branches, f.Version)
- } else {
- fMatch.URL = getURL(f.Repository, f.FileName, f.Branches, f.Version)
- }
-
- for _, m := range f.LineMatches {
- fragment := getFragment(f.Repository, m.LineNumber)
- if !strings.HasPrefix(fragment, "#") && !strings.HasPrefix(fragment, ";") {
- // TODO - remove this is backward compatibility glue.
- fragment = "#" + fragment
- }
- md := Match{
- FileName: f.FileName,
- LineNum: m.LineNumber,
- URL: fMatch.URL + fragment,
- }
-
- lastEnd := 0
- line := m.Line
- for i, f := range m.LineFragments {
- l := f.LineOffset
- e := l + f.MatchLength
-
- frag := Fragment{
- Pre: string(line[lastEnd:l]),
- Match: string(line[l:e]),
- }
- if i == len(m.LineFragments)-1 {
- frag.Post = string(m.Line[e:])
- }
-
- md.Fragments = append(md.Fragments, frag)
- lastEnd = e
- }
- fMatch.Matches = append(fMatch.Matches, md)
- }
- fmatches = append(fmatches, &fMatch)
- }
- return fmatches, nil
-}
diff --git a/web/templates.go b/web/templates.go
deleted file mode 100644
index eea9bb7..0000000
--- a/web/templates.go
+++ /dev/null
@@ -1,409 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package web
-
-import (
- "html/template"
- "log"
-)
-
-// Top provides the standard templates in parsed form
-var Top = template.New("top").Funcs(Funcmap)
-
-// TemplateText contains the text of the standard templates.
-var TemplateText = map[string]string{
-
- "didyoumean": `
-<html>
-<head>
- <title>Error</title>
-</head>
-<body>
- <p>{{.Message}}. Did you mean <a href="/search?q={{.Suggestion}}">{{.Suggestion}}</a> ?
-</body>
-</html>
-`,
-
- "head": `
-<head>
-<meta charset="utf-8">
-<meta http-equiv="X-UA-Compatible" content="IE=edge">
-<meta name="viewport" content="width=device-width, initial-scale=1">
-<!-- Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) -->
-<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
-<style>
- #navsearchbox { width: 350px !important; }
- #maxhits { width: 100px !important; }
- .label-dup {
- border-width: 1px !important;
- border-style: solid !important;
- border-color: #aaa !important;
- color: black;
- }
- .noselect {
- user-select: none;
- }
- a.label-dup:hover {
- color: black;
- background: #ddd;
- }
- .result {
- display: block;
- content: " ";
- visibility: hidden;
- }
- .container-results {
- overflow: auto;
- max-height: calc(100% - 72px);
- }
- .inline-pre {
- border: unset;
- background-color: unset;
- margin: unset;
- padding: unset;
- overflow: unset;
- }
- :target { background-color: #ccf; }
- table tbody tr td { border: none !important; padding: 2px !important; }
-</style>
-</head>
- `,
-
- "jsdep": `
-<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js"></script>
-<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
-`,
-
- // the template for the search box.
- "searchbox": `
-<form action="search">
- <div class="form-group form-group-lg">
- <div class="input-group input-group-lg">
- <input class="form-control" placeholder="Search for some code..." autofocus
- {{if .Query}}
- value={{.Query}}
- {{end}}
- id="searchbox" type="text" name="q">
- <div class="input-group-btn">
- <button class="btn btn-primary">Search</button>
- </div>
- </div>
- </div>
-</form>
-`,
-
- "navbar": `
-<nav class="navbar navbar-default">
- <div class="container-fluid">
- <div class="navbar-header">
- <a class="navbar-brand" href="/">Zoekt</a>
- <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar-collapse" aria-expanded="false">
- <span class="sr-only">Toggle navigation</span>
- <span class="icon-bar"></span>
- <span class="icon-bar"></span>
- <span class="icon-bar"></span>
- </button>
- </div>
- <div class="navbar-collapse collapse" id="navbar-collapse" aria-expanded="false" style="height: 1px;">
- <form class="navbar-form navbar-left" action="search">
- <div class="form-group">
- <input class="form-control"
- placeholder="Search for some code..." role="search"
- id="navsearchbox" type="text" name="q" autofocus
- {{if .Query}}
- value={{.Query}}
- {{end}}>
- <div class="input-group">
- <div class="input-group-addon">Max Results</div>
- <input class="form-control" type="number" id="maxhits" name="num" value="{{.Num}}">
- </div>
- <button class="btn btn-primary">Search</button>
- </div>
- </form>
- </div>
- </div>
-</nav>
-<script>
-document.onkeydown=function(e){
- var e = e || window.event;
- if (e.key == "/") {
- var navbox = document.getElementById("navsearchbox");
- if (document.activeElement !== navbox) {
- navbox.focus();
- return false;
- }
- }
-};
-</script>
-`,
- // search box for the entry page.
- "search": `
-<html>
-{{template "head"}}
-<title>Zoekt, en gij zult spinazie eten</title>
-<body>
- <div class="jumbotron">
- <div class="container">
- {{template "searchbox" .Last}}
- </div>
- </div>
-
- <div class="container">
- <div class="row">
- <div class="col-md-8">
- <h3>Search examples:</h3>
- <dl class="dl-horizontal">
- <dt><a href="search?q=needle">needle</a></dt><dd>search for "needle"</dd>
- <dt><a href="search?q=thread+or+needle">thread or needle</a></dt><dd>search for either "thread" or "needle"</dd>
- <dt><a href="search?q=class+needle">class needle</a></span></dt><dd>search for files containing both "class" and "needle"</dd>
- <dt><a href="search?q=class+Needle">class Needle</a></dt><dd>search for files containing both "class" (case insensitive) and "Needle" (case sensitive)</dd>
- <dt><a href="search?q=class+Needle+case:yes">class Needle case:yes</a></dt><dd>search for files containing "class" and "Needle", both case sensitively</dd>
- <dt><a href="search?q=%22class Needle%22">"class Needle"</a></dt><dd>search for files with the phrase "class Needle"</dd>
- <dt><a href="search?q=needle+-hay">needle -hay</a></dt><dd>search for files with the word "needle" but not the word "hay"</dd>
- <dt><a href="search?q=path+file:java">path file:java</a></dt><dd>search for the word "path" in files whose name contains "java"</dd>
- <dt><a href="search?q=needle+lang%3Apython&num=50">needle lang:python</a></dt><dd>search for "needle" in Python source code</dd>
- <dt><a href="search?q=f:%5C.c%24">f:\.c$</a></dt><dd>search for files whose name ends with ".c"</dd>
- <dt><a href="search?q=path+-file:java">path -file:java</a></dt><dd>search for the word "path" excluding files whose name contains "java"</dd>
- <dt><a href="search?q=foo.*bar">foo.*bar</a></dt><dd>search for the regular expression "foo.*bar"</dd>
- <dt><a href="search?q=-%28Path File%29 Stream">-(Path File) Stream</a></dt><dd>search "Stream", but exclude files containing both "Path" and "File"</dd>
- <dt><a href="search?q=-Path%5c+file+Stream">-Path\ file Stream</a></dt><dd>search "Stream", but exclude files containing "Path File"</dd>
- <dt><a href="search?q=sym:data">sym:data</a></span></dt><dd>search for symbol definitions containing "data"</dd>
- <dt><a href="search?q=phone+r:droid">phone r:droid</a></dt><dd>search for "phone" in repositories whose name contains "droid"</dd>
- <dt><a href="search?q=phone+b:master">phone b:master</a></dt><dd>for Git repos, find "phone" in files in branches whose name contains "master".</dd>
- <dt><a href="search?q=phone+b:HEAD">phone b:HEAD</a></dt><dd>for Git repos, find "phone" in the default ('HEAD') branch.</dd>
- </dl>
- </div>
- <div class="col-md-4">
- <h3>To list repositories, try:</h3>
- <dl class="dl-horizontal">
- <dt><a href="search?q=r:droid">r:droid</a></dt><dd>list repositories whose name contains "droid".</dd>
- <dt><a href="search?q=r:go+-r:google">r:go -r:google</a></dt><dd>list repositories whose name contains "go" but not "google".</dd>
- </dl>
- </div>
- </div>
- </div>
- <nav class="navbar navbar-default navbar-bottom">
- <div class="container">
- {{template "footerBoilerplate"}}
- <p class="navbar-text navbar-right">
- Used {{HumanUnit .Stats.IndexBytes}} mem for
- {{.Stats.Documents}} documents ({{HumanUnit .Stats.ContentBytes}})
- from {{.Stats.Repos}} repositories.
- </p>
- </div>
- </nav>
-</body>
-</html>
-`,
- "footerBoilerplate": `<a class="navbar-text" href="about">About</a>`,
- "results": `
-<html>
-{{template "head"}}
-<title>Results for {{.QueryStr}}</title>
-<script>
- function zoektAddQ(atom) {
- window.location.href = "/search?q=" + escape("{{.QueryStr}}" + " " + atom) +
- "&" + "num=" + {{.Last.Num}};
- }
-</script>
-<body id="results">
- {{template "navbar" .Last}}
- <div class="container-fluid container-results">
- <h5>
- {{if .Stats.Crashes}}<br><b>{{.Stats.Crashes}} shards crashed</b><br>{{end}}
- {{ $fileCount := len .FileMatches }}
- Found {{.Stats.MatchCount}} results in {{.Stats.FileCount}} files{{if or (lt $fileCount .Stats.FileCount) (or (gt .Stats.ShardsSkipped 0) (gt .Stats.FilesSkipped 0)) }},
- showing top {{ $fileCount }} files (<a rel="nofollow"
- href="search?q={{.Last.Query}}&num={{More .Last.Num}}">show more</a>).
- {{else}}.{{end}}
- </h5>
- {{range .FileMatches}}
- <table class="table table-hover table-condensed">
- <thead>
- <tr>
- <th>
- {{if .URL}}<a name="{{.ResultID}}" class="result"></a><a href="{{.URL}}" >{{else}}<a name="{{.ResultID}}">{{end}}
- <small>
- {{.Repo}}:{{.FileName}}</a>:
- <span style="font-weight: normal">[ {{if .Branches}}{{range .Branches}}<span class="label label-default">{{.}}</span>,{{end}}{{end}} ]</span>
- {{if .Language}}<button
- title="restrict search to files written in {{.Language}}"
- onclick="zoektAddQ('lang:{{.Language}}')" class="label label-primary">language {{.Language}}</button></span>{{end}}
- {{if .DuplicateID}}<a class="label label-dup" href="#{{.DuplicateID}}">Duplicate result</a>{{end}}
- </small>
- </th>
- </tr>
- </thead>
- {{if not .DuplicateID}}
- <tbody>
- {{range .Matches}}
- <tr>
- <td style="background-color: rgba(238, 238, 255, 0.6);">
- <pre class="inline-pre"><span class="noselect">{{if .URL}}<a href="{{.URL}}">{{end}}<u>{{.LineNum}}</u>{{if .URL}}</a>{{end}}: </span>{{range .Fragments}}{{LimitPre 100 .Pre}}<b>{{.Match}}</b>{{LimitPost 100 .Post}}{{end}}</pre>
- </td>
- </tr>
- {{end}}
- </tbody>
- {{end}}
- </table>
- {{end}}
-
- <nav class="navbar navbar-default navbar-bottom">
- <div class="container">
- {{template "footerBoilerplate"}}
- <p class="navbar-text navbar-right">
- Took {{.Stats.Duration}}{{if .Stats.Wait}}(queued: {{.Stats.Wait}}){{end}} for
- {{HumanUnit .Stats.IndexBytesLoaded}}B index data,
- {{.Stats.NgramMatches}} ngram matches,
- {{.Stats.FilesConsidered}} docs considered,
- {{.Stats.FilesLoaded}} docs ({{HumanUnit .Stats.ContentBytesLoaded}}B)
- loaded{{if or .Stats.FilesSkipped .Stats.ShardsSkipped}},
- {{.Stats.FilesSkipped}} docs and {{.Stats.ShardsSkipped}} shards skipped{{else}}.{{end}}
- </p>
- </div>
- </nav>
- </div>
- {{ template "jsdep"}}
-</body>
-</html>
-`,
-
- "repolist": `
-<html>
-{{template "head"}}
-<body id="results">
- <div class="container">
- {{template "navbar" .Last}}
- <div><b>
- Found {{.Stats.Repos}} repositories ({{.Stats.Documents}} files, {{HumanUnit .Stats.ContentBytes}}b content)
- </b></div>
- <table class="table table-hover table-condensed">
- <thead>
- <tr>
- <th>Name <a href="/search?q={{.Last.Query}}&order=name">▼</a><a href="/search?q={{.Last.Query}}&order=revname">▲</a></th>
- <th>Last updated <a href="/search?q={{.Last.Query}}&order=revtime">▼</a><a href="/search?q={{.Last.Query}}&order=time">▲</a></th>
- <th>Branches</th>
- <th>Size <a href="/search?q={{.Last.Query}}&order=revsize">▼</a><a href="/search?q={{.Last.Query}}&order=size">▲</a></th>
- </tr>
- </thead>
- <tbody>
- {{range .Repos}}
- <tr>
- <td>{{if .URL}}<a href="{{.URL}}">{{end}}{{.Name}}{{if .URL}}</a>{{end}}</td>
- <td><small>{{.IndexTime.Format "Jan 02, 2006 15:04"}}</small></td>
- <td style="vertical-align: middle;">
- {{range .Branches}}
- {{if .URL}}<tt><a class="label label-default small" href="{{.URL}}">{{end}}{{.Name}}{{if .URL}}</a> </tt>{{end}}
- {{end}}
- </td>
- <td><small>{{HumanUnit .Files}} files ({{HumanUnit .Size}})</small></td>
- </tr>
- {{end}}
- </tbody>
- </table>
- </div>
-
- <nav class="navbar navbar-default navbar-bottom">
- <div class="container">
- {{template "footerBoilerplate"}}
- <p class="navbar-text navbar-right">
- </p>
- </div>
- </nav>
-
- {{ template "jsdep"}}
-</body>
-</html>
-`,
-
- "print": `
-<html>
- {{template "head"}}
- <title>{{.Repo}}:{{.Name}}</title>
-<body id="results">
- {{template "navbar" .Last}}
- <div class="container-fluid container-results" >
- <div><b>{{.Name}}</b></div>
- <div class="table table-hover table-condensed" style="overflow:auto; background: #eef;">
- {{ range $index, $ln := .Lines}}
- <pre id="l{{Inc $index}}" class="inline-pre"><span class="noselect"><a href="#l{{Inc $index}}">{{Inc $index}}</a>: </span>{{$ln}}</pre>
- {{end}}
- </div>
- <nav class="navbar navbar-default navbar-bottom">
- <div class="container">
- {{template "footerBoilerplate"}}
- <p class="navbar-text navbar-right">
- </p>
- </div>
- </nav>
- </div>
- {{ template "jsdep"}}
-</body>
-</html>
-`,
-
- "about": `
-
-<html>
- {{template "head"}}
- <title>About <em>zoekt</em></title>
-<body>
-
-
- <div class="jumbotron">
- <div class="container">
- {{template "searchbox" .Last}}
- </div>
- </div>
-
- <div class="container">
- <p>
- This is <a href="http://github.com/google/zoekt"><em>zoekt</em> (IPA: /zukt/)</a>,
- an open-source full text search engine. It's pronounced roughly as you would
- pronounce "zooked" in English.
- </p>
- <p>
- {{if .Version}}<em>Zoekt</em> version {{.Version}}, uptime{{else}}Uptime{{end}} {{.Uptime}}
- </p>
-
- <p>
- Used {{HumanUnit .Stats.IndexBytes}} memory for
- {{.Stats.Documents}} documents ({{HumanUnit .Stats.ContentBytes}})
- from {{.Stats.Repos}} repositories.
- </p>
- </div>
-
- <nav class="navbar navbar-default navbar-bottom">
- <div class="container">
- {{template "footerBoilerplate"}}
- <p class="navbar-text navbar-right">
- </p>
- </div>
- </nav>
-`,
- "robots": `
-user-agent: *
-disallow: /search
-`,
-}
-
-func init() {
- for k, v := range TemplateText {
- _, err := Top.New(k).Parse(v)
- if err != nil {
- log.Panicf("parse(%s): %v:", k, err)
- }
- }
-}
diff --git a/write.go b/write.go
deleted file mode 100644
index 7a89167..0000000
--- a/write.go
+++ /dev/null
@@ -1,178 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zoekt
-
-import (
- "bufio"
- "bytes"
- "encoding/binary"
- "encoding/json"
- "io"
- "sort"
- "time"
-)
-
-func (w *writer) writeTOC(toc *indexTOC) {
- // Tagged sections are indicated with a 0 section count.
- // Tagged sections allow easier forwards and backwards
- // compatibility when evolving zoekt index files with new
- // sections.
- //
- // A tagged section is:
- // Varint TagLen, Tag String, Varint SecType, Section
- //
- // Section type is indicated because simpleSections and
- // compoundSections have different lengths.
- w.U32(0)
- secs := toc.sectionsTaggedList()
- for _, s := range secs {
- w.String(s.tag)
- w.Varint(uint32(s.sec.kind()))
- s.sec.write(w)
- }
-}
-
-func (s *compoundSection) writeStrings(w *writer, strs []*searchableString) {
- s.start(w)
- for _, f := range strs {
- s.addItem(w, f.data)
- }
- s.end(w)
-}
-
-func writePostings(w *writer, s *postingsBuilder, ngramText *simpleSection,
- charOffsets *simpleSection, postings *compoundSection, endRunes *simpleSection) {
- keys := make(ngramSlice, 0, len(s.postings))
- for k := range s.postings {
- keys = append(keys, k)
- }
- sort.Sort(keys)
-
- ngramText.start(w)
- for _, k := range keys {
- var buf [8]byte
- binary.BigEndian.PutUint64(buf[:], uint64(k))
- w.Write(buf[:])
- }
- ngramText.end(w)
-
- postings.start(w)
- for _, k := range keys {
- postings.addItem(w, s.postings[k])
- }
- postings.end(w)
-
- charOffsets.start(w)
- w.Write(toSizedDeltas(s.runeOffsets))
- charOffsets.end(w)
-
- endRunes.start(w)
- w.Write(toSizedDeltas(s.endRunes))
- endRunes.end(w)
-}
-
-func (b *IndexBuilder) Write(out io.Writer) error {
- buffered := bufio.NewWriterSize(out, 1<<20)
- defer buffered.Flush()
-
- w := &writer{w: buffered}
- toc := indexTOC{}
-
- toc.fileContents.writeStrings(w, b.contentStrings)
- toc.newlines.start(w)
- for _, f := range b.contentStrings {
- toc.newlines.addItem(w, toSizedDeltas(newLinesIndices(f.data)))
- }
- toc.newlines.end(w)
-
- toc.branchMasks.start(w)
- for _, m := range b.branchMasks {
- w.U64(m)
- }
- toc.branchMasks.end(w)
-
- toc.fileSections.start(w)
- for _, s := range b.docSections {
- toc.fileSections.addItem(w, marshalDocSections(s))
- }
- toc.fileSections.end(w)
-
- writePostings(w, b.contentPostings, &toc.ngramText, &toc.runeOffsets, &toc.postings, &toc.fileEndRunes)
-
- // names.
- toc.fileNames.writeStrings(w, b.nameStrings)
-
- writePostings(w, b.namePostings, &toc.nameNgramText, &toc.nameRuneOffsets, &toc.namePostings, &toc.nameEndRunes)
-
- toc.subRepos.start(w)
- w.Write(toSizedDeltas(b.subRepos))
- toc.subRepos.end(w)
-
- toc.contentChecksums.start(w)
- w.Write(b.checksums)
- toc.contentChecksums.end(w)
-
- toc.languages.start(w)
- w.Write(b.languages)
- toc.languages.end(w)
-
- toc.runeDocSections.start(w)
- w.Write(marshalDocSections(b.runeDocSections))
- toc.runeDocSections.end(w)
-
- if err := b.writeJSON(&IndexMetadata{
- IndexFormatVersion: IndexFormatVersion,
- IndexTime: time.Now(),
- IndexFeatureVersion: FeatureVersion,
- IndexMinReaderVersion: WriteMinFeatureVersion,
- PlainASCII: b.contentPostings.isPlainASCII && b.namePostings.isPlainASCII,
- LanguageMap: b.languageMap,
- ZoektVersion: Version,
- }, &toc.metaData, w); err != nil {
- return err
- }
- if err := b.writeJSON(b.repo, &toc.repoMetaData, w); err != nil {
- return err
- }
-
- var tocSection simpleSection
-
- tocSection.start(w)
- w.writeTOC(&toc)
- tocSection.end(w)
- tocSection.write(w)
- return w.err
-}
-
-func (b *IndexBuilder) writeJSON(data interface{}, sec *simpleSection, w *writer) error {
- blob, err := json.Marshal(data)
- if err != nil {
- return err
- }
- sec.start(w)
- w.Write(blob)
- sec.end(w)
- return nil
-}
-
-func newLinesIndices(in []byte) []uint32 {
- out := make([]uint32, 0, bytes.Count(in, []byte{'\n'}))
- for i, c := range in {
- if c == '\n' {
- out = append(out, uint32(i))
- }
- }
- return out
-}