| // Copyright 2016 Google Inc. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package zoekt |
| |
| import ( |
| "bufio" |
| "bytes" |
| "encoding/binary" |
| "encoding/json" |
| "io" |
| "sort" |
| "time" |
| ) |
| |
| func (w *writer) writeTOC(toc *indexTOC) { |
| secs := toc.sections() |
| w.U32(uint32(len(secs))) |
| for _, s := range secs { |
| s.write(w) |
| } |
| } |
| |
| func (s *compoundSection) writeStrings(w *writer, strs []*searchableString) { |
| s.start(w) |
| for _, f := range strs { |
| s.addItem(w, f.data) |
| } |
| s.end(w) |
| } |
| |
| func writePostings(w *writer, s *postingsBuilder, ngramText *simpleSection, |
| charOffsets *simpleSection, postings *compoundSection, endRunes *simpleSection) { |
| keys := make(ngramSlice, 0, len(s.postings)) |
| for k := range s.postings { |
| keys = append(keys, k) |
| } |
| sort.Sort(keys) |
| |
| ngramText.start(w) |
| for _, k := range keys { |
| var buf [8]byte |
| binary.BigEndian.PutUint64(buf[:], uint64(k)) |
| w.Write(buf[:]) |
| } |
| ngramText.end(w) |
| |
| postings.start(w) |
| for _, k := range keys { |
| postings.addItem(w, s.postings[k]) |
| } |
| postings.end(w) |
| |
| charOffsets.start(w) |
| w.Write(toSizedDeltas(s.runeOffsets)) |
| charOffsets.end(w) |
| |
| endRunes.start(w) |
| w.Write(toSizedDeltas(s.endRunes)) |
| endRunes.end(w) |
| } |
| |
| func (b *IndexBuilder) Write(out io.Writer) error { |
| buffered := bufio.NewWriterSize(out, 1<<20) |
| defer buffered.Flush() |
| |
| w := &writer{w: buffered} |
| toc := indexTOC{} |
| |
| toc.fileContents.writeStrings(w, b.contentStrings) |
| toc.newlines.start(w) |
| for _, f := range b.contentStrings { |
| toc.newlines.addItem(w, toSizedDeltas(newLinesIndices(f.data))) |
| } |
| toc.newlines.end(w) |
| |
| toc.branchMasks.start(w) |
| for _, m := range b.branchMasks { |
| w.U64(m) |
| } |
| toc.branchMasks.end(w) |
| |
| toc.fileSections.start(w) |
| for _, s := range b.docSections { |
| toc.fileSections.addItem(w, marshalDocSections(s)) |
| } |
| toc.fileSections.end(w) |
| |
| writePostings(w, b.contentPostings, &toc.ngramText, &toc.runeOffsets, &toc.postings, &toc.fileEndRunes) |
| |
| // names. |
| toc.fileNames.writeStrings(w, b.nameStrings) |
| |
| writePostings(w, b.namePostings, &toc.nameNgramText, &toc.nameRuneOffsets, &toc.namePostings, &toc.nameEndRunes) |
| |
| toc.subRepos.start(w) |
| w.Write(toSizedDeltas(b.subRepos)) |
| toc.subRepos.end(w) |
| |
| toc.contentChecksums.start(w) |
| w.Write(b.checksums) |
| toc.contentChecksums.end(w) |
| |
| toc.languages.start(w) |
| w.Write(b.languages) |
| toc.languages.end(w) |
| |
| toc.runeDocSections.start(w) |
| w.Write(marshalDocSections(b.runeDocSections)) |
| toc.runeDocSections.end(w) |
| |
| if err := b.writeJSON(&IndexMetadata{ |
| IndexFormatVersion: IndexFormatVersion, |
| IndexTime: time.Now(), |
| IndexFeatureVersion: FeatureVersion, |
| PlainASCII: b.contentPostings.isPlainASCII && b.namePostings.isPlainASCII, |
| LanguageMap: b.languageMap, |
| ZoektVersion: Version, |
| }, &toc.metaData, w); err != nil { |
| return err |
| } |
| if err := b.writeJSON(b.repo, &toc.repoMetaData, w); err != nil { |
| return err |
| } |
| |
| var tocSection simpleSection |
| |
| tocSection.start(w) |
| w.writeTOC(&toc) |
| tocSection.end(w) |
| tocSection.write(w) |
| return w.err |
| } |
| |
| func (b *IndexBuilder) writeJSON(data interface{}, sec *simpleSection, w *writer) error { |
| blob, err := json.Marshal(data) |
| if err != nil { |
| return err |
| } |
| sec.start(w) |
| w.Write(blob) |
| sec.end(w) |
| return nil |
| } |
| |
| func newLinesIndices(in []byte) []uint32 { |
| out := make([]uint32, 0, bytes.Count(in, []byte{'\n'})) |
| for i, c := range in { |
| if c == '\n' { |
| out = append(out, uint32(i)) |
| } |
| } |
| return out |
| } |