blob: ce16a606bec3cfa0c85dacc23489f2ae808ac408 [file] [log] [blame]
// Copyright 2020 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zoekt
import (
"reflect"
"regexp/syntax"
"strings"
"testing"
"github.com/google/zoekt/query"
)
var opnames = map[syntax.Op]string{
syntax.OpNoMatch: "OpNoMatch",
syntax.OpEmptyMatch: "OpEmptyMatch",
syntax.OpLiteral: "OpLiteral",
syntax.OpCharClass: "OpCharClass",
syntax.OpAnyCharNotNL: "OpAnyCharNotNL",
syntax.OpAnyChar: "OpAnyChar",
syntax.OpBeginLine: "OpBeginLine",
syntax.OpEndLine: "OpEndLine",
syntax.OpBeginText: "OpBeginText",
syntax.OpEndText: "OpEndText",
syntax.OpWordBoundary: "OpWordBoundary",
syntax.OpNoWordBoundary: "OpNoWordBoundary",
syntax.OpCapture: "OpCapture",
syntax.OpStar: "OpStar",
syntax.OpPlus: "OpPlus",
syntax.OpQuest: "OpQuest",
syntax.OpRepeat: "OpRepeat",
syntax.OpConcat: "OpConcat",
syntax.OpAlternate: "OpAlternate",
}
func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) {
t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub))
for _, s := range r.Sub {
printRegexp(t, s, lvl+1)
}
}
func substrMT(pattern string) matchTree {
d := &indexData{}
mt, _ := d.newSubstringMatchTree(&query.Substring{
Pattern: pattern,
})
return mt
}
func TestRegexpParse(t *testing.T) {
type testcase struct {
in string
query matchTree
isEquivalent bool
}
cases := []testcase{
{"(foo|)bar", substrMT("bar"), false},
{"(foo|)", &bruteForceMatchTree{}, false},
{"(foo|bar)baz.*bla", &andMatchTree{[]matchTree{
&orMatchTree{[]matchTree{
substrMT("foo"),
substrMT("bar"),
}},
substrMT("baz"),
substrMT("bla"),
}}, false},
{
"^[a-z](People)+barrabas$",
&andMatchTree{[]matchTree{
substrMT("People"),
substrMT("barrabas"),
}}, false,
},
{"foo", substrMT("foo"), true},
{"^foo", substrMT("foo"), false},
{"(foo) (bar)", &andMatchTree{[]matchTree{substrMT("foo"), substrMT("bar")}}, false},
{"(thread|needle|haystack)", &orMatchTree{[]matchTree{
substrMT("thread"),
substrMT("needle"),
substrMT("haystack"),
}}, true},
{"(foo)(?-s:.)*?(bar)", &andLineMatchTree{andMatchTree{[]matchTree{
substrMT("foo"),
substrMT("bar"),
}}}, false},
{"(foo)(?-s:.)*?[[:space:]](?-s:.)*?(bar)", &andMatchTree{[]matchTree{
substrMT("foo"),
substrMT("bar"),
}}, false},
{"(foo){2,}", substrMT("foo"), false},
{"(...)(...)", &bruteForceMatchTree{}, false},
}
for _, c := range cases {
r, err := syntax.Parse(c.in, syntax.Perl)
if err != nil {
t.Errorf("Parse(%q): %v", c.in, err)
continue
}
d := indexData{}
q := query.Regexp{
Regexp: r,
}
gotQuery, isEq, _, _ := d.regexpToMatchTreeRecursive(q.Regexp, 3, q.FileName, q.CaseSensitive)
if !reflect.DeepEqual(c.query, gotQuery) {
printRegexp(t, r, 0)
t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, gotQuery, c.query)
}
if isEq != c.isEquivalent {
printRegexp(t, r, 0)
t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, isEq, c.isEquivalent)
}
}
}