| // Copyright 2020 Google Inc. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package zoekt |
| |
| import ( |
| "reflect" |
| "regexp/syntax" |
| "strings" |
| "testing" |
| |
| "github.com/google/zoekt/query" |
| ) |
| |
| var opnames = map[syntax.Op]string{ |
| syntax.OpNoMatch: "OpNoMatch", |
| syntax.OpEmptyMatch: "OpEmptyMatch", |
| syntax.OpLiteral: "OpLiteral", |
| syntax.OpCharClass: "OpCharClass", |
| syntax.OpAnyCharNotNL: "OpAnyCharNotNL", |
| syntax.OpAnyChar: "OpAnyChar", |
| syntax.OpBeginLine: "OpBeginLine", |
| syntax.OpEndLine: "OpEndLine", |
| syntax.OpBeginText: "OpBeginText", |
| syntax.OpEndText: "OpEndText", |
| syntax.OpWordBoundary: "OpWordBoundary", |
| syntax.OpNoWordBoundary: "OpNoWordBoundary", |
| syntax.OpCapture: "OpCapture", |
| syntax.OpStar: "OpStar", |
| syntax.OpPlus: "OpPlus", |
| syntax.OpQuest: "OpQuest", |
| syntax.OpRepeat: "OpRepeat", |
| syntax.OpConcat: "OpConcat", |
| syntax.OpAlternate: "OpAlternate", |
| } |
| |
| func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) { |
| t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub)) |
| for _, s := range r.Sub { |
| printRegexp(t, s, lvl+1) |
| } |
| } |
| |
| func substrMT(pattern string) matchTree { |
| d := &indexData{} |
| mt, _ := d.newSubstringMatchTree(&query.Substring{ |
| Pattern: pattern, |
| }) |
| return mt |
| } |
| |
| func TestRegexpParse(t *testing.T) { |
| type testcase struct { |
| in string |
| query matchTree |
| isEquivalent bool |
| } |
| |
| cases := []testcase{ |
| {"(foo|)bar", substrMT("bar"), false}, |
| {"(foo|)", &bruteForceMatchTree{}, false}, |
| {"(foo|bar)baz.*bla", &andMatchTree{[]matchTree{ |
| &orMatchTree{[]matchTree{ |
| substrMT("foo"), |
| substrMT("bar"), |
| }}, |
| substrMT("baz"), |
| substrMT("bla"), |
| }}, false}, |
| { |
| "^[a-z](People)+barrabas$", |
| &andMatchTree{[]matchTree{ |
| substrMT("People"), |
| substrMT("barrabas"), |
| }}, false, |
| }, |
| {"foo", substrMT("foo"), true}, |
| {"^foo", substrMT("foo"), false}, |
| {"(foo) (bar)", &andMatchTree{[]matchTree{substrMT("foo"), substrMT("bar")}}, false}, |
| {"(thread|needle|haystack)", &orMatchTree{[]matchTree{ |
| substrMT("thread"), |
| substrMT("needle"), |
| substrMT("haystack"), |
| }}, true}, |
| {"(foo)(?-s:.)*?(bar)", &andLineMatchTree{andMatchTree{[]matchTree{ |
| substrMT("foo"), |
| substrMT("bar"), |
| }}}, false}, |
| {"(foo)(?-s:.)*?[[:space:]](?-s:.)*?(bar)", &andMatchTree{[]matchTree{ |
| substrMT("foo"), |
| substrMT("bar"), |
| }}, false}, |
| {"(foo){2,}", substrMT("foo"), false}, |
| {"(...)(...)", &bruteForceMatchTree{}, false}, |
| } |
| |
| for _, c := range cases { |
| r, err := syntax.Parse(c.in, syntax.Perl) |
| if err != nil { |
| t.Errorf("Parse(%q): %v", c.in, err) |
| continue |
| } |
| d := indexData{} |
| q := query.Regexp{ |
| Regexp: r, |
| } |
| gotQuery, isEq, _, _ := d.regexpToMatchTreeRecursive(q.Regexp, 3, q.FileName, q.CaseSensitive) |
| if !reflect.DeepEqual(c.query, gotQuery) { |
| printRegexp(t, r, 0) |
| t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, gotQuery, c.query) |
| } |
| if isEq != c.isEquivalent { |
| printRegexp(t, r, 0) |
| t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, isEq, c.isEquivalent) |
| } |
| } |
| } |