diff --git a/fixtures/vql_queries.golden b/fixtures/vql_queries.golden index 325034d..c315917 100644 --- a/fixtures/vql_queries.golden +++ b/fixtures/vql_queries.golden @@ -323,7 +323,7 @@ "bar": 0 } ], - "035 Limit and order: SELECT * FROM test() ORDER BY foo DESC LIMIT 1 ": [ + "035 Limit and order: SELECT * FROM test() ORDER BY foo DESC LIMIT 1 ": [ { "foo": 4, "bar": 2 diff --git a/protocols/protocol_membership.go b/protocols/protocol_membership.go index 66ae910..c312da8 100644 --- a/protocols/protocol_membership.go +++ b/protocols/protocol_membership.go @@ -4,6 +4,7 @@ import ( "reflect" "strings" + "github.com/Velocidex/ordereddict" "www.velocidex.com/golang/vfilter/types" ) @@ -30,6 +31,13 @@ func (self MembershipDispatcher) Membership(scope types.Scope, a types.Any, b ty case types.Null, *types.Null, nil: return false + case *ordereddict.Dict: + a_str, ok := a.(string) + if ok { + _, pres := t.Get(a_str) + return pres + } + case string: // 'he' in 'hello' a_str, ok := a.(string) diff --git a/reformat/fixtures/formatting.golden b/reformat/fixtures/formatting.golden index 939d1d4..c3aa12f 100644 --- a/reformat/fixtures/formatting.golden +++ b/reformat/fixtures/formatting.golden @@ -8,6 +8,7 @@ FROM ALLUploads 1 MultiLine Comment: + /* This is a multiline comment */ @@ -33,10 +34,8 @@ LET LocalDeviceMajor <= (253, 7, -- loop 4 Comment above plugin arg: SELECT * -FROM info( - -- This is a comment for Foo - Foo=1, - Bar=2) +FROM info(-- This is a comment for Foo +Foo=1, Bar=2) @@ -85,18 +84,18 @@ FROM Artifact.Windows.EventLogs.EvtxHunter( 11 If Plugin: -LET DateAfterTime <= if( - condition=TargetTime, - then=timestamp(epoch=TargetTime.Unix - TargetTimeBox), - else=timestamp(epoch=now() - TargetTimeBox)) +LET DateAfterTime <= if(condition=TargetTime, + then=timestamp(epoch=TargetTime.Unix - TargetTimeBox), + else=timestamp(epoch=now() - TargetTimeBox)) 12 If Plugin with longer args: -LET DateAfterTime <= if( - condition=TargetTime, - then=timestamp(epoch=TargetTime.Unix - TargetTimeBox + SomeConstant), - else=timestamp(epoch=now() - TargetTimeBox)) +LET DateAfterTime <= if(condition=TargetTime, + then=timestamp(epoch=TargetTime.Unix - + TargetTimeBox + + SomeConstant), + else=timestamp(epoch=now() - TargetTimeBox)) @@ -155,32 +154,22 @@ LET Foo(x, y, z) = SELECT * 20 Nesting: SELECT * -FROM foreach( - row={ +FROM foreach(row={ SELECT * - FROM foreach( - row={ - SELECT * - FROM info - }, - query={ - SELECT * - FROM info() - }) + FROM foreach(row={ SELECT * FROM info }, query={ SELECT * FROM info() }) }, - query=ForeachQuery(X=1)) + query=ForeachQuery(X=1)) 21 Foreach: SELECT * -FROM foreach( - row={ +FROM foreach(row={ SELECT * FROM clients() - LIMIT 5 + LIMIT 5 }, - query={ + query={ SELECT client_info(client_id=client_id) FROM glob(globs=AGlob) }) @@ -189,9 +178,9 @@ FROM foreach( 22 Subquery: SELECT { - SELECT * - FROM info() - } AS Foo, + SELECT * + FROM info() + } AS Foo, Bar FROM scope() @@ -205,7 +194,7 @@ SELECT A AS First, FROM info(arg=1, arg2=3) WHERE 1 ORDER BY C -LIMIT 1 +LIMIT 1 @@ -215,3 +204,28 @@ FROM scope() +25 Very long LET with SELECT pushing far to right: +LET this_is_a_long_name = SELECT + *, + upload_directory( + accessor="collector", + file=RootPathSpec + "hello world " + _Components) AS UploadedFile + FROM ALLUploads + + + +26 Complex long lines. Args should line up on the function they are in.: +LET enumerate_path = SELECT + regex_replace(source=TargetPath, + re='''\%USERPROFILE\%''', + replace=Directory) AS TargetPath, + *, + check_exist(path=regex_replace(source=TargetPath, + re='''\%USERPROFILE\%''', + replace=Directory))[0] AS Exists, + MaxSize - rand(range=(MaxSize - MinSize)) - len( + list=unhex(string=MagicBytes)) - 7 AS _PaddingSize + FROM Honeyfiles + + + diff --git a/reformat/visitor.go b/reformat/visitor.go index 86d2743..e472ef4 100644 --- a/reformat/visitor.go +++ b/reformat/visitor.go @@ -1,6 +1,8 @@ package reformat import ( + "strings" + "www.velocidex.com/golang/vfilter" "www.velocidex.com/golang/vfilter/types" ) @@ -15,5 +17,11 @@ func ReFormatVQL(scope types.Scope, query string, visitor := vfilter.NewVisitor(scope, options) visitor.Visit(vql) - return visitor.ToString(), nil + lines := strings.Split(visitor.ToString(), "\n") + result := make([]string, 0, len(lines)) + for _, l := range lines { + result = append(result, strings.TrimRight(l, " ")) + } + + return strings.Join(result, "\n"), nil } diff --git a/reformat/visitor_test.go b/reformat/visitor_test.go index 2722d79..61e4f46 100644 --- a/reformat/visitor_test.go +++ b/reformat/visitor_test.go @@ -85,6 +85,22 @@ FROM Artifact.Windows.EventLogs.EvtxHunter(EvtxGlob='''%SystemRoot%\System32\Win {"Subquery", "SELECT {SELECT * FROM info()} AS Foo, Bar FROM scope()"}, {"Simple Statement", "SELECT A AS First, B AS Second, C, D FROM info(arg=1, arg2=3) WHERE 1 ORDER BY C LIMIT 1"}, {"Explain statements", "EXPLAIN SELECT 'A' FROM scope()"}, + {"Very long LET with SELECT pushing far to right", ` +LET this_is_a_long_name = SELECT *, upload_directory(accessor="collector", file=RootPathSpec + "hello world " + _Components) AS UploadedFile FROM ALLUploads +`}, + {"Complex long lines. Args should line up on the function they are in.", ` + LET enumerate_path = SELECT + regex_replace(source=TargetPath, + re='''\%USERPROFILE\%''', + replace=Directory) AS TargetPath, + *, + check_exist(path=regex_replace(source=TargetPath, + re='''\%USERPROFILE\%''', + replace=Directory))[0] AS Exists, + MaxSize - rand(range=(MaxSize - MinSize)) - len( + list=unhex(string=MagicBytes)) - 7 AS _PaddingSize + FROM Honeyfiles +`}, } func makeTestScope() types.Scope { @@ -100,7 +116,7 @@ func TestVQLQueries(t *testing.T) { golden := "" for idx, testCase := range reformatTests { - if false && idx != 24 { + if false && idx != 20 { continue } diff --git a/utils/debug.go b/utils/debug.go index ad9f775..05570e5 100644 --- a/utils/debug.go +++ b/utils/debug.go @@ -1,3 +1,15 @@ package utils +import "fmt" + +const ( + debug = false +) + func DlvBreak() {} + +func DebugPrint(format string) { + if debug { + fmt.Println(format) + } +} diff --git a/utils/dict/dict.go b/utils/dict/dict.go index 25f24dc..9f6c03b 100644 --- a/utils/dict/dict.go +++ b/utils/dict/dict.go @@ -89,13 +89,18 @@ func normalize_value(ctx context.Context, return result default: - a_value := reflect.Indirect(reflect.ValueOf(value)) - a_type := a_value.Type() + a_value := reflect.ValueOf(value) + a_value = reflect.Indirect(a_value) + a_type := reflect.TypeOf(value) if a_type == nil { return types.Null{} } if a_type.Kind() == reflect.Slice || a_type.Kind() == reflect.Array { + if types.IsNil(a_value) { + return types.Null{} + } + length := a_value.Len() result := make([]types.Any, 0, length) for i := 0; i < length; i++ { @@ -106,6 +111,10 @@ func normalize_value(ctx context.Context, // Map keys are random so they lead to unstable output } else if a_type.Kind() == reflect.Map { + if types.IsNil(a_value) { + return types.Null{} + } + result := ordereddict.NewDict() for _, key := range a_value.MapKeys() { str_key, ok := key.Interface().(string) diff --git a/vfilter_test.go b/vfilter_test.go index 776a531..f26742e 100644 --- a/vfilter_test.go +++ b/vfilter_test.go @@ -245,6 +245,8 @@ var execTestsSerialization = []execTest{ Set("foo", int64(1)). Set("bar", []Any{int64(2), int64(3)})}, + {"'foo' IN dict(foo=0)", true}, + // Associative // Relies on pre-populating the scope with a Dict. {"foo.bar.baz, foo.bar2", []float64{5, 7}}, diff --git a/visitor.go b/visitor.go index 89f3b78..5881099 100644 --- a/visitor.go +++ b/visitor.go @@ -10,6 +10,7 @@ import ( "www.velocidex.com/golang/vfilter/arg_parser" "www.velocidex.com/golang/vfilter/materializer" "www.velocidex.com/golang/vfilter/types" + "www.velocidex.com/golang/vfilter/utils" ) var ( @@ -41,6 +42,9 @@ type FormatOptions struct { ArgsOnNewLine bool BreakLines bool CollectCallSites bool + + // Set when we do a test reformat to try to lookahead. + test bool } type CallSite struct { @@ -51,11 +55,21 @@ type CallSite struct { type Visitor struct { CallSites []CallSite + + // Tokens added to the visitor as we encounter each token during + // parsing. Combining all the Fragments yields a reformatted + // query. Fragments []string scope types.Scope - indents []int - // Fragment offsets for the line feeds + // A list of indent points at each line break. Acts as a stack + // when an indent pushes a new indent point and an unindent pops + // it. + indents []int + + // Fragment offsets for the line feeds. Each represent the offset + // in the Fragment array where a line feed is inserted. Total + // length of this array represents the total number of lines. line_breaks []int // Current position along the line. @@ -67,6 +81,7 @@ type Visitor struct { max_width int max_line string + // Flag set when a comment is encountered. has_comments bool } @@ -75,19 +90,24 @@ func NewVisitor(scope types.Scope, options FormatOptions) *Visitor { scope: scope, line_breaks: []int{0}, opts: options, + indents: []int{0}, } } // Merge results from the in visitor to this visitor. func (self *Visitor) merge(in *Visitor) { - self.Fragments = in.Fragments + self.Fragments = append([]string{}, in.Fragments...) self.line_breaks = in.line_breaks - self.indents = in.indents + self.indents = append([]int{}, in.indents...) self.pos = in.pos self.max_width = in.max_width + self.max_line = in.max_line + self.has_comments = in.has_comments } func (self *Visitor) copy() *Visitor { + opts_copy := self.opts + return &Visitor{ Fragments: append([]string{}, self.Fragments...), scope: self.scope, @@ -95,7 +115,8 @@ func (self *Visitor) copy() *Visitor { line_breaks: append([]int{}, self.line_breaks...), pos: self.pos, max_width: self.max_width, - opts: self.opts, + max_line: self.max_line, + opts: opts_copy, } } @@ -104,12 +125,27 @@ func (self *Visitor) push_indent() { self.indents = append(self.indents, self.pos) } +// Add a line break and reset pos and indent to start of the line. +func (self *Visitor) new_line(pos int) { + self.indents = append(self.indents, pos) + self.pos = pos + + if self.opts.BreakLines { + self.Fragments = append(self.Fragments, "\n") + self.line_breaks = append(self.line_breaks, len(self.Fragments)) + self.Fragments = append(self.Fragments, strings.Repeat(" ", pos)) + + self.checkInvariant() + } +} + // Indent in from the last indent point. func (self *Visitor) indent_in() { last_indent := 0 if len(self.indents) > 0 { last_indent = self.indents[len(self.indents)-1] } + self.indents = append(self.indents, last_indent+2) } @@ -134,15 +170,17 @@ func (self *Visitor) line_break() { if !self.opts.BreakLines { switch last_fragment { // Do not follow these with a space. - case " ", "(", "{": + case " ", "(", "{", "=", "[", "=[": default: - self.push(" ") + if !strings.HasSuffix(last_fragment, " ") { + self.push(" ") + } } return } // Ensure no trailing spaces - if last_fragment == " " { + if strings.TrimSpace(last_fragment) == "" { self.pop() } @@ -150,10 +188,11 @@ func (self *Visitor) line_break() { if len(self.indents) > 0 { last_indent = self.indents[len(self.indents)-1] } + // Go back to start of the line - self.push("\n") self.pos = 0 - self.line_breaks = append(self.line_breaks, len(self.Fragments)-1) + self.Fragments = append(self.Fragments, "\n") + self.line_breaks = append(self.line_breaks, len(self.Fragments)) self.push(strings.Repeat(" ", last_indent)) } @@ -162,7 +201,7 @@ func (self *Visitor) current_line() string { return "" } - line_fragment_idx := self.line_breaks[len(self.line_breaks)-1] + 1 + line_fragment_idx := self.line_breaks[len(self.line_breaks)-1] if len(self.Fragments) < line_fragment_idx { return "" } @@ -200,8 +239,7 @@ func (self *Visitor) Visit(node interface{}) { case []*_Comment: for _, c := range t { - self.Visit(c) - self.line_break() + self.visitComment(c) } case *VQL: @@ -285,15 +323,30 @@ func (self *Visitor) visitStoredExpression(node *StoredExpression) { self.Visit(node.Expr) } +// Comments must be on their own lines func (self *Visitor) visitComment(node *_Comment) { + // C Style comments start with // and follow the arg. if node.Comment != nil { self.push(*node.Comment) + self.line_break() } + + // VQL Comments start with -- and generally follow the thing they + // are commenting. We must break line after wards to tell the + // parser to switch back to VQL mode. if node.VQLComment != nil { self.push(*node.VQLComment) + self.line_break() } + + // Multi line comments always start at column 0 and follow by a + // line break. if node.MultiLine != nil { + self.new_line(0) + defer self.pop_indent() + self.push(*node.MultiLine) + self.line_break() } } @@ -312,6 +365,11 @@ func (self *Visitor) visitParameterList(node *_ParameterList) { } } +func (self *Visitor) isCurrentLineSpace() bool { + return strings.TrimSpace(self.current_line()) == "" +} + +// Aliased expressions are elements after the SELECT separated by , func (self *Visitor) visitAliasedExpression(node *_AliasedExpression) { node.mu.Lock() defer node.mu.Unlock() @@ -324,43 +382,71 @@ func (self *Visitor) visitAliasedExpression(node *_AliasedExpression) { } if node.Expression != nil { - visitor, longest_line, does_it_fit := doesNodeFitInOneLine(self, node.Expression) - + // We have two choices here - fit the expression on the + // current line, or break lines if node.As != "" { - // Make sure we have enough room for the AS clause - if does_it_fit && longest_line+3+len(node.As) < self.opts.MaxWidthThreshold { - self.merge(visitor) - self.push(" AS ", node.As) - return - } - self.line_break() - - self.Visit(node.Expression) - self.push(" AS ", node.As) - return - } + node_as := node.As + self.abTest(node.Expression, + // Fit expression on one line. + func(self *Visitor, node interface{}) { + self.opts.BreakLines = false + self.Visit(node) + + self.push(" AS ", node_as) + }, + func(self *Visitor, node interface{}) { + // Break lines + if !self.isCurrentLineSpace() { + self.line_break() + } - // No AS Clause - if does_it_fit { - self.merge(visitor) + self.Visit(node) + self.push(" AS ", node_as) + }) return } - self.line_break() - self.Visit(node.Expression) + // No AS clause + self.abTest(node.Expression, + // Fit expression on one line. + func(self *Visitor, node interface{}) { + self.opts.BreakLines = false + self.opts.ArgsOnNewLine = false + self.Visit(node) + }, + func(self *Visitor, node interface{}) { + // Break lines + if !self.isCurrentLineSpace() { + self.line_break() + } + self.Visit(node) + }) return } else if node.SubSelect != nil { self.push("{", " ") - self.indent_in() - self.line_break() - self.Visit(node.SubSelect) + // We prefer the subquery to start at the begining of the line + // with a little indent. + if self.opts.BreakLines { + self.new_line(2) + defer self.pop_indent() + + self.indent_in() + defer self.pop_indent() + + self.push(" ") + self.Visit(node.SubSelect) + + // Align closing } to previous block + self.line_break() + self.push("}") + + } else { + self.Visit(node.SubSelect) + self.push(" }") + } - // Align closing } to previous block - self.pop_indent() - self.line_break() - self.push("}") if node.As != "" { self.push(" AS ", node.As) } @@ -372,6 +458,14 @@ func (self *Visitor) visitSymbolRef(node *_SymbolRef) { defer node.mu.Unlock() self.Visit(node.Comments) + + if self.pos > self.opts.IndentWidthThreshold { + self.indent_in() + defer self.pop_indent() + + self.line_break() + } + self.push(node.Symbol) if !node.Called && node.Parameters == nil { return @@ -395,52 +489,85 @@ func (self *Visitor) visitSymbolRef(node *_SymbolRef) { return } - longest_arg := 0 + // Here have two choices: + // 1. Try to fit the entire arg list on the same line. + // 2. Break each arg on its own line. + self.abTest(node, + // Fit everything on the one line. + func(self *Visitor, node_any interface{}) { + self.opts.BreakLines = false - // See if we can fit the arg list in one line - if !self.pluginUsesLineMode(node.Symbol) { - visitor, longest_arg_, ok := doesArgListFitInOneLine( - self, node.Parameters) - if ok { - self.merge(visitor) - return - } - longest_arg = longest_arg_ - } + node := node_any.(*_SymbolRef) - // Nope we break into lines. - self.push("(") + // Write all the args on the one line + self.push("(") + for idx, arg := range node.Parameters { + self.Visit(arg) + // Do not add , after the last parameter + if idx < len(node.Parameters)-1 { + self.push(",", " ") + } + } + self.push(")") + }, - // The width will be quite wide so we try to fit it a bit better - // on a new line by indenting 2 spots from the start of the block. - if self.opts.ArgsOnNewLine || - self.pluginUsesLineMode(node.Symbol) { - self.indent_in() - self.line_break() + // Break each arg on a separate line + func(self *Visitor, node_any interface{}) { + node := node_any.(*_SymbolRef) - } else if self.pos+longest_arg > self.opts.MaxWidthThreshold { - self.indent_in() - self.line_break() + self.push("(") - } else { - // Otherwise try to line up on the ( because it looks neater. - self.push_indent() - } + // Here we have a couple of choices: + // 1. Write the args starting immediately after the ( - for example + // plugin(Arg1=Foo + // Arg2=Bar) + // + // 2. Add another line break and indent a bit after the name of the plugin : + // plugin( + // Arg1=Foo, + // Arg2=Bar) + + self.abTest(node, + // Option 1: indent on start of ( + func(self *Visitor, node_any interface{}) { + self.push_indent() + defer self.pop_indent() + + for idx, arg := range node.Parameters { + if idx > 0 { + self.line_break() + } + + self.Visit(arg) + if idx < len(node.Parameters)-1 { + self.push(",", " ") + } + } - defer self.pop_indent() + self.push(")") + }, - for idx, arg := range node.Parameters { - if idx > 0 && self.opts.ArgsOnNewLine { - self.line_break() - } + // Option 2: Start a new line + func(self *Visitor, node_any interface{}) { + self.indent_in() + defer self.pop_indent() - self.Visit(arg) - if idx < len(node.Parameters)-1 { - self.push(",", " ") - } - } + self.line_break() + + for idx, arg := range node.Parameters { + if idx > 0 { + self.line_break() + } + + self.Visit(arg) + if idx < len(node.Parameters)-1 { + self.push(",", " ") + } + } - self.push(")") + self.push(")") + }) + }) } func (self *Visitor) visitAndExpression(node *_AndExpression) { @@ -629,11 +756,10 @@ func (self *Visitor) visitArgs(node *_Args) { if node.Comments != nil { self.has_comments = true - // If we are not breaking lines we dont adds the comment at - // all. - if self.opts.BreakLines { - self.Visit(node.Comments) - } + // We encountered a comment - we have to break lines from now + // on so we can preserve the comments. + self.opts.BreakLines = true + self.Visit(node.Comments) } if node.Right != nil { @@ -642,15 +768,27 @@ func (self *Visitor) visitArgs(node *_Args) { } else if node.SubSelect != nil { self.push(node.Left, "={") - self.indent_in() - self.line_break() - self.Visit(node.SubSelect) + // We prefer subquery to start at the begining of the line + // with a small indent. + if self.opts.BreakLines { + self.new_line(2) + self.indent_in() + defer self.pop_indent() - // Align closing } to previous block - self.pop_indent() - self.line_break() - self.push("}") + self.push(" ") + self.Visit(node.SubSelect) + + // Align closing } to previous block + self.pop_indent() + self.line_break() + self.push("}") + + } else { + self.push(" ") + self.Visit(node.SubSelect) + self.push(" }") + } } else if node.Array != nil { self.push(node.Left, "=[") @@ -693,80 +831,93 @@ func (self *Visitor) visitPlugin(node *Plugin) { return } - longest_arg := 0 + // Here have two choices: + // 1. Try to fit the entire arg list on the same line. + // 2. Break each arg on its own line. + self.abTest(node, + // Fit everything on the one line. + func(self *Visitor, node_any interface{}) { + self.opts.BreakLines = false - if !self.pluginUsesLineMode(node.Name) { - // Check if the arg list is going to fit on the current - // line. - // - // We need to format the args in the plugin args. There - // are 3 formatting styles: - // - // 1. All args fit on the same line. - // SELECT * FROM plugin(A="A", B="B") - // - // 2. There are many args but they are generally short. We - // format them one on each line lining up with the - // opening brace. - // SELECT * FROM plugin(A="A", - // B="B", - // C="C") - // - // 3. One of the args is so long that the line will - // overflow, in that case we move the indent point - // relative to the block start. - // SELECT * FROM plugin( - // A="A", - // B="Very long arg", - // C="C") - // - // The following code figures out which style is - // appropriate by calculating: - // * How long would the arg list be if formatted on the same line? - // * What is the length of each arg if formatted on its own? - // - // We do this by trying to format the args into a single - // line with a new visitor. This is effectively a - // lookahead/backtracking algorithm. - visitor, longest_arg_, ok := doesArgListFitInOneLine( - self, node.Args) - if ok { - // The args all fit in the same line, just merge the - // visitor - self.merge(visitor) - return - } + node := node_any.(*Plugin) - longest_arg = longest_arg_ - } + // Write all the args on the one line + self.push("(") + for idx, arg := range node.Args { + self.Visit(arg) + // Do not add , after the last parameter + if idx < len(node.Args)-1 { + self.push(",", " ") + } + } + self.push(")") - self.push("(") + }, - // The block will be very wide so we break it into a smaller - // block - if self.pluginUsesLineMode(node.Name) || - self.pos+longest_arg > self.opts.MaxWidthThreshold { - self.indent_in() - self.line_break() + // Break each arg on a separate line + func(self *Visitor, node_any interface{}) { + node := node_any.(*Plugin) - } else { - // Otherwise try to line up on the ( - self.push_indent() - } - defer self.pop_indent() + // Here we have a couple of choices: + // 1. Write the args starting immediately after the ( - for example + // plugin(Arg1=Foo + // Arg2=Bar) + // + // 2. Add another line break and indent a bit after the name of the plugin : + // plugin( + // Arg1=Foo, + // Arg2=Bar) - // Write args one per line - for idx, arg := range node.Args { - if idx > 0 { - // First arg inline - self.line_break() - } - self.Visit(arg) - if idx < len(node.Args)-1 { - self.push(",", " ") - } - } - self.push(")") + self.abTest(node, + // Option 1: indent on start of ( + func(self *Visitor, node_any interface{}) { + node := node_any.(*Plugin) + + self.push("(") + + self.push_indent() + defer self.pop_indent() + + for idx, arg := range node.Args { + if idx > 0 { + self.line_break() + } + + self.Visit(arg) + if idx < len(node.Args)-1 { + self.push(",", " ") + } + } + + self.push(")") + }, + + // Option 2: Start a new line + func(self *Visitor, node_any interface{}) { + node := node_any.(*Plugin) + + self.push("(") + + self.indent_in() + defer self.pop_indent() + + self.line_break() + + for idx, arg := range node.Args { + if idx > 0 { + self.line_break() + } + + self.Visit(arg) + if idx < len(node.Args)-1 { + self.push(",", " ") + } + } + + self.push(")") + }) + + }) } } @@ -783,6 +934,8 @@ func (self *Visitor) visitSelectExpression(node *_SelectExpression) { // No trailing , in the last element. if idx < len(node.Expressions)-1 { self.push(",", " ") + + // We want each expression on its own line. self.line_break() } } @@ -796,12 +949,34 @@ func (self *Visitor) visitSelect(node *_Select) { } self.push("SELECT ") - self.push_indent() if node.SelectExpression != nil { - self.Visit(node.SelectExpression) + // We need to make a choice here: + // 1. Put expression after SELECT + // 2. Break line and put expression near the start of line. + self.abTest( + node.SelectExpression, + + // Render after the SELECT + func(self *Visitor, node interface{}) { + self.push_indent() + defer self.pop_indent() + + self.Visit(node) + }, + + // Start on a new line. + func(self *Visitor, node interface{}) { + self.new_line(2) + defer self.pop_indent() + + self.indent_in() + defer self.pop_indent() + + self.push(" ") + self.Visit(node) + }) } - self.pop_indent() if node.From != nil { self.line_break() @@ -819,8 +994,9 @@ func (self *Visitor) visitSelect(node *_Select) { self.line_break() self.push("GROUP BY ") self.push_indent() + defer self.pop_indent() + self.Visit(node.GroupBy) - self.pop_indent() } if node.OrderBy != nil { @@ -838,20 +1014,40 @@ func (self *Visitor) visitSelect(node *_Select) { } } +// At any point the following invariants hold: + +// 1. self.pos represents the current cursor position. It must be at +// the end of the current line. +// 2. self.max_width must be the equal to the length of self.max_line +func (self *Visitor) checkInvariant() { + // self.pos must line up with the current line offset. + current_line := self.current_line() + if len(current_line) != self.pos || + len(self.max_line) != self.max_width { + utils.DebugPrint("ERROR: Invariant failed!\n") + } +} + +// Push fragments into the fragment queue. func (self *Visitor) push(fragments ...string) { for _, i := range fragments { self.Fragments = append(self.Fragments, i) self.pos += len(i) + + // Line has overrun the max width, break the line. if self.max_width < self.pos { self.max_width = self.pos self.max_line = self.current_line() } + self.checkInvariant() } } func (self *Visitor) pop() { if len(self.Fragments) > 0 { + last_fragment := self.Fragments[len(self.Fragments)-1] self.Fragments = self.Fragments[:len(self.Fragments)-1] + self.pos -= len(last_fragment) } } @@ -898,74 +1094,93 @@ func (self *Visitor) visitVQL(node *VQL) { } } -func FormatToString(scope types.Scope, node interface{}) string { - visitor := NewVisitor(scope, ToStringOptions) - visitor.Visit(node) - return visitor.ToString() -} +// Try to format the node both ways and select the better one (based on is_better()) +func (self *Visitor) abTest( + node interface{}, + a func(self *Visitor, node interface{}), + b func(self *Visitor, node interface{}), +) { -func doesArgListFitInOneLine(self *Visitor, args []*_Args) ( - result *Visitor, longest_arg int, does_it_fit bool) { + visitor_a := self.copy() + visitor_a.opts.test = true + a(visitor_a, node) - // It is not going to fit on the line at all - if self.pos > self.opts.MaxWidthThreshold { - return self, self.pos, false + if len(visitor_a.indents) != len(self.indents) { + utils.DebugPrint("Unbalanced indents") } - // make a copy of the visitor and try to write all the args on it. - result = self.copy() - result.opts.BreakLines = false - - // Write all the args on the one line - result.push("(") - for idx, arg := range args { - start := result.pos - result.Visit(arg) - if idx < len(args)-1 { - result.push(",", " ") - } - arg_len := result.pos - start - if arg_len > longest_arg { - longest_arg = arg_len - } + // When not in reformat mode we really dont care which option we + // choose. + if !self.opts.BreakLines { + self.merge(visitor_a) + return } - result.push(")") - // Check if the width exceeds the recommended size - does_it_fit = !result.has_comments && - result.max_width < self.opts.MaxWidthThreshold && - len(result.line_breaks) == len(self.line_breaks) + visitor_b := self.copy() + visitor_b.opts.test = true + + b(visitor_b, node) - // Comments need to take the entire line. - if result.has_comments { - longest_arg = self.opts.MaxWidthThreshold + if len(visitor_b.indents) != len(self.indents) { + utils.DebugPrint("Unbalanced indents") } - return result, longest_arg, does_it_fit -} + /* + fmt.Printf("Performing abTest on %v\n", self.ToString()) -func doesNodeFitInOneLine(self *Visitor, node interface{}) ( - result *Visitor, longest_line int, does_it_fit bool) { + fmt.Printf("Visitor_a:\n%v\n", visitor_a.ToString()) - // We already overflow it can not fit. - if self.pos > self.opts.MaxWidthThreshold { - return self, self.pos, false + fmt.Printf("Visitor_b:\n%v\n", visitor_b.ToString()) + */ + if visitor_a.is_better(visitor_b) { + self.merge(visitor_b) + } else { + self.merge(visitor_a) } - // make a copy of the visitor and try to write all the args on it. - result = self.copy() - result.opts.BreakLines = false - result.opts.ArgsOnNewLine = false - result.Visit(node) + // fmt.Printf("Selected:\n%v\n", self.ToString()) +} + +// Is the other visitor better than this one? +func (self *Visitor) is_better(other *Visitor) bool { + // Most important priority is to ensure we dont exceed the max + // width by much. + /* + fmt.Printf("self %v (%v lines)\n%v\n - other %v (%v lines)\n%v\n", + self.max_width, len(self.line_breaks), self.max_line, + other.max_width, len(other.line_breaks), other.max_line) + */ + max_width := self.opts.MaxWidthThreshold + + // If the other formatting exceeds the max line number but we dont + // then reject it. + if other.max_width > max_width && self.max_width < max_width { + return false + } + + // If we are too wide but the other is not then the other is + // better. + if self.max_width > max_width && other.max_width < max_width { + return true + } - does_it_fit = !result.has_comments && - result.max_width < self.opts.MaxWidthThreshold && - len(result.line_breaks) == len(self.line_breaks) + // If both widths are ok, the best one is the one with less lines. + if self.max_width < max_width && + other.max_width < max_width && + len(other.line_breaks) > len(self.line_breaks) { + return false + } - // Comments need to take the entire line. - if result.has_comments { - result.max_width = self.opts.MaxWidthThreshold + // If the other is wider then it is not better + if other.max_width > self.max_width { + return false } - return result, result.max_width, does_it_fit + return true +} + +func FormatToString(scope types.Scope, node interface{}) string { + visitor := NewVisitor(scope, ToStringOptions) + visitor.Visit(node) + return visitor.ToString() }