From 81873b30f061056710a7417d7aafa4f091f6e1f9 Mon Sep 17 00:00:00 2001 From: "Mathias R. Jessen" Date: Sat, 28 Oct 2017 10:33:43 +0200 Subject: [PATCH 1/3] Add tests for -Split operator Right-to-Left mode Modified existing test that assumes Max-substrings = 0 when negative. Added tests for scriptblock/predicate-based splitting (regular and right-to-left). --- .../Operators/SplitOperator.Tests.ps1 | 46 +++++++++++++++++-- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/test/powershell/Language/Operators/SplitOperator.Tests.ps1 b/test/powershell/Language/Operators/SplitOperator.Tests.ps1 index e132043838d..63f55f9d879 100644 --- a/test/powershell/Language/Operators/SplitOperator.Tests.ps1 +++ b/test/powershell/Language/Operators/SplitOperator.Tests.ps1 @@ -30,13 +30,18 @@ Describe "Split Operator" -Tags CI { $res[1] | Should Be "b" $res[2] | Should Be "c" $res[3] | Should Be "d" + } + + It "Binary split operator works with negative substring limit" { + $res = "a b c d" -split " ", -3 + $res.count | Should Be 3 + $res[0] | Should Be "a b" + $res[1] | Should Be "c" + $res[2] | Should Be "d" $res = "a b c d" -split " ", -1 - $res.count | Should Be 4 - $res[0] | Should Be "a" - $res[1] | Should Be "b" - $res[2] | Should Be "c" - $res[3] | Should Be "d" + $res.count | Should Be 1 + $res[0] | Should Be "a b c d" } It "Binary split operator can works with freeform delimiter" { @@ -103,6 +108,37 @@ Describe "Split Operator" -Tags CI { $res[2] | Should Be "::d" } + It "Binary split operator works with script block and substring limit" { + $res = "a::b::c::d" -split {$_ -eq "b" -or $_ -eq "C"}, 2 + $res.count | Should Be 2 + $res[0] | Should Be "a::" + $res[1] | Should Be "::c::d" + } + + It "Binary split operator works with script block and substring limit on arrays" { + $res = "a::b::c::d","e::f::g::h" -split {$_ -eq "b" -or $_ -eq "c" -or $_ -eq "f" -or $_ -eq "g"}, 2 + $res.count | Should Be 4 + $res[0] | Should Be "a::" + $res[1] | Should Be "::c::d" + $res[2] | Should Be "e::" + $res[3] | Should Be "::g::h" + } + + It "Binary split operator works with script block and negative substring limit" { + $res = "a::b::c::d" -split {$_ -eq "b" -or $_ -eq "C"}, -2 + $res.count | Should Be 2 + $res[0] | Should Be "a::b::" + $res[1] | Should Be "::d" + } + + It "Binary split operator works with script block and negative substring limit on arrays" { + $res = "a::b::c::d","e::f::g::h" -split {$_ -eq "b" -or $_ -eq "c" -or $_ -eq "f" -or $_ -eq "g"}, -2 + $res.count | Should Be 4 + $res[0] | Should Be "a::b::" + $res[1] | Should Be "::d" + $res[2] | Should Be "e::f::" + $res[3] | Should Be "::h" + } } Context "Binary split operator options" { From 5e5e3e2f3c643cef73cb21968c58f4f3fc0f5a6c Mon Sep 17 00:00:00 2001 From: "Mathias R. Jessen" Date: Sat, 28 Oct 2017 11:01:20 +0200 Subject: [PATCH 2/3] Add Right-To-Left splitting to SplitWithPattern() To allow RTL splitting when supplying negative Max-substrings arguments to -split, we set the RightToLeft RegexOption and convert limit to an absolute value --- .../engine/lang/parserutils.cs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/System.Management.Automation/engine/lang/parserutils.cs b/src/System.Management.Automation/engine/lang/parserutils.cs index 1be5c02a347..0ceeb18b716 100644 --- a/src/System.Management.Automation/engine/lang/parserutils.cs +++ b/src/System.Management.Automation/engine/lang/parserutils.cs @@ -728,20 +728,22 @@ private static object SplitWithPattern(ExecutionContext context, IScriptExtent e separatorPattern = Regex.Escape(separatorPattern); } + RegexOptions regexOptions = parseRegexOptions(options); + if (limit < 0) { - // Regex only allows 0 to signify "no limit", whereas - // we allow any integer <= 0. - limit = 0; + // If user-suppplied limit is negative we + // interpret it as "split Right-to-Left". + regexOptions |= RegexOptions.RightToLeft; + limit = System.Math.Abs(limit); } - RegexOptions regexOptions = parseRegexOptions(options); Regex regex = NewRegex(separatorPattern, regexOptions); List results = new List(); foreach (string item in content) { - string[] split = regex.Split(item, limit, 0); + string[] split = regex.Split(item, limit); ExtendList(results, split); } From cf47352d71d0e78d39fc634a2d1e659c1030a6ef Mon Sep 17 00:00:00 2001 From: "Mathias R. Jessen" Date: Sat, 28 Oct 2017 11:13:01 +0200 Subject: [PATCH 3/3] Add Right-To-Left splitting to SplitWithPredicate() Additionally changed the chunk buffer from StringBuilder to List - we don't use any StringBuilder specific functionality and List allows us to reverse the chunk and avoid Insert(0, item) The individual chunk lists have been changed to pre-allocate the expected needed capacity to avoid unnecessary resizing of the unuderlying array --- .../engine/lang/parserutils.cs | 91 +++++++++++++++---- 1 file changed, 72 insertions(+), 19 deletions(-) diff --git a/src/System.Management.Automation/engine/lang/parserutils.cs b/src/System.Management.Automation/engine/lang/parserutils.cs index 0ceeb18b716..208352d46a3 100644 --- a/src/System.Management.Automation/engine/lang/parserutils.cs +++ b/src/System.Management.Automation/engine/lang/parserutils.cs @@ -632,22 +632,54 @@ private static object SplitOperatorImpl(ExecutionContext context, IScriptExtent private static object SplitWithPredicate(ExecutionContext context, IScriptExtent errorPosition, IEnumerable content, ScriptBlock predicate, int limit) { - List results = new List(); - foreach (string item in content) + // If user supplied a negative Max-substrings argument, + // we employ Right-to-Left splitting instead + bool rightToLeft = limit < 0; + + limit = System.Math.Abs(limit); + + if (limit == 1) { - List split = new List(); + // Don't bother with looking for any delimiters, + // just return the original string(s). + return new List(content); + } + + List results = new List(); + + List buf = new List(); - if (limit == 1) + foreach (string item in content) + { + List split; + if(limit == 0) + { + // No limit specified + // set capacity to item.Length + 1 + // this covers worst common case (predicate == {$true}) + split = new List(item.Length + 1); + } + else { - // Don't bother with looking for any delimiters, - // just return the original string. - results.Add(item); - continue; + // Limit was specified by the user + // instantiate list with maximum needed capacity + split = new List(limit); } + + // Clear char buffer + buf.Clear(); - StringBuilder buf = new StringBuilder(); - for (int strIndex = 0; strIndex < item.Length; strIndex++) + int strIndex = 0; + for (int cursor = 0; cursor < item.Length; cursor++) { + if(rightToLeft) + { + strIndex = item.Length - 1 - cursor; + } + else + { + strIndex = cursor; + } object isDelimChar = predicate.DoInvokeReturnAsIs( useLocalScope: true, errorHandlingBehavior: ScriptBlock.ErrorHandlingBehavior.WriteToExternalErrorPipe, @@ -657,8 +689,12 @@ private static object SplitWithPredicate(ExecutionContext context, IScriptExtent args: new object[] { item, strIndex }); if (LanguagePrimitives.IsTrue(isDelimChar)) { - split.Add(buf.ToString()); - buf = new StringBuilder(); + if(rightToLeft) + { + buf.Reverse(); + } + split.Add(string.Concat(buf)); + buf.Clear(); if (limit > 0 && split.Count >= (limit - 1)) { @@ -667,9 +703,16 @@ private static object SplitWithPredicate(ExecutionContext context, IScriptExtent // and add it as the last item, otherwise // add an empty string if there was // a delimiter at the end. - if ((strIndex + 1) < item.Length) + if ((cursor + 1) < item.Length) { - split.Add(item.Substring(strIndex + 1)); + if(rightToLeft) + { + split.Add(item.Substring(0, strIndex)); + } + else + { + split.Add(item.Substring(strIndex + 1)); + } } else { @@ -681,24 +724,34 @@ private static object SplitWithPredicate(ExecutionContext context, IScriptExtent // If this delimiter is at the end of the string, // add an empty string to denote the item "after" // it. - if (strIndex == (item.Length - 1)) + if (cursor == (item.Length - 1)) { split.Add(""); } } else { - buf.Append(item[strIndex]); + buf.Add(item[strIndex]); } } // Add any remainder, if we're under the limit. - if (buf.Length > 0 && - (limit <= 0 || split.Count < limit)) + if (buf.Count > 0 && + (limit == 0 || split.Count < limit)) { - split.Add(buf.ToString()); + if(rightToLeft) + { + buf.Reverse(); + } + split.Add(string.Concat(buf)); } + if(rightToLeft) + { + // We want to preserve the order from the + // original string (always output chunks left-to-right) + split.Reverse(); + } ExtendList(results, split); }