AsSpan over Substring




Date Added (UTC):

23 Jun 2024 @ 22:12

Date Updated (UTC):

23 Jun 2024 @ 22:14


.NET Version(s):

.NET 8

Tag(s):

#Strings


Added By:
Profile Image

Sri Lanka    
Tech Enthusiast | .Net | C# | Azure | AWS

Benchmark Results:





Benchmark Code:



using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Columns;
using BenchmarkDotNet.Mathematics;
using BenchmarkDotNet.Order;
using System.Buffers;

namespace Demo.Dotnet8;


[Orderer(SummaryOrderPolicy.FastestToSlowest)]
[RankColumn(NumeralSystem.Arabic)]
[HideColumns(Column.RatioSD)]
[MarkdownExporter]
public class CA1870
{
    private  readonly char[] MyValues = new[] { 'a', 'b', 'c', 'x', 'y', 'z' };
    private static readonly SearchValues<char> s_myValues = SearchValues.Create("abcxyz");


    [Params("qazwsxecc", "qazzedcrvvtbb")]
    public string Text { get; set; } = string.Empty;

    public CA1870()
    {
    }

    [Benchmark]
    public  int IndexOfMyValues()
    {
        return Text.IndexOfAny(MyValues);
    }

    [Benchmark]
    public int IndexOfMyValuesSearchValue()
    {
        return Text.AsSpan().IndexOfAny(s_myValues);
    }

    [Benchmark]
    public bool ContainsOnlyMyValuesSearchValue()
    {
        return !Text.AsSpan().ContainsAnyExcept(s_myValues);
    }

}

// .NET 8
public int IndexOfMyValues()
{
    return Text.IndexOfAny(MyValues);
}
// .NET 8
public int IndexOfMyValuesSearchValue()
{
    return MemoryExtensions.IndexOfAny(MemoryExtensions.AsSpan(Text), s_myValues);
}
// .NET 8
public bool ContainsOnlyMyValuesSearchValue()
{
    return !MemoryExtensions.ContainsAnyExcept(MemoryExtensions.AsSpan(Text), s_myValues);
}

// .NET 8
.method public hidebysig 
    instance int32 IndexOfMyValues () cil managed 
{
    .custom instance void [BenchmarkDotNet.Annotations]BenchmarkDotNet.Attributes.BenchmarkAttribute::.ctor(int32, string) = (
        01 00 26 00 00 00 01 5f 00 00
    )
    // Method begins at RVA 0x208b
    // Code size 18 (0x12)
    .maxstack 8

    // sequence point: (line 41, col 9) to (line 41, col 42) in _
    IL_0000: ldarg.0
    IL_0001: call instance string Demo.Dotnet8.CA1870::get_Text()
    IL_0006: ldarg.0
    IL_0007: ldfld char[] Demo.Dotnet8.CA1870::MyValues
    IL_000c: callvirt instance int32 [System.Runtime]System.String::IndexOfAny(char[])
    IL_0011: ret
}
// .NET 8
.method public hidebysig 
    instance int32 IndexOfMyValuesSearchValue () cil managed 
{
    .custom instance void [BenchmarkDotNet.Annotations]BenchmarkDotNet.Attributes.BenchmarkAttribute::.ctor(int32, string) = (
        01 00 2c 00 00 00 01 5f 00 00
    )
    // Method begins at RVA 0x209e
    // Code size 22 (0x16)
    .maxstack 8

    // sequence point: (line 47, col 9) to (line 47, col 53) in _
    IL_0000: ldarg.0
    IL_0001: call instance string Demo.Dotnet8.CA1870::get_Text()
    IL_0006: call valuetype [System.Runtime]System.ReadOnlySpan`1<char> [System.Memory]System.MemoryExtensions::AsSpan(string)
    IL_000b: ldsfld class [System.Runtime]System.Buffers.SearchValues`1<char> Demo.Dotnet8.CA1870::s_myValues
    IL_0010: call int32 [System.Memory]System.MemoryExtensions::IndexOfAny<char>(valuetype [System.Runtime]System.ReadOnlySpan`1<!!0>, class [System.Runtime]System.Buffers.SearchValues`1<!!0>)
    IL_0015: ret
}
// .NET 8
.method public hidebysig 
    instance bool ContainsOnlyMyValuesSearchValue () cil managed 
{
    .custom instance void [BenchmarkDotNet.Annotations]BenchmarkDotNet.Attributes.BenchmarkAttribute::.ctor(int32, string) = (
        01 00 32 00 00 00 01 5f 00 00
    )
    // Method begins at RVA 0x20b5
    // Code size 25 (0x19)
    .maxstack 8

    // sequence point: (line 53, col 9) to (line 53, col 61) in _
    IL_0000: ldarg.0
    IL_0001: call instance string Demo.Dotnet8.CA1870::get_Text()
    IL_0006: call valuetype [System.Runtime]System.ReadOnlySpan`1<char> [System.Memory]System.MemoryExtensions::AsSpan(string)
    IL_000b: ldsfld class [System.Runtime]System.Buffers.SearchValues`1<char> Demo.Dotnet8.CA1870::s_myValues
    IL_0010: call bool [System.Memory]System.MemoryExtensions::ContainsAnyExcept<char>(valuetype [System.Runtime]System.ReadOnlySpan`1<!!0>, class [System.Runtime]System.Buffers.SearchValues`1<!!0>)
    IL_0015: ldc.i4.0
    IL_0016: ceq
    IL_0018: ret
}

// .NET 8 Jit Asm Code unavailable due to errors:
Type Demo.Dotnet8.CA1870 has a static constructor, which is not supported by SharpLab JIT decompiler.


Benchmark Description:


Using 𝙎𝙪𝙗𝙨𝙩𝙧𝙞𝙣𝙜 allocates a new 𝙨𝙩𝙧𝙞𝙣𝙜 object on the heap and involves a full copy of the extracted text. String manipulation can be a performance bottleneck, especially when dealing with many small, short-lived strings. This can impact performance due to memory allocation and garbage collection. The issue becomes more pronounced when working with large substrings. To address this, 𝗖# introduced the 𝙎𝙥𝙖𝙣<𝙏> and 𝙍𝙚𝙖𝙙𝙊𝙣𝙡𝙮𝙎𝙥𝙖𝙣<𝙏> types. These allow for efficient manipulation of character data without unnecessary copying.

The provided benchmark code is designed to measure and compare the performance of different methods for searching within strings in .NET. The benchmarks are set up using the BenchmarkDotNet library, a popular .NET library for benchmarking code performance. The specific .NET version isn't mentioned in the provided code, but the use of features like `Span<T>` and `ReadOnlySpan<T>` suggests it targets .NET Core 2.1 or later, where these types were introduced. ### General Setup - **BenchmarkDotNet Attributes**: The code uses several BenchmarkDotNet attributes to configure the benchmarking process: - `Orderer(SummaryOrderPolicy.FastestToSlowest)`: Results will be ordered from the fastest to the slowest. - `RankColumn(NumeralSystem.Arabic)`: Adds a column showing the rank of each benchmark method. - `HideColumns(Column.RatioSD)`: Hides the column that shows the ratio of the standard deviation, focusing on average performance. - `MarkdownExporter`: Exports the results in Markdown format for easy reading and sharing. - **Parameters (`Params`)**: Two different strings are used as input for the benchmark methods to test performance across different scenarios. - **Setup**: The benchmark class initializes a `char` array `MyValues` and a `SearchValues<char>` instance `s_myValues` with the same set of characters. These are used to search within the provided text strings. ### Benchmark Methods #### 1. `IndexOfMyValues()` - **Purpose**: This method measures the performance of searching for any of a set of characters within a string using the `IndexOfAny` method. - **Performance Aspect**: It tests how quickly .NET can find the first occurrence of any character from a small set within a string. - **Importance**: Understanding the performance of `IndexOfAny` is crucial for scenarios where you need to validate or parse strings based on the presence of specific characters. - **Expected Results**: The performance will depend on the length of the string and the position of the first matching character. Shorter strings or matches near the start of the string should be faster. #### 2. `IndexOfMyValuesSearchValue()` - **Purpose**: This benchmark aims to measure the performance of the `IndexOfAny` method when using a `SearchValues<T>` instance, which is a more optimized way to search for values. - **Performance Aspect**: It evaluates the efficiency improvements that can be achieved by using `SearchValues<T>` over a simple array of values. - **Importance**: This method is significant for applications that frequently perform searches within large texts or require optimized search operations. - **Expected Results**: This method is expected to be faster than using a simple array, especially for longer texts or when the searched characters are spread out, due to the optimized search algorithms in `SearchValues<T>`. #### 3. `ContainsOnlyMyValuesSearchValue()` - **Purpose**: To test the performance of checking if a string contains only specific characters using `ContainsAnyExcept` with `SearchValues<T>`. - **Performance Aspect**: This method assesses how efficiently a string can be scanned to ensure it doesn't contain any characters outside a given set. - **Importance**: This is crucial for validating strings against a whitelist of characters, ensuring data integrity and security in user inputs or file processing. - **Expected Results**: The performance here will largely depend on the string's length and the diversity of its characters. Strings that contain only the specified characters or fail early (contain an invalid character near the beginning) will likely yield faster results. ### Summary Each benchmark method is designed to test a specific aspect of string searching or validation performance in .NET, providing insights into the efficiency of different approaches. By analyzing the results, developers can make informed decisions about which methods to use in their applications based on the specific requirements of their scenarios, such as the need for speed or the frequency of search operations.


Benchmark Comments: