1


Performance Comparison between Regular Expressions vs Custom Parser in .NET 8




Date Added (UTC):

11 May 2024 @ 19:14

Date Updated (UTC):

11 May 2024 @ 19:14


.NET Version(s):

.NET 8

Tag(s):

#Regex


Added By:
Profile Image

rkonit    Twitter
India    
I write code, architect applications, or lead a team of developers and often have final say over the tools and platforms we use.

Benchmark Results:





Benchmark Code:



using System.Text.RegularExpressions;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

public class RegexVsParserBenchmark
{
    private string _email;

    [GlobalSetup]
    public void Setup()
    {
        _email = "test@example.com";
    }

    [Benchmark]
    public bool ValidateWithRegex()
    {
        string pattern = @"^(\w+)\@(\w+)\.(\w+)$";
        Regex regex = new Regex(pattern);
        return regex.IsMatch(_email);
    }
    
    [Benchmark]
    public bool ValidateWithParser()
    {
        if (string.IsNullOrEmpty(_email))
        {
            return false;
        }

        var atCount = 0;
        var atFound = false;
        var dotFound = false;

        foreach (var ch in _email)
        {
            if (ch == '@')
            {
                atCount++;
                atFound = true;
            }
            else if (ch == '.')
            {
                if (atFound)
                    dotFound = true;
            }
            else if (!char.IsLetterOrDigit(ch) && ch != '-' && ch != '_')
            {
                return false;
            }
        }
        
        if( atCount != 1 || !dotFound)
        {
            return false;
        }

        return true;
    }
}

// .NET 8
public bool ValidateWithRegex()
{
    return new Regex("^(\\w+)\\@(\\w+)\\.(\\w+)$").IsMatch(_email);
}
// .NET 8
public bool ValidateWithParser()
{
    if (string.IsNullOrEmpty(_email))
    {
        return false;
    }
    int num = 0;
    bool flag = false;
    bool flag2 = false;
    string email = _email;
    int num2 = 0;
    while (num2 < email.Length)
    {
        char c = email[num2];
        if (c == '@')
        {
            num++;
            flag = true;
        }
        else if (c == '.')
        {
            if (flag)
            {
                flag2 = true;
            }
        }
        else if (!char.IsLetterOrDigit(c) && c != '-' && c != '_')
        {
            return false;
        }
        num2++;
    }
    if (num != 1 || !flag2)
    {
        return false;
    }
    return true;
}

// .NET 8
.method public hidebysig 
    instance bool ValidateWithRegex () cil managed 
{
    .custom instance void [BenchmarkDotNet.Annotations]BenchmarkDotNet.Attributes.BenchmarkAttribute::.ctor(int32, string) = (
        01 00 1a 00 00 00 01 5f 00 00
    )
    // Method begins at RVA 0x205d
    // Code size 22 (0x16)
    .maxstack 8

    // sequence point: (line 29, col 9) to (line 29, col 51) in _
    IL_0000: ldstr "^(\\w+)\\@(\\w+)\\.(\\w+)$"
    // sequence point: (line 30, col 9) to (line 30, col 42) in _
    IL_0005: newobj instance void [System.Text.RegularExpressions]System.Text.RegularExpressions.Regex::.ctor(string)
    // sequence point: (line 31, col 9) to (line 31, col 38) in _
    IL_000a: ldarg.0
    IL_000b: ldfld string RegexVsParserBenchmark::_email
    IL_0010: callvirt instance bool [System.Text.RegularExpressions]System.Text.RegularExpressions.Regex::IsMatch(string)
    IL_0015: ret
}
// .NET 8
.method public hidebysig 
    instance bool ValidateWithParser () cil managed 
{
    .custom instance void [BenchmarkDotNet.Annotations]BenchmarkDotNet.Attributes.BenchmarkAttribute::.ctor(int32, string) = (
        01 00 22 00 00 00 01 5f 00 00
    )
    // Method begins at RVA 0x2074
    // Code size 120 (0x78)
    .maxstack 2
    .locals init (
        [0] int32 atCount,
        [1] bool atFound,
        [2] bool dotFound,
        [3] string,
        [4] int32,
        [5] char ch
    )

    // sequence point: (line 37, col 9) to (line 37, col 42) in _
    IL_0000: ldarg.0
    IL_0001: ldfld string RegexVsParserBenchmark::_email
    IL_0006: call bool [System.Runtime]System.String::IsNullOrEmpty(string)
    IL_000b: brfalse.s IL_000f

    // sequence point: (line 39, col 13) to (line 39, col 26) in _
    IL_000d: ldc.i4.0
    IL_000e: ret

    // sequence point: (line 42, col 9) to (line 42, col 25) in _
    IL_000f: ldc.i4.0
    IL_0010: stloc.0
    // sequence point: (line 43, col 9) to (line 43, col 29) in _
    IL_0011: ldc.i4.0
    IL_0012: stloc.1
    // sequence point: (line 44, col 9) to (line 44, col 30) in _
    IL_0013: ldc.i4.0
    IL_0014: stloc.2
    // sequence point: (line 46, col 28) to (line 46, col 34) in _
    IL_0015: ldarg.0
    IL_0016: ldfld string RegexVsParserBenchmark::_email
    IL_001b: stloc.3
    IL_001c: ldc.i4.0
    IL_001d: stloc.s 4
    // sequence point: hidden
    IL_001f: br.s IL_0063
    // loop start (head: IL_0063)
        // sequence point: (line 46, col 18) to (line 46, col 24) in _
        IL_0021: ldloc.3
        IL_0022: ldloc.s 4
        IL_0024: callvirt instance char [System.Runtime]System.String::get_Chars(int32)
        IL_0029: stloc.s 5
        // sequence point: (line 48, col 13) to (line 48, col 27) in _
        IL_002b: ldloc.s 5
        IL_002d: ldc.i4.s 64
        IL_002f: bne.un.s IL_0039

        // sequence point: (line 50, col 17) to (line 50, col 27) in _
        IL_0031: ldloc.0
        IL_0032: ldc.i4.1
        IL_0033: add
        IL_0034: stloc.0
        // sequence point: (line 51, col 17) to (line 51, col 32) in _
        IL_0035: ldc.i4.1
        IL_0036: stloc.1
        // sequence point: hidden
        IL_0037: br.s IL_005d

        // sequence point: (line 53, col 18) to (line 53, col 32) in _
        IL_0039: ldloc.s 5
        IL_003b: ldc.i4.s 46
        IL_003d: bne.un.s IL_0046

        // sequence point: (line 55, col 17) to (line 55, col 29) in _
        IL_003f: ldloc.1
        IL_0040: brfalse.s IL_005d

        // sequence point: (line 56, col 21) to (line 56, col 37) in _
        IL_0042: ldc.i4.1
        IL_0043: stloc.2
        // sequence point: hidden
        IL_0044: br.s IL_005d

        // sequence point: (line 58, col 18) to (line 58, col 74) in _
        IL_0046: ldloc.s 5
        IL_0048: call bool [System.Runtime]System.Char::IsLetterOrDigit(char)
        IL_004d: brtrue.s IL_005d

        IL_004f: ldloc.s 5
        IL_0051: ldc.i4.s 45
        IL_0053: beq.s IL_005d

        IL_0055: ldloc.s 5
        IL_0057: ldc.i4.s 95
        IL_0059: beq.s IL_005d

        // sequence point: (line 60, col 17) to (line 60, col 30) in _
        IL_005b: ldc.i4.0
        IL_005c: ret

        // sequence point: hidden
        IL_005d: ldloc.s 4
        IL_005f: ldc.i4.1
        IL_0060: add
        IL_0061: stloc.s 4

        // sequence point: (line 46, col 25) to (line 46, col 27) in _
        IL_0063: ldloc.s 4
        IL_0065: ldloc.3
        IL_0066: callvirt instance int32 [System.Runtime]System.String::get_Length()
        IL_006b: blt.s IL_0021
    // end loop

    // sequence point: (line 64, col 9) to (line 64, col 39) in _
    IL_006d: ldloc.0
    IL_006e: ldc.i4.1
    IL_006f: bne.un.s IL_0074

    IL_0071: ldloc.2
    IL_0072: brtrue.s IL_0076

    // sequence point: (line 66, col 13) to (line 66, col 26) in _
    IL_0074: ldc.i4.0
    IL_0075: ret

    // sequence point: (line 69, col 9) to (line 69, col 21) in _
    IL_0076: ldc.i4.1
    IL_0077: ret
}

// .NET 8 (X64)
ValidateWithRegex()
    L0000: push rsi
    L0001: push rbx
    L0002: sub rsp, 0x28
    L0006: mov rbx, rcx
    L0009: mov rcx, 0x7ffcf3117100
    L0013: call 0x00007ffd5256ae10
    L0018: mov rsi, rax
    L001b: mov rdx, 0x299698654d0
    L0025: mov rdx, [rdx]
    L0028: mov rcx, rsi
    L002b: xor r8d, r8d
    L002e: call qword ptr [0x7ffcf3106f70]
    L0034: mov rdx, [rbx+8]
    L0038: mov rcx, rsi
    L003b: add rsp, 0x28
    L003f: pop rbx
    L0040: pop rsi
    L0041: jmp qword ptr [0x7ffcf3107468]
// .NET 8 (X64)
ValidateWithParser()
    L0000: push r15
    L0002: push r14
    L0004: push r13
    L0006: push rdi
    L0007: push rsi
    L0008: push rbp
    L0009: push rbx
    L000a: sub rsp, 0x20
    L000e: mov rax, [rcx+8]
    L0012: test rax, rax
    L0015: jne short L0028
    L0017: xor eax, eax
    L0019: add rsp, 0x20
    L001d: pop rbx
    L001e: pop rbp
    L001f: pop rsi
    L0020: pop rdi
    L0021: pop r13
    L0023: pop r14
    L0025: pop r15
    L0027: ret
    L0028: mov ebx, [rax+8]
    L002b: test ebx, ebx
    L002d: je short L0017
    L002f: xor esi, esi
    L0031: xor edi, edi
    L0033: xor ebp, ebp
    L0035: mov r14, [rcx+8]
    L0039: xor r15d, r15d
    L003c: test ebx, ebx
    L003e: jle short L00ab
    L0040: mov eax, r15d
    L0043: movzx r13d, word ptr [r14+rax*2+0xc]
    L0049: cmp r13d, 0x40
    L004d: jne short L0058
    L004f: inc esi
    L0051: mov edi, 1
    L0056: jmp short L00a3
    L0058: cmp r13d, 0x2e
    L005c: jne short L0069
    L005e: test edi, edi
    L0060: je short L00a3
    L0062: mov ebp, 1
    L0067: jmp short L00a3
    L0069: cmp r13d, 0x100
    L0070: jae short L00ea
    L0072: mov eax, r13d
    L0075: mov rcx, 0x7ffd51932c28
    L007f: movzx edx, byte ptr [rax+rcx]
    L0083: and edx, 0x1f
    L0086: mov eax, 1
    L008b: shlx eax, eax, edx
    L0090: test eax, 0x11f
    L0095: jne short L00a3
    L0097: cmp r13d, 0x2d
    L009b: je short L00a3
    L009d: cmp r13d, 0x5f
    L00a1: jne short L00c5
    L00a3: inc r15d
    L00a6: cmp ebx, r15d
    L00a9: jg short L0040
    L00ab: cmp esi, 1
    L00ae: jne short L00b4
    L00b0: test ebp, ebp
    L00b2: jne short L00d6
    L00b4: xor eax, eax
    L00b6: add rsp, 0x20
    L00ba: pop rbx
    L00bb: pop rbp
    L00bc: pop rsi
    L00bd: pop rdi
    L00be: pop r13
    L00c0: pop r14
    L00c2: pop r15
    L00c4: ret
    L00c5: xor eax, eax
    L00c7: add rsp, 0x20
    L00cb: pop rbx
    L00cc: pop rbp
    L00cd: pop rsi
    L00ce: pop rdi
    L00cf: pop r13
    L00d1: pop r14
    L00d3: pop r15
    L00d5: ret
    L00d6: mov eax, 1
    L00db: add rsp, 0x20
    L00df: pop rbx
    L00e0: pop rbp
    L00e1: pop rsi
    L00e2: pop rdi
    L00e3: pop r13
    L00e5: pop r14
    L00e7: pop r15
    L00e9: ret
    L00ea: mov ecx, r13d
    L00ed: call qword ptr [0x7ffcf3dc7d98]
    L00f3: mov edx, eax
    L00f5: jmp short L0086


Benchmark Description:


The provided C# code snippet is a benchmark test that compares the performance of two methods for validating email addresses. The two methods are ValidateWithRegex and ValidateWithParser. The ValidateWithRegex method uses a regular expression to validate the email address, while the ValidateWithParser method uses a custom parser to validate the email address.

The provided benchmark code is designed to compare two different methods for validating email addresses in C#: one using regular expressions (Regex) and the other using a custom parser method. This comparison is crucial for understanding the performance implications of using Regex versus manual string parsing for common validation tasks. The benchmarks are run using the BenchmarkDotNet library, a powerful tool for benchmarking .NET code. The specific .NET version isn't mentioned, but BenchmarkDotNet supports a wide range of .NET versions, including .NET Core and .NET Framework. ### General Setup - **GlobalSetup (`Setup` method):** This method initializes the benchmark by setting up the test data. Here, it initializes an email string (`_email`) with a value of `"test@example.com"`. This setup runs once before the benchmarks start, ensuring that each benchmark method operates on the same input data. ### Benchmark Methods #### 1. `ValidateWithRegex` - **Purpose:** This method tests the performance of using a regular expression to validate an email address. Regular expressions provide a powerful and concise way to validate strings, but they can be computationally expensive, especially for complex patterns or large input strings. - **Performance Aspect:** It measures the time and resources required to compile a Regex pattern and match it against an input string. This benchmark is important because it highlights the efficiency of Regex operations in .NET, which can vary significantly based on the complexity of the pattern and the input size. - **Expected Insights:** Running this benchmark should give insights into how quickly and efficiently .NET can handle regular expression matching for a relatively simple pattern. High execution times might indicate that for simple validation tasks, Regex might not be the most performance-efficient choice. #### 2. `ValidateWithParser` - **Purpose:** This method tests the performance of manually parsing the email string to validate its format. This approach checks the string character by character to ensure it meets the criteria for a valid email address (contains exactly one '@' character, at least one '.' character after '@', and only contains letters, digits, '-', or '_'). - **Performance Aspect:** It measures the efficiency of manually parsing a string without using regular expressions. This method is designed to test how well a hand-written parser performs, focusing on CPU time and memory allocation compared to the Regex approach. - **Expected Insights:** The results from this benchmark can provide insights into whether a custom parsing approach can outperform regular expressions in terms of speed and resource usage for simple validation tasks. A lower execution time and less memory allocation would suggest that manual parsing is more efficient for this specific case. ### Conclusion By comparing the performance of `ValidateWithRegex` and `ValidateWithParser`, developers can make informed decisions about which method to use for email validation in their projects. The benchmarks aim to highlight the trade-offs between readability and maintainability of Regex versus the potential performance gains of a custom parser. The results will depend on various factors, including the .NET runtime version, the complexity of the validation logic, and the specifics of the input data. Understanding these benchmarks can help developers optimize their applications for better performance.


Benchmark Comments: