1
Performance Comparison between Regular Expressions vs Custom Parser in .NET 8
Date Added (UTC):
11 May 2024 @ 19:14
Date Updated (UTC):11 May 2024 @ 19:14
.NET Version(s): Tag(s):
Added By:
I write code, architect applications, or lead a team of developers and often have final say over the tools and platforms we use.
Benchmark Results:
Benchmark Code:
Originally imported from :
https://gist.github.com/rkonit/c49b70fe1a9cf0d6e75fef2eb978572con 11 May 2024 @ 19:14 (UTC) .
The original benchmark may have changed.
https://gist.github.com/rkonit/c49b70fe1a9cf0d6e75fef2eb978572c
The original benchmark may have changed.
using System.Text.RegularExpressions;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
public class RegexVsParserBenchmark
{
private string _email;
[GlobalSetup]
public void Setup()
{
_email = "test@example.com";
}
[Benchmark]
public bool ValidateWithRegex()
{
string pattern = @"^(\w+)\@(\w+)\.(\w+)$";
Regex regex = new Regex(pattern);
return regex.IsMatch(_email);
}
[Benchmark]
public bool ValidateWithParser()
{
if (string.IsNullOrEmpty(_email))
{
return false;
}
var atCount = 0;
var atFound = false;
var dotFound = false;
foreach (var ch in _email)
{
if (ch == '@')
{
atCount++;
atFound = true;
}
else if (ch == '.')
{
if (atFound)
dotFound = true;
}
else if (!char.IsLetterOrDigit(ch) && ch != '-' && ch != '_')
{
return false;
}
}
if( atCount != 1 || !dotFound)
{
return false;
}
return true;
}
}
Powered by SharpLab
// .NET 8
public bool ValidateWithRegex()
{
return new Regex("^(\\w+)\\@(\\w+)\\.(\\w+)$").IsMatch(_email);
}
// .NET 8
public bool ValidateWithParser()
{
if (string.IsNullOrEmpty(_email))
{
return false;
}
int num = 0;
bool flag = false;
bool flag2 = false;
string email = _email;
int num2 = 0;
while (num2 < email.Length)
{
char c = email[num2];
if (c == '@')
{
num++;
flag = true;
}
else if (c == '.')
{
if (flag)
{
flag2 = true;
}
}
else if (!char.IsLetterOrDigit(c) && c != '-' && c != '_')
{
return false;
}
num2++;
}
if (num != 1 || !flag2)
{
return false;
}
return true;
}
Powered by SharpLab
// .NET 8
.method public hidebysig
instance bool ValidateWithRegex () cil managed
{
.custom instance void [BenchmarkDotNet.Annotations]BenchmarkDotNet.Attributes.BenchmarkAttribute::.ctor(int32, string) = (
01 00 1a 00 00 00 01 5f 00 00
)
// Method begins at RVA 0x205d
// Code size 22 (0x16)
.maxstack 8
// sequence point: (line 29, col 9) to (line 29, col 51) in _
IL_0000: ldstr "^(\\w+)\\@(\\w+)\\.(\\w+)$"
// sequence point: (line 30, col 9) to (line 30, col 42) in _
IL_0005: newobj instance void [System.Text.RegularExpressions]System.Text.RegularExpressions.Regex::.ctor(string)
// sequence point: (line 31, col 9) to (line 31, col 38) in _
IL_000a: ldarg.0
IL_000b: ldfld string RegexVsParserBenchmark::_email
IL_0010: callvirt instance bool [System.Text.RegularExpressions]System.Text.RegularExpressions.Regex::IsMatch(string)
IL_0015: ret
}
// .NET 8
.method public hidebysig
instance bool ValidateWithParser () cil managed
{
.custom instance void [BenchmarkDotNet.Annotations]BenchmarkDotNet.Attributes.BenchmarkAttribute::.ctor(int32, string) = (
01 00 22 00 00 00 01 5f 00 00
)
// Method begins at RVA 0x2074
// Code size 120 (0x78)
.maxstack 2
.locals init (
[0] int32 atCount,
[1] bool atFound,
[2] bool dotFound,
[3] string,
[4] int32,
[5] char ch
)
// sequence point: (line 37, col 9) to (line 37, col 42) in _
IL_0000: ldarg.0
IL_0001: ldfld string RegexVsParserBenchmark::_email
IL_0006: call bool [System.Runtime]System.String::IsNullOrEmpty(string)
IL_000b: brfalse.s IL_000f
// sequence point: (line 39, col 13) to (line 39, col 26) in _
IL_000d: ldc.i4.0
IL_000e: ret
// sequence point: (line 42, col 9) to (line 42, col 25) in _
IL_000f: ldc.i4.0
IL_0010: stloc.0
// sequence point: (line 43, col 9) to (line 43, col 29) in _
IL_0011: ldc.i4.0
IL_0012: stloc.1
// sequence point: (line 44, col 9) to (line 44, col 30) in _
IL_0013: ldc.i4.0
IL_0014: stloc.2
// sequence point: (line 46, col 28) to (line 46, col 34) in _
IL_0015: ldarg.0
IL_0016: ldfld string RegexVsParserBenchmark::_email
IL_001b: stloc.3
IL_001c: ldc.i4.0
IL_001d: stloc.s 4
// sequence point: hidden
IL_001f: br.s IL_0063
// loop start (head: IL_0063)
// sequence point: (line 46, col 18) to (line 46, col 24) in _
IL_0021: ldloc.3
IL_0022: ldloc.s 4
IL_0024: callvirt instance char [System.Runtime]System.String::get_Chars(int32)
IL_0029: stloc.s 5
// sequence point: (line 48, col 13) to (line 48, col 27) in _
IL_002b: ldloc.s 5
IL_002d: ldc.i4.s 64
IL_002f: bne.un.s IL_0039
// sequence point: (line 50, col 17) to (line 50, col 27) in _
IL_0031: ldloc.0
IL_0032: ldc.i4.1
IL_0033: add
IL_0034: stloc.0
// sequence point: (line 51, col 17) to (line 51, col 32) in _
IL_0035: ldc.i4.1
IL_0036: stloc.1
// sequence point: hidden
IL_0037: br.s IL_005d
// sequence point: (line 53, col 18) to (line 53, col 32) in _
IL_0039: ldloc.s 5
IL_003b: ldc.i4.s 46
IL_003d: bne.un.s IL_0046
// sequence point: (line 55, col 17) to (line 55, col 29) in _
IL_003f: ldloc.1
IL_0040: brfalse.s IL_005d
// sequence point: (line 56, col 21) to (line 56, col 37) in _
IL_0042: ldc.i4.1
IL_0043: stloc.2
// sequence point: hidden
IL_0044: br.s IL_005d
// sequence point: (line 58, col 18) to (line 58, col 74) in _
IL_0046: ldloc.s 5
IL_0048: call bool [System.Runtime]System.Char::IsLetterOrDigit(char)
IL_004d: brtrue.s IL_005d
IL_004f: ldloc.s 5
IL_0051: ldc.i4.s 45
IL_0053: beq.s IL_005d
IL_0055: ldloc.s 5
IL_0057: ldc.i4.s 95
IL_0059: beq.s IL_005d
// sequence point: (line 60, col 17) to (line 60, col 30) in _
IL_005b: ldc.i4.0
IL_005c: ret
// sequence point: hidden
IL_005d: ldloc.s 4
IL_005f: ldc.i4.1
IL_0060: add
IL_0061: stloc.s 4
// sequence point: (line 46, col 25) to (line 46, col 27) in _
IL_0063: ldloc.s 4
IL_0065: ldloc.3
IL_0066: callvirt instance int32 [System.Runtime]System.String::get_Length()
IL_006b: blt.s IL_0021
// end loop
// sequence point: (line 64, col 9) to (line 64, col 39) in _
IL_006d: ldloc.0
IL_006e: ldc.i4.1
IL_006f: bne.un.s IL_0074
IL_0071: ldloc.2
IL_0072: brtrue.s IL_0076
// sequence point: (line 66, col 13) to (line 66, col 26) in _
IL_0074: ldc.i4.0
IL_0075: ret
// sequence point: (line 69, col 9) to (line 69, col 21) in _
IL_0076: ldc.i4.1
IL_0077: ret
}
Powered by SharpLab
|
|
Benchmark Description:
The provided C# code snippet is a benchmark test that compares the performance of two methods for validating email addresses. The two methods are ValidateWithRegex and ValidateWithParser. The ValidateWithRegex method uses a regular expression to validate the email address, while the ValidateWithParser method uses a custom parser to validate the email address.
The provided benchmark code is designed to compare two different methods for validating email addresses in C#: one using regular expressions (Regex) and the other using a custom parser method. This comparison is crucial for understanding the performance implications of using Regex versus manual string parsing for common validation tasks. The benchmarks are run using the BenchmarkDotNet library, a powerful tool for benchmarking .NET code. The specific .NET version isn't mentioned, but BenchmarkDotNet supports a wide range of .NET versions, including .NET Core and .NET Framework.
### General Setup
- **GlobalSetup (`Setup` method):** This method initializes the benchmark by setting up the test data. Here, it initializes an email string (`_email`) with a value of `"test@example.com"`. This setup runs once before the benchmarks start, ensuring that each benchmark method operates on the same input data.
### Benchmark Methods
#### 1. `ValidateWithRegex`
- **Purpose:** This method tests the performance of using a regular expression to validate an email address. Regular expressions provide a powerful and concise way to validate strings, but they can be computationally expensive, especially for complex patterns or large input strings.
- **Performance Aspect:** It measures the time and resources required to compile a Regex pattern and match it against an input string. This benchmark is important because it highlights the efficiency of Regex operations in .NET, which can vary significantly based on the complexity of the pattern and the input size.
- **Expected Insights:** Running this benchmark should give insights into how quickly and efficiently .NET can handle regular expression matching for a relatively simple pattern. High execution times might indicate that for simple validation tasks, Regex might not be the most performance-efficient choice.
#### 2. `ValidateWithParser`
- **Purpose:** This method tests the performance of manually parsing the email string to validate its format. This approach checks the string character by character to ensure it meets the criteria for a valid email address (contains exactly one '@' character, at least one '.' character after '@', and only contains letters, digits, '-', or '_').
- **Performance Aspect:** It measures the efficiency of manually parsing a string without using regular expressions. This method is designed to test how well a hand-written parser performs, focusing on CPU time and memory allocation compared to the Regex approach.
- **Expected Insights:** The results from this benchmark can provide insights into whether a custom parsing approach can outperform regular expressions in terms of speed and resource usage for simple validation tasks. A lower execution time and less memory allocation would suggest that manual parsing is more efficient for this specific case.
### Conclusion
By comparing the performance of `ValidateWithRegex` and `ValidateWithParser`, developers can make informed decisions about which method to use for email validation in their projects. The benchmarks aim to highlight the trade-offs between readability and maintainability of Regex versus the potential performance gains of a custom parser. The results will depend on various factors, including the .NET runtime version, the complexity of the validation logic, and the specifics of the input data. Understanding these benchmarks can help developers optimize their applications for better performance.