Benchmarking different ways of calculating average of a column in Entity Framework 8




Date Added (UTC):

08 Apr 2024 @ 02:33

Date Updated (UTC):

12 Apr 2024 @ 02:01


.NET Version(s):

.NET 8

Tag(s):

#DBOperations #EntityFramework


Added By:
Profile Image

Blog   
Ireland    
.NET Developer and tech lead from Ireland!

Benchmark Results:





Benchmark Code:



using System;
using System.Linq;
using BenchmarkDotNet.Attributes;
using Microsoft.EntityFrameworkCore;

[MemoryDiagnoser]
[ReturnValueValidator] 
public class AverageBlogRanking
{
    [Params(1000)]
    public int NumBlogs; // number of records to write [once], and read [each pass]

    [GlobalSetup]
    public void Setup()
    {
        using var context = new BloggingContext();
        context.Database.EnsureDeleted();
        context.Database.EnsureCreated();
        context.SeedData(NumBlogs);
    }

    [Benchmark]
    public double LoadEntitiesNoTracking()
    {
        var sum = 0;
        var count = 0;
        using var ctx = new BloggingContext();
        foreach (var blog in ctx.Blogs.AsNoTracking())
        {
            sum += blog.Rating;
            count++;
        }

        return (double)sum / count;
    }

    [Benchmark]
    public double ProjectOnlyRanking()
    {
        var sum = 0;
        var count = 0;
        using var ctx = new BloggingContext();
        foreach (var rating in ctx.Blogs.Select(b => b.Rating))
        {
            sum += rating;
            count++;
        }

        return (double)sum / count;
    }

    [Benchmark(Baseline = true)]
    public double CalculateInDatabase()
    {
        using var ctx = new BloggingContext();
        return ctx.Blogs.Average(b => b.Rating);
    }

    public class BloggingContext : DbContext
    {
        public DbSet<Blog> Blogs { get; set; }

        protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
            => optionsBuilder.UseSqlServer(@"Server=localhost;Database=Blogging;Trusted_Connection=True;TrustServerCertificate=true");

        public void SeedData(int numblogs)
        {
            Blogs.AddRange(
                Enumerable.Range(0, numblogs).Select(
                    i => new Blog
                    {
                        Name = $"Blog{i}",
                        Url = $"blog{i}.blogs.net",
                        CreationTime = new DateTime(2020, 1, 1),
                        Rating = i % 5
                    }));
            SaveChanges();
        }
    }

    public class Blog
    {
        public int BlogId { get; set; }
        public string Name { get; set; }
        public string Url { get; set; }
        public DateTime CreationTime { get; set; }
        public int Rating { get; set; }
    }
}

// .NET 8 Lowered C# Code unavailable due to errors:
error CS0234: The type or namespace name 'EntityFrameworkCore' does not exist in the namespace 'Microsoft' (are you missing an assembly reference?)
error CS0246: The type or namespace name 'DbContext' could not be found (are you missing a using directive or an assembly reference?)
error CS0246: The type or namespace name 'DbSet<>' could not be found (are you missing a using directive or an assembly reference?)
error CS0246: The type or namespace name 'DbContextOptionsBuilder' could not be found (are you missing a using directive or an assembly reference?)
error CS1674: 'AverageBlogRanking.BloggingContext': type used in a using statement must be implicitly convertible to 'System.IDisposable'.
error CS1061: 'AverageBlogRanking.BloggingContext' does not contain a definition for 'Database' and no accessible extension method 'Database' accepting a first argument of type 'AverageBlogRanking.BloggingContext' could be found (are you missing a using directive or an assembly reference?)
error CS0103: The name 'SaveChanges' does not exist in the current context

// .NET 8 IL Code unavailable due to errors:
error CS0234: The type or namespace name 'EntityFrameworkCore' does not exist in the namespace 'Microsoft' (are you missing an assembly reference?)
error CS0246: The type or namespace name 'DbContext' could not be found (are you missing a using directive or an assembly reference?)
error CS0246: The type or namespace name 'DbSet<>' could not be found (are you missing a using directive or an assembly reference?)
error CS0246: The type or namespace name 'DbContextOptionsBuilder' could not be found (are you missing a using directive or an assembly reference?)
error CS1674: 'AverageBlogRanking.BloggingContext': type used in a using statement must be implicitly convertible to 'System.IDisposable'.
error CS1061: 'AverageBlogRanking.BloggingContext' does not contain a definition for 'Database' and no accessible extension method 'Database' accepting a first argument of type 'AverageBlogRanking.BloggingContext' could be found (are you missing a using directive or an assembly reference?)
error CS0103: The name 'SaveChanges' does not exist in the current context

// .NET 8 Jit Asm Code unavailable due to errors:
error CS0234: The type or namespace name 'EntityFrameworkCore' does not exist in the namespace 'Microsoft' (are you missing an assembly reference?)
error CS0246: The type or namespace name 'DbContext' could not be found (are you missing a using directive or an assembly reference?)
error CS0246: The type or namespace name 'DbSet<>' could not be found (are you missing a using directive or an assembly reference?)
error CS0246: The type or namespace name 'DbContextOptionsBuilder' could not be found (are you missing a using directive or an assembly reference?)
error CS1674: 'AverageBlogRanking.BloggingContext': type used in a using statement must be implicitly convertible to 'System.IDisposable'.
error CS1061: 'AverageBlogRanking.BloggingContext' does not contain a definition for 'Database' and no accessible extension method 'Database' accepting a first argument of type 'AverageBlogRanking.BloggingContext' could be found (are you missing a using directive or an assembly reference?)
error CS0103: The name 'SaveChanges' does not exist in the current context


Benchmark Description:


For this benchmark at least it looks like offloading the calculation to the DB is the fastest and most memory efficient approach.

This benchmark setup is designed to measure and compare different methods of calculating the average rating of blogs in a database using Entity Framework Core (EF Core) in a .NET environment. The setup involves a test database (Blogging) and a specific number of blog records (controlled by the `NumBlogs` parameter) to perform the operations on. The benchmarks are run using BenchmarkDotNet, a powerful .NET library for benchmarking, with memory diagnostics enabled to measure the memory usage of each method. ### General Setup - **.NET Version**: The exact .NET version isn't specified, but given the use of EF Core and BenchmarkDotNet, it's likely targeting .NET Core 3.1 or .NET 5/6. - **Database**: SQL Server, as indicated by the connection string in `OnConfiguring`. - **EF Core Version**: Not specified, but the code suggests a relatively recent version that supports `AsNoTracking()` and LINQ queries. - **Configuration**: The `NumBlogs` parameter is set to 1000, meaning the benchmarks will operate on a dataset of 1000 blog records. The `GlobalSetup` method ensures a fresh database for each run by deleting and recreating the database, then seeding it with data. ### Benchmark Methods #### 1. `LoadEntitiesNoTracking()` **Purpose**: This method tests the performance of loading full entities without tracking changes to them (`AsNoTracking()`) and manually calculating the average rating. **Rationale**: Tracking changes in EF Core can add overhead because the context needs to keep track of the entities' states. By disabling tracking, this method aims to reduce overhead when the entities are only read from the database. This benchmark measures how efficiently we can load entities, iterate over them, and perform calculations in memory. **Expected Insights**: This method should provide insights into the memory usage and speed of processing entities without the overhead of change tracking. It's expected to be faster and use less memory than tracking entities but slower than calculating the average directly in the database due to the manual iteration and calculation in .NET. #### 2. `ProjectOnlyRanking()` **Purpose**: This method evaluates the performance of projecting only the necessary data (in this case, the `Rating` field) and calculating the average rating in memory. **Rationale**: By selecting only the required field, this approach aims to reduce the amount of data retrieved from the database, potentially lowering memory usage and increasing speed compared to loading full entities. **Expected Insights**: This benchmark is designed to show the benefits of projecting only the needed data. It should use less memory and possibly be faster than loading full entities, as less data is being transferred and processed. However, it still involves manual calculation in memory, which might not be as efficient as database-side calculations. #### 3. `CalculateInDatabase()` **Purpose**: This method measures the performance of calculating the average rating directly in the database. **Rationale**: Databases are optimized for operations like aggregations (e.g., calculating averages), so performing the calculation on the database side should be more efficient than retrieving data and calculating in the application. **Expected Insights**: As the baseline method, this approach is expected to be the most efficient in terms of speed and memory usage. It demonstrates the performance benefits of leveraging database capabilities for data processing tasks. This method should provide a benchmark for comparing the overhead of retrieving data and processing it in the application. ### Conclusion Each method is designed to test a different aspect of data retrieval and processing with EF Core, from loading full entities to projecting only necessary data, to leveraging database capabilities for calculations. By comparing these methods, one can gain insights into the trade-offs between data processing in the application versus the database, the impact of change tracking on performance, and the efficiency of projecting data. These insights can guide optimization strategies for .NET applications that rely on EF Core for data access.


Benchmark Comments: