'Creating a serializable fixed size char array in F#

I am dealing with a very large amount of data I need to load / save to disk where speed is the key.

I wrote this code:

// load from cache
let loadFromCacheAsync<'a when 'a: (new: unit -> 'a) and 'a: struct and 'a :> ValueType> filespec =
    async {
        let! bytes = File.ReadAllBytesAsync(filespec) |> Async.AwaitTask
        let result = 
            use pBytes = fixed bytes
            let sourceSpan = Span<byte>(NativePtr.toVoidPtr pBytes, bytes.Length) 
            MemoryMarshal.Cast<byte, 'a>(sourceSpan).ToArray()
        return result
    }    

// save to cache
let saveToCacheAsync<'a when 'a: unmanaged> filespec (data: 'a array) =
    Directory.CreateDirectory cacheFolder |> ignore
    let sizeStruct = sizeof<'a>
    use ptr = fixed data
    let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, data.Length * sizeStruct).ToArray()
    File.WriteAllBytesAsync(filespec, nativeSpan) |> Async.AwaitTask

and it requires the data structures to be unmanaged. For example, I have:

[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortTradeData =
    {
        [<FieldOffset(00)>]    Timestamp: DateTime
        [<FieldOffset(08)>]    Price:     double
        [<FieldOffset(16)>]    Quantity:  double
        [<FieldOffset(24)>]    Direction: int
    }

or

[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortCandleData =
    {
        [<FieldOffset(00)>] Timestamp:  DateTime
        [<FieldOffset(08)>] Open:       double
        [<FieldOffset(16)>] High:       double
        [<FieldOffset(24)>] Low:        double
        [<FieldOffset(32)>] Close:      double
    }

etc...

I'm now facing a case where I need to store a string. I know the max length of the strings but I'm trying to find out how I can do this with un-managed types.

I'm wondering if I could do something like this (for 256 bytes):

[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type TestData =
    {
        [<FieldOffset(00)>]    Timestamp: DateTime
        [<FieldOffset(08)>]    Text:      char
        [<FieldOffset(264)>]   Dummy:     int
    }

Would it be safe then to get a pointer to Text, cast it to a char array, read / write what I want in it and then save / load it as needed?

Or am I asking for some random troubles at some point?

As a side question, any way to speed up the loadFromCache function is very welcome too :)

Edit:

I came up with this for now. It converts a list of complex event objects into something serializable. The line:

let bytes = Pipeline.serializeBinary event

turns the original event data into a byte array.

Then I create the struct that will hold the binary stream, write the length, create a span representing the struct and copy the bytes. Then I marshal the span into the struct type (ShortEventData).

I can't use Marshal copy since I can't put a destination offset, so I have to copy the bytes with a loop. But there has to be a better way.

And I think, there has to be a better way for everything else in this as well :D Any suggestion would help, I just don't really like this solution.

[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortEventData =
    {
        [<FieldOffset(00)>]    Timestamp: DateTime
        [<FieldOffset(08)>]    Event:     byte
        [<FieldOffset(1032)>]  Length:    int
    }

events
|> List.map (fun event ->
        let bytes = Pipeline.serializeBinary event
        let serializableEvent : DataCache.ShortEventData =
            {
                Timestamp = event.GetTimestamp()
                Event     = byte 0
                Length    = bytes.Length
            }
        use ptr = fixed [|serializableEvent|]
        let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, serializableEvent.Length * sizeStruct)
        for i = 0 to bytes.Length - 1 do
            nativeSpan[8 + i] <- bytes[i]
        MemoryMarshal.Cast<byte, DataCache.ShortEventData>(nativeSpan).ToArray()[0]
    )

Edit:

Adding benchmarks for different serialization models:

open System
open System.IO
open System.Runtime.InteropServices
open BenchmarkDotNet.Attributes
open BenchmarkDotNet.Running
open MBrace.FsPickler
open Microsoft.FSharp.NativeInterop
open Newtonsoft.Json

#nowarn "9"



[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type TestStruct =
    {
        [<FieldOffset(00)>] SomeValue:       int
        [<FieldOffset(04)>] AnotherValue:    int
        [<FieldOffset(08)>] YetAnotherValue: double
    }
    
    static member MakeOne(r: Random) =
        {
            SomeValue       = r.Next()
            AnotherValue    = r.Next()
            YetAnotherValue = r.NextDouble()
        }
        
        
        
[<MemoryDiagnoser>]
type Benchmarks () =        
    let testData =
        let random = Random(1000)
        Array.init 1000 (fun _ -> TestStruct.MakeOne(random))

    
    // inits, outside of the benchmarks
    // FSPickler
    let FSPicklerSerializer = FsPickler.CreateBinarySerializer()

    // APEX
    let ApexSettings = Apex.Serialization.Settings().MarkSerializable(typeof<TestStruct>)
    let ApexBinarySerializer = Apex.Serialization.Binary.Create(ApexSettings)
        

    [<Benchmark>]
    member _.Thomas() =  // thomas' save to disk
        let sizeStruct = sizeof<TestStruct>
        use ptr = fixed testData
        Span<byte>(NativePtr.toVoidPtr ptr, testData.Length * sizeStruct).ToArray()

    [<Benchmark>]
    member _.Newtonsoft() =
        JsonConvert.SerializeObject(testData)

    [<Benchmark>]
    member _.FSPickler() =
        FSPicklerSerializer.Pickle testData
        
    [<Benchmark>]
    member _.Apex() =
        let outputStream = new MemoryStream()
        ApexBinarySerializer.Write(testData, outputStream)        
        
    
[<EntryPoint>]
let main _ =

    let _ = BenchmarkRunner.Run<Benchmarks>()
    0
|     Method |         Mean |        Error |       StdDev |    Gen 0 |   Gen 1 |   Gen 2 | Allocated |
|----------- |-------------:|-------------:|-------------:|---------:|--------:|--------:|----------:|
|     Thomas |     878.4 ns |     11.74 ns |     10.41 ns |   2.5444 |  0.1411 |       - |     16 KB |
| Newtonsoft | 880,641.2 ns | 16,346.50 ns | 15,290.52 ns | 103.5156 | 79.1016 | 48.8281 |    508 KB |
|  FSPickler |  71,786.6 ns |  1,373.89 ns |  1,349.35 ns |  13.6719 |  2.0752 |       - |     84 KB |
|       Apex |   1,088.8 ns |     20.59 ns |     22.03 ns |   2.6093 |  0.0725 |       - |     16 KB |

It looks like Apex is very close to what I did, but it's probably a lot more flexible and more optimized, so it could make sense to switch to it, UNLESS what I have can be a lot more optimized.

I have to see how @JL0PD's excellent comments can improve the speed.



Solution 1:[1]

Out of interest I took the lambda at the end of your question and tested three similar implementations and ran it on Benchmark.Net.

  • Reference - as you have shown
  • Mutable Struct - as I might have done it with a mutable struct
  • Record - using a plain old dumb record

See the results for yourself. Plain old dumb record is the fastest (though only marginally faster than my attempt and ~10x faster than your example). Write dumb code first. Benchmark it. Then try to improve.

#nowarn "9"

open System
open System.Runtime.InteropServices
open BenchmarkDotNet.Attributes
open BenchmarkDotNet.Running
open Microsoft.FSharp.NativeInterop

type ShortEventDataRec =
    {
        Timestamp: DateTime
        Event:     byte[]
        Length:    int
    }

[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortEventData =
    {
        [<FieldOffset(00)>]    Timestamp: DateTime
        [<FieldOffset(08)>]    Event:     byte
        [<FieldOffset(1032)>]  Length:    int
    }

[<StructLayout(LayoutKind.Explicit)>]
type MutableShortEventData =
    struct
        [<FieldOffset(00)>]    val mutable Timestamp: DateTime
        [<FieldOffset(08)>]    val mutable Event:     byte
        [<FieldOffset(1032)>]  val mutable Length:    int
    end 

[<MemoryDiagnoser>]
type Benchmarks () =

    let event  = 
        Array.init 1024 (fun i -> byte (i % 256))
    let time = DateTime.Now
    let sizeStruct = sizeof<ShortEventData>


    [<Benchmark>]
    member __.Reference() =
        let bytes = event
        let serializableEvent =
            {
                ShortEventData.Timestamp = time
                Event     = byte 0
                Length    = bytes.Length
            }
        use ptr = fixed [|serializableEvent|]
        let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, sizeStruct)
        for i = 0 to bytes.Length - 1 do
            nativeSpan.[8 + i] <- bytes.[i]

        MemoryMarshal.Cast<byte, ShortEventData>(nativeSpan).[0]


    [<Benchmark>]
    member __.MutableStruct() =
        let bytes = event

        let targetBytes = GC.AllocateUninitializedArray(sizeStruct)
        let targetSpan = Span(targetBytes)
        let targetStruct = MemoryMarshal.Cast<_, MutableShortEventData>(targetSpan)

        targetStruct.[0].Timestamp <- time
        let targetEvent = bytes.CopyTo(targetSpan.Slice(8, 1024))
        targetStruct.[0].Length <- event.Length

        targetStruct.[0]


    [<Benchmark>]
    member __.Record() =
        let bytes = event
        let serializableEvent =
            {
                ShortEventDataRec.Timestamp = time
                Event     = 
                    let eventBytes = GC.AllocateUninitializedArray(bytes.Length)
                    System.Array.Copy(bytes, eventBytes, bytes.Length)
                    eventBytes
                Length    = bytes.Length
            }
        serializableEvent
    
[<EntryPoint>]
let main _ =

    let _ = BenchmarkRunner.Run<Benchmarks>()
    0
Method Mean Error StdDev Gen 0 Gen 1 Allocated
Reference 526.88 ns 6.318 ns 5.909 ns 0.0629 - 1 KB
MutableStruct 49.50 ns 0.966 ns 1.074 ns 0.0636 - 1 KB
Record 42.73 ns 0.672 ns 0.628 ns 0.0650 0.0002 1 KB

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 tranquillity