'Creating a serializable fixed size char array in F#
I am dealing with a very large amount of data I need to load / save to disk where speed is the key.
I wrote this code:
// load from cache
let loadFromCacheAsync<'a when 'a: (new: unit -> 'a) and 'a: struct and 'a :> ValueType> filespec =
async {
let! bytes = File.ReadAllBytesAsync(filespec) |> Async.AwaitTask
let result =
use pBytes = fixed bytes
let sourceSpan = Span<byte>(NativePtr.toVoidPtr pBytes, bytes.Length)
MemoryMarshal.Cast<byte, 'a>(sourceSpan).ToArray()
return result
}
// save to cache
let saveToCacheAsync<'a when 'a: unmanaged> filespec (data: 'a array) =
Directory.CreateDirectory cacheFolder |> ignore
let sizeStruct = sizeof<'a>
use ptr = fixed data
let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, data.Length * sizeStruct).ToArray()
File.WriteAllBytesAsync(filespec, nativeSpan) |> Async.AwaitTask
and it requires the data structures to be unmanaged. For example, I have:
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortTradeData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Price: double
[<FieldOffset(16)>] Quantity: double
[<FieldOffset(24)>] Direction: int
}
or
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortCandleData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Open: double
[<FieldOffset(16)>] High: double
[<FieldOffset(24)>] Low: double
[<FieldOffset(32)>] Close: double
}
etc...
I'm now facing a case where I need to store a string. I know the max length of the strings but I'm trying to find out how I can do this with un-managed types.
I'm wondering if I could do something like this (for 256 bytes):
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type TestData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Text: char
[<FieldOffset(264)>] Dummy: int
}
Would it be safe then to get a pointer to Text, cast it to a char array, read / write what I want in it and then save / load it as needed?
Or am I asking for some random troubles at some point?
As a side question, any way to speed up the loadFromCache function is very welcome too :)
Edit:
I came up with this for now. It converts a list of complex event objects into something serializable. The line:
let bytes = Pipeline.serializeBinary event
turns the original event data into a byte array.
Then I create the struct that will hold the binary stream, write the length, create a span representing the struct and copy the bytes. Then I marshal the span into the struct type (ShortEventData).
I can't use Marshal copy since I can't put a destination offset, so I have to copy the bytes with a loop. But there has to be a better way.
And I think, there has to be a better way for everything else in this as well :D Any suggestion would help, I just don't really like this solution.
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortEventData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Event: byte
[<FieldOffset(1032)>] Length: int
}
events
|> List.map (fun event ->
let bytes = Pipeline.serializeBinary event
let serializableEvent : DataCache.ShortEventData =
{
Timestamp = event.GetTimestamp()
Event = byte 0
Length = bytes.Length
}
use ptr = fixed [|serializableEvent|]
let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, serializableEvent.Length * sizeStruct)
for i = 0 to bytes.Length - 1 do
nativeSpan[8 + i] <- bytes[i]
MemoryMarshal.Cast<byte, DataCache.ShortEventData>(nativeSpan).ToArray()[0]
)
Edit:
Adding benchmarks for different serialization models:
open System
open System.IO
open System.Runtime.InteropServices
open BenchmarkDotNet.Attributes
open BenchmarkDotNet.Running
open MBrace.FsPickler
open Microsoft.FSharp.NativeInterop
open Newtonsoft.Json
#nowarn "9"
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type TestStruct =
{
[<FieldOffset(00)>] SomeValue: int
[<FieldOffset(04)>] AnotherValue: int
[<FieldOffset(08)>] YetAnotherValue: double
}
static member MakeOne(r: Random) =
{
SomeValue = r.Next()
AnotherValue = r.Next()
YetAnotherValue = r.NextDouble()
}
[<MemoryDiagnoser>]
type Benchmarks () =
let testData =
let random = Random(1000)
Array.init 1000 (fun _ -> TestStruct.MakeOne(random))
// inits, outside of the benchmarks
// FSPickler
let FSPicklerSerializer = FsPickler.CreateBinarySerializer()
// APEX
let ApexSettings = Apex.Serialization.Settings().MarkSerializable(typeof<TestStruct>)
let ApexBinarySerializer = Apex.Serialization.Binary.Create(ApexSettings)
[<Benchmark>]
member _.Thomas() = // thomas' save to disk
let sizeStruct = sizeof<TestStruct>
use ptr = fixed testData
Span<byte>(NativePtr.toVoidPtr ptr, testData.Length * sizeStruct).ToArray()
[<Benchmark>]
member _.Newtonsoft() =
JsonConvert.SerializeObject(testData)
[<Benchmark>]
member _.FSPickler() =
FSPicklerSerializer.Pickle testData
[<Benchmark>]
member _.Apex() =
let outputStream = new MemoryStream()
ApexBinarySerializer.Write(testData, outputStream)
[<EntryPoint>]
let main _ =
let _ = BenchmarkRunner.Run<Benchmarks>()
0
| Method | Mean | Error | StdDev | Gen 0 | Gen 1 | Gen 2 | Allocated |
|----------- |-------------:|-------------:|-------------:|---------:|--------:|--------:|----------:|
| Thomas | 878.4 ns | 11.74 ns | 10.41 ns | 2.5444 | 0.1411 | - | 16 KB |
| Newtonsoft | 880,641.2 ns | 16,346.50 ns | 15,290.52 ns | 103.5156 | 79.1016 | 48.8281 | 508 KB |
| FSPickler | 71,786.6 ns | 1,373.89 ns | 1,349.35 ns | 13.6719 | 2.0752 | - | 84 KB |
| Apex | 1,088.8 ns | 20.59 ns | 22.03 ns | 2.6093 | 0.0725 | - | 16 KB |
It looks like Apex is very close to what I did, but it's probably a lot more flexible and more optimized, so it could make sense to switch to it, UNLESS what I have can be a lot more optimized.
I have to see how @JL0PD's excellent comments can improve the speed.
Solution 1:[1]
Out of interest I took the lambda at the end of your question and tested three similar implementations and ran it on Benchmark.Net.
Reference- as you have shownMutable Struct- as I might have done it with a mutable structRecord- using a plain old dumb record
See the results for yourself. Plain old dumb record is the fastest (though only marginally faster than my attempt and ~10x faster than your example). Write dumb code first. Benchmark it. Then try to improve.
#nowarn "9"
open System
open System.Runtime.InteropServices
open BenchmarkDotNet.Attributes
open BenchmarkDotNet.Running
open Microsoft.FSharp.NativeInterop
type ShortEventDataRec =
{
Timestamp: DateTime
Event: byte[]
Length: int
}
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortEventData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Event: byte
[<FieldOffset(1032)>] Length: int
}
[<StructLayout(LayoutKind.Explicit)>]
type MutableShortEventData =
struct
[<FieldOffset(00)>] val mutable Timestamp: DateTime
[<FieldOffset(08)>] val mutable Event: byte
[<FieldOffset(1032)>] val mutable Length: int
end
[<MemoryDiagnoser>]
type Benchmarks () =
let event =
Array.init 1024 (fun i -> byte (i % 256))
let time = DateTime.Now
let sizeStruct = sizeof<ShortEventData>
[<Benchmark>]
member __.Reference() =
let bytes = event
let serializableEvent =
{
ShortEventData.Timestamp = time
Event = byte 0
Length = bytes.Length
}
use ptr = fixed [|serializableEvent|]
let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, sizeStruct)
for i = 0 to bytes.Length - 1 do
nativeSpan.[8 + i] <- bytes.[i]
MemoryMarshal.Cast<byte, ShortEventData>(nativeSpan).[0]
[<Benchmark>]
member __.MutableStruct() =
let bytes = event
let targetBytes = GC.AllocateUninitializedArray(sizeStruct)
let targetSpan = Span(targetBytes)
let targetStruct = MemoryMarshal.Cast<_, MutableShortEventData>(targetSpan)
targetStruct.[0].Timestamp <- time
let targetEvent = bytes.CopyTo(targetSpan.Slice(8, 1024))
targetStruct.[0].Length <- event.Length
targetStruct.[0]
[<Benchmark>]
member __.Record() =
let bytes = event
let serializableEvent =
{
ShortEventDataRec.Timestamp = time
Event =
let eventBytes = GC.AllocateUninitializedArray(bytes.Length)
System.Array.Copy(bytes, eventBytes, bytes.Length)
eventBytes
Length = bytes.Length
}
serializableEvent
[<EntryPoint>]
let main _ =
let _ = BenchmarkRunner.Run<Benchmarks>()
0
| Method | Mean | Error | StdDev | Gen 0 | Gen 1 | Allocated |
|---|---|---|---|---|---|---|
| Reference | 526.88 ns | 6.318 ns | 5.909 ns | 0.0629 | - | 1 KB |
| MutableStruct | 49.50 ns | 0.966 ns | 1.074 ns | 0.0636 | - | 1 KB |
| Record | 42.73 ns | 0.672 ns | 0.628 ns | 0.0650 | 0.0002 | 1 KB |
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | tranquillity |
