'In C#, How can I use MongoDb's Aggregation/Lookup to make this join more efficient?

We have a microservice which caches Salesforce data to mitigate Salesforce limits. While this cache doesn't even attempt to mimic the nuanced/complicated data security policies of Salesforce, we do have some logic to secure fields using roles.

We wish to be join records from multiple SObjects (Tables), each of which are stored in separate Collections once they are in MongoDb. And in fact, I've implemented a solution for this, but (not surprisingly) the performance leaves much to be desired as the join takes place in C# instead of MongoDb.

The relevant part of the solution looks like this:

    public async Task<(List<JObject> records, int totalCount)> FindRecords(GenericQueryParameters parameters, CancellationToken cancellationToken)
    {
        (List<BsonDocument> resultRecords, int count) = await FindMongoRecords(parameters, cancellationToken);
        List<BsonDocument> joinedResults = await Join(resultRecords, parameters.ExtractedJoinCriteria, cancellationToken);
        List<JObject> jObjectRecords = resultRecords.Select(ToJObject)
            .ToList();
        return (jObjectRecords, (int)count);
    }

    private async Task<(List<BsonDocument> records, int totalCount)> FindMongoRecords(GenericQueryParameters parameters, CancellationToken cancellationToken)
    {
        ISObjectConfigurable config = await _sObjectConfigurationManager.Get(parameters.SObjectName);

        IMongoCollection<BsonDocument> collection = GetReadCollection(config);
        FilterDefinition<BsonDocument> filters = BuildFilters(config, parameters);
        IFindFluent<BsonDocument, BsonDocument> filteredCollection = collection.Find(filters);

        IFindFluent<BsonDocument, BsonDocument> sortedCollection = (parameters.SortTransition == Transitions.Ascending)
            ? filteredCollection.SortBy(x => x[parameters.SortField])
            : filteredCollection.SortByDescending(x => x[parameters.SortField]);

        List<BsonDocument> resultRecords = await sortedCollection
            .Skip((parameters.PageNumber - 1) * parameters.PageSize)
            .Limit(parameters.PageSize)
            .Project(GetReadableFields(config))
            .ToListAsync(cancellationToken);
        long count = await collection.CountDocumentsAsync(filters, cancellationToken: cancellationToken);
        return (resultRecords, (int)count);
    }

    private async Task<List<BsonDocument>> Join(List<BsonDocument> resultRecords, List<JoinCriteria> joinCriteria, CancellationToken cancellationToken)
    {
        foreach (JoinCriteria joinCriterium in joinCriteria)
        {
            HashSet<string> targets = resultRecords.Select(x => x[joinCriterium.ParentFieldName])
                .Select(x => $"\"{x}\"")
                .ToHashSet();

            GenericQueryParameters childQueryParameters = new()
            {
                SObjectName = joinCriterium.ChildSObjectName,
                PageSize = 50,
                Filters = new List<string> {
                    $"{joinCriterium.ChildFieldName} IN {{{string.Join(",", targets)}}}"
                }
            };
            (List<BsonDocument> allChildRecords, int _) = await FindMongoRecords(childQueryParameters, cancellationToken);
            Dictionary<BsonValue, List<BsonDocument>> childRecordsByChildField = allChildRecords
                .GroupBy(x => x[joinCriterium.ChildFieldName], x => x)
                .ToDictionary(group => group.Key, group => group.ToList());

            foreach (BsonDocument resultRecord in resultRecords)
            {
                BsonValue parentFieldValue = resultRecord[joinCriterium.ParentFieldName];
                resultRecord[joinCriterium.Collection] = childRecordsByChildField.TryGetValue(parentFieldValue, out List<BsonDocument> childRecords)
                    ? ToBsonDocumentArray(childRecords)
                    : new BsonArray();
            }
        }

        return resultRecords;
    }

    private static BsonArray ToBsonDocumentArray(List<BsonDocument> childRecords)
    {
        BsonArray array = new();
        foreach (BsonDocument childRecord in childRecords)
        {
            _ = array.Add(childRecord);
        }

        return array;
    }

    private ProjectionDefinition<BsonDocument, BsonDocument> GetReadableFields(ISObjectConfigurable config)
    {
        ProjectionDefinitionBuilder<BsonDocument> projectionDefinitionBuilder = Builders<BsonDocument>.Projection;
        IEnumerable<ProjectionDefinition<BsonDocument>> projectionDefinitions = _oAuthRoleValidator.IsAdmin()
            ? new List<ProjectionDefinition<BsonDocument>>()
            : CreateSecureProjection(config, projectionDefinitionBuilder);

        return projectionDefinitionBuilder.Combine(projectionDefinitions);
    }

    private IEnumerable<ProjectionDefinition<BsonDocument>> CreateSecureProjection(
            ISObjectConfigurable config,
            ProjectionDefinitionBuilder<BsonDocument> projectionDefinitionBuilder
        )
    {
        IEnumerable<string> hiddenFields = config.SObjectFields.Where(field => !_oAuthRoleValidator.UserCanReadField(config.FieldConfigByName, field));
        IEnumerable<string> hiddenShadows = hiddenFields.Select(x => "_" + x);
        return hiddenFields.Concat(hiddenShadows)
            .Select(field => projectionDefinitionBuilder.Exclude(field));
    }

GenericQueryParameters looks like this:

public class GenericQueryParameters
{
    private static readonly Regex GenericJoinRegex = new(@"(\w+)\s*:(\w+).(\w+)\s*==\s*(\w+)", RegexOptions.Compiled);

    [JsonProperty(PropertyName = "sObjectName")]
    public string SObjectName { get; set; }

    [JsonProperty(PropertyName = "filters")]
    public List<string> Filters { get; set; } = new List<string>();

    [JsonProperty(PropertyName = "joins")]
    public List<string> JoinCriteria { get; set; } = new List<string>();

    [JsonIgnore]
    [SuppressMessage("Style", "IDE1006:Naming Styles", Justification = "This is a hidden backing field.")]
    private List<JoinCriteria> _extractedJoinCriteria { get; set; }

    [JsonIgnore]
    public List<JoinCriteria> ExtractedJoinCriteria
    {
        get
        {
            if (_extractedJoinCriteria == null)
            {
                _extractedJoinCriteria = JoinCriteria.Select(x => ExtractCriterium(x))
                    .ToList();
            }

            return _extractedJoinCriteria;
        }
    }

    [JsonProperty(PropertyName = "pageNumber")]
    public int PageNumber { get; set; } = 1;

    private const int MaxPageSize = 50;
    private int _pageSize = 10;

    [JsonProperty(PropertyName = "pageSize")]
    public int PageSize
    {
        get => _pageSize;
        set => _pageSize = (value > MaxPageSize)
            ? MaxPageSize
            : value;
    }

    [JsonProperty(PropertyName = "sortField")]
    public string SortField { get; set; } = "_CreatedDate";

    [JsonProperty(PropertyName = "sortTransition")]
    public Transitions SortTransition { get; set; } = Transitions.Ascending;

    [JsonProperty(PropertyName = "includeDeleted")]
    public bool IncludeDeleted { get; set; } = false;

    [JsonProperty(PropertyName = "syncNow")]
    public bool SynchronizeFirst { get; set; } = false;

    [JsonProperty(PropertyName = "transformationTemplateName")]
    public string TransformationTemplateName { get; set; }

    private static JoinCriteria ExtractCriterium(string joinCriteriumString)
    {
        Match match = GenericJoinRegex.Match(joinCriteriumString);
        return match.Success
            ? new()
            {
                Collection = match.Groups[1].Value,
                ChildSObjectName = match.Groups[2].Value,
                ChildFieldName = match.Groups[3].Value,
                ParentFieldName = match.Groups[4].Value
            }
            : throw new MalformedDatabaseJoinException($"The filter '{joinCriteriumString}' could not be parsed.");
    }

and JoinCriteria looks like:

public class JoinCriteria
{
    public string Collection { get; init; }
    public string ChildSObjectName { get; init; }
    public string ChildFieldName { get; init; }
    public string ParentFieldName { get; init; }
}

As you can see, in the present solution, each SObject/Collection is queried separately, data is then redacted to conform the the consumer's permissions, and then finally the data is assembled for return to the consumer.

How can I refactor this solution to perform the join within MongoDb?



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source