'How to compare and identify similarities between customer orders on the same list
I'm stuck trying to solve the below excel problem.
I have a list of customer orders that I'm trying to compare with each other to get a percentage total. The percentage should identify the similarities between each order set based on the item type. So for example, the total number of orders under Matt would be compared with the total number of orders under Jennifer and Walter. There should be two seperate percentage totals for each customer. So in the example with Matt the percentage total would identify how similar Matts order is to Jennifer's order and there would be another percentage total that would identify how similar Matts order is to Walter's order.
I'm not sure how to tackle this problem and have been pulling my hair out in the research process to find a solution. I have tried approaching it from a one-to-many/many-to-many relationship but am not sure how it would work with a single list.
Any help or guidance would be greatly appreciated.
Solution 1:[1]
I thought this might be an interesting challenge in Power Query to generate the Jaccard-Tanimoto coefficient for the different comparisons.
Algorithm
- Group the table by Customer
- Generate a List of the items for each customre
- Using a recursive custom function, generate a List of all possible Combinations given the number of customers.
- This List contains all possible number combinations using Pairs of numbers where the numbers represent the Index into the unique List of customers. So for a customer count of 3, we would generate {0,1}{0,2}{1,2}
- We then use that List of Indexes to generate both our coefficient and the corresponding list of customer comparisons.
- For the coefficient, I used the formula
Nc/(Na+Nb-Nc)whereNcis the count of the Union of the two lists, andNaandNbis the count of each of the two lists.
Custom Function
Enter as a blank query and Rename fnCombo
//generate all possible combinations of numbers from min to max
(min as number, max as number)=>
let
nums = {min..max},
x = List.Generate(
()=>[a={nums{0},nums{1}}, idx=0],
each [idx] < List.Count(nums)-1,
each [a={nums{0}, nums{[idx]+2}}, idx=[idx]+1],
each [a]
),
result = if (min+1)=max then x else List.Combine({x,fnCombo(min+1,max)})
in
result
M Code
let
Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
#"Changed Type" = Table.TransformColumnTypes(Source,{{"Customer Order", type text}, {"Item Type", type text}}),
//group by customer
#"Grouped Rows" = Table.Group(#"Changed Type", {"Customer Order"}, {
{"Attributes", each List.Distinct([Item Type]), type list}
}),
//Create separate columns for each comparison
similarities=
let
customers=#"Grouped Rows"[Customer Order],
attrib = #"Grouped Rows"[Attributes],
indices = fnCombo(0,List.Count(customers)-1),
//calculate Jaccard/Tanimoto Coefficient
tanimotos =
List.Generate(
()=>[x=
let
Nc= List.Count(List.Intersect({attrib{indices{0}{0}},attrib{indices{0}{1}}})),
Na= List.Count(attrib{indices{0}{0}}),
Nb= List.Count(attrib{indices{0}{1}})
in Nc/(Na+Nb-Nc), idx=0],
each [idx]<List.Count(indices),
each [x=
let
Nc= List.Count(List.Intersect({attrib{indices{[idx]+1}{0}},attrib{indices{[idx]+1}{1}}})),
Na= List.Count(attrib{indices{[idx]+1}{0}}),
Nb= List.Count(attrib{indices{[idx]+1}{1}})
in Nc/(Na+Nb-Nc), idx=[idx]+1],
each [x]),
//calculate matching list of customers being compared
compares = List.Generate(
()=>[x=customers{indices{0}{0}} & " - " & customers{indices{0}{1}}, idx=0],
each [idx]<List.Count(indices),
each [x=customers{indices{[idx]+1}{0}} & " - " & customers{indices{[idx]+1}{1}}, idx=[idx]+1],
each [x]
)
in
{compares} & {tanimotos},
//combine the two lists into a new table for output
results = Table.FromColumns(
similarities,
type table[Customer Pairs=text, Similarity=number])
in results
Solution 2:[2]
Sort the data by Item Type first. Assumes data is sheet named Sheet1 in columns A and B.
Option Explicit
Sub analyse()
Dim ws As Worksheet
Dim i As Long, n As Long, lastrow As Long
Dim dict As Object, ar, item As String, cust As String
Dim x As Long, y As Long, r As Long, pc As Single
Dim t0 As Single: t0 = Timer
Set dict = CreateObject("Scripting.Dictionary")
Set ws = Sheets("Sheet1")
With ws
lastrow = .Cells(.Rows.Count, 2).End(xlUp).Row
For r = 2 To lastrow
cust = Trim(.Cells(r, 1))
item = Trim(.Cells(r, 2))
' increment count for customer
dict(cust) = dict(cust) + 1
' if next line another item create report
If .Cells(r + 1, 2) <> item Then
n = Sheets.Count
Set ws = Sheets.Add(after:=Sheets(n))
ws.Name = item
ar = dict.keys
' columns
For x = 0 To UBound(ar)
' row 1 namee
ws.Cells(1, x + 2) = ar(x)
' rows
For y = 0 To UBound(ar)
' avoid comparing with self
If y <> x Then
' percent of total
pc = dict(ar(x)) / (dict(ar(x)) + dict(ar(y)))
ws.Cells(x + 2, y + 2) = Format(pc, "0%")
End If
' column 1 names
If y = 0 Then
ws.Cells(x + 2, 1) = ar(x)
End If
Next
Next
dict.RemoveAll
i = i + 1
End If
Next
End With
MsgBox i & " items analysed", vbInformation, Format(Timer - t0, "0.0 secs")
End Sub
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | |
| Solution 2 | CDP1802 |


