'Finding the best supersets in a list based on intersections
I have a file including lines as follows,
finalInjectionList is input file: [0, 2, 3] [0, 2, 3, 4] [0, 3] [1, 2, 4] [2, 3] [2, 3, 4]
Here [0, 2, 3, 4] and [1, 2, 4] are the best supersets for my problem and I want to write them to an outputfile. Because those are supersets of some other elements and NOT subsets of any line.
my code:
import ast
import itertools
def get_data(filename):
with open(filename, 'r') as fi:
data = fi.readlines()
return data
def get_ast_set(line):
return set(ast.literal_eval(line))
def check_infile(datafile, savefile):
list1 = [get_ast_set(row) for row in get_data(datafile)]
print(list1)
outlist = []
#for i in range(len(list1)):
for a, b in itertools.combinations(list1, 2):
if a.issuperset(b):
with open(savefile, 'a') as fo:
fo.writelines(str(a))
if __name__ == "__main__":
datafile = str("./finalInjectionList")
savefile = str("./filteredSets" )
check_infile(datafile, savefile)
My code writes all supersets, e.g {2, 3, 4} also. But {0, 2, 3, 4} covers {2, 3, 4} already, so I do not want to write {2, 3, 4} to output file.
Is there any suggestion?
Solution 1:[1]
Solved by modifying routine check_infile
import ast
import itertools
# A union by rank and path compression based
# program to detect cycle in a graph
from collections import defaultdict
def findparent(d, node):
"""Goes through chain of parents, until we reach node which is its own parent
Meaning, no node has it has a subset"""
if d[node] == node:
return node
else:
return findparent(d, d[node])
def get_data(filename):
with open(filename, 'r') as fi:
data = fi.readlines()
return data
def get_ast_set(line):
return set(ast.literal_eval(line))
def check_infile(datafile, savefile):
"""Find minimum number of supersets as follows:
1) identify superset of each set
2) Go through superset chains (findparents) to find set of nodes which are supersets (roots) """
list1 = [get_ast_set(row) for row in get_data(datafile)]
print(list1)
outlist = []
n = len(list1)
# Initially each node is its own parent (i.e. include self as superset)
# Here parent means superset
parents = {u:u for u in range(n)}
for u in range(n):
a = list1[u]
for v in range(u+1, n):
b = list1[v]
if a.issuperset(b):
parents[v] = u # index u is superset of v
elif b.issuperset(a):
parents[u] = v # index v is superset of u
# Print root nodes
roots = set()
for u in range(n):
roots.add(findparent(parents, u))
with open(savefile, 'w') as fo:
for i in roots:
fo.write(str(list1[i]))
fo.write('\n')
if __name__ == "__main__":
datafile = str("./finalInjectionList.txt")
savefile = str("./filteredSets.txt" )
check_infile(datafile, savefile)
Test File (finalInjectionList.txt)
[0, 2, 3]
[0, 2, 3, 4]
[0, 3]
[1, 2, 4]
[2, 3]
[2, 3, 4]
Output File (filteredSets.txt)
{0, 2, 3, 4}
{1, 2, 4}
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 |
