-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmakegroups.py
58 lines (45 loc) · 1.87 KB
/
makegroups.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""Replicates the make group tool in Alteryx"""
# Import necessary modules
import networkx as nx
import pandas as pd
from itertools import repeat
from networkx.algorithms import community
def makegroups(df):
"""
Replicates the functionality of the make group tool in Alteryx
Expects a pandas dataframe ("df") with two columns.
"""
try:
# Renames df columns
df.columns = ['a', 'b']
# Converting the df to a list for network edges
edgesList = df.values.tolist()
# Converting the df to a dictionary for network nodes
nodeDict = df.to_dict('list')
# Creating an empty graph
G = nx.Graph()
# Adding nodes from both lists in the dictionary
# Effectively a merge of the two lists
G.add_nodes_from(nodeDict['a'])
G.add_nodes_from(nodeDict['b'])
# Adding the edges from the edges list
G.add_edges_from(edgesList)
# Generates groups
# TODO check that size of smallest clique, here 2 is dynamic
comm = list(community.k_clique_communities(G, 2))
# Splits the communities into two lists
comm1 = list(comm[0])
comm2 = list(comm[1])
# Generates list of the group name
# Replicates the group name to match community list length
group1 = list(repeat(comm1[0], len(comm1)))
group2 = list(repeat(comm2[0], len(comm2)))
# combines each group name and community list into a dataframe
dfGroup1 = pd.DataFrame(list(zip(group1, comm1)), columns=['Group', 'key'])
dfGroup2 = pd.DataFrame(list(zip(group2, comm2)), columns=['Group', 'key'])
# Combines both dataframes into the final dataframe
df = dfGroup1.append(dfGroup2, ignore_index=True)
print("Success: Groups created")
return df
except Exception as e:
print("Error: Function - makegroups:", e)