
Group   Attribute

Cheese  Dairy
Cheese  Food
Cheese  Curd
Cow     Dairy
Cow     Food
Cow     Animal
Cow     Hair
Cow     Stomachs
Yogurt  Dairy
Yogurt  Food
Yogurt  Curd
Yogurt  Fruity


Group   TotalCount   LikeGroup   CommonWords  PCT

Cheese  3            Yogurt      3            100.0
Cow     5            Cheese      2            40.0
Yogurt  4            Cheese      4            75.0




>>>Yogurt = ['Dairy','Food','Curd','Fruity']
>>>Cheese = ['Dairy','Food','Curd']

>>>Yogurt_Cheese = len(list(set(Yogurt) & set(Cheese)))/len(Yogurt)

>>>Yogurt = ['Dairy','Food','Curd','Fruity']
>>>Cow = ['Dairy','Food','Animal','Hair','Stomachs']

>>>Yogurt_Cow = len(list(set(Yogurt) & set(Cow)))/len(Yogurt)




import pandas as pd
from itertools import permutations

df = pd.DataFrame(data = [['cheese','dairy'],['cheese','food'],['cheese','curd'],['cow','dairy'],['cow','food'],['yogurt','dairy'],['yogurt','food'],['yogurt','curd'],['yogurt','fruity']], columns = ['Group','Attribute'])
count_dct = df.groupby('Group').count().to_dict() # to get the TotalCount, used later
count_dct = count_dct.values()[0] # gets rid of the attribute key and returns the dictionary embedded in the list.

unique_grp = df['Group'].unique() # get the unique groups
unique_atr = df['Attribute'].unique() # get the unique attributes

combos = list(permutations(unique_grp, 2)) # get all combinations of the groups
comp_df = pd.DataFrame(data = (combos), columns = ['Group','LikeGroup']) # create the array to put comparison data into
comp_df['CommonWords'] = 0

for atr in unique_atr:
    temp_df = df[df['Attribute'] == atr] # break dataframe into pieces that only contain the attribute being looked at during that iteration

    myl = list(permutations(temp_df['Group'],2)) # returns the pairs that have the attribute in common as a tuple
    for comb in myl:
        comp_df.loc[(comp_df['Group'] == comb[0]) & (comp_df['LikeGroup'] == comb[1]), 'CommonWords'] += 1 # increments the CommonWords column where the Group column is equal to the first entry in the previously mentioned tuple, and the LikeGroup column is equal to the second entry.

for key, val in count_dct.iteritems(): # put the previously computed TotalCount into the comparison dataframe
    comp_df.loc[comp_df['Group'] == key, 'TotalCount'] = val

comp_df['PCT'] = (comp_df['CommonWords'] * 100.0 / comp_df['TotalCount']).round()


    Group LikeGroup  CommonWords  TotalCount  PCT
0  cheese       cow            2           3   67
1  cheese    yogurt            3           3  100
2     cow    cheese            2           2  100
3     cow    yogurt            2           2  100
4  yogurt    cheese            3           4   75
5  yogurt       cow            2           4   50


10-15 23:30