from xml.dom import minidom import urllib2, base64 user = '' #write here your username passwd = '' #write here your password distances_constant= [0.333333,0.4,0.5,1] #mindmap = open(user+'2.mm', 'w') mindmap = open('/home/pietrosp/public_html/maps/delicious/clusterdelimind.mm', 'w') mindmap.write('\n') mindmap.write('\n') mindmap.write('\n'%(user, user)) ########################################################################### ########################################################################### def auth_open(url): request = urllib2.Request(url) base64string = base64.encodestring('%s:%s'%(user, passwd))[: -1] request.add_header("Authorization", "Basic %s"%base64string) return urllib2.urlopen(request) ########################################################################### def compareTags(category_column,category_row): "Compares two tags, and gives 'I', '<', '>', 'X', 'D' if they are Identical, the first in the second, the second in the first, with nonempty intersection, Disjointed" column_in_row = 1 #this will remain true iff every instance that is present in column is also present in row row_in_column = 1 #this will remain true iff every instance that is present in row is also present in column intersection = 0 #this will hold the number of common links for post in posts: if post.getAttribute("tag").find('+'+category_column+'+') != -1 : #it is in column if post.getAttribute("tag").find('+'+category_row+'+') != -1 : intersection = intersection + 1 #it is in row else: column_in_row = 0 #is in column but not in row else: if post.getAttribute("tag").find('+'+category_row+'+') != -1 : row_in_column = 0 #it is in row but not in column else: pass #last case added for completness. The post has nothing to do with those two tags. Do nothing. if intersection == 0 and column_in_row == 1 and row_in_column == 1 : return 'B' #B for BUG! With an empty Tag all the empty tags go 'B' if intersection == 0: return 'D' elif column_in_row == 1 and row_in_column == 1 : return 'I' elif column_in_row == 1 : return '>' elif row_in_column == 1 : return '<' return intersection ########################################################################### def GetInverseRelation(relation): "Given a relation gives the inverse: 'I'->'I' 'X'->'X' '>'->'<' '<'->'>' 'D'->'D' 'B'->'B'" if relation == '<': return '>' if relation == '>': return '<' return relation def GetSubTags(category): "Given a Tag finds all the one under and returns it as a list" sub=[] pairRow = all_dict[category].items() for couple in pairRow: if (couple[1]=='>'): sub.append(couple[0]) subend=sub for s in sub: for t in sub: if all_dict[s][t]=='<': subend.remove(s) break return subend def GetEquivalentTags(category): "Given a Tag finds all the one under and returns it as a list" sub=[] pairRow = all_dict[category].items() for couple in pairRow: if (couple[1]=='I'): sub.append(couple[0]) return sub def GetSimilarTags(tag, DistanceTable, distance,Tags): "gets a list of all the tags that are at less than distance d" sub=[] for othertag in Tags: if (DistanceTable[tag][othertag]>=distance): sub.append(othertag) return sub ########################################################################### def InsertSimilarTagsAll(tag, DistanceTable,d, tags): "Recursive function to Sorts all the tags according to how similar they are" SimilarTags = GetSimilarTags(tag, DistanceTable, d,tags) if len(SimilarTags)>1: if len(tags)>len(SimilarTags): mindmap.write('\n'%d) # mindmap.write('\n') comment the previous line and uncomment this if you want the script not to show the numbers on the branches SimTagsDone = [] Distances=FindDistancesInTable(DistanceTable,SimilarTags,d) if len(Distances): NextDist=min(Distances) for simtag in SimilarTags: if simtag in SimTagsDone: continue Distances=FindDistancesInTable(DistanceTable,SimilarTags,d) if len(Distances)==0: InsertMetaTag(simtag) else: SimTagsDone=SimTagsDone+InsertSimilarTagsAll(simtag,DistanceTable,NextDist,SimilarTags) if len(tags)>len(SimilarTags): mindmap.write('\n') return SimilarTags else: InsertMetaTag(tag) return [tag] def InsertSimilarTags(tag, DistanceTable,distances, tags): "Inserts the tags that are similar and returns a list of the inserted tags" d=distances[0] SimilarTags = GetSimilarTags(tag, DistanceTable, d,tags) if len(SimilarTags)>1: if len(tags)>len(SimilarTags): mindmap.write('\n') # mindmap.write('\n'%d) SimTagsDone = [] for simtag in SimilarTags: if simtag in SimTagsDone: continue if len(distances)<=1: InsertMetaTag(simtag) else: SimTagsDone=SimTagsDone+InsertSimilarTags(simtag,DistanceTable,distances[1:],SimilarTags) if len(tags)>len(SimilarTags): mindmap.write('\n') return SimilarTags else: InsertMetaTag(tag) return [tag] def InsertSimilarTagsOne(tag, DistanceTable,d, tags): "Inserts the tags that at least similar as d and returns a list of the inserted tags" SimilarTags = GetSimilarTags(tag, DistanceTable, d,tags) if len(SimilarTags)>1: if len(tags)>len(SimilarTags): mindmap.write('\n'%d) for simtag in SimilarTags: InsertMetaTag(simtag) if len(tags)>len(SimilarTags): mindmap.write('\n') return SimilarTags else: InsertMetaTag(tag) return [tag] ########################################################################### def Min(A,B): "returns the minimum of two values" if AB: return A return B ########################################################################### def MakeDistanceTable(Tags): "returns the non_transitive table distance" table = {} for row in Tags: one_row = {} for column in Tags: one_row[column] = DistanceTagTag(row, column) table[row]=one_row print "table distances done", return table def MakeTableTransitive(Table): "makes the table transitive, so that is d(a,c) <= max(d(a,b),d(b,c))" changes=1 Tags=Table.keys() while changes>0: changes=0 for a in Tags: #the row for c in Tags: #the column distac = Table[a][c] for b in Tags: #the corner distab = Table[a][b] distbc = Table[b][c] minabc=Min(distab,distbc) if minabc>distac: changes=changes+1 Table[a][c]=minabc print "changes=",changes return Table def FindDistancesInTable(Table, Tags, PreviousMin=1): "takes a table and returns a list of all the distances among a set of tags" Distances=[] for tagA in Tags: for tagB in Tags: d=Table[tagA][tagB] if d>PreviousMin: Distances.append(d) return Distances def DistanceTagTag(TagA, TagB): "Returns the distance between Tag A a and Tab B" relation=all_dict[TagA][TagB] if relation == 'I': return 1 if relation == '<': return 1 if relation == '>': return 1 if relation == 'D': return 0 ## return float(relation)/Min(len(postsbytag[TagA]),len(postsbytag[TagB])) dA=float(relation)/len(postsbytag[TagA]) dB=float(relation)/len(postsbytag[TagB]) ## return pow(dA*dB,0.5)#I take the square root of it return (dA+dB)/2#I average them ########################################################################### def InsertMetaTag(category): "Insert the Tag in the mindmap and internal tags if necessary for one level" subcategories=GetSubTags(category) mindmap.write('\n'%(user, category, category,len(postsbytag[category]),len(subcategories))) posts_to_add = [] if len(subcategories)>0: if len(subcategories) > 1: DistanceTable=MakeDistanceTable(subcategories) DistanceTable=MakeTableTransitive(DistanceTable) mindmap.write('\n') subdistances= distances_constant SubTagsDone = [] for subcategory in subcategories: if subcategory in SubTagsDone: continue SubTagsDone=SubTagsDone+InsertSimilarTags(subcategory, DistanceTable,subdistances,subcategories) #SubTagsDone=SubTagsDone+InsertSimilarTagsAll(subcategory, DistanceTable,0.000001,subcategories) mindmap.write('\n') else: #len(subcategories)==1 InsertMetaTag(subcategories[0]) for post in postsbytag[category] : for subcategory in subcategories: #this is to avoid that post that are present in subcategories appear again if post.getAttribute("tag").find('+'+subcategory+'+')!= -1: break else: posts_to_add.append(post) else: posts_to_add=postsbytag[category] if len(posts_to_add)==1 or (len(posts_to_add)>=1 and len(subcategories)==0 ): for post in posts_to_add: mindmap.write('\n'%(post.getAttribute("href").encode('ascii', 'xmlcharrefreplace').replace('&', '&'), post.getAttribute("description").encode('ascii','xmlcharrefreplace').replace('"', '"').replace('&', '&'))) elif len(posts_to_add) > 1: mindmap.write('\n') for post in posts_to_add: mindmap.write('\n'%(post.getAttribute("href").encode('ascii', 'xmlcharrefreplace').replace('&', '&'), post.getAttribute("description").encode('ascii','xmlcharrefreplace').replace('"', '"').replace('&', '&'))) mindmap.write('\n') mindmap.write('\n') ########################################################################### ########################################################################### recent_post_data = auth_open('http://del.icio.us/api/posts/recent?count=1000').read() posts = minidom.parseString(recent_post_data).documentElement.getElementsByTagName("post") tag_data = auth_open('http://del.icio.us/api/tags/get?').read() xdoc = minidom.parseString(tag_data).documentElement newposts=[] for post in posts: newpost=post Attribute=newpost.getAttribute("tag") Attribute=Attribute.replace(" ","+ +") newpost.setAttribute("tag","+"+Attribute+"+") newposts.append(newpost) posts=newposts postsbytag = {} for post in posts: for tag in xdoc.getElementsByTagName("tag"): category = tag.getAttribute("tag") if post.getAttribute("tag").find("+"+category+"+") != -1: if category in postsbytag: postsbytag[category]=postsbytag[category]+[post] else: postsbytag[category]=[post] for tag in xdoc.getElementsByTagName("tag"): category = tag.getAttribute("tag") xdoc3 = xdoc2 = xdoc all_dict = {} for tag_row in xdoc2.getElementsByTagName("tag"): category_row = tag_row.getAttribute("tag") one_dict = {} for tag_column in xdoc3.getElementsByTagName("tag"): category_column = tag_column.getAttribute("tag") if category_column == category_row: one_dict[category_column] = 'I' elif category_column in all_dict: one_dict[category_column] = GetInverseRelation(all_dict[category_column][category_row]) else: one_dict[category_column] = compareTags(category_column,category_row) all_dict[category_row]=one_dict MainTags=[] print "Number tags", len(xdoc.getElementsByTagName("tag")) print "main tags:", for tag in xdoc.getElementsByTagName("tag"): category = tag.getAttribute("tag") valueRow=all_dict[category].values() if valueRow.count('<') : continue MainTags.append(category) print category, print "Number Main tags", len(MainTags) DistanceTable=MakeDistanceTable(MainTags) DistanceTable=MakeTableTransitive(DistanceTable) TagsDone = [] distances= distances_constant for tag in MainTags: if tag in TagsDone: continue # valueRow=all_dict[category].values() TagsDone=TagsDone+InsertSimilarTags(tag, DistanceTable,distances,MainTags) # TagsDone=TagsDone+InsertSimilarTagsAll(tag, DistanceTable,0.000001,MainTags) mindmap.write('\n\n')