all (414)
# 6030048821 (16.12) 1 (2021-02-26 21:58)

def get_unique( words ):
    unique_words = []
    for word in words:
        if word not in unique_words:
            unique_words.append(word)
    return unique_words
def jaccard(words_1, words_2):
    union = words_1 + words_2
    union = get_unique(union)
    intersect = []
    for w in words_1:
        if w in words_2:
            intersect.append(w)
    jaccard_coef = len(intersect)/len(union)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    for tweet_id in range(len(norm_tweets)):
        jac = jaccard(norm_tweets[tweet_id], norm_query)
        top.append([tweet_id,jac])

    top.sort(key=lambda s:(-s[1],s[0]))
    return top[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    cont = tweet_content.split(' ')
    # print('c',len(cont))
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    s = ' '
    idx = 0
    while idx != len(cont):
        if len(s + ' ' + cont[idx]) > print_width:
            print(s)
            s = ' '
        else:
            if idx == len(cont)-1:
                print(s + ' ' + cont[idx])
            else:
                s += (' '+ cont[idx])
            idx += 1

#--------------------------------------------
# 6030239321 (13.76) 2 (2021-03-01 22:40)

def get_unique( words ):
    uniq = []
    for word in words :
        if word not in uniq :
            uniq.append(word)
    return uniq
def jaccard(words_1, words_2):
    union = 0
    for i in words_2 :
        if i in words_1 :
            union += 1
    return union/(len(words_1)+len(words_2)-union)
def top_n_similarity(norm_tweets, norm_query, n):
    top_val = []
    top_tweets = []
    for i in norm_tweets :
        jac = jaccard(i,norm_query)
        if len(top_val) == 0 :
            top_val.append(jac)
            top_tweets.append(i)
        if len(top_val) > n :
            nth = min(top_val)
            idx = top_val.index(nth)
            if i > nth :
                top_val[idx] = jac
                top_tweets[idx] = i
        else :
            top_val.append(jac)
            top_tweets.append(i)
    return list(zip(top_tweets,top_val))
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n","#"+str(tweet_id),round(jc_coef ,2))
    words = tweet_content.split(' ')
    count = 0
    width = print_width - 2
    print('  ',end='')
    for i in words :
        if count + len(i) > width :
            print("\n"+"  "+i+" ",end='')
            count = len(i)+1
        else :
            print(i+' ',end='')
            count += len(i)+1


#--------------------------------------------
# 6030380021 (12.13) 3 (2021-03-01 23:56)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return(unique_words)
def jaccard(words_1, words_2):
    out1 = []
    out2 = []
    for i in words_1:
        if i in words_2 and i not in out1:
            out1.append(i)
    for i in words_2:
        if i in words_1 and i not in out1:
            out1.append(i)
    for i in words_1:
        if i not in out2:
            out2.append(i)
    for i in words_2:
        if i not in out2:
            out2.append(i)
    return(len(out1)/len(out2))
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        jaccards = jaccard(norm_tweets[i],norm_query)
        top_n.append([i,jaccards])
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    output = "\n"
    output += "#" + str(tweet_id) + " (" + str(round(jc_coef,2)) + ")\n  "
    prev_space_index = 0
    n = 1
    while True:
        space_index = tweet_content.find(" ",prev_space_index + 1)
        if space_index == -1:
            output += tweet_content[prev_space_index:]
            break
        elif space_index >= print_width*n -2:
            n += 1
            output+="\n  "+ tweet_content[prev_space_index+1:space_index]
        else:
            output += tweet_content[prev_space_index:space_index]
        prev_space_index = space_index
    print(output)

#--------------------------------------------
# 6030924521 (15.15) 4 (2021-03-01 16:55)

def get_unique( words ):
  unique_words=[]
  for i in words:
    if i not in unique_words:
      unique_words.append(i)
  return unique_words
def jaccard(words_1, words_2):
  s=0
  for i in words_1:
    if i in words_2:
      s+=1
  w=get_unique(words_1+words_2)
  jaccard_coef= round(s/len(w),2)
  return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
  norm_tweets_jac=[]
  for i in norm_tweets:
    jac=jaccard(i,norm_query)
    norm_tweets_jac.append(jac)
  top_n=[]
  for i in range(n):
    maxi=max(norm_tweets_jac)
    if maxi==0:
      return []
    num=norm_tweets_jac.index(maxi)
    top_n.append([num,maxi])
    norm_tweets_jac[num]=0
  return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
  print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
  A=tweet_content.split(' ')
  if '' in A:
    n=A.index('')
    A=A[0:n]+A[n+1:len(A)]
  while A!=[]:
    st=0
    w='  '
    for i in A:
      t=w+i
      if len(t)>print_width:
        print(w)
        n=A.index(i)
        A=A[n:len(A)]
        st+=1
        break
      w=t
      #if i == A[len(A)-1]:
        #print(w)
       # break
      t=w+' '
      if len(t)>print_width:
        print(w)
        n=A.index(i)
        A=A[n+1:len(A)]
        st+=1
        break
      w=t
    if len(A)==0 and st==0:
      print(w)
      break
    if i == A[len(A)-1] and st==0:
      print(w)
      break

#--------------------------------------------
# 6130097621 (14.89) 5 (2021-03-01 22:56)

def get_unique( words ):
    unique_words=[]
    for i in words :
        if i not in unique_words :
            unique_words += [i]


    return unique_words
def jaccard(words_1, words_2):
    x=[]
    y=[]
    for i in words_1:
        for j in words_2:
            if i == j :
                x += [i]
    if len(words_1)>=len(words_2):
        y += words_1
        for i in words_2 :
            if i not in y :
                y+= [i]
    if len(words_1)<=len(words_2):
        y += words_2
        for i in words_1 :
            if i not in y :
                y+= [i]
    jaccard_coef = (len(x)/len(y))

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    for tweet_id in range(len(norm_tweets)) :
        x = jaccard(norm_tweets[tweet_id], norm_query)
        if x>0 :
            top += [[tweet_id,x]]
    a=[]
    b=[]
    top_n=[]
    for i in range(len(top)):
        a += [top[i][1]]
    for i in range(n) :
        b += [max(a)]
        a.remove(max(a))
    top_x=[]
    for i in range(len(top)):
        if top[i][1] in b :
            top_x += [top[i]]
    top_y=[]
    for i in range(len(top_x)):
        if top_x[i][1]==min(b):
            top_y+=[top_x[i]]
    for i in top_y:
        top_x.remove(i)

    top_n=top_x+[top_y[0]]
    for i in range(n):
        if len(top_n)!=n:
            top_n+=[top_y[i+1]]
    top_n.sort()
    for j in range(50):
        for i in range(len(top_n)-1):
            if top_n[i][1]<top_n[i+1][1]:
                top_n[i+1],top_n[i]=top_n[i],top_n[i+1]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n")
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")

    tweet_content = tweet_content.split()
    tt = []
    ans = "  "

    for i in tweet_content :
        if len(ans) <= print_width :
            ans += i
            ans += " "
            tt += [i]
        if len(ans) == print_width + 1 :
            print(ans)
            ans = "  "
        if len(ans) > print_width :
            x = ans
            x = x.split()
            ans1 = "  "
            for e in range(len(x)-1) :
                ans1 += x[e]
                ans1 += " "
            print(ans1)
            ans = "  "+i+" "
        if tt == tweet_content :
            print(ans)




#--------------------------------------------
# 6130917221 (9.98) 6 (2021-03-01 23:19)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
        elif i in unique_words:
            pass
    return unique_words
def jaccard(words_1, words_2):
    words_1 = get_unique(words_1)
    words_2 = get_unique(words_2)
    x = y = 0
    for i in words_1:
        if i in words_2:
            x += 1
    words_3 = words_1 + words_2
    words_4 = []
    for i in words_3:
        if i not in words_4:
            words_4.append(i)
    y = len(words_4)
    jaccard_coef = x/y
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    y = []
    z = []
    for i in range(len(norm_tweets)):
        x = jaccard(norm_tweets[i],norm_query)
        y.append([i*-1,x])
    y.sort()
    for i in y:
        i[0] *= -1
        z.append([i[0],i[1]])
    a = z[-n:]
    top_n = a[::-1]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(' ')
    jc_coef = round(jc_coef,2)
    s = ' '
    for i in tweet_content:
        x = s + ' ' + i
        if len(x) > print_width:
            print(s)
            s = '  ' + i
        else:
            s = x
    print()
    print('#' + str(tweet_id) + ' (' + str(jc_coef) + ')')
    print(s)

#--------------------------------------------
# 6130924621 (7.65) 7 (2021-03-01 23:58)

def get_unique( words ):
    unique_words = []
    for x in words:
        if x not in unique_words:
            unique_words.append(x)
        elif x in unique_words:
            pass
    return unique_words
def jaccard(words_1, words_2):
    words_1=get_unique(words_1)
    words_2=get_unique(words_2)
    x=0
    y=0
    for i in words_1:
        if i in words_2:
            x+=1
    words_3=words_1+words_2
    words_4=[]
    for i in words_3:
        if i not in words_4:
            words_4.append(i)
    y=len(words_4)
    jaccard_coef=round(x/y)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    jaccard_norm = []
    for i in range(len(norm_tweets)):
        jc_co = jaccard(norm_tweets[i], norm_query)
        jaccard_norm.append([jc_co, i])
        jaccard_norm.sort(key=lambda k:(k[0],-k[1]))
        top_n = []
    for i in range(len(jaccard_norm)-1, len(jaccard_norm)-1-n, -1):
        j = jaccard_norm[i]
        top_n.append([j[1], j[0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(' ')
    jc_coef = round(jc_coef,2)
    p = ' '
    for i in tweet_content:
        x = p + ' ' + i
        if len(x) > print_width:
            print(p)
            p = '  ' + i
        else:
            p = x
    print()
    print('#' + str(tweet_id) + ' (' + str(jc_coef) + ')')
    print(p)
#--------------------------------------------
# 6230041021 (17.00) 8 (2021-02-28 23:52)

def get_unique( words ):
    unique_words = []
    for i in words :
        if i in unique_words :
           pass
        else:
           unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    x1 = get_unique(words_1)
    x2 = get_unique(words_2)
    st = 0
    for i in x1:
        if i in x2 :
            st += 1
    E_st = len(x1)+ len(x2)- st
    jaccard_coef = st/E_st
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    fake_all = []
    for i in range(len(norm_tweets)):
        Jaccard_coefficient = jaccard(norm_tweets[i], norm_query)
        if Jaccard_coefficient > 0:
            fake_all.append([Jaccard_coefficient, -i])
    fake_all.sort(reverse=True)
    for i in range(n):
       top_n.append([-(fake_all[i][1]),fake_all[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#'+ str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    t = tweet_content.split(' ')
    word_lines = '  '
    for i in range(len(t)):
        if len(word_lines) + len(str(t[i])) <= print_width :
           word_lines += str(t[i])+' '
        elif len(word_lines) + len(str(t[i])) > print_width :
           print(word_lines)
           word_lines = '  '
           word_lines += str(t[i])+' '
    if word_lines != ' ' :
       print(word_lines)

#--------------------------------------------
# 6230092021 (18.01) 9 (2021-02-28 12:54)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)

    return unique_words
def jaccard(words_1, words_2):
    c = 0
    w1=[]
    w2=[]
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            c += 1
        else:
            w1.append(words_1[i])
    for i in range(len(words_2)):
        if words_2[i] not in words_1:
            w2.append(words_2[i])
    num_words1 = len(w1)
    num_words2 = len(w2)
    all_num = num_words1 + num_words2 + c
    jaccard_coef = c/all_num
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    for i in range(len(norm_tweets)):
        a.append([i,jaccard(norm_tweets[i],norm_query)])
    a.sort()
    b = []
    for k in a:
        k[0],k[1] = -k[1],k[0]
        b.append(k)
    b.sort()
    top_n =[]
    for e in b:
        e[0],e[1] = e[1],-e[0]
        top_n.append(e)
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n'+'#'+str(tweet_id)+' ('+str((round((jc_coef),2)))+')')
    t = tweet_content.split(' ')
    content = '  '
    for i in t:
        if len(content) + len(i) <= print_width:
            content += i + ' '
        else:
            print(content)
            content = '  ' + i + ' '
    print(content)
#--------------------------------------------
# 6230131921 (18.01) 10 (2021-03-01 21:23)

def get_unique(words):
    words.sort
    unique_words = []
    for i in words:
        if i in unique_words:
            pass
        else:
            unique_words.append(i)
    return unique_words

#--------------------------------------------------------
def jaccard(words_1, words_2):
    words_3 = words_1 + words_2
    words_3.sort()
    b = [words_3[0]]
    for i in range(len(words_3)-1):
        if words_3[i] == words_3[i+1]:
            pass
        else:
            b.append(words_3[i+1])
    c = []
    for i in range(len(words_3)-1):
        if words_3[i] == words_3[i+1]:
            c.append(words_3[i+1])
        else:
            pass
    jaccard_coef = (len(c)/len(b))
    return jaccard_coef

#--------------------------------------------------------
def top_n_similarity(norm_tweets, norm_query, n):
    empty = []
    for i in range(len(norm_tweets)):
        value = jaccard(norm_tweets[i], norm_query)
        empty.append([-value,i])
        empty.sort()
    for i in range(len(empty)):
        empty[i][0],empty[i][1] = empty[i][1],-empty[i][0]
    top_n = empty[0:n:1]
    return top_n

#--------------------------------------------------------
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    brokenword = tweet_content.split(" ")
    print()
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    emptystring = "  "
    for i in range(len(brokenword)):
        if len(emptystring) + len(brokenword[i]) <= print_width:
            emptystring+= brokenword[i] + " "

        elif len(emptystring) + len(brokenword[i]) > print_width:
            print(emptystring)
            emptystring = "  "
            emptystring += brokenword[i] + " "
    if emptystring != " ":
        print(emptystring)

#--------------------------------------------
# 6230133121 (20.00) 11 (2021-03-01 16:54)

def get_unique( words ):
    unique_words = []
    for e in words:
        for d in words:
            if not e in unique_words and e == d:
                unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    words_repeat =[]
    for e in words_1:
        for d in words_2:
            if e == d:
                words_repeat.append(e)
    words = words_1 + words_2
    words_sum = get_unique( words )
    jaccard_coef = len(words_repeat)/len(words_sum)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    jc = []
    jct = []
    top_n = []
    for e in norm_tweets:
        tweet_id = norm_tweets.index(e)
        jac = jaccard(norm_tweets[tweet_id], norm_query)
        if jac > 0 :
            jc.append([-jac,tweet_id])
    jc.sort()
    jct = jc[0:n]
    for [a,b] in jct:
        top_n.append([b,-a])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(" ")
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    content = tweet_content.split(' ')
    tweet=' '
    for e in content:
        if len(tweet+' '+e) <= print_width:
            tweet+=' '+e
        else:
            print(tweet)
            tweet='  '+ e
    print(tweet)

#--------------------------------------------
# 6230153721 (17.95) 12 (2021-03-01 18:38)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    d=[]
    y=[]
    for i in words_1:
        if i in words_2:
            d.append(i)
        if i not in y:
            y.append(i)
    for i in words_2:
        if i not in y:
            y.append(i)
    jaccard_coef=len(d)/len(y)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    norm_tweets2=[]
    for i in norm_tweets:
        w=get_unique(i)
        norm_tweets2.append(w)
    top_n1=[]
    for i in range(len(norm_tweets2)):
        x=jaccard(norm_query,norm_tweets2[i])
        if len(top_n1)<n:
            top_n1.append([x,i])
            top_n1.sort()
        else:
            if x>top_n1[0][0]:
                s=False
                for k in range(len(top_n1)):
                    if top_n1[0][0]!=top_n1[k][0] and s==False:
                        top_n1.pop(k-1)
                        top_n1.append([x,i])
                        top_n1.sort()
                        s=True
                    elif k==len(top_n1)-1 and s==False:
                        top_n1.pop(k)
                        top_n1.append([x,i])
                        top_n1.sort()
                        s=True
    b=[]
    for i in range(len(top_n1)):
        if i==0:
            b.append(top_n1[i][1])
        elif top_n1[i][0]==top_n1[i-1][0] and i!=len(top_n1)-1:
            b.append(top_n1[i][1])
        elif top_n1[i][0]!=top_n1[i-1][0]:
            for s in range(len(b)):
                top_n1[i-1-s][1]=b[s]
            b=[]
            b.append(top_n1[i][1])
        elif i==len(top_n1)-1 and top_n1[i][0]==top_n1[i-1][0]:
            b.append(top_n1[i][1])
            for s in range(len(b)):
                top_n1[i-s][1]=b[s]
    top_n2=[]
    for i in top_n1:
        top_n2.append([i[1],i[0]])
    top_n=[]
    for i in top_n2:
        top_n.insert(0,i)
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    tweet_content=tweet_content.split()
    b=' '
    for i in range(len(tweet_content)):
        if len(b)<print_width and len(b)+len(' '+tweet_content[i])<=print_width and i<len(tweet_content)-1 :
            b+=' '+tweet_content[i]
        elif i>=len(tweet_content)-1 and len(b)+len(' '+tweet_content[i])<=print_width:
            b+=' '+tweet_content[i]
            print(b)
        elif i>=len(tweet_content)-1 and len(b)+len(' '+tweet_content[i])>print_width:
            print(b)
            print('  '+tweet_content[i])
        else:
            print(b)
            b=' '
            b+=' '+tweet_content[i]

#--------------------------------------------
# 6230154321 (18.01) 13 (2021-03-01 20:27)

def get_unique( words ):
    unique_words=[]
    for n in words:
        if n not in unique_words:
            unique_words.append(n)
    return unique_words
def jaccard(words_1, words_2):
    a=[]
    for e in words_1:
        if e  in words_2:
            a.append(e)
    b=words_1 + words_2
    c=[]
    for n in b:
        if n not in c:
            c.append(n)
    jaccard_coef = len(a)/len(c)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    e=[]
    norm=[]
    for i in norm_tweets:
        c=get_unique( i )
        norm.append(c)
    for h in range(len(norm)):
        d=jaccard(norm[h], norm_query)
        e.append([d,-h])
        e.sort()
    top_n=e[-n:]
    top_n.reverse()
    for i in top_n:
        i[1]*=-1
        i.reverse()
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    n = tweet_content.split(' ')
    e=' '+' '
    print(' ')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    for i in n:
        if len(e+i)>print_width:
            print(e)
            e=' '+' '+i+' '
        elif len(e+i)<=print_width:
            e+=i+' '
    print(e)
#--------------------------------------------
# 6230444321 (16.94) 14 (2021-03-01 21:39)

def get_unique( words ):
    unique_words = []
    for word in words :
        if word not in unique_words :
            unique_words.append(word)
    return unique_words
def jaccard(words_1, words_2):
    same = 0
    All = len(words_1)
    for word in words_2:
        if word in words_1:
            same += 1
        else:
            All += 1
    jaccard_coef = same/All
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    data = []
    for i in range(len(norm_tweets)) :
        if(jaccard(norm_tweets[i],norm_query) > 0):
            data.append([-jaccard(norm_tweets[i],norm_query),i])
    data.sort()
    for i in range(n) :
        top_n.append([data[i][1],-data[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    WordInTweet = tweet_content.split()
    content = []
    message = " "
    for word in WordInTweet :
        if len(message+" "+word) <= print_width : message = message+" "+word
        else :
            content.append(message)
            message = "  " + word
    if(message not in content) : content.append(message)
    for e in content : print(e)


#--------------------------------------------
# 6230585121 (9.33) 15 (2021-03-01 23:57)

def get_unique(words):
    unique_words = []
    for c in words:
        if c not in unique_words :
            unique_words.append(c)
    return unique_words
def jaccard(words_1, words_2):
    a = words_1+words_2
    b = []
    n = 0
    for c in a:
        if c not in b :
            b.append(c)
        else:
            n += 1
    jaccard_coef = n/len(b)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    for i in range(0,len(norm_tweets)):
        b = jaccard(norm_tweets[i], norm_query)
        a.append(b)
    c = []
    d = []
    for i in range(0,len(a)):
        c.append(a[i])
        c.append(i+1)
        d.append(c)
        c = []
    d.sort()
    f = d[::-1]
    p = 1
    r = []
    for i in range(0,len(f)-1):
        if f[i][0] == f[i+1][0]:
            p+=1
            r.append(f[i])
            r.append(f[i+1])
    r.sort()
    new = []
    for i in r:
        if i not in new:
            new.append(i)
    j = f.index(new[-1])
    f[j:j+len(new):1] = new
    for i in range(0,len(f)):
        f[i][0],f[i][1] = f[i][1],f[i][0]
    top_n = f[0:n:1]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id)+" ("+str(round(jc_coef, 2))+")")
    print('  '+tweet_content)

#--------------------------------------------
# 6231004021 (20.00) 16 (2021-02-27 13:06)

def get_unique(words):
    unique_words = []

    for word in words:
        if word not in unique_words:
            unique_words.append(word)

    return unique_words
def jaccard(words_1, words_2):
    unique_words_1 = get_unique(words_1)
    unique_words_2 = get_unique(words_2)
    total_duplicate_word = 0
    for word in unique_words_1:
        if word in unique_words_2:
            total_duplicate_word += 1

    total_unique_word = len(get_unique(words_1 + words_2))
    jaccard_coef = total_duplicate_word/total_unique_word

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []

    for i in range(len(norm_tweets)):
        coef = jaccard(norm_tweets[i], norm_query)
        if coef > 0:
            top_n.append([i, coef])

    top_n = sorted(top_n, key=lambda x: x[1], reverse=True)[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print(f'#{tweet_id} ({round(jc_coef, 2)})')

    tweet_content = tweet_content.split(' ')
    width = 0
    print('  ', end='')
    for i in range(len(tweet_content)):
        if len(tweet_content[i]) + width > print_width - 2:
            print(f'\n  {tweet_content[i]} ', end='')
            width = len(tweet_content[i]) + 1
        else:
            print(f'{tweet_content[i]} ', end='')
            width += len(tweet_content[i]) + 1
    print()


    # --------------------------------------------
# 6231008621 (19.48) 17 (2021-03-01 20:55)

def get_unique( words ):
    unique_words = []
    for w in words:
        if not w in unique_words:
            unique_words.append(w)
    return unique_words
def jaccard(words_1, words_2):
    union = get_unique(words_1 + words_2)
    nUnion = len(union)
    nIntersect = len(words_1) + len(words_2) - nUnion
    return nIntersect / nUnion
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        t = norm_tweets[i]
        j = jaccard(t, norm_query)
        if j != 0:
            top_n.append([i, j])
    top_n.sort(key=lambda t: (-t[1], t[0]))
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#%d (%.2f)" % (tweet_id, round(jc_coef, 2)))
    buffer = "  "
    wordCount = 0
    for w in tweet_content.split(" "):
        if len(buffer) + len(w) > print_width and wordCount != 0:
            print(buffer)
            buffer = "  "
            wordCount = 0
        buffer += " " + w
        wordCount += 1
    if wordCount != 0:
        print(buffer)


#--------------------------------------------
# 6231012021 (20.00) 18 (2021-02-28 22:34)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    count_repeat_words=0
    words_1_no_repeat=[]
    words_2_no_repeat=[]
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            count_repeat_words+=1
        else:
            words_1_no_repeat.append(words_1[i])
    for i in range(len(words_2)):
        if words_2[i] not in words_1:
            words_2_no_repeat.append(words_2[i])
    num_words_1=len(words_1_no_repeat)
    num_words_2=len(words_2_no_repeat)
    all_num=num_words_1+num_words_2+count_repeat_words
    jaccard_coef=count_repeat_words/all_num
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query) > 0:
            x.append([i,jaccard(norm_tweets[i],norm_query)])
    x.sort()
    y=[]
    for o in x:
         o[0],o[1] = -o[1],o[0]
         y.append(o)
    y.sort()
    top_n=[]
    for u in y:
         u[0],u[1] = u[1],-u[0]
         top_n.append(u)
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#'+str(tweet_id)+' ('+str((round((jc_coef),2)))+')')
    t=tweet_content.split(" ")
    y="  "
    for i in t:
        if len(y)+len(i)<=print_width:
            y+=i+" "
        else:
            print(y)
            y="  "+i+" "
    print(y)
#--------------------------------------------
# 6231019521 (20.00) 19 (2021-03-01 17:58)

def get_unique( words ):
    words.sort()
    words.insert(0,' ')
    unique_words=[]
    for i in range(1,len(words)):
        if words[i-1]!=words[i]:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    words12=words_1+words_2
    words12.insert(0,' ')
    words12.sort()
    a=[]
    for k in range(1,len(words12)):
        if words12[k-1]!=words12[k]:
            a.append(words12[k])
    b=[]
    for m in words_2:
        if m in words_1:
            b.append(m)
    jaccard_coef=len(b)/len(a)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    t=[]
    for i in range(len(norm_tweets)):
        coef=jaccard(norm_tweets[i],norm_query)
        if coef>0:
            t.append([i,coef])
    t.sort()
    top=[]
    for k in range(len(t)):
        top.append([-t[k][1],t[k][0]])
    top.sort()
    top_n=[]
    for c in range(len(top)):
        top_n.append([top[c][1],-top[c][0]])
    top_n=top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),'('+str((round(jc_coef,2)))+')')
    content=tweet_content.split(' ')
    a='  '
    for b in content:
        if len(a)+len(b)<=print_width:
            a+=b+' '
        else:
            print(a)
            a='  '+b+' '
    print(a)

#--------------------------------------------
# 6231205921 (14.00) 20 (2021-03-01 20:20)

def get_unique( words ):
    words.sort()
    unique_words = list(words)
    for i in range(1,len(words)) :
        if words[i] == words[i-1] :
            unique_words.remove(words[i])
    return unique_words
def jaccard(words_1, words_2):
    c = 0
    for i in range(len(words_1)) :
        if words_1[i] in words_2 :
            c += 1
    a = len(words_1)+len(words_2)-c
    jaccard_coef = c/a
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    tweet_id = len(norm_tweets)
    x = []*n
    for i in range(tweet_id) :
        x.append([-jaccard(norm_tweets[i],norm_query),i])
    x.sort()
    for s in x :
        x = [s[1],-s[0]]
    top_n = x[:min(tweet_id,n)]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")")
    tweet_content = tweet_content.split(" ")
    a = "  "
    for i in range(len(tweet_content)) :
        if len(a)+len(tweet_content[i]) > print_width :
            print(a)
            a = "  "
            a += tweet_content[i]+" "
        else :
            a += tweet_content[i]+" "
    print(a)



#--------------------------------------------
# 6231207121 (17.00) 21 (2021-02-28 02:13)

def get_unique( words ):

    unique_words = []
    for e in words :
        if e not in unique_words :
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):

    nsame = 0
    if words_1 >= words_2 :
        for e in words_1 :
            if e in words_2 :
                nsame += 1
    if words_1 < words_2 :
        for e in words_2 :
            if e in words_1 :
                nsame += 1
    nunion = len(words_1)+len(words_2)-nsame
    jaccard_coef = nsame/nunion
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    x = []
    for i in range(len(norm_tweets)) :
        x.append([i, jaccard(norm_tweets[i], norm_query)])
    y = []
    for i in range(len(x)) :
        y.append([-x[i][1],x[i][0]])
    y.sort()
    top_n = []
    for i in range(n) :
        top_n.append([y[i][1],-y[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    tweet_list = tweet_content.split(" ")
    width = print_width-1

    print("")
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")

    c = 0
    p = "  "

    for e in tweet_list :

        c += (len(e)+1)

        if c <= width :
            p += e+" "
        else :
            print(p)
            p = "  "+e+" "
            c = (len(e)+1)

    print(p)

#--------------------------------------------
# 6231213921 (16.94) 22 (2021-03-01 23:55)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if  words[i] not in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    similarity_words = []
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            similarity_words.append(words_1[i])
    jaccard_coef = len(similarity_words)/(len(words_1)+len(words_2)-len(similarity_words))

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    mix = []
    for e in norm_tweets:
        x = norm_tweets.index(e)
        mix.append([jaccard(e,norm_query),-x])
    mix.sort()
    top_n = []
    mix = mix[::-1]
    for i in range(n):
        y = -(mix[i][1])
        top_n.append([y,mix[i][0]])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")")

    x = tweet_content.split()
    y = ' '
    z = []

    for i in range(len(x)):
        if i != (len(x)-1):
            if (len(y)+(len(x[i])+1)) <= print_width:
                y += ' '+x[i]
            else:
                z.append(y)
                y = '  '+x[i]
        else:
            if (len(y)+(len(x[i])+1)) <= print_width:
                y += ' '+x[i]
                z.append(y)
            else:
                z.append(y)
                y = '  '+x[i]
                z.append(y)

    for i in range(len(z)):
        print(z[i])

#--------------------------------------------
# 6231214521 (18.50) 23 (2021-03-01 23:27)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):

    x = 0
    y = []
    for e in words_1:
        if e in words_2:
            x +=1
    for e in words_1:
        if e in words_2:
            y.append(e)
    a =len(y)
    b = len(words_1)+len(words_2)
    c = b-a
    jaccard_coef = x/c
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i  in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query) > 0:
            top_n.append([jaccard(norm_tweets[i],norm_query) , i])
    top_n.sort(reverse = True)
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1]=top_n[i][1],top_n[i][0]
    for i in range(len(top_n)):
        if top_n[i][1] == "0" or top_n[i][1] == '0.0':
            x.remove(x[i])

    def pun(ji):
        return ji[0]
    top_n.sort(key=pun)
    def punpun(jiji):
        return jiji[1]
    top_n.sort(reverse = True ,key=punpun)
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")")
    tweet_content=tweet_content.split(" ")
    pp =" "
    for i in range(len(tweet_content)):
        if len(pp)+len(tweet_content[i])>print_width:
            print(pp)
            pp = " "
            pp += tweet_content[i]+" "
        else:
            pp += tweet_content[i]+" "
    print(pp)








#--------------------------------------------------------
# 6231220221 (14.73) 24 (2021-03-01 23:55)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    similar_words = []
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            similar_words.append(words_1[i])
    jaccard_coef = len(similar_words)/(len(words_1)+len(words_2)-len(similar_words))

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    mix = []
    for a in norm_tweets:
        k = norm_tweets.index(a)
        mix.append([jaccard(a,norm_query),-k])
    mix.sort()
    top_n = []
    mix = mix[::-1]
    for e in range(n):
        y = -(mix[e][1])
        top_n.append([y,mix[e][0]])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")")

    a = tweet_content.split()
    b = ' '
    c = []

    for i in range(len(a)):
        if i != (len(a)-1):
            if (len(b)+(len(a[i])+1)) < print_width:
                b += ' '+a[i]
            else:
                c.append(b)
                b = '  '+a[i]
        else:
            if (len(b)+(len(a[i])+1)) < print_width:
                b += ' '+a[i]
                c.append(b)
            else:
                c.append(b)
                b = '  '+a[i]
                c.append(b)

    for e in range(len(c)):
        print(c[e])

#--------------------------------------------
# 6231222521 (18.01) 25 (2021-03-01 15:36)

def get_unique( words ):
    result = []
    for w in words :
        if w not in result :
            result.append(w)
    return result
def jaccard(words_1, words_2):
    union = []
    intersect = []
    for i in words_1 :
        if i not in union :
            union.append(i)
        if i in words_2 :
            intersect.append(i)
    for i in words_2 :
        if i not in union :
            union.append(i)
        if i in words_1 and i not in intersect :
            intersect.append(i)
    return len(intersect)/len(union)
def top_n_similarity(norm_tweets, norm_query, n):
    k = len(norm_tweets)
    q = [ [-jaccard(norm_tweets[i],norm_query),i] for i in range(k)]
    q.sort()
    q = [ [e[1],-e[0]] for e in q]
    return q[:min(k,n)]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    tweet_content = tweet_content.split(" ")
    n = len(tweet_content)
    e = "  "+tweet_content[0]+" "
    #print(tweet_content)
    for i in range(1,n) :
        if len(e)+len(tweet_content[i]) <= print_width :
            e += tweet_content[i]+" "
        else :
            print(e)
            e = "  "+tweet_content[i]+" "
    print(e)

#--------------------------------------------
# 6231223121 (15.55) 26 (2021-02-27 20:46)

def get_unique( words ):
    words.sort()
    unique_words=[]
    for i in range(len(words)):
        if not words[i] in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    words_all= words_1+words_2
    words_all.sort()
    x=[]
    for i in range(len(words_all)-1):
        if words_all[i]!=words_all[i+1]:
             x.append(words_all[i])
    x+=[words_all[-1]]
    down=len(x)
    up=0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            up+=1
    jaccard_coef=up/down

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    t=[]
    for i in range(len(norm_tweets)):
        t.append([i,jaccard(norm_tweets[i],norm_query)])
    top_n=list(t)
    for e in range(len(top_n)):
        top_n[e][0],top_n[e][1]=top_n[e][1],top_n[e][0]
    top_n.sort()
    for i in range(len(top_n)):
        top_n[i][0]=round(1-top_n[i][0],2)
    top_n.sort()
    for i in range(len(top_n)):
        top_n[i][0]=round(1-top_n[i][0],2)
    for e in range(len(top_n)):
        top_n[e][0],top_n[e][1]=top_n[e][1],top_n[e][0]
    top_n=top_n[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    t=tweet_content.split(' ')
    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    p1=[]
    c=1
    while len(t)!=0:
        for i in range(len(t)):
            c+=len(' '+t[i])
            if c>print_width:
                c=1
                break
            p1.append(t[i])
        for i in range(len(p1)):
            if p1[i] in t:
                t.remove(p1[i])
        out=' '.join(p1)
        print('  '+out)
        if len(p1)>0:
            p1=[]

#--------------------------------------------
# 6231224821 (0.00) 27 (2021-03-01 23:58)

def get_unique( words ):
    words = input().split()
    unique_words = list(set(words))
    print(unique_words)
    return unique_words
def jaccard(words_1, words_2):
    words_1 = input().split()
    unw1 = set(words_1)
    words_2 = input().split()
    unw2 = set(words_2)
    its = unw1.intersection(unw2)
    unn = unw1.union(unw2)
    x = len(its)
    y = len(unn)
    jaccard_coef = x/y
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    k = len(norm_tweets)
    x = [ [-jaccard(norm_tweets[i],norm_query),i] for i in range(k)]
    x.sort()
    x = [ [e[1],-e[0]] for e in q]
    return x[:min(k,n)]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+ str(tweet_id)+'('+str(round(jc_coef,2))+')')
    tweet_content = tweet_content.split(" ")
    x = " "
    y = len(tweet_content)
    for e in range(y):
        if len(x)+y[e] > print_width:
            print(x)
            x = " "
            x += y[e]+" "
        else:
            x += y[e]+" "
    print(x)

#--------------------------------------------
# 6231510221 (18.01) 28 (2021-02-28 11:37)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    merge = words_1+words_2
    unique = get_unique(merge)
    jaccard_coef = (len(merge)-len(unique))/(len(unique))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    i = 0
    for tweet in norm_tweets:
        jaccard_coef = jaccard(tweet,norm_query)
        top_n.append([i,jaccard_coef])
        i += 1
    top_n.sort(key = lambda x:x[1],reverse=True)
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id),'('+str(round(jc_coef,2))+')')
    str_print = []
    tweet_content = tweet_content.split(' ')
    line = "  "
    for word in tweet_content:
        if len(line) + len(word) > print_width:
            str_print.append(line)
            line = "  "
        line += word + ' '
    if len(line) > 2:
        str_print.append(line)
    for l in str_print:
        print(l)
#--------------------------------------------
# 6231511921 (18.01) 29 (2021-03-01 10:11)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)-1):
        words.sort()
        if words[i] != words[i+1]:
            unique_words.append(words[i+1])
    if unique_words != []:
        unique_words.append(words[0])

    return unique_words
def jaccard(words_1, words_2):
    same = 0
    for c in words_1:
        if c in words_2:
            same += 1
    all = (len(words_1)+len(words_2))-same
    jaccard_coef = same/all

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for tweets_id in range(len(norm_tweets)):
        jc = jaccard(norm_tweets[tweets_id],norm_query)
        top_n.append([tweets_id,jc])
    top_n.sort(key = lambda x:x[1],reverse=True)

    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#"+str(tweet_id)+" "+'('+str(round(jc_coef,2))+')')
    twc = tweet_content.split(' ')
    output = "  "
    for i in twc:
        if (len(output) + len(i)) > print_width:
            print(output)
            output = "  "
        output = output + i + " "
    if len(output) != 2:
        print(output)

#--------------------------------------------
# 6231707621 (20.00) 30 (2021-02-26 13:59)

def get_unique( words ):
    unique_words=[]
    words.sort()
    for i in range(len(words)-1):
        if words[i]!=words[i+1]:
            unique_words.append(words[i])
    if len(words)>=1:
        unique_words.append(words[-1])

    return unique_words
def jaccard(words_1, words_2):

    words_1=get_unique( words_1 )
    words_2=get_unique( words_2 )
    words_1.extend(words_2)
    x=get_unique( words_1 )
    jaccard_coef=(len(words_1)-len(x))/len(x)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    x=[]
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i], norm_query)>0:
            x.append([jaccard(norm_tweets[i], norm_query),-i])
    x.sort()
    x=x[-1:-n-1:-1]
    for i in range(len(x)):
        top_n.append([int(-1*x[i][1]),x[i][0]])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a=tweet_content.split(' ')
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    b=''
    for i in range(len(a)-1):
        b+=' '+a[i]
        if print_width-1-len(b)<= len(a[i+1]):
            print(' '+b)
            b=''
    if len(b)+len(a[-1])+1<print_width:
        print(' '+b+' '+a[-1])
    else:
        print(b)
        print(a[-1])




#--------------------------------------------
# 6231709921 (13.33) 31 (2021-02-26 23:36)

def get_unique( words ):
    unique_words=[]
    for e in words :
        if e not in unique_words :
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    sed=0
    suan=0
    for e in words_1 :
        if e in words_2 :
            sed+=1
    words=words_1 + words_2
    tot=get_unique(words)
    suan=len(tot)
    jaccard_coef=sed/suan
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n1=[]
    for i in range(len(norm_tweets)) :
        tweet_id=i
        jaccard1=jaccard(norm_tweets[tweet_id],norm_query)
        top_n1.append([jaccard1,tweet_id])
    top_n1.sort(reverse=True)
    for e in top_n1 :
        e[1],e[0]=e[0],e[1]
    top_n=[]
    r=0
    while len(top_n) < n :
        tem=[]
        for i in range(r,len(top_n1)-1) :
            if top_n1[i][1] != top_n1[i+1][1] :
                if top_n1[i] not in top_n :
                    top_n.append(top_n1[i])
                if i>0 :
                    r+=i
                else :
                    r+=1
                break
            else :
                tem.append(top_n1[i])
        tem.sort()
        top_n+=tem
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    tcon=tweet_content.split(' ')
    aws=''
    i=0
    b=0
    while i<len(tcon) :
        if len(aws.strip())+2+len(tcon[i])< print_width :
            aws+=tcon[i]+' '
            i+=1
        else:
            print('  '+aws.strip())
            aws=''
            k=i
            b+=1
    if b<1 :
        lastaws=''
        for e in tcon :
            lastaws+=e+' '
        print('  '+lastaws.strip())
    else :
        lastaws=''
        for e in tcon[k:] :
            lastaws+=e+' '
        print('  '+lastaws.strip())

#--------------------------------------------
# 6330170421 (18.01) 32 (2021-03-01 01:58)

def get_unique( words ):

    unique_words = []
    for n in range(len(words)):
        if words[n] not in unique_words:
            unique_words.append(words[n])
    return unique_words
def jaccard(words_1, words_2):

    uw1=[]
    for n in range(len(words_1)):
        if words_1[n] not in uw1:
            uw1.append(words_1[n])

    uw2=[]
    for n in range(len(words_2)):
        if words_2[n] not in uw2:
            uw2.append(words_2[n])

    X=[]
    if len(uw1) > len(uw2):
        for n in range (len(uw1)):
            if uw1[n] in uw2:
                X.append(uw1[n])
    else :
        for n in range (len(uw2)):
            if uw2[n] in uw1:
                X.append(uw2[n])

    Y=[]
    if len(uw1) < len(uw2):
        for n in range (len(uw2)):
            if uw2[n] not in uw1:

                Y.append(uw2[n])
        for n in range (len(uw1)):
            Y.append(uw1[n])
    else :
        for n in range (len(uw1)):
            if uw1[n] not in uw2:

                Y.append(uw1[n])
        for n in range (len(uw2)):
            Y.append(uw2[n])

    jaccard_coef = len(X)/len(Y)



    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    A=[]
    B=[]
    for e in range(len(norm_tweets)):
        tweet_id = e
        A.append([tweet_id,jaccard(norm_tweets[e],norm_query)])
    for e in range (len(A)):
        B.append([float(A[e][1])*-1,e])
    B.sort()

    for e in range(len(B)):
        B[e][1],B[e][0] = -1*float(B[e][0]),B[e][1]

    top_n = B[:n]


    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tc = tweet_content.split(' ')
    x = []
    print(' ')
    print('#'+ str(tweet_id) + ' ' + '(' + str(round(jc_coef,2)) + ')')
    for n in range (len(tc)):
        y = ' '.join(x)
        if len(y)+len(tc[n]) >= (print_width-2):
            print('  '+y)
            x = [tc[n]]
        else :
            x.append(tc[n])

    if len(x) != 0:
        print('  '+' '.join(x))




#--------------------------------------------
# 6330171021 (18.01) 33 (2021-03-01 14:18)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    intersect =len([x for x in words_1 if x in words_2])
    union = len(words_1)+len(words_2)-intersect
    if union == 0:
        jaccard_coef = 0
    jaccard_coef = intersect/union
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    for i in range(len(norm_tweets)):
        tweet_id = i
        a.append([-1*(jaccard(norm_query,norm_tweets[i])),tweet_id])
        a.sort()
    for e in a:
        e[0] = -1*e[0]
        e[0],e[1] = e[1],e[0]
    top_n = a[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    p = tweet_content.split(' ')
    x = ''
    for i in range(len(p)):
        if len(x)+len(p[i]) > print_width-2:
            print(' '+x)
            x = ' '+str(p[i])
        else:
            x+=' '+str(p[i])
    print(' '+x)

#--------------------------------------------
# 6330172721 (20.00) 34 (2021-02-27 19:03)

def get_unique( words ):
    unique_words = []
    for i in words :
        if i not in unique_words :
            unique_words += [i]
    return unique_words
def jaccard(words_1, words_2):
    rept_wrd_cnt = 0
    tmp_lst = []
    for i in words_1 :
        if i in words_2 :
            rept_wrd_cnt += 1
        if i not in tmp_lst :
            tmp_lst += [i]
    for i in words_2 :
        if i not in tmp_lst :
            tmp_lst += [i]
    jaccard_coef = rept_wrd_cnt / len(tmp_lst)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    def second(ele) :
        return ele[1]
    for tweet_id in range(len(norm_tweets)) :
        jacc = jaccard(norm_tweets[tweet_id],norm_query)
        if jacc > 0 :
            top_n += [[tweet_id,jacc]]
    top_n = sorted(top_n,key=second,reverse=1)[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n"+'#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    tweet_content = tweet_content.split(' ')
    line = [" "]
    for txt in tweet_content :
        line += [txt]
        if len(" ".join(line))>print_width :
            line = line[:-1]
            print(" ".join(line))
            line = ["  "+txt]
    print(" ".join(line))



#--------------------------------------------
# 6330173321 (19.68) 35 (2021-02-27 02:12)

def get_unique( words ):
    unique_words = []
    i = 0
    du = []
    while words != []:
        if words[i] not in unique_words:
            unique_words.append(words.pop(i))
        elif words[i] in unique_words:
            du.append(words.pop(i))
    return unique_words
def jaccard(words_1, words_2):
    ins = 0
    for i in words_1:
        if i in words_2:
            ins += 1
    uni = len(words_1)+len(words_2)-ins
    jaccard_coef = ins/uni
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    tweetjac= []
    tweet_f = []
    tweetjack= []
    for i in range(len(norm_tweets)):
        prejaccard = jaccard(norm_tweets[i], norm_query)
        if prejaccard > 0 :
            tj = []
            tj.append(i)
            tj.append(prejaccard)
            tweetjac.append(tj)
    for [a1,a2] in tweetjac:
        tweet_f.append([-a2,a1])
    tweet_f.sort()
    for [a1,a2] in tweet_f:
        tweetjack.append([a2,-a1])
    top_n = tweetjack[0:n:]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    re = round(jc_coef,2)
    print(" ")
    print("#"+str(tweet_id)+" ("+str(re)+")")
    content = tweet_content.split(' ')
    line = []
    long = 0
    g = 0
    for i in content:
        if long+len(i) <= print_width-2:
            line.append(i)
            long += len(i)+1
            g = 20
        else:
            if line != []:
                print("  "+" ".join(line))
                line = []
            elif g==0:
                line.append(i)
                print("  "+" ".join(line))
                line.remove(i)
            long = 0
            g = 0
            if long+len(i) > print_width-2:
                long = 0
                continue
            else:
                long += 2
                line.append(i)
                long += len(i)-1
                g = 20
    print("  "+" ".join(line))


#--------------------------------------------
# 6330174021 (19.15) 36 (2021-02-28 13:46)

def get_unique(words):
    unique_words = []
    for word in words:
        if word not in unique_words:
            unique_words.append(word)
    return unique_words
def jaccard(words_1, words_2):
    less, more = words_1, words_2
    if len(less) > len(more): less, more = more, less
    similar_words = sum([word in more for word in less])
    tot_words = len(more + [word for word in less if word not in more])
    jaccard_coef = similar_words / tot_words
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for tweet_id in range(len(norm_tweets)):
        jacc = jaccard(norm_tweets[tweet_id], norm_query)
        if jacc == 0: continue
        top_n.append([tweet_id, jacc])
    top_n = sorted(top_n, reverse = True, key = lambda x:x[1])[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(end='\n')
    print('#' + str(tweet_id) + ' (' + str(round(jc_coef ,2)) + ')')
    tweet_content = tweet_content.split(' ')
    line = [tweet_content[0]]
    n = len(tweet_content[0])
    for i in range(1, len(tweet_content)):
        if n + len(line) + len(tweet_content[i]) + 2 > print_width:
            print('  ' + ' '.join(line))
            n = 0
            line = []
        line.append(tweet_content[i])
        n += len(tweet_content[i]) if tweet_content[i] != '' else 1
    else:
        print('  ' + ' '.join(line))

#--------------------------------------------
# 6330176221 (18.01) 37 (2021-03-01 13:47)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)) :
        if words[i] not in unique_words :
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    a = []
    b = []
    for i in range(len(words_1)) :
        if words_1[i] not in a :
            a.append(words_1[i])
    for i in range(len(words_2)) :
        if words_2[i] not in b :
            b.append(words_2[i])
    inter = []
    if len(a) >= len(b) :
        for i in range(len(b)) :
            if b[i] in a :
                inter.append(b[i])
    if len(a) < len(b) :
        for i in range(len(a)) :
            if a[i] in b :
                inter.append(a[i])
    union = len(a) + len(b) - len(inter)
    jaccard_coef = len(inter) / union
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    for i in range(len(norm_tweets)) :
        tweet_id = i
        jac = jaccard(norm_tweets[i], norm_query)
        a.append([-tweet_id,jac])
    for i in a :
        i[0],i[1] = i[1],i[0]
    a.sort()
    a = a[::-1]
    for i in a :
        if i[1] < 0 :
            i[1] = -int(i[1])
    for i in a :
        i[0],i[1] = i[1],i[0]
    top_n = a[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a = tweet_content.split(' ')
    print(' ')
    print('#'+ str(tweet_id) + ' ' + '(' + str(round(jc_coef,2)) + ')')
    b = []
    for i in range(len(a)) :
        if len(' '.join(b)) + len(a[i]) >= (print_width - 2) :
            print('  ' + ' '.join(b))
            b = [a[i]]
        else :
            b.append(a[i])
    print('  ' + ' '.join(b))

#--------------------------------------------
# 6330177921 (20.00) 38 (2021-02-27 03:07)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    c = 0
    for e in words_1:
        if e in words_2:
            c += 1
    s = words_1 + words_2
    a = []
    for e in s:
        if e not in a:
            a.append(e)
    jaccard_coef = c/len(a)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    for i in range(len(norm_tweets)):
        a.append([jaccard(norm_tweets[i],norm_query),-i])
    a.sort()
    a = a[::-1]
    top_n = []
    for i in range(n):
        if a[i][0] <= 0: break
        top_n.append([-a[i][1],a[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    con = tweet_content.split(" ")
    a = "  "
    b = " "
    show = []
    for i in range(len(con)):
        if len(a) + len(con[i]) > print_width:
            show.append(a)
            a = "  "
        a += con[i] + b
    show.append(a)
    for e in show:
        print(e)

#--------------------------------------------
# 6330178521 (18.47) 39 (2021-03-01 00:58)

def get_unique( words ):
    unique_words = []
    for w in words:
        if not w in unique_words:
            unique_words.append(w)

    return unique_words
def jaccard(words_1, words_2):
    sim = 0
    all = len(words_1) + len(words_2)
    for e in words_1:
        if e in words_2:
            sim +=1
    jaccard_coef = sim/(all - sim)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    x=[]
    y=[]
    for i in range(len(norm_tweets)):
        j = jaccard(norm_tweets[i], norm_query)
        if j > 0:
            x.append([j, -i])
    x.sort()
    y = x[::-1]
    for i in range(len(y)):
        top_n.append([(y[i][1]) * -1,y[i][0]])
    top_n = top_n[:n:]

    return(top_n)
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    s = tweet_content.split()

    print()
    print('#' + str(tweet_id) + ' ' + '(' + str(round(jc_coef,2)) + ')')
    l = 0
    z = ''
    for i in range(len(s)):
        p = ' ' + str(s[i])
        if l + len(p) <= (print_width-1):
            if l != 0:
                z +=p
            else:
                z += ' ' + p
            l += len(p)
        else:
            print(z)
            z = '  ' + s[i]
            l = len(z)
    if len(z) != 0:
        print(z)

#--------------------------------------------
# 6330179121 (15.02) 40 (2021-02-27 22:27)

def get_unique( words ):
    unique_words = []
    for i in words :
        if i not in unique_words :
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    a = []
    b = []
    for i in words_1 :
        if i in words_2 :
            a.append(i)
        if i not in b :
            b.append(i)
    for i in words_2 :
        if i not in b :
            b.append(i)
    jaccard_coef = len(a)/len(b)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    for i in range(len(norm_tweets)) :
        x = []
        j = jaccard(norm_tweets[i],norm_query)
        x.append(j)
        x.append(i)
        a.append(x)
    a.sort(reverse = True)
    x = []
    for i in range(len(a)) :
        y = []
        y.append(a[i][1])
        y.append(a[i][0])
        x.append(y)
    i = 1
    top_n = []
    k = x[0][1]
    m = [x[0]]
    for i in range(len(x)-1) :
        if x[i+1][1] != k :
            top_n += m
            k = x[i+1][1]
            m = [x[i+1]]
        else :
            m.append(x[i+1])
            m.sort()
    top_n += m
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    w = tweet_content.split()
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    n = []
    for i in w :
        n.append(len(i))
    while len(n) != 0 :
        i = 0
        a = ' '
        if n[i] > print_width :
            a = a+' '+w[i]
            i += 1
            if i+1 > len(n) :
                break
        while len(a) <= print_width-n[i] :
            a = a+' '+w[i]
            i += 1
            if i+1 > len(n) :
                break
        w = w[i:]
        n = n[i:]
        print(a)



#--------------------------------------------
# 6330180721 (17.00) 41 (2021-02-26 11:05)

def get_unique(words):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1,words_2):
    x = get_unique(words_1)
    y = get_unique(words_2)
    s = 0
    for e in y:
        if e in x:s+=1
    total = get_unique(x+y)
    jaccard_coef = s/len(total)
    return jaccard_coef
def top_n_similarity(norm_tweets,norm_query,n):
    k =[]; top_n=[]; h=[]; l= []
    for i in range(len(norm_tweets)):
        jac = jaccard(norm_tweets[i],norm_query)
        k.append([jac,i])
    k.sort()
    k = k[-1::-1]
    for i in range(len(k)-1):
        if k[i][0]==k[i+1][0]:
            h.append([k[i][1],k[i][0]])
        else:
            h.append([k[i][1],k[i][0]])
            h.sort(); l += h
            h =[]
    h.append([k[-1][1],k[-1][0]])
    h.sort(); l+=h
    for i in range(n):
        top_n.append(l[i])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    sen = tweet_content.split(' ')
    var = str(round(jc_coef,2))
    print('')
    print('#'+str(tweet_id)+' ('+var+')')
    r = '  '
    for e in range(len(sen)-1):
        if len(r+sen[e])<=print_width:
            r+=sen[e]+' '
        else:
            print(r)
            r='  '+sen[e]+' '
    if len(r+sen[-1])<=print_width:
        print(r+sen[-1])
    else:
        print(r)
        print('  '+sen[-1])
#--------------------------------------------
# 6330181321 (20.00) 42 (2021-03-01 16:14)

def get_unique( words ):
    unique_words = [ ]
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    s=0
    w1=get_unique(words_1)
    w2=get_unique(words_2)
    for e in w1:
        if e in w2:
            s+=1
    jaccard_coef=s/(len(w1)+len(w2)-s)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    Top_no0=[ ]
    for tweet_id in range(len(norm_tweets)):
        Jaccard=jaccard(norm_tweets[tweet_id],norm_query)
        Top_no0.append([tweet_id,Jaccard])
    Top_no1=[ ]
    for i in range(len(Top_no0)):
        if Top_no0[i][1]>0:
            Top_no1.append(Top_no0[i])
    Top_no2=[ ]
    for [i1,i2] in Top_no1:
        Top_no2.append([i2,i1])
    Top_no2.sort()
    Top_no2 = Top_no2[::-1]
    for i in range(len(Top_no2)):
        Top_no1[i][0],Top_no1[i][1] = Top_no2[i][1],Top_no2[i][0]
    for k in range(len(Top_no1)-1):
        for e in range(len(Top_no1)-1):
            if Top_no1[e][1] == Top_no1[e+1][1]:
                if Top_no1[e][0] > Top_no1[e+1][0]:
                    Top_no1[e],Top_no1[e+1] = Top_no1[e+1],Top_no1[e]
    top_n=Top_no1[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    x=[]
    ans=''
    tweet_content=tweet_content.split(' ')
    print('  ')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    if len(' '.join(tweet_content))<print_width:
            print('  '+' '.join(tweet_content))

    else:
        for i in range(len(tweet_content)):


            if len('  '+ans)<= print_width:
                x.append(tweet_content[i])
                ans=' '.join(x)


            else:
                x.pop(-1)
                ans=' '.join(x)
                print('  '+ans)
                x=[tweet_content[i-1],tweet_content[i]]
                ans=' '.join(x)


        if len('  '+ans)<= print_width:
            print('  '+ans)
        else:
            x.pop(-1)
            ans=' '.join(x)
            print('  '+ans)
            print('  '+tweet_content[-1])





#--------------------------------------------
# 6330182021 (14.12) 43 (2021-03-01 09:52)

def get_unique( words ):
    unique_words = []
    for e in words:
        if not e in unique_words:
            unique_words.append(e)

    return unique_words
def jaccard(words_1, words_2):
    total = words_1+words_2
    tt=[]
    for e in total:
        if not e in tt:
            tt.append(e)
    out = len(total)-len(tt)


    jaccard_coef = out/(len(tt))

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    tweet_id = []
    for i in range(len(norm_tweets)):
        tweet_id.append(i)
    y = []
    for i in range (len(norm_tweets)):
        jaccard(norm_tweets[i],norm_query)
        y.append(jaccard(norm_tweets[i],norm_query))
    x=[]
    for i in range (len(norm_tweets)):
        xx=[y[i],tweet_id[i]*(-1)]
        x.append(xx)
    r=sorted(x,reverse=True)
    x=r[:n]
    for [a1,a2] in x:
        top_n.append([a2*(-1),a1])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    w=tweet_content.split(' ')
    print(" ")
    print("#"+str(tweet_id),"("+str(round(jc_coef, 2))+")")
    n=print_width
    i=0
    x=True
    while x:
        c=" "*2
        while len(c) < n and i<len(w):
            c+=w[i]+" "
            i=i+1
        if len(c)==n:
            x=True
        if len(c) > n:
            i= i-1
            c=c[:-len(w[i])-1]

        else:
            x=True
        print(c)
        if i>=len(w) :
            x=False


#--------------------------------------------
# 6330183621 (16.94) 44 (2021-02-25 23:42)

def get_unique( words ):

    unique_words=[]

    for e in range(len(words)):
        if words[e] not in unique_words:
            unique_words.append(words[e])
        else:
            pass

    return unique_words
def jaccard(words_1, words_2):

    a=get_unique(words_1)
    b=get_unique(words_2)
    if len(a)>=len(b):
        c=0
        for e in range(len(a)):
            if a[e] in b:
                c+=1
            else:
                pass
    else:
        c=0
        for e in range(len(b)):
            if b[e] in a:
                c+=1
            else:
                pass

    total1=words_1+words_2
    total2=get_unique(total1)
    bottom=len(total2)

    j=c/bottom

    return j
def top_n_similarity(norm_tweets, norm_query, n):

    a=[]

    for e in range(len(norm_tweets)):
        b=jaccard(norm_tweets[e],norm_query)
        if b>0:
            c=[b,e]
            a.append(c)
        else:
            pass

    a.sort()

    for i in range(len(a)):
        for z in range (len(a)-i):
            if a[i][0]==a[i+z][0]:
                a[i],a[i+z]=a[i+z],a[i]
    c=[]
    for e in range(n):
        d=a[len(a)-e-1]
        f=[d[1],d[0]]
        c.append(f)

    return c
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print('')
    z=round(jc_coef,2)
    a='#'+str(tweet_id)+' ('+str(z)+')'
    print(a)
    n=print_width
    b=tweet_content.split()
    c='  '+b[0]
    for e in range (1,len(b)):
        if len(c)>2:
            c=c+' '+b[e]
        else:
            c=c+b[e]
        if e!= len(b)-1:
            if len(c)+len(b[e+1])+1>n:
                print(c)
                c='  '
            else:
                pass
        else:
            print(c)

#--------------------------------------------
# 6330184221 (20.00) 45 (2021-03-01 14:38)

def get_unique( words ):

    unique_words = []
    for e in words :
        if e not in unique_words :
            unique_words.append(e)

    return unique_words
def jaccard(words_1, words_2):

    c = 0
    t = []

    for i in range(len(words_1)) :
        if words_1[i] in words_2 :
            c += 1
            t.append([words_1[i]])
        else :
            t.append([words_1[i]])
    for i in range(len(words_2)) :
        if words_2[i] not in words_1 :
            t.append([words_2[i]])

    p = len(t)
    jaccard_coef = c/p

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []

    for i in range(len(norm_tweets)) :

        if jaccard(norm_tweets[i] , norm_query) > 0 :
            top_n.append([-jaccard(norm_tweets[i] , norm_query),i])

    top_n.sort()

    for i in range(len(top_n)) :
        top_n[i] =[top_n[i][1],-top_n[i][0]]

    top_n = top_n[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print("")
    print("#"+str(tweet_id)+' ('+str(round(jc_coef,2))+')')

    a = tweet_content.split(' ')
    s = ""
    c = 0

    for e in a :
        if (c+len(e)+1) <= (int(print_width)-1) :
            s += " ".join([''] + [e])
            c = len(s)
        else :
            print(" "+s)
            s = ""
            s += " ".join([''] + [e])
            c = len(s)

    if len(s) != 0 :
        print(" "+s)



#--------------------------------------------
# 6330185921 (20.00) 46 (2021-02-27 19:21)

def get_unique( words ):
    words.sort()
    words.append("ยากชิบหายเลยมาม่่าไม่อร่อยงงจัด")
    unique_words=[]

    for i in range (len(words)-1):
        if words[i]!= words[i+1]:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    jaccard_coef=0
    for i in range (len(words_1)):
        if words_1[i] in words_2:
            jaccard_coef+=1
    words =words_1+words_2
    words.sort()
    words.append("ยากชิบหายเลยมาม่่าไม่อร่อยงงจัด")
    unique_words=[]

    for i in range (len(words)-1):
        if words[i]!= words[i+1]:
            unique_words.append(words[i])

    jaccard_coef=(float(jaccard_coef)/len(unique_words))

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    d=[]
    top_n=[]

    for i in range (len(norm_tweets)):
        k =jaccard(norm_tweets[i],norm_query)

        d.append([k,i])

    for i in range (len(d)):
        d[i][0] = float(d[i][0]*-1)
    d.sort()
    for i in range (len(d)):

        d[i][0] = float(d[i][0]*-1)
        if d[i][0] > 0 :
            top_n.append([d[i][1],d[i][0]])
        top_n=top_n[0:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')

    z=tweet_content.split(' ')
    h=' '
    for i in range (len(z)):
        if len(h)+len(z[i])+1 <= print_width:
            h=h+' '+z[i]
            if z[i] == z[-1]:
                print(h)
        else:
            print(h)
            h=' '
            h=h+' '+z[i]
            if z[i] == z[-1]:
                print(h)




#--------------------------------------------
# 6330186521 (17.75) 47 (2021-03-01 22:30)

def get_unique( words ):
    unil = []
    for e in words:
        if e not in unil:
            unil.append(e)
    unique_words = unil
    return unique_words
def jaccard(words_1, words_2):
    un1 = get_unique(words_1)
    un2 = get_unique(words_2)
    dv = len(get_unique(words_1+words_2))
    repe = []
    for e in un1:
        if e in un2:
            repe.append(e)
    jaccard_coef = len(repe)/dv
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    sp = norm_tweets
    sub = []
    top_n = []
    for e in sp:
        if jaccard(e,norm_query) > 0:
            sub.append(sp.index(e))
            sub.append(jaccard(e,norm_query))
            top_n.append(sub[-2:])
            if len(top_n) >= n: break
    for e in top_n:
        e[0],e[1] = -e[1],e[0]
    top_n.sort()
    for e in top_n:
        e[0],e[1] = e[1],-e[0]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n'+'#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    para = (tweet_content.split(' '))
    l=True
    i=0
    a=[]
    while l == True:
        if i < len(para):
            a.append(para[i])
            t = ' '.join(a)
            i+=1
            if len(t) > print_width-2 and i < len(para):
                print('  '+ t[:len(t)-1-len(a[-1])])
                a=[a[-1]]
        elif i == len(para):
            if len(t) > print_width-2:
                print('  '+ t[:len(t)-1-len(a[-1])]+'\n'+'  '+a[-1])
            else:
                print('  '+t)
            l = False

#--------------------------------------------
# 6330187121 (16.67) 48 (2021-02-27 23:05)

def get_unique( words ):
    unique_words = []
    for e in words:
        if not e in unique_words:
            unique_words.append(e)
    return unique_words
#--------------------------------------------------------
def jaccard(words_1, words_2):
    nu = 0
    for e in words_1:
        if e in words_2:
            nu += 1
    de = len(words_1)+len(words_2)-nu
    jaccard_coef = nu/de
    return jaccard_coef
#--------------------------------------------------------
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    a = []
    for i in range (len(norm_tweets)):
        r = -jaccard(norm_tweets[i],norm_query)
        a.append([r,i])
    a.sort()
    a = a[:n]
    for e,i in a:
        top_n.append([i,-e])
    for e in top_n:
        if e[1] == 0:
            top_n.remove(e)
    if [1,0.0] in top_n:
        top_n.remove([1,0.0])
    return top_n
#--------------------------------------------------------
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    a = round(jc_coef,2)
    print('#'+str(tweet_id),'('+str(a)+')')
    x1 = tweet_content
    ab = []
    while len(x1) > (print_width-2):
        x1 = x1.split(' ')
        d = x1[0]
        n = 0
        while len(d) <= (print_width-2):
            n += 1
            d += ' '+x1[n]
        x2 = d.split()
        x2.remove(x1[n])
        x3 = ' '.join(x2)
        if x3[0] == ' ':
            x3.remove(x3[0])
        for e in x2:
            if e in x1:
                x1.remove(e)
        ab.append(x3)
        x1 = ' '.join(x1)
    for e in ab:
        print('  '+e)
    if x1[0] == ' ':
        x1 = x1[1:]
    print('  '+x1)

#--------------------------------------------
# 6330188821 (16.00) 49 (2021-03-01 23:44)

def get_unique( words ):
    unique_words = []
    for x in words:
        if x not in unique_words:
            unique_words.append(x)

    return unique_words
def jaccard(words_1, words_2):
    intersect = 0
    for x in words_1:
        if x in words_2:
            intersect += 1
    total = (len(words_1) + len(words_2)) - intersect
    jaccard_coef = intersect / total

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        x = jaccard(norm_tweets[i], norm_query)
        if x > 0:
            top_n.append([-x, i])
    top_n.sort()
    for i in range(len(top_n)):
        top_n[i] = [top_n[i][1], (-top_n[i][0])]
    top_n = top_n[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#' + str(tweet_id) + ' (' + str(round(jc_coef, 2)) + ')')
    content = tweet_content.split(' ')
    a = ''
    b = 0
    for w in content:
        if (b + 1 + len(w)) <= (print_width - 1):
            a += ' ' + w
            l = len(a)
        else:
            print(' ' + a)
            a = ''
            a += ' ' + w
            l = len(a)

    if len(a) != 0:
        print(' ' + a)

#--------------------------------------------
# 6330189421 (18.44) 50 (2021-02-28 23:28)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    All=[]
    x=0
    for e in words_1:
        if not e in All:
            All.append(e)
    for e in words_2:
        if not e in All:
            All.append(e)
    for e in All:
        if e in words_1 and e in words_2:
            x+=1
    jaccard_coef=x/len(All)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x=[]
    b=[]
    for i in range(len(norm_tweets)):
        x=[]
        x.append(-jaccard(norm_tweets[i],norm_query))
        x.append(i)
        b=b+[x]
    b.sort()
    top_n=[]
    for i in range(n):
        if -b[i][0] > 0:
            top_n.append([b[i][1],-b[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(" ")
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    x=tweet_content.split()
    y=" "
    a=""
    for i in range (len(x)):
        a=y
        y+=" "+x[i]
        if len(y)>print_width:
            print(a)
            y=" "
            a=""
            y+=" "+x[i]
    else:print(y)
#--------------------------------------------
# 6330190021 (16.12) 51 (2021-03-01 01:19)

def get_unique( words ):
    if words != [] :
        info = words
        char = info[0]
        unique_words = [char]
        while True :
            while char in info :
                info.remove(char)
            if len(info) == 0 : break
            else:
                char = info[0]
                unique_words.append(char)
    else:
        unique_words = words
    return unique_words
def jaccard(words_1, words_2):
    top = []
    for e in words_1 :
        if e in words_2 :
            top.append(e)
    jaccard_coef = len(top)/len(get_unique(words_1+words_2))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    info = []
    for i in range(len(norm_tweets)) :
        tweet_id = i
        info.append([-jaccard(norm_query,norm_tweets[i]),tweet_id])
    info.sort()
    top_n = info[:n]
    for i in range(len(top_n)) :
        top_n[i][0] = -top_n[i][0]
        top_n[i] = top_n[i][::-1]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    info = tweet_content.split(" ")
    OP = " "
    while len(info) != 0 :
        while len(OP) < print_width :
            if len(OP+" "+info[0]) <= print_width :
                OP += " "+info[0]
            else: break
            info.pop(0)
            if len(info) == 0 : break
        print(OP)
        OP = " "

#--------------------------------------------
# 6330191621 (14.18) 52 (2021-03-01 14:02)

def get_unique( words ):
    d = {x:0 for x in words}
    unique_words = list(d)
    return unique_words
def jaccard(words_1, words_2):
    d = []
    for word in words_1 :
        if word in words_2 :
            d += [word]
    totalword = words_1 + words_2
    totalword = get_unique(totalword)
    jaccard_coef = len(d)/len(totalword)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)) :
        top_n.append([jaccard(norm_tweets[i],norm_query), i])#swap later
    for item in top_n:#jac more than 0
        if item[1] <= 0 :
            top_n.remove(item)
    top_n.sort(reverse = True)#sort by jac
    for k in range(len(top_n)-1) :#sort by tweet if jac equal
        if top_n[k][0] == top_n[k+1][0]:
            if top_n[k][1] > top_n[k+1][1] :
                top_n[k], top_n[k+1] = top_n[k+1],top_n[k]
    for item in top_n :#swapto[index,jac]
        item[0], item[1] = item[1], item[0]
    top_n = top_n[:n]#topn
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    import math
    print(' ')
    print('#' + str(tweet_id), '(' + str(round(jc_coef, 2)) + ')')
    mylist = list(tweet_content)
    newlist = []
    for i in range(math.ceil(len(mylist)/print_width)):
        newlist.append(''.join(mylist[i*print_width:(i+1)*print_width]))
    print('\n'.join(newlist))

#--------------------------------------------
# 6330192221 (19.10) 53 (2021-03-01 23:59)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        for j in range(0,i+1):
            if words[j] not in unique_words:
                unique_words.append(words[j])
    return unique_words
def jaccard(words_1, words_2):
    numer=0
    denom=1
    for i in range(len(words_1)):
        for j in range(len(words_2)):
            if words_1[i] == words_2[j]:
                numer+=1
                denom=len(words_1)+len(words_2)-numer
    jaccard_coef= numer/denom
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n=[]
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query)>0:
            top_n.append([i,jaccard(norm_tweets[i],norm_query)])
            top_n.sort(key=lambda x:(x[1],-x[0]))

    return top_n[-1:-n-1:-1]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n"+"#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    x=tweet_content.split(' ')
    printed=2
    line="  "
    for i in range(len(x)-1):
        if printed+len(x[i])>print_width:
            print(line)
            line="  "
            printed=2
        printed=printed+len(x[i])+1
        line+=(x[i]+" ")
    else:
        print(line+x[i+1])
    return



#--------------------------------------------
# 6330193921 (18.01) 54 (2021-02-27 12:37)

def get_unique( words ):
    for i in range(len(words)):
        unique = words[0]
        words.remove(unique)
        if unique not in words:
            words.append(unique)
    unique_words = words
    return unique_words
def jaccard(words_1, words_2):
    k = 0
    for e in words_2:
        if e in words_1:
            k += 1
    jaccard_coef = k/((len(words_1)+len(words_2))-k)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    s = []
    for i in range(len(norm_tweets)):
        tweet_id = i
        jaccard2 = jaccard(norm_tweets[tweet_id],norm_query)
        s.append([tweet_id,-jaccard2])
    for e in s:
        e[0],e[1] = e[1],e[0]
    s.sort()
    for e in s:
        e[0],e[1] = e[1],-e[0]
    top_n = s[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),"("+str(round(jc_coef,2))+')')
    line = "  "
    tweet_content = tweet_content.split(' ')
    for e in tweet_content:
        if len(line)+len(e) <= print_width:
            line += e+" "
        else:
            print(line)
            line = "  "
            line += e+" "
    print(line)


#--------------------------------------------
# 6330194521 (17.89) 55 (2021-03-01 01:26)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    s_t = []
    for i in words_1:
        if i in words_2:
            s_t.append(i)
    word_fin = words_1+words_2
    jaccard_coef = len(s_t)/len(get_unique(word_fin))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        top_n.append([i,jaccard(norm_tweets[i],norm_query)])
    for x in top_n:
        x[0],x[1] = -x[1],x[0]
        top_n = sorted(top_n)
    for y in top_n:
        y[0] = -y[0]
        y[0],y[1] = y[1],y[0]
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    text = tweet_content.split(" ")
    print()
    print(str(tweet_id) + " " + "(" + str(round(jc_coef,2)) +")")
    c_w = 0
    f_w = True
    for i in text:
        if c_w+len(i) <= print_width:
            if f_w == True:
                print("  ", end="")
                c_w += 2
            print(i, end=" ")
            c_w += len(i)+1
            f_w = False
        else:
            print()
            c_w = 0
            f_w = True
            if c_w+len(i) > print_width:
                print("  " + i)
                c_w = 0
                continue
            else:
                print("  ", end="")
                c_w += 2
                print(i, end=" ")
                c_w += len(i)+1
                f_w = False
    print()




#--------------------------------------------
# 6330197421 (17.01) 56 (2021-02-28 13:06)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    numerator = 0
    for e in words_1:
        if e in words_2:
            numerator += 1

    denominator = numerator
    for e in words_1:
        if e not in words_2:
            denominator += 1
    for e in words_2:
        if e not in words_1:
            denominator += 1

    jaccard_coef = numerator/denominator
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        JC = jaccard(norm_tweets[i], norm_query)
        top_n.append([i, JC])
    top_n = [tweet[::-1] for tweet in top_n]
    top_n.sort()
    top_n = top_n[::-1]
    for i in range(len(top_n)-1):
        if top_n[i][0] == top_n[i+1][0]:
            if top_n[i][1] > top_n[i+1][1]:
                top_n[i],top_n[i+1] = top_n[i+1],top_n[i]
        for i in range(len(top_n)-1):
            if top_n[i][0] == top_n[i+1][0]:
                if top_n[i][1] > top_n[i+1][1]:
                    top_n[i],top_n[i+1] = top_n[i+1],top_n[i]
    top_n = [tweet[::-1] for tweet in top_n]
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#" + str(tweet_id), "(" + str(round(jc_coef,2)) + ")")
    words = tweet_content.split(' ')
    Line = [' ']
    for e in words:
        if e == words[-1]:
            if len(" ".join(Line)) < print_width:
                if len((" ".join(Line) + ' ' + e)) <= print_width:
                    Line += [e]
                    print(" ".join(Line))
                else:
                    print(" ".join(Line))
                    print("  " + e)
        else:
            if len(" ".join(Line)) < print_width:
                if len((" ".join(Line) + ' ' + e)) <= print_width:
                    Line += [e]
                else:
                    print(" ".join(Line))
                    Line = [' '] + [e]
            else:
                print(" ".join(Line))
                Line = [' '] + [e]

#--------------------------------------------
# 6330198021 (0.00) 57 (2021-03-01 23:53)

def get_unique( words ):

    unique_words = []
    for i in range (len(words)) :
        if words[i] not in unique_words :
            unique_words.append(words[i])
        else :
            pass

    return unique_words
def jaccard(words_1, words_2):

    k = 0
    b = len(words_1)
    c = len(words_2)
    for a in words_1 :
        if a in words_2 :
            k = k + 1
        else :
            k = k + 0

    jaccard_coef = float(k/((b+c)-k))

    return jaccard_coef
# 6330199721 (17.33) 58 (2021-03-01 22:04)

def get_unique( words ):

    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):

    a = 0
    for i in words_1:
        if i in words_2:
            a += 1
    b = len(words_1)+len(words_2)-a
    jaccard_coef = a/b
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query) > 0:
            top_n.append([jaccard(norm_tweets[i],norm_query),-i])
    top_n.sort(reverse = True)
    for a in top_n:
        a[0],a[1] = -a[1] , a[0]
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    a = tweet_content
    b = print_width
    while len(a) > 0 :
        if len(a) < b:
            print('  ' + a[:])
            break
        elif ' ' not in a[:]:
            print('  ' + a.split()[0])
            t = ' '.join(a.split()[1:])
        elif a[b] == ' ' or a[b-1] == ' ':
            print('  ' + a[:b])
            a = a[b:]
        else:
            c = a[:b].split()
            a = c[-1].strip() + a[b:]
            print('  '+' '.join(c[:-1]))

#--------------------------------------------
# 6330200621 (17.40) 59 (2021-03-01 14:39)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e in unique_words:
            unique_words.remove(e)
        unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    c = 0
    for e in words_1:
        if e in words_2:
            c +=1
    n = len(words_1)+len(words_2)
    for e in words_1:
        if e in words_2:
            n -=1
    jaccard_coef = c/n
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i  in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query) > 0:
            top_n.append([jaccard(norm_tweets[i],norm_query) , -i])

    top_n.sort(reverse = True)

    for e in top_n:
        e[0] ,e[1] = -e[1] , e[0]
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    t = tweet_content
    n = print_width
    while len(t) != 0 :
        if len(t) <= n:
            print('  ' + t[:n])
            t = t[n:]
        else:
            if ' ' not in t[:n]:
                print('  ' + t.split()[0])
                t = ' '.join(t.split()[1:]).strip()
            else:
                if t[n] == ' ' or t[n-1] == ' ':
                    print('  ' + t[:n])
                    t = t[n:].strip()
                else:
                    s = t[:n].split()
                    t = s[-1].strip() + t[n:]
                    print('  '+' '.join(s[:-1]))



#--------------------------------------------
# 6330201221 (12.94) 60 (2021-03-01 23:20)

def get_unique( words ):
    unique_words = []
    for item in words:
        if item not in unique_words:
            unique_words.append(item)
    return unique_words
def jaccard(words_1, words_2):

    count_same = 0
    for c_same in words_1:
        if c_same in words_2:
            count_same += 1
    if len(words_1)+len(words_2)-count_same != 0:
        jaccard_coefx = count_same/(len(words_1)+len(words_2)-count_same)
    jaccard_coef = round(jaccard_coefx,2)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n0 = []
    top_n = []
    for i in range(len(norm_tweets)):
        tweet_id = i
        j = jaccard(norm_tweets[i],norm_query)
        if j > 0:
            top_n0.append([j,int(i)])
    top_n0.sort(reverse = True)
    '''for e in top_n0:
        e[0] =  e[1]
        e[1] = -e[0]'''
    for j in range(n):
        top_n.append([top_n0[j][1],top_n0[j][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),"("+str(round(jc_coef,2))+')')
    line = " "
    tweet_content = tweet_content.split(' ')
    for i in range(len(tweet_content)):
        if i == len(tweet_content)-1 and len(line)+len(tweet_content[i]) < print_width:
            print(line)
        elif i == len(tweet_content)-1 and len(line)+len(tweet_content[i]) >= print_width:
            print(line)
            print('  '+tweet_content[i])
        elif len(line)+len(tweet_content[i]) < print_width:
            line += tweet_content[i]+" "
        else:
            print(line)
            line = " "
            line += tweet_content[i]+" "

#--------------------------------------------
# 6330202921 (16.93) 61 (2021-03-01 20:57)

def get_unique( words ):
    rtrn = []
    words.sort()
    l = len(words)
    if(l>0):
        crnt = words[0]
        rtrn.append(words[0])
    for w in words:
        if(w!=crnt):
            crnt = w
            rtrn.append(w)
    return rtrn
def jaccard(words_1, words_2):
    l1 = len(words_1)
    l2 = len(words_2)
    lmrg = len(get_unique(words_1+words_2))
    same = l1+l2-lmrg
    return same/lmrg
def top_n_similarity(norm_tweets, norm_query, n):
    l1 = []
    for i in range(0,len(norm_tweets)):
        j = -jaccard(norm_tweets[i],norm_query)
        if(j<0):
            l1.append([j,i]);
    l1.sort()
    l2 = []
    if(n>len(l1)):
        n = len(l1)
    for i in range(0,n):
        item = l1[i]
        l2.append([item[1],-item[0]])
    return l2
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    txt = ''
    for tweet in tweet_content.split(' '):
        if(len(txt)>0 and (len(txt)+len(tweet))>print_width):
            print(txt)
            txt = ''
        txt += tweet + ' ';
    if(len(txt)>0):
        print(txt)

#--------------------------------------------
# 6330203521 (19.62) 62 (2021-02-28 21:13)

def get_unique( words ):
    words.sort()
    unique_words = []
    if len(words) != 0 :
        for i in range(len(words)-1) :
            if words[i] != words[i+1] :
                unique_words.append(words[i])
        unique_words.append(words[-1])
    return unique_words
def jaccard(words_1, words_2):
    a = 0
    b = list(words_1)
    if len(b) != 0 :
        for c in words_2 :
            if c in words_1 :
                a += 1
            else :
                b.append(c)
        jaccard_coef = a/len(b)
    else :
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    x = []
    for i in range(len(norm_tweets)) :
        u = list(norm_tweets[i])
        j = jaccard(norm_tweets[i],norm_query)
        if j > 0 :
            x.append([i,j])
    a = len(x)
    for k in range(a-1) :
        for i in range(a-1) :   #Bubble_sort
            if x[i][1] < x[i+1][1] :
                x[i],x[i+1] = x[i+1],x[i]
            elif x[i][1] == x[i+1][1] and x[i][0] > x[i+1][0] :
                x[i],x[i+1] = x[i+1],x[i]
    top_n = x[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    p = tweet_content+" "
    print("")
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    u = 0
    index = 0
    a = print_width -2
    while True :    #rfindถ้าหาไม่เจอจะreturnค่า-1ออกมา
        index = p.rfind(" ",u,u+a+1)
        if index == -1 :
            break
        print("  "+p[u:index])
        u = index+1
#--------------------------------------------
# 6330205821 (18.01) 63 (2021-03-01 13:44)

def get_unique( words ):
    wws = words
    unique_words=[]
    for e in wws:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    wss1 = words_1
    wss2 = words_2
    c = []

    for e in wss2:
        if e in wss1 :
            c.append(e)

    jaccard_coef = len(c)/(len(wss1)+len(wss2)-len(c))


    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    y =0
    t=[]
    while y < len(norm_tweets):
        jac = jaccard(norm_tweets[y],norm_query)
        t.append([jac*(-1),y])
        y += 1
    t.sort()
    top =t
    for i in range(len(t)):
        top[i][0],top[i][1] = t[i][1],-t[i][0]
    top_n = top[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    tt = tweet_content.split(" ")
    k = 0
    d = '  '
    while k < len(tt):
        if k ==len(tt)-1 and len(d)+len(tt[k]) <= print_width:
            d += tt[k]
            print(d)
        elif len(d)+len(tt[k]) <= print_width:
            d +=tt[k]+' '
        elif k == len(tt)-1 and len(d)+len(tt[k]) > print_width:
            print(d)
            d = '  '+tt[k]+' '
            print(d)
        else:
            print(d)
            d = '  '+tt[k]+' '
        k += 1




#--------------------------------------------
# 6330206421 (14.16) 64 (2021-02-28 13:21)

def get_unique( words ):
    unique_words = []
    for item in words:
        if item not in unique_words:
            unique_words.append(item)
    return unique_words
def jaccard(words_1, words_2):

    count_same = 0
    for c_same in words_1:
        if c_same in words_2:
            count_same += 1

    words_sum =  words_1 + words_2
    unique_words = []
    for item in words_sum:
        if item not in unique_words:
            unique_words.append(item)

    jaccard_coefx = count_same/len(unique_words)
    jaccard_coef = round(jaccard_coefx,2)


    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []*n
    count = len(norm_tweets)
    for i in range(count):
        tweet_id = (i)
        jaccard_mi = ((jaccard(norm_tweets[tweet_id],norm_query))*(-1))
        top_n.append([jaccard_mi,tweet_id])
    top_n.sort()
    for i in top_n:
        i[0],i[1] = i[1],(i[0])*(-1)
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    jc = round(jc_coef,2)
    word = tweet_content.split(" ")
    maximum_word = " "
    check = 0
    count = 0
    print()
    print("#"+str(tweet_id)+" ("+str(jc)+")")
    for i in word:
        if (len(maximum_word)+len(i) <= print_width-1):
            maximum_word = " ".join((maximum_word, i))
            count += 1
            check += 1
        elif (len(maximum_word)+len(i) > print_width-1):
            break
    print(maximum_word)
    while(check < len(word)):
        maximum_word = " "
        for i in word[count::]:
            if (len(maximum_word)+len(i) <= print_width-1):
                maximum_word = " ".join((maximum_word, i))
                count += 1
                check += 1
            elif (len(maximum_word)+len(i) > print_width-1):
                break
        print(maximum_word)

#--------------------------------------------
# 6330207021 (0.00) 65 (2021-03-01 23:25)

def jaccard(words_1, words_2):
    for i in words_1:
        a=0
        if words_1[i] in word_2:
            a+=1
    jaccard_coef=(a/(len(words_1)+len(words_2)-a))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x=[]
    z=[-(norm_query),(norm_tweet[i])]
    x.append(z)
    x.sort()
    norm_query,norm_tweet[i]=-norm_tweet[i],norm_query

    top_n=x[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content.split(' ')
    print('')
    print('#' + tweet_id + round(jc_coef,2))
    print('  ' + t)


#--------------------------------------------
# 6330208721 (20.00) 66 (2021-02-28 10:42)

def get_unique(words):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    I = []
    s = []
    for i in words_1:
        if i in words_2:
            I.append(i)
    for i in words_1+words_2:
        if i not in s:
            s.append(i)
    jaccard_coef = len(I)/len(s)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    j = []
    for i in range(len(norm_tweets)):
        j.append([-jaccard(norm_tweets[i], norm_query),i])
    j.sort()
    j2 = j[:n]
    top_n = []
    for i in j2:
        if i[0] != 0.0:
            top_n.append([i[1],-i[0]])


    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    list_tweet = tweet_content.split(' ')
    c = []
    length = 0
    print_width-=2
    print()
    print('#'+str(tweet_id)+" ("+str(round(jc_coef,2))+")" )
    for i in range(len(list_tweet)):
        if length + len(list_tweet[i]) > print_width:
            print("  "+" ".join(c))
            c = []
            length = 0
        c.append(list_tweet[i])
        length += len(list_tweet[i])+1

    print("  "+" ".join(c))

#--------------------------------------------
# 6330209321 (17.00) 67 (2021-03-01 17:31)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] in unique_words:
            pass
        else:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    inter = 0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            inter += 1
    #==========================
    union = []
    for i in range(len(words_1)):
        if words_1[i] in union:
            pass
        else:
            union.append(words_1[i])
    for i in range(len(words_2)):
        if words_2[i] in union:
            pass
        else:
            union.append(words_2[i])
    jaccard_coef = inter/len(union)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    jaclist=[]
    for i in range(len(norm_tweets)):
        jac=jaccard(norm_tweets[i],norm_query)
        jaclist.append([i,jac])
    sorted_jaclist=sorted(jaclist,key=lambda x: x[1])
    top_n=[]
    for i in range(n):
        k=len(sorted_jaclist)
        if sorted_jaclist[k-1-i][1]>0:
            top_n.append(sorted_jaclist[k-1-i])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
   print()
   print("#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")")
   #print("#%d (%f)"%(tweet_id,round(jc_coef,2)))
   words=tweet_content.split(" ")
   temps=" "
   for i in range(len(words)):
       l=len(temps)+len(words[i])+1
       if l<=print_width:
           temps+=" "+words[i]
       else:
           if len(temps)==1:
               print("  "+words[i])
               #temps=" "
           else:
               print(temps)
               temps="  "+words[i]
   print(temps)









#--------------------------------------------
# 6330210921 (14.01) 68 (2021-03-01 20:01)

def get_unique( words ):
    unique_words = []
    for e in words :
        if e not in unique_words :
            unique_words.append(e)
    return unique_words
###################################
def jaccard(words_1, words_2):
    #top
    top = 0
    if len(words_1)<len(words_2) :
        gt_words = words_2
        lt_words = words_1
    else :
        gt_words = words_1
        lt_words = words_2
    for e in gt_words :
        if e in lt_words :
            top += 1
    #bottom
    bottom = 0
    sum_words = words_1
    for e in words_2 :
        if e not in sum_words :
            sum_words.append(e)
    bottom = len(sum_words)

    #Jaccard similarity coefficient
    jaccard_coef = top/bottom
    return jaccard_ceof
######################################
def top_n_similarity(norm_tweets, norm_query, n):
    all_words = []
    for i in range(len(norm_tweets)) :
        now_list = norm_tweets[i]
        top = 0
        for e in now_list :
             if e in norm_query :
                top += 1
        bottom = 0
        s_words = []
        for e in norm_query :
            if e not in s_words :
                s_words.append(e)
        for e in now_list :
            if e not in s_words :
                s_words.append(e)
        bottom = len(s_words)
        jaccard = top/bottom
        pt_test = []
        pt_test.append(jaccard)
        pt_test.append(i)
        all_words.append(pt_test)
    all_words.sort()
    all_words.reverse()

    i = 0
    while i < len(all_words)-1 :
        test1 = all_words[i]
        test2 = all_words[i+1]
        if test1[0] == test2[0] :
            if test1[1]>test2[1] :
                all_words[i],all_words[i+1] = all_words[i+1],all_words[i]
                i -= 1
            else :
                i += 1
        else :
            i += 1
    top_n = all_words[:n]
    for i in range(len(top_n)) :
        list = top_n[i]
        fill = []
        fill.append(list[1])
        fill.append(list[0])
        top_n[i] = fill
    return top_n
###########################################################################
def show_tweet(tweet_id, tweet_content, jc_coef, print_width) :
    jc_coef = round(jc_coef,2)
    jc_coef = str(jc_coef)
    print("\n#"+str(tweet_id)+" ("+jc_coef+")")

    tweet_content = tweet_content.split(" ")
    n = " "
    for e in tweet_content :
        if len(n)+len(e)<print_width :
            n += " "
            n += e
        else :
            print(n)
            n = "  "
            n += e
    print(n)
#################################################################

#--------------------------------------------
# 6330211521 (11.35) 69 (2021-03-01 16:20)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)) :
        if words[i] in unique_words :
            unique_words += []
        elif words[i] not in unique_words :
            unique_words += [words[i]]

    return unique_words
def jaccard(words_1, words_2):
    a = 0
    b = []
    words_2_2 = words_2
    for i in range(len(words_1)) :
        if words_1[i] in words_2 :
            a += 1
        if words_1[i] in words_2_2 :
            b += words_1[i]
            words_2_2.remove(words_1[i])
        elif words_1[i] not in words_2_2 :
            b += words_1[i]
    for j in range(len(words_2_2)) :
        b += words_2_2[j]
    jaccard_coef = a/(int(len(b)))

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    e2 = []
    c = 0
    n1 = 0
    d = []
    e = []
    for j in range(len(norm_tweets)) :
        tweet_id = j
        norm_query1 = norm_query
        for i in range(len(norm_tweets[j])) :
            if norm_tweets[j][i] in norm_query :
                c += 1
            if norm_tweets[j][i] in norm_query1 :
                d += norm_tweets[j][i]
                norm_query1.remove(norm_tweets[j][i])
            elif norm_tweets[j][i] not in norm_query1 :
                d += norm_tweets[j][i]
        for k in range(len(norm_query1)) :
            d += norm_query1[k]
        jaccard = c/len(d)
        e += [[jaccard, tweet_id]]
    e.sort()
    e1 = []
    for [a1, a2] in e :
        e1.append([a2, a1])
    for i in range(len(e1)) :
        if e1[i][1]>0 :
            e2 += [e1[i]]
            n1 += 1
            if n1 == n :
                break
        else :
            e2 += []
    top_n = e2
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(' ')
    print("\n")
    print('#'+str(tweet_id), (round(jc_coef, 2)))
    print(' '.join(tweet_content))



#--------------------------------------------
# 6330212121 (16.16) 70 (2021-02-25 22:51)

def get_unique( words ):
    unique_words=[]
    l=len(words)
    for i in range(l):
        if (not str(words[i]) in words[i+1:]) and (not str(words[i]) in unique_words):
           unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    t=[]
    lw1=len(words_1)
    lw2=len(words_2)
    if lw1<=lw2:
        for e in words_1:
            if e in words_2:
                t.append(e)
    else:
        for e in words_2:
            if e in words_1:
                t.append(e)

    a_sec_b=len(t)
    union=words_1+words_2
    k=get_unique(union)
    a_union_b=len(k)
    return round(a_sec_b/a_union_b,2)
def top_n_similarity(norm_tweets, norm_query, n):

    x=[]
    l=len(norm_tweets)
    for i in range(l):
        jc=jaccard(norm_tweets[i],norm_query)
        x.append([i,jc])

    #print(x)
    x.sort(key=takeSecond,reverse=True)
    #print(x)
    top_n =x[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    #print('1234567890123456789012345678901234567890')
    print(' ')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    x=tweet_content.split(' ')
    #print(x)
    y=[]
    for i in range(len(x)):
        y.append(len(x[i]))

    lt='  '
    for i in range(len(x)):

        if len(lt) <= print_width and len(lt)+y[i]<= print_width:
            lt+=x[i]+' '


        else:
            print(lt)
            lt='  '+x[i]+' '

    print(lt)

#--------------------------------------------
# 6330213821 (13.03) 71 (2021-03-01 21:38)

def get_unique( words ):
    unique_words = []
    for w in words:
        if w not in unique_words:
            unique_words.append(w)
    return unique_words
def jaccard(words_1, words_2):
    words_1 = get_unique(words_1)
    words_2 = get_unique(words_2)

    k = 0
    i = len(get_unique(words_1 + words_2))
    for w in words_1:
       if w in words_2:
            k+=1
    jaccard_coef = k/i
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    tweet_id = []
    for i in range(len(norm_tweets)):
        tweet_id.append(i)
    j = []
    for i in range (len(norm_tweets)) :

        if jaccard(norm_tweets[i],norm_query) > 0:
            j.append(jaccard(norm_tweets[i],norm_query))
    a = []
    for i in range (len(norm_tweets)) :
        b = [jaccard(norm_tweets[i],norm_query),tweet_id[i]]
        a.append(b)
    a.sort()
    a = a[::-1]
    top_n = []
    for i in range (n):
        top_n.append(a[i][::-1])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    t1 = tweet_content.split()
    print('')
    print('#'+str(tweet_id),'(',str(round(float(jc_coef),2)),')')
    c =2
    print(' ',end = '')
    for e in t1:
        if c + len(e) + 1 <= print_width:
            print(" "+e,end = '')
            c += len(e) + 1
        else:
            print()
            print(' ',end ='')
            c = 2+len(e)
            print(' '+e,end= '')
    print ()
#--------------------------------------------
# 6330214421 (18.50) 72 (2021-02-28 01:19)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    a = []
    if len(words_1) < len(words_2):
        for i in range(len(words_1)):
            if words_1[i] in words_2:
                a.append(words_1[i])
    elif len(words_1) > len(words_2):
        for i in range(len(words_2)):
            if words_2[i] in words_1:
                a.append(words_2[i])
    elif len(words_1) == len(words_2):
        for i in range(len(words_2)):
            if words_2[i] in words_1:
                a.append(words_2[i])
    b = len(words_1)+len(words_2)-len(a)
    jaccard_coef = len(a)/b
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    b = []
    for i in range(len(norm_tweets)):
        tweet_id = i
        a = jaccard(norm_tweets[i], norm_query)
        b.append([-tweet_id,a])
    for i in b:
        i[0],i[1] = i[1],i[0]
    b.sort()
    b = b[::-1]
    for i in b:
        i[0],i[1] = i[1],i[0]
    for i in b:
        if i[0] < 0:
            i[0] = -i[0]
    for i in b:
        if i[1] == 0.0 :
            b.remove(i)
    top_n = b[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("   ")
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")

    a = tweet_content.split(" ")
    b = print_width - 2
    c = 0
    x = []
    for i in range(len(a)):
        c += len(a[i])+1
        x.append(a[i])
        if c == b:
            r = "  "+" ".join(x)
            print(r)
            c = 0
            x = []

        if c-1 > b :
            x = x[:-1:]
            r = "  "+" ".join(x)
            print(r)
            c = len(a[i])+1
            x = []
            x.append(a[i])

    r = "  "+" ".join(x)
    print(r)
#--------------------------------------------
# 6330215021 (17.95) 73 (2021-03-01 01:43)

def get_unique( words ):
    if len(words) == 0:
        return []
    words.sort()
    unique_words = []
    unique_words.append(words[0])
    for i in range(len(words)-1):
        if words[i] == words[i+1]:
            pass
        else:
            unique_words.append(words[i+1])
    unique_words.sort()
    return unique_words
def jaccard(words_1, words_2):
    union = words_1 + words_2
    union = get_unique(union)

    intersec = []
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            intersec.append(words_1[i])

    jaccard_coef = len(intersec)/len(union)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    for i in range(len(norm_tweets)):
        jaccard_coef = jaccard(norm_tweets[i],norm_query)
        top.append([jaccard_coef*(-1),i])
    top.sort()
    for i in range(len(norm_tweets)):
        top[i][0] *= -1
        top[i][0],top[i][1] = top[i][1],top[i][0]
    top_n = top[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    jc_coef = round(jc_coef,2)
    print("")
    print("#"+str(tweet_id)+" ("+str(jc_coef)+")")
    tweet_word = tweet_content.split(" ")
    content = " "
    for e in tweet_word:
        if len(content)<=print_width:
            content += " "+e
        if len(content)>print_width:
            print(content[:-(len(e))])
            content="  "+e
    print(content)

#--------------------------------------------
# 6330216721 (20.00) 74 (2021-02-27 01:47)

def get_unique( words ):
    unique_words = []
    for word in words:
        if word not in unique_words:
            unique_words.append(word)
    return unique_words
def jaccard(words_1, words_2):
    n_intersect = sum(word in words_2 for word in words_1)
    n_union = len(words_1) + len(words_2) - n_intersect

    jaccard_coef = n_intersect/n_union if n_union != 0 else 0

    """
    w1, w2 = set(words_1), set(words_2)
    i = len(w1.intersection(w2))
    u = len(w1.union(w2))
    assert jaccard_coef == i/u
    """

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = [[tweet_id, jaccard(tweet, norm_query)] for tweet_id, tweet in enumerate(norm_tweets)]
    # TODO: check order[asc, dec], stable?
    top = filter(lambda x: x[1] > 0, top)  # similarity must be > 0
    top = sorted(top, key=lambda x: (-x[1], x[0])) # sort desc by score then asc by index
    top_n = top[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#{} ({})".format(tweet_id, round(jc_coef, 2)))

    # split tweet content into token
    content = tweet_content.split(' ')
    # initalize line to first content
    current_line = "  " + content[0]
    # iterate through all token
    for tok in content[1:]:
        # skipping appending empty space onto empty line
        # relevent when handling breaking in multispace sequence
        if current_line == "  " and len(tok) == 0:
            continue
        # lookahead to see if line can fit new token
        if (len(current_line) + 1 + len(tok)) <= print_width:
            # add token to line if it fit, spacing if nesscary
            current_line += (" " if current_line != "  " else "" )+ tok
        else:
            # but if it can't fit
            # then print the current line out
            print(current_line.rstrip())
            # and start new line, with current token added to it
            current_line = "  " + tok
    # display leftover line, if it isn't empty
    if current_line != "  ":
        print(current_line)


#--------------------------------------------
# 6330217321 (20.00) 75 (2021-02-27 06:32)

def get_unique( words ):
    unique_words=[]
    for i in range (len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    w = words_1 + words_2
    k=0
    for e in words_1:
        if e in words_2:
            k+=1
    w.sort()
    i=0
    while i <= (len(w)-1):
        if i != len(w)-1:
            if w[i] == w[i+1]:
                w.remove(w[i])
            else:
                i+=1
        else: break
    jaccard_coef = k/len(w)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    a = []
    for i in range (len(norm_tweets)):
        jac = jaccard(norm_tweets[i], norm_query)
        if jac != 0:
            a.append([-jac,i])
    a.sort()
    for i in range (len(a)):
        a[i][0],a[i][1] = a[i][1],-a[i][0]

    top_n = a[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    w = tweet_content.split(" ")
    print("")
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    a=0
    s=[]
    for e in w:
        if a+len(e)+1 <= print_width-1:
            e=" "+e
            s.append(e)
            a+=len(e)
        else:
            e=" "+e
            print(" "+"".join(s))
            s = [e]
            a = len(e)
    print(" "+"".join(s))
#--------------------------------------------
# 6330219621 (20.00) 76 (2021-02-27 13:01)

def get_unique( words ):
    unique_words = []
    for e in words :
        if e not in unique_words :
            unique_words.append(e)
        else :
            pass
    return unique_words
def jaccard(words_1, words_2):
    words_1 = get_unique(words_1)
    words_2 = get_unique(words_2)
    c = 0
    for j in words_1 :
        if j in words_2 :
            c += 1
        else :
            words_2.append(j)
    jaccard_coef = c / len(words_2)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    tn = list()
    for i in range(len(norm_tweets)) :
        jac = jaccard(norm_query,norm_tweets[i])
        if jac > 0 :
            jac *= -1
            x = [jac,i]
            tn.append(x)
    tn.sort()
    for e in tn :
        x = [e[1],-e[0]]
        top_n.append(x)
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tc = tweet_content.split()
    c = 0
    s,f = 0,''
    cn = list()
    print('\n#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    for e in tweet_content :
        if e == ' ' :
            c += 1
        elif c != 0 :
            c = str(c)
            cn.append(c)
            c = 0
    cn.append('1')
    for i in range(len(tc)) :
        s += len(tc[i])
        if s < print_width-1 :
            s += int(cn[i])
            f += str(tc[i])+' '*int(cn[i])
        else :
            print(' ',f)
            s = len(tc[i])+int(cn[i])
            f = str(tc[i])+' '*int(cn[i])
    print(' ',f)

#--------------------------------------------
# 6330221821 (18.33) 77 (2021-03-01 02:08)

def get_unique( words ):
    unique_words = []
    if len(words) == 1: unique_words = words
    words.sort()
    for e in range(len(words)):
        if words[e] != words[e-1]:
            unique_words.append(words[e])
    return unique_words
def jaccard(words_1, words_2):
    a = get_unique(words_1+words_2)
    jaccard_coef = (len(words_1+words_2)-len(a))/len(a)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    for e in range(len(norm_tweets)):
        jc = jaccard(norm_tweets[e], norm_query)
        if jc > 0: a.append([1-jc,e])
    a.sort()
    for p in range(len(a)):
        a[p][0],a[p][1] = a[p][1], 1-a[p][0]
    top_n= a[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('#'+str(tweet_id), '('+str(round(jc_coef,2))+')')

    i = 0
    w = tweet_content.split(' ')
    while i < len(w):
        wc = 2
        ln = [' ']
        while i < len(w):
            wc += len(w[i])
            if wc > print_width or i == len(w): break
            ln.append(w[i]); i += 1
            wc += 1
            if wc > print_width: break
        print(' '.join(ln))

#--------------------------------------------
# 6330222421 (13.34) 78 (2021-02-28 12:16)

def get_unique( words ):

#     words = norm_tweets
    words.sort()
    unique_word0 = []

    for i in range(len(words)) :
        if i == 0 :
            unique_word0 += [words[i]]
        else :
            if words[i] != words[i-1] :
                unique_word0 += [words[i]]

    unique_words = unique_word0

    return unique_words
def jaccard(words_1, words_2):

    words_1 = get_unique(words_1) #unique_words
    words_2 = get_unique(words_2) #norm_query
    w = (words_1 + words_2)
    w.sort()
    a = []
    for i in range(len(w)) :
        if i == 0 :
            a += [w[i]]
        else :
            if w[i] != w[i-1] :
                a += [w[i]]
    b = a
    c = len(b)

    d = len(words_1) ; e = len(words_2) ; f = (d + e) - c

    jaccard_coef = round((f/c), 2)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n0 = []

    for i in range(len(norm_tweets)) :
        top_n0.append([i, jaccard(norm_tweets[i],norm_query)])

    top_n1 = []
    for [a1,a2] in top_n0 :
        top_n1.append([-a2,a1])
    top_n1.sort()

    for i in range(len(top_n1)) :
        top_n0[i][0],top_n0[i][1] = top_n1[i][1],-top_n1[i][0]

    top_n = top_n0[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    a = '#'+ str(tweet_id) + ' ' +'(' + str(jc_coef) + ')'
    b = (tweet_content.split())

    total_w = len(tweet_content)

    print()
    print(a) #tweet-id and jc-number


    tw_c = []
    for i in range(len(b)) :
        if len(' '.join(tw_c)) < print_width-2 :
            tw_c.append(b[i])
        else :
            tw_c = tw_c[:i-1:1]
            print('  '+' '.join(tw_c))
            tw_c = [b[i-1],b[i]]
    print('  '+' '.join(tw_c))

#--------------------------------------------
# 6330223021 (20.00) 79 (2021-02-25 23:42)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    x = []
    for i in words_2:
        if i in words_1:
            x.append(i)
    jaccard_coef = len(x)/len(get_unique(words_1+words_2))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    tweet_id = [int(e) for e in range(len(norm_tweets))]; top_reverse = []
    for i in range(len(tweet_id)):
        if jaccard(norm_tweets[i], norm_query) > 0:
            top_reverse.append([-jaccard(norm_tweets[i], norm_query), tweet_id[i]])
    top_reverse.sort(); top = []
    for i in top_reverse:
        top.append([i[1], -i[0]])
    top_n = top[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id), '('+str(round(jc_coef, 2))+')')
    x = tweet_content.split(' ')
    t = []; m = 0; c = 0
    for i in x:
        if len(i)+m+2 <= print_width:
            t.append(i)
            m += len(i)+1
        elif len(i)+m+2 > print_width:
            t = ' '.join(t)
            print(' ', t.strip())
            t = []
            t.append(i)
            m = 0
            m += len(i)+1
        c += 1
        if c == len(x) :
            t = ' '.join(t)
            print(' ', t.strip())
#--------------------------------------------
# 6330224721 (14.00) 80 (2021-02-28 01:15)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):

    up = 0
    words11 = get_unique( words_1 )
    words22 = get_unique( words_2 )

    for i in range(len(words11)):
        for j in range(len(words22)):
            if words11[i] == words22[j]:
                up += 1

    words_3 = words_1 + words_2
    words33 = get_unique( words_3 )
    down = len(words33)

    jaccard_coef = up/(down+int(len(words33)==0))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = [[0,0]]*n
    top_m = []
    x = []

    for i in range(len(norm_tweets)):
        x += [[jaccard(norm_query, norm_tweets[i]),-i]]
    x.sort()
    x = x[::-1]
    top_m = x[:n]

    for i in range(n):
        a,b = top_m[i]
        top_n[i] = -b,a

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    s = ""
    s += "\n"
    s += "#" + str(tweet_id) + " " + "(" + str(round(jc_coef,2)) + ")"
    s += "\n"
    s += "  "
    countWord = 2
    sentence = tweet_content.split(" ")
    for i in range(len(sentence)):
        word = sentence[i]
        if i == 0:
            s += word
            countWord += len(word)
        else:
            if (countWord + len(word) + 1) > print_width:
                s += "\n"
                s += "  "
                s += word
                countWord = 2 + len(word)
            else:
                s += " "
                s += word
                countWord += len(word) + 1
    print(s)

#--------------------------------------------
# 6330225321 (18.01) 81 (2021-03-01 14:38)

def get_unique( words ):
    words.sort()
    unique_words = []
    for i in range(len(words)) :
        if words[i] != words[i-1] :
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    a = 0
    if len(words_1) > len(words_2) :
        for i in range(len(words_1)) :
            if words_1[i] in words_2 :
                a +=1
    else :
        for i in range(len(words_2)) :
            if words_2[i] in words_1 :
                a +=1
    b = len(words_1) + len(words_2) -a
    jaccard_coef = a/b
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    top_n =[]
    for i  in range(len(norm_tweets)) :
        tweet_id = i
        jac = jaccard(norm_tweets[tweet_id], norm_query)
        a.append([jac,-tweet_id])
        a.sort()
        b = a[-1:-n-1:-1]
    for f in  b :
        top_n.append([-f[1],f[0]])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),'(' + str(round(jc_coef,2)) +')' )
    a = tweet_content.split(' ')
    b = ' '
    for e in a :
        if len(b) +len(e) < print_width :
            b += ' '+e
            if e == a[-1]:
                print(b)

        else :
            print(b)
            b = ''
            b ='  ' +e
            if e == a[-1] :
                print(b)


#--------------------------------------------
# 6330226021 (20.00) 82 (2021-03-01 00:42)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    sim = 0
    for e in words_1:
        if e in words_2:
            sim += 1
    tot = len(get_unique(words_1+words_2))
    jaccard_coef = sim/tot
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    for tweet_id in range(len(norm_tweets)):
        Jaccard = jaccard(norm_tweets[tweet_id],norm_query)
        if Jaccard > 0:
            top.append([Jaccard, -tweet_id])
    top.sort()
    top_n = []
    for Jaccard, tweet_id in top:
        top_n.append([-tweet_id, Jaccard])
    top_n = top_n[:-n-1:-1]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    cont = tweet_content.split(' ')
    show = ['  ']
    n = 2
    for e in cont:
        if n == 2:
            show.append(e)
            n += len(e)
        elif 2 < n+len(e)+1 <= print_width:
            show.append(' '+e)
            n += len(e)+1
        else :
            show.append('\n'+'  '+e)
            n = 2+len(e)
    j_show = ''.join(show)
    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    print(j_show)

#--------------------------------------------
# 6330227621 (19.10) 83 (2021-03-01 22:37)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
#............................................................
def jaccard(words_1, words_2):
    A=0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            A +=1
    B = len(words_1)+len(words_2)-A
    jaccard_coef = A/B
    return jaccard_coef
# ...........................................................
def top_n_similarity(norm_tweets, norm_query, n):
    L=[]
    B=[]
    C=[]
    D=[]
    for i in range(len(norm_tweets)):
        A = jaccard(norm_tweets[i],norm_query)
        L.append([A,i])
    L=sorted(L)
    L=L[::-1]
    L+=['aaaaaa','aaaaa']
    for i in range(len(norm_tweets)):
        if L[i][0]==L[i+1][0]:
            B.append(L[i])
        else:
            B.append(L[i])
            B.sort()
            C+=B
            B=[]
    C=C[:n:]
    for i in range(len(C)):
        C[i][0],C[i][1]=C[i][1],C[i][0]
    for i in range(len(C)):
        if C[i][1]!=0:
            D.append(C[i])
    top_n = D
    return top_n
#.............................................................
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('    ')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    n = print_width
    A = tweet_content.split(' ')
    N = 0
    L = '  '
    for i in range(len(A)):
        if len(A[i])>= n-2:
            L = '  '+A[i]
            print(L)
            L='  '
        else:
            if len(A[i])>n-2-N:
                print(L)
                N =0
                L='  '+A[i]+' '
                N = len(A[i])+1
            else:
                if A[i]==' ':
                    L+=' '
                    N+=1
                else:
                    L += A[i]+' '
                    N+=len(A[i])+1
    print(L)
#--------------------------------------------
# 6330228221 (18.01) 84 (2021-02-28 23:20)

def get_unique( words ):
    unique_words = []
    for i in words :
        if i not in unique_words :
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    allword = get_unique( words_1 + words_2)
    m = len(allword)

    intersec=0
    for i in words_1 :
        if i in words_2 :
            intersec+=1
    jaccard_coef = intersec/m

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)) :
        tweet_id = i
        jaccardcoef = jaccard(norm_tweets[i],norm_query)
        top_n.append([jaccardcoef,-tweet_id])
    top_n.sort()
    for i in range(len(top_n)):
        top_n[i][1]*= (-1)
    for i in top_n :
        i.reverse()
    top_n.reverse()
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("                                               ")
    print("#"+str(tweet_id),'('+str(round(jc_coef,2))+')')
    first_line = "  "
    t = tweet_content.split(" ")
    for i in t :
        if len(first_line+i) <= print_width :
            first_line +=i
            first_line += " "
        else:
            print(first_line)
            first_line= "  " + i + " "
    print(first_line)
#--------------------------------------------
# 6330229921 (20.00) 85 (2021-02-28 13:46)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    b = 0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            b += 1
    c = len(words_1) + len(words_2) - b
    jaccard_coef = b / c
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    tot = []
    for tweet_id in range(len(norm_tweets)):
        d = []
        m = jaccard(norm_tweets[tweet_id], norm_query)
        if m > 0:
            d.append(-m)
            d.append(tweet_id)
            tot.append(d)
    tot.sort()
    for i in range(len(tot)):
        tot[i][0], tot[i][1] = tot[i][1], -tot[i][0]
    top_n = tot[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n'+'#'+str(tweet_id)+' ('+str(round(jc_coef, 2))+')')
    k = tweet_content.split(' ')
    t = '  '
    for i in range(len(k)):
        if i != (len(k)-1) and (len(t)+len(k[i])) <= print_width:
            t += k[i]
            if len(t) < print_width:
                t += ' '
        elif i != (len(k)-1) and (len(t)+len(k[i])+1) > print_width:
            print(t)
            t = '  ' + k[i] + ' '
        elif i == (len(k)-1) and (len(t)+len(k[i])) <= print_width :
            t += k[i]
            print(t)
        elif i == (len(k)-1) and (len(t)+len(k[i])+1) > print_width:
            print(t)
            t = '  ' + k[i]
            print(t)

#--------------------------------------------
# 6330230421 (18.01) 86 (2021-02-26 13:00)

def get_unique( words ):
    unique_words = []
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    x = []
    x1 = []
    for i in words_1:
        if i in words_2:
            if not i in x:
                x.append(i)
    x1 += x
    for i in words_1:
        if not i in x1:
            x1.append(i)
    for i in words_2:
        if not i in x1:
            x1.append(i)

    jaccard_coef = len(x)/len(x1)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    for i in range(len(norm_tweets)):
        tweet_id = i
        jac = jaccard(norm_tweets[tweet_id],norm_query)
        a.append([-jac,tweet_id])
    a.sort()
    for k in range(len(a)):
        jac = -a[k][0]
        a[k][0] = a[k][1]
        a[k][1] = jac
    top_n = a[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(' ')
    line = ' '
    print("\n"+"#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    for i in range(len(tweet_content)):
        if len(line)+(len(' '+tweet_content[i])) <= print_width:
            line += ' '+tweet_content[i]
        else :
            print(line)
            line = '  '+tweet_content[i]
    print(line)

#--------------------------------------------
# 6330232721 (20.00) 87 (2021-03-01 00:56)

def get_unique( words ):
    unique_words = []
    i=0
    while i<len(words):
        if words[i] not in unique_words :
            unique_words.append(words[i])
        i+=1
    return unique_words
def jaccard(words_1, words_2):
    i=0
    top_num = 0
    while i<len(words_2):
        if words_2[i] in words_1:
            top_num+=1
        i+=1
    bot_num = len(words_1) + len(words_2) - top_num
    jaccard_coef = top_num/bot_num
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    all_tweet = []
    i=0
    while i<(len(norm_tweets)):
        a = []
        jac_num = jaccard(norm_tweets[i],norm_query)
        if jac_num > 0 :
            a.append(-jac_num)
            a.append(i)
            all_tweet.append(a)
        i+=1
    all_tweet.sort()
    for i in range (len(all_tweet)):
        all_tweet[i][0],all_tweet[i][1]=all_tweet[i][1],-all_tweet[i][0]
    top_n = all_tweet[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    lis_tweet = tweet_content.split(' ')
    print('\n'+'#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    show = ' '
    for i in lis_tweet:
        show+=' '+i
        if len(show)>print_width:
            print(show[0:-(len(i)):1])
            show='  '+i
    print(show)
#--------------------------------------------
# 6330233321 (18.05) 88 (2021-03-01 20:03)

def get_unique( words ):

    unique_words = []
    for e in words:
        if not e in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):

    count = 0
    maizum = []
    for e in words_1:
        if not e in maizum:
            maizum.append(e)
    for e in words_2:
        if not e in maizum:
            maizum.append(e)
    maizum = len(maizum)
    for e in words_2:
        if e in words_1:
            count += 1
    zum = count
    jaccard_coef = (zum/maizum)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []
    all_jacc = []
    id = 0
    for e in norm_tweets:
        jacc = jaccard(e, norm_query)
        if jacc > 0:
            all_jacc.append([-jacc, id])
            id += 1
        else:
            id += 1
    all_jacc.sort()
    all_jacc.sort(reverse=True)
    all_jacc = all_jacc[::-1]
    for e in all_jacc:
        top_n.append(e[::-1])
    for e in top_n:
        e[1] = -e[1]
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    t = tweet_content
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef, 2))+')')
    k = len(t)
    t = t.split()
    words_width = 2
    print_words = []
    s = 0
    for i in range(len(t)+(k//48)):
        if words_width + len(t[i-s]) <= print_width:
            words_width += len(t[i-s])
            words_width += 1
            print_words.append(t[i-s])
        elif words_width + len(t[i-s]) > print_width:
            print_words = ' '.join(print_words)
            print('  '+print_words)
            print_words = []
            words_width = 2
            s += 1
    if len(print_words) != 0:
        print_words = ' '.join(print_words)
        print('  '+print_words)

#--------------------------------------------
# 6330234021 (14.33) 89 (2021-03-01 17:20)

def get_unique( words ):
    unique_words = []
    i = 0
    while i < len(words) :
        if words[i] not in unique_words :
            unique_words.append(words[i])
        i += 1
    return unique_words
def jaccard(words_1, words_2):
    inter = []
    union =[]
    for i in range(len(words_1)) :
        if words_1[i] in words_2 :
            inter.append(words_1[i])
        if words_1[i] not in words_2 :
            union.append(words_1[i])
    for i in range(len(words_2)) :
        if words_2[i] not in words_1 :
            union.append(words_2[i])
    union += inter
    jaccard_coef = len(inter) / len(union)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    x = []
    for tweet_id in range(len(norm_tweets)) :
        if jaccard(norm_tweets[tweet_id] , norm_query) > 0 :
            y = jaccard(norm_tweets[tweet_id] , norm_query)
            x.append([-y,tweet_id])
    x.sort()
    if len(x) != 0 :
        for i in range(n) :
            top_n.append([x[i][1],-x[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    x = tweet_content.split(' ')
    y = ' '
    for c in x :
        z = len(c) + 2
        if z <= print_width :
            y += c + ' '
            z += 1
        else :
            print(y)
            y = '  '+c+' '
            z = len(y)
    print(y)

#--------------------------------------------
# 6330235621 (18.50) 90 (2021-03-01 03:08)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)) :
        if words[i] not in unique_words :
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    a=[]
    b=[]
    c=[]
    d=[]
    if len(words_1)<=len(words_2) :
        a+=words_1
        b+=words_2
    else :
        a+=words_2
        b+=words_1
    for i in range(len(a)) :
        if a[i] in b :
            c.append(a[i])
        if a[i] not in b :
            d.append(a[i])
    for i in range(len(b)) :
        if b[i] not in a :
            d.append(b[i])
    jaccard_coef=len(c)/(len(d)+len(c))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    x=[]
    for tweet_id in range(len(norm_tweets)) :
        if jaccard(norm_tweets[tweet_id],norm_query)>0 :
            a=jaccard(norm_tweets[tweet_id],norm_query)
            x.append([-a,tweet_id])
    x.sort()
    if len(x)!=0 :
        for i in range(n) :
            top_n.append([x[i][1],-x[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(" ")
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    a=tweet_content.split(" ")
    b="  "
    d=2
    for c in a :
       d+=len(c)
       if d<=print_width :
           b+=c+" "
           d+=1
       else :
           print(b)
           b="  "+c+" "
           d=len(b)
    print(b)
#--------------------------------------------
# 6330236221 (17.97) 91 (2021-02-28 22:45)

def get_unique( words ):
    unique_words = []
    for i in words:
        if  i not in unique_words:
            unique_words.append(i)

    return unique_words
def jaccard(words_1, words_2):
    r =[]

    for j in words_2:
        if j in words_1:
              r.append(j)
    x = len(words_1) + len(words_2) - len(r)
    jaccard_coef = len(r) / x

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    j = []
    p = []
    v =[]

    for i in range(len(norm_tweets)):
        y = norm_tweets[i]
        jac = jaccard(y, norm_query)
        if jac > 0:
             tweet_id = i
             top_n.append([jac,tweet_id])


    for k in top_n:
        j.append(k)
        j.sort()
    j.reverse()
    for k in j:
        k[1] *= -1
    j.sort()
    j.reverse()
    for k in j:
        k [1] *= -1
    p = j[:n]
    for k in p:
        k[0],k[1] = k[1],k[0]
    top_n = p
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
   x = " "
   print("\n"+"#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
   for t in tweet_content.split(" "):
       if  len(x +" "+ t) < print_width:
               x = x +" "+ t
       else:
           print(x)
           x = " "
           x = x +" "+ t
   print(x)

















#--------------------------------------------
# 6330238521 (18.01) 92 (2021-03-01 22:45)

def get_unique( words ):
    unique_words = []
    for i in range(0, len(words)):
        if not words[i] in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    x = []
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            x.append((words_1)[i])
    jaccard_coef = (len(x))/(len(get_unique( words_1 + words_2 )))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range (len(norm_tweets)):
        k = jaccard(norm_tweets[i], norm_query)
        top_n.append([k,-i])
    top_n.sort(reverse = True)
    top_n = [[-b,a] for a,b in top_n]
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    x = tweet_content.split(' ')
    print("")
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    g = ' '
    for w in x:
        if len(g+w) < print_width:
            g += ' '+w
        else:
            print(g)
            g = '  '+w
    print(g)

#--------------------------------------------
# 6330239121 (17.87) 93 (2021-02-28 20:13)

def get_unique(words):
    unique_words = []
    for i in words:
        if not i in unique_words:
            unique_words.append(i)

    return unique_words
def jaccard(words_1, words_2):
    interception = []
    for i in range( min( len(words_1) , len(words_2 ))):
        if len(words_1) >= len(words_2):
            if words_2[i] in words_1 :
                interception.append(words_2[i])
        else :
            if words_1[i] in words_2 :
                interception.append(words_1[i])
    jaccard_coef = len(interception) / ( len(words_1) + len(words_2) - len(interception) )

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    before_top_n = []
    mid_top_n = []
    top_n = []
    for i in range (len(norm_tweets)):
        tweet_id = i
        before_top_n.append([tweet_id, jaccard(norm_tweets[tweet_id],norm_query)])
    for [tweet_id,jco] in before_top_n:
        if jco > 0 :
            mid_top_n.append([jco,tweet_id])
    mid_top_n.sort()
    mid_top_n = mid_top_n[::-1]
    k = 0
    while k != len(mid_top_n):
        for i in range (len(mid_top_n)-1):
            if mid_top_n[i][0] == mid_top_n[i+1][0] and mid_top_n[i][1] > mid_top_n[i+1][1]:
                    mid_top_n[i],mid_top_n[i+1] = mid_top_n[i+1],mid_top_n[i]
        k += 1
    if len(mid_top_n) != 0:
        for i in range (min(n,len(mid_top_n))):
                top_n.append(mid_top_n[i])
        for i in range (min(n,len(mid_top_n))):
            top_n[i][0],top_n[i][1] = mid_top_n[i][1],mid_top_n[i][0]


    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n#" + str(tweet_id) + " " + "(" + str(round(jc_coef,2)) + ")")
    y = tweet_content.split(" ")
    x = len(tweet_content)
    a = 0
    z = "  "
    if x <= (print_width-2):
        print("  " + tweet_content[::] )
    else:
        while x >= (print_width-2):
            for i in range(len(y)):
                if len(z) <= (print_width):
                    if not len(z) + len(y[i]) > print_width:
                        z += y[i]
                        z += " "
                    else:
                        print(z[0:-1:1] + z[-1:-2].strip())
                        x -= (len(z)-3)
                        a += 1
                        z = "  "
                        z += y[i]
                        z += " "
                else:
                    if not len(z) + len(y[i]) > print_width:
                        z += y[i]
                        z += " "
                    else:
                        z = z[0:-1:1] + z[-1:-2].strip()
                        print(z)
                        x -= (len(z)-3)
                        a += 1
                        z = "  "
                        z += y[i]

        else:
            if a >= 2 :
                print("  " + tweet_content[-x+a::].strip() )
            else:
                print("  " + tweet_content[-x+1::].strip() )
#--------------------------------------------
# 6330240721 (19.19) 94 (2021-02-26 22:10)

def get_unique( words ):

    unique_words = []
    NCT = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
        if e in unique_words:
            NCT.append(e)


    return unique_words
def jaccard(words_1, words_2):

    U = []
    V = []
    for e in words_1 + words_2:
        if e in U:
            V.append(e)
        if e not in U:
            U.append(e)

    jaccard_coef = len(V)/len(U)



    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    y = []
    o = []
    for i in range(len(norm_tweets)):
        WayV = any(item in norm_tweets[i] for item in norm_query)
        if WayV == True :
            y.append(norm_tweets[i])
            o.append(i)
        if WayV == False :
            pass



    z = []
    for i in range(len(y)):
        x = jaccard(y[i], norm_query)
        if x > 0:
            z.append([round(x,2), o[i]])
    z.sort(key = lambda x:x[1])
    z.sort(key = lambda x:x[0],reverse=True)






    top_n = []
    zz = len(z)
    for i in range(n):
        if len(z)==0:
            break
        if zz == 0:
            break
        top_n.append([z[i][1], z[i][0]])
        zz -= 1









    return(top_n)
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print('')
    print('#' + str(tweet_id),'('+str(round(jc_coef,2))+')')
    line = []
    tweet_content_list = tweet_content.split()
    for tweet in tweet_content_list:
        if len(line) == 0:
            line = [tweet]
        elif len('  '+' '.join(line + [tweet])) <= print_width:
            line += [tweet]
        else:
            print('  '+' '.join(line))
            line = [tweet]
    print('  '+' '.join(line))

#--------------------------------------------
# 6330241321 (20.00) 95 (2021-03-01 23:59)

def get_unique( words ):
    unique_words = []
    for a in words :
        if a not in unique_words :
            unique_words.append(a)

    return unique_words
def jaccard(words_1, words_2):
    same = 0
    tot = 0
    for a in words_1 :
        if a in words_2 :
            same = same +  1
    tot = len(words_1) + len(words_2) - same
    jaccard_coef = same / tot
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)) :
        j = jaccard(norm_tweets[i] , norm_query)
        if j > 0 :
            top_n.append([-j, i])
    top_n.sort()
    top_n = top_n[:n]
    for i in range(len(top_n)) :
        top_n[i] = [top_n[i][1] , (-top_n[i][0])]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#" + str(tweet_id) + " (" + str(round(jc_coef , 2)) + ")")
    content = tweet_content.split(" ")
    out = ""
    l = 0
    for a in content :
        if (l + 1 + len(a)) <= (print_width - 1) :
            out = out + " " + a
            l = len(out)
        else :
            print(" " + out)
            out = ""
            out = out + " " + a
            l = len(out)

    if len(out) != 0 :
        print(" " + out)

#--------------------------------------------
# 6330242021 (11.67) 96 (2021-03-01 17:00)

def get_unique( words ):
    unique_words = []
    for e in words:
        unique_words.append(e)
    unique_words.sort()
    i=0
    while 1< i < len(words):
        if unique_words[i-1] == unique_words[i]:
            unique_words.remove(unique_words[i])
        i+=1
    return unique_words
def jaccard(words_1, words_2):
    s = 0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            s += 1
    t=len(words_1+words_2)-s
    if t!=0:
      jaccard_coef = s/t
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    d=[]
    for i in range(len(norm_tweets)):
      jac=jaccard(norm_tweets[i],norm_query)
      if jac>0:
          d.append([i, jac])
      d.sort()
      for j in range(0,len(d)-1):
          if d[j+1][1]>d[j][1]:
              d[j+1],d[j]=d[j],d[j+1]
          elif d[j][1]==d[j+1][1]:
              if d[j+1][0]<d[j][0]:
                  d[j+1],d[j]=d[j],d[j+1]
    top_n=d[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n'+'#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    c=tweet_content.split(' ')
    i=0
    out='  '
    while i < len(c):
        if len(out)+len(c[i]) < print_width-1:
            out += ' '+c[i]
            i+=1
        else:
            print(' '+out)
            out='  '
        if i == len(c):
            print(' '+out)

#--------------------------------------------
# 6330243621 (19.82) 97 (2021-03-01 23:36)

def get_unique( words ):
    unique_words = []
    for i in range(1,len(words)) :
        if not words[i] in unique_words :
            if words[i] != words[i-1] :
                unique_words.append(words[i])
                if i == 1 :
                    unique_words.append(words[i-1])
    return unique_words
def jaccard(words_1, words_2):
    jc = []
    for i in range(len(words_2)) :
        if not words_2[i] in jc :
            if words_2[i] in words_1 :
                jc.append([words_2[i]])
    if not (len(words_1)+len(words_2)-len(jc)) == 0 :
        jaccard_coef = len(jc)/(len(words_1)+len(words_2)-len(jc))
    else :
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)) :
        jc = jaccard(norm_tweets[i], norm_query)
        if jc > 0 :
            top_n.append([jc, i])
    top_n.sort(key=lambda sl: (-sl[0],sl[1]))
    top_n = top_n[:n]
    for t in range(len(top_n)) :
        top_n[t][0],top_n[t][1] = top_n[t][1],top_n[t][0]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id), '('+str(round(jc_coef,2))+')')
    t = tweet_content.split(' ')
    c = []
    d = 0
    for i in range(len(t)) :
        if d < print_width :
            c += [t[i]]
            d += 1
            d += len(t[i])
            if d >= print_width :
                c.remove(t[i])
                print('  '+' '.join(c))
                c = [t[i]]
                d = len(t[i])+1
    print('  '+' '.join(c))

#--------------------------------------------
# 6330245921 (18.98) 98 (2021-03-01 16:08)

def get_unique( words ):
    unique_words = []
    for i in words :
        if i not in unique_words:
            unique_words += [i]
    return unique_words
def jaccard(words_1, words_2):
    v = get_unique(words_1 + words_2)
    x = []
    for b in words_1:
        if b in words_2:
            x += [b]
    jaccard_coef = len(x)/len(v)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    y = []
    for i in range(len(norm_tweets)):
        y.append(i)
    r = []
    for i in norm_tweets:
        r += [-jaccard(i,norm_query)]
    m = []
    for i in y:
        m += [[r[i],y[i]]]
    m.sort()
    p = []
    for i in m:
        if i[0] != 0:
            p+=[i]
    top = []
    for i in p:
        top+=[[i[1],-i[0]]]
    top_n = top[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    meen = '(' + str(round(jc_coef, 2)) + ')'
    parn = '#' + str(tweet_id)
    print(parn, meen)
    tee = tweet_content.split(' ')
    au = '  ' + tee[0]
    for z in tee[1:]:
        if len(au) + len(' ' + z) <= print_width:
            au += ' ' + z
        else:
            print(au.strip())
            au = '  ' + z
    print(au.strip())
#--------------------------------------------
# 6330246521 (17.92) 99 (2021-02-26 22:11)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    words_all = words_1 + words_2
    union_words = []
    for e in words_all:
        if e not in union_words:
            union_words.append(e)
    words_1.sort()
    words_2.sort()
    intersec_words = []
    c = 0
    for i in range(len(words_1)):
        for i in range(len(words_2)):
            if words_1[c] == words_2[i] and words_1[c] not in intersec_words:
                intersec_words.append(words_2[i])
        c+=1
    jaccard_coef = round(len(intersec_words)/len(union_words), 2)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    list_of_jaccard = []
    for x in range(len(norm_tweets)):
        words_all = norm_tweets[x] + norm_query
        union_words = []
        for e in words_all:
            if e not in union_words:
                union_words.append(e)
        norm_tweets[x].sort()
        norm_query.sort()
        intersec_words = []
        c = 0
        for i in range(len(norm_tweets[x])):
            for i in range(len(norm_query)):
                if norm_tweets[x][c] == norm_query[i] and norm_tweets[x][c] not in intersec_words:
                    intersec_words.append(norm_query[i])
            c+=1
        jaccard = round(len(intersec_words)/len(union_words), 2)
        if jaccard != 0:
            list_of_jaccard.append([jaccard, -1*x])
    list_of_jaccard.sort()
    top_n = list_of_jaccard[::-1][:n:]
    for i in range (len(top_n)):
        top_n[i][1] *= -1
    for i in range (len(top_n)):
        top_n[i][0], top_n[i][1] = top_n[i][1], top_n[i][0]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#" + str(tweet_id) + " (" + str(round(jc_coef,2)) + ")")
    words_list = tweet_content.split(' ')
    sentence = words_list[0]
    for i in range(len(words_list)-1):
        if len(sentence) + len(words_list[i+1]) + 1 <= print_width-2:
            sentence += " " + words_list[i+1]
        else:
            print("  " + sentence)
            sentence = words_list[i+1]
    print("  " + sentence)

#--------------------------------------------
# 6330247121 (18.22) 100 (2021-03-01 22:12)

def get_unique( words ):
    unique_words = []
    for x in words:
        if x not in unique_words:
            unique_words.append(x)
    return unique_words
def jaccard(words_1, words_2):
    unique = []
    count_unique_words = 0
    count_duplicate = 0
    for x in words_1:
        if x in words_2:
          count_duplicate += 1
    for y in words_1:
        if y not in unique:
            unique.append(y)
    for z in words_2:
        if z not in unique:
            unique.append(z)
    count_unique_words = len(unique)
    try:
        jaccard_coef = count_duplicate/count_unique_words
    except:
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    counter = 0
    for index in range(0,len(norm_tweets)):
        data = []
        result = jaccard(norm_tweets[index],norm_query)
        data.append(index)
        data.append(result)
        top_n.append(data)
    top_n = sorted(top_n,key=lambda l:l[1], reverse=True)
    if top_n[0][1] == 0.0:
        top_n=[]

    return top_n[0:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("# {} ({})".format(tweet_id, round(jc_coef,2)))
    count=0
    line = []
    for word in tweet_content.split(" "):
        if count + len(word) + 1 <= print_width:
            line.append(word)
            count += len(word) + 1
        else:
            res = " ".join(line)
            print("  " + res)
            count=len(word)+1
            line = []
            line.append(word)
    print("  ", end = "")
    print(" ".join(line))







#--------------------------------------------
# 6330248821 (15.97) 101 (2021-03-01 23:42)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    w1 = get_unique(words_1)
    w2 = get_unique(words_2)
    w = w1 + w2
    a=[]
    for i in w1:
        if i in w2:
            a.append(i)
    jaccard_coef = len(a) /( len(w)-len(a))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    new = []
    jac = []
    top_n = []
    for i in range(len(norm_tweets)):
        new.append([i])
        jac.append(jaccard(norm_tweets[i],norm_query))
    for i in range(n):
        top_n.append([new[jac.index(max(jac))][0],max(jac)])
        jac.insert(jac.index(max(jac)), 0)
        jac.remove(max(jac))

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    words = tweet_content.split(' ')
    r = '  '
    for i in range(len(words)):
        if i == len(words)-1 and len(r) + len(words[i]) +1 < print_width:
            r += ' '+words[i]
            print(r)
        elif i == len(words)-1 and len(r) + len(words[i]) +1 > print_width:
            print(r)
            print('  '+words[i])
        else:
            if len(r) + len(words[i]) +1 > print_width:
                print(r)
                r = '  '+words[i]
            elif len(r) + len(words[i]) +1 < print_width:
                r += words[i]+' '
#--------------------------------------------
# 6330249421 (20.00) 102 (2021-03-01 19:11)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)) :
        if words[i] not in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    sed=0
    for i in range(len(words_2)):
        if words_2[i] in words_1 :
            sed+=1
    suan=len(get_unique( words_1 + words_2))
    jaccard_coef=(sed/suan)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)) :
        k=[]
        k.append(-(jaccard(norm_tweets[i],norm_query)))
        k.append(i)
        top_n.append(k)
        top_n.sort()
    for xxx in top_n:
        xxx[0],xxx[-1]=abs(xxx[1]),abs(xxx[0])
    answer=[]
    for i in range(len(top_n)):
        if (top_n[i])[1] >0:
            answer.append(top_n[i])
    top_n=answer[:n:]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content=tweet_content.split(" ")
    print(" ")
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    answer=[]
    for i in range(len(tweet_content)):
        k=3+len(" ".join(answer))+len(tweet_content[i])
        if k<=print_width :
            answer.append(tweet_content[i])
        else :
            print(" "," ".join(answer).strip())
            answer=[tweet_content[i]]

    print(" "," ".join(answer).strip())



#--------------------------------------------
# 6330250021 (18.33) 103 (2021-03-01 11:57)

def get_unique( words ):
    unique_words=[]
    for a in words:
        if a not in unique_words:
            unique_words.append(a)
    return unique_words
def jaccard(words_1, words_2):
    s=0
    for b in words_1:
        if b in words_2:
            s+=1
    u=len(words_1)+len(words_2)-s
    jaccard_coef=s/u
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    all=[]
    for i in range(len(norm_tweets)):
        j=jaccard(norm_tweets[i],norm_query)
        all.append([-j,i])
    all.sort()
    top_n=[]
    for a,b in all[:n]:
        if a!=0:
            top_n.append([b,-a])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    i=0
    while i+print_width-2<len(tweet_content):
        if ' ' in tweet_content[i:i+print_width-1]:
            x=tweet_content[i+print_width-2::-1].index(' ')
            y=i+print_width-2-x
            print(' ',tweet_content[i:y])
        elif ' ' in tweet_content[i:]:
            y=tweet_content[i:].index(' ')
            print(' ',tweet_content[i:y])
        i=y+1
    print(' ',tweet_content[i:])

#--------------------------------------------
# 6330251621 (20.00) 104 (2021-02-28 19:42)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    b = words_1 + words_2
    words_total= []
    for i in b:
            if i not in words_total:
                words_total.append(i)
    n=0
    for i in words_1:
        if i in words_2:
            n +=1
    if words_total ==0:
        jaccard_coef=0
    else:
        jaccard_coef = n/int(len(words_total))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top=[]
    for tweet_id in range(len(norm_tweets)):
        h = norm_tweets[tweet_id] +  norm_query
        norm_total= []
        for i in h:
            if i not in norm_total:
                norm_total.append(i)

        l=0
        for e in norm_tweets[tweet_id]:
            if e in norm_query:
                l +=1
        if len(norm_total) == 0:pass
        else:
            yr = l/int(len(norm_total))
            if yr >0:
                top.append([tweet_id,yr])

    for p in range(len(top)):
        top[p][1] = float(top[p][1])*(-1)
    g=[]
    for [a1,a2] in top:
        g.append([a2,a1])
    g.sort()
    top_n =[]
    for p in range(len(g)):
        g[p][0] = g[p][0]*(-1)
    for [a1,a2] in g:
        top_n.append([a2,a1])

    top_n= top_n[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#'+str(int(tweet_id))+' '+'('+str(round(jc_coef,2))+')')
    t= tweet_content.split(' ')
    k=''
    for i in range(len(t)):
        if len(str(k))==0 and len(t[i]) >print_width-2:
            print('  '+t[i])
        elif len(str(t[i])) <= print_width-2 and len(str(k))+len(str(t[i])) <= print_width-2:
            k += t[i]+' '
        else :
            print('  '+k)
            k =''
            k +=t[i]+' '
    print('  '+k)

#--------------------------------------------
# 6330252221 (19.48) 105 (2021-03-01 23:22)

def get_unique( words ):
    words.sort()
    unique_words=[]
    for i in range(len(words)):
        if i==0:
            unique_words+=[words[i]]
        else:
            if words[i]!=words[i-1]:
                unique_words+=[words[i]]

    return unique_words

#------------------
def jaccard(words_1, words_2):
    b=0
    c=0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            b+=1
        else:
            c+=1
    for i in range(len(words_2)):
        if words_2[i] in words_1:
            b+=0
        else:
            c+=1
    d=b/(b+c)
    if d==0:
        jaccard_coef=0
    else:
        jaccard_coef=d


    return jaccard_coef

#-------------------
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    ln = len(norm_tweets)
    for i in range(ln):
        a= jaccard(norm_tweets[i],norm_query)
        if a>0:
            top_n.append([i,a])
        else:
            pass

    top=[]
    for b in top_n:
        b=[-b[1],b[0]]
        top.append(b)
    top_n =[]
    top.sort()
    for z in top:
        z=[z[1],-z[0]]
        top_n.append(z)
    top_n= top_n[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content=tweet_content.split(" ")
    l=len(tweet_content)
    a=tweet_id
    print(" ")
    print("#"+str(a)+" ("+str(round(jc_coef,2))+")")
    x=[]
    n=0
    for i in range(l):
        z=int(len(x)) + 1 + n
        if  z <= print_width:
            x+=[tweet_content[i]]
            n+=len(tweet_content[i])
        else:
            print(" "+" ".join(x[:-1]))
            x=[x[-1]]
            n=len(tweet_content[i-1])
            x+=[tweet_content[i]]
            n+=len(tweet_content[i])
    if len(x) + 1 + n <= print_width:
        print(" "+" ".join(x))
    else:
        print(" "+" ".join(x[:-1]))
        print(" "+x[-1])


#--------------------------------------------
# 6330253921 (18.90) 106 (2021-03-01 01:32)

def get_unique( words ):

    unique_words = []
    for i in range(len(words)):
        if words[i] in words[:i]:
            pass
        else:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):

    words = words_1 + words_2
    unique_words = []
    for i in range(len(words)):
        if words[i] in words[:i]:
            pass
        else:
            unique_words.append(words[i])
    c = 0
    for i in words_1:
        if i in words_2:
            c += 1
    jaccard_coef = round(c/len(unique_words),6)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    r = []
    for i in norm_tweets:
        r += [1-jaccard(i,norm_query)]
    k = []
    for i in range(len(norm_tweets)):
        k += [[r[i],i]]
    k.sort()
    o = []
    for i in k:
        if i[0] != 1:
            o.append(i)
        else:
            pass
    d = []
    for i in o:
        a = [i[1],1-i[0]]
        d.append(a)

    top_n = d[0:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    c = tweet_content.split(' ')
    j = ''
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    for i in range(len(c)):
        j += str(' '+c[i])
        if i+1 == len(c):
            print('  '+j.strip())
            break
        elif len(j)+len(c[i+1]) < print_width:
            pass
        else:
            print('  '+j.strip())
            j = ''


#--------------------------------------------
# 6330254521 (17.42) 107 (2021-03-01 06:08)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    s=0
    for e in words_1:
        if e in words_2:
            s+=1
    t=len(words_1)+len(words_2)-s
    if t!=0:
        jaccard_coef = s/t
    else:
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        tweet_id= i
        jaccad = jaccard(norm_tweets[i], norm_query)
        top_n.append([jaccad,tweet_id])
    top_n.sort()
    top_n=top_n[::-1]
    top_n_incomplete=[]
    top_n_complete=[]
    z=len(top_n)
    top_n.append([99999,99999])
    for i in range(z):
        if top_n[i][0]==top_n[i+1][0]:
            top_n_incomplete.append(top_n[i])
        else:
            top_n_incomplete.append(top_n[i])
            for i in range(len(top_n_incomplete)):
                top_n_incomplete[i][0],top_n_incomplete[i][1]=top_n_incomplete[i][1],top_n_incomplete[i][0]
            top_n_incomplete.sort()
            for e in top_n_incomplete:
                top_n_complete.append(e)
            top_n_incomplete=[]
    if top_n_complete[0][1]==0:
        top_n_complete=[]
    top_n_complete=top_n_complete[:n]
    return top_n_complete
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    word=tweet_content.split(" ")
    width=0
    id = str(tweet_id)
    jccd = "("+str(round(jc_coef,2))+")"
    print("\n")
    print("#"+id+" "+jccd)
    list_of_word=[]
    for i in range(len(word)):
        width+=len(word[i])
        if i!=len(word)-1:
            if width < print_width-2 :
                    list_of_word.append(word[i])
                    list_of_word.append(" ")
                    width+=1
            else:
                z=""
                for q in list_of_word:
                     z+=q
                print("  "+z)
                width=len(word[i])+1
                list_of_word=[]
                list_of_word.append(word[i])
                list_of_word.append(" ")
        else:
            if width<print_width-2:
                list_of_word.append(word[i])
                z=""
                for q in list_of_word:
                     z+=q
                print("  "+z)
            else :
                z=""
                for q in list_of_word:
                     z+=q
                print("  "+z)
                print("  "+word[i])


#--------------------------------------------
# 6330255121 (20.00) 108 (2021-03-01 23:51)

def get_unique( words ):
     unique_words = []
     for s in words:
         if s not in unique_words:
             unique_words.append(s)
     return unique_words
#--------------------------------------------------------
def jaccard(words_1, words_2):
    dup = 0

    for w in words_1:
        if w in words_2 :
            dup += 1
    result = len(words_1) + len(words_2) - dup
    jaccard_coef = dup / result

    return jaccard_coef


#--------------------------------------------------------
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        noj = jaccard(norm_tweets[i], norm_query)
        if noj != 0:
            top_n.append([-noj,i])

    top_n.sort()
    top_n = top_n[:n]
    for i in range(len(top_n)):
        top_n[i] = [top_n[i][1],-top_n[i][0]]

    return top_n


#--------------------------------------------------------
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(                                         )
    print('#'+ str(tweet_id),'('+str(round( jc_coef,2))+')')
    twc = tweet_content.split(' ')
    fp = ''
    lw = 0
    for s in twc:
        if (lw + 1 +len(s))  <= (print_width - 1):
            fp += ' '+ s
            lw = len(fp)

        else:
            print(' '+fp)
            fp = 0
            fp = ' ' + s
            lw = len(fp)

    if len(fp) != 0:
        print(' ' + fp)













#--------------------------------------------
# 6330256821 (5.33) 109 (2021-03-01 23:15)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    inter=[]
    union=[]
    for i in range(len(words_1)):
        if words_1[i]==words_2:
            inter.append(word[i])
    for i in words_1:
        if i not in words_2 :
            union.append(i)
    sum1= union+words_2
    jaccard_coef=len(inter)/(len(sum1))

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+tweet_id,"("+round(jc_coef,2)+")")
    out=tweet_content.split('')

#--------------------------------------------
# 6330257421 (18.33) 110 (2021-02-28 23:49)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    s=0
    j=0
    for i in range(len(words_1)):
        if words_1[i] in words_2 :
            s=s+1
    j=len(words_1)+len(words_2)-s
    jaccard_coef=s/j
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for tweet_id in range(len(norm_tweets)):
        j=[]
        if(jaccard(norm_tweets[tweet_id],norm_query)>0):
            j.append(jaccard(norm_tweets[tweet_id],norm_query))
            j.append(tweet_id*-1)
            top_n.append(j)
    top_n.sort()
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1]=top_n[i][1],top_n[i][0]
        top_n[i][0]=top_n[i][0]*-1
    top_n=top_n[::-1]
    if(len(top_n)>n):
       top_n=top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    if(tweet_content[:10]=='RT @POTUS:'):
        tweet_content=tweet_content[11:]
    j=0
    i=print_width-2
    while(i<len(tweet_content)):
        k=j
        while(tweet_content[i]!=' '):
            i=i-1
            if(i==-1):
                while(tweet_content[k]!=' '):
                    k=k+1
                break;
        if(k!=j):
            print('  '+tweet_content[j:k])
            j=k+1
            while(tweet_content[j]==' '):
                j=j+1
            i=k+print_width
        else:
            print('  '+tweet_content[j:i])
            j=i
            while(tweet_content[j]==' '):
                j=j-1
            i,j=i+print_width-i+j,i+1
    else:
        print('  '+tweet_content[j:])
# 6330258021 (20.00) 111 (2021-02-25 23:56)

def get_unique( words ):
    unique_words = []
    for i in words :
        if i not in unique_words : unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    union,intersect = get_unique(words_1+words_2),[]
    for i in get_unique(words_1) :
        if i in get_unique(words_2) : intersect.append(i)
    jaccard_coef = len(intersect)/len(union)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n,l,li,jl,c = [],[],[],[],0
    for i in range(len(norm_tweets)) :
        jac = jaccard(norm_tweets[i],norm_query)
        if jac > 0 : top_n.append([jac,i])
    top_n = sorted(top_n)[::-1]
    for i in range(len(top_n)) :
        if top_n[i][0] == c : li.append(top_n[i][1])
        else :
            if len(li) != 0 : l.append(sorted(li))
            li = [top_n[i][1]]
            c = top_n[i][0]
            jl.append(c)
    l.append(sorted(li))
    top_n = []
    for i in range(len(jl)) :
        for j in l[i] : top_n.append([j,jl[i]])
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    s = []
    for i in tweet_content.split(' ') :
        n = len(' '.join(s))
        if n+len(i)+3 <= print_width : s.append(i)
        if n+len(i)+3 > print_width :
            print(' ',' '.join(s).strip())
            s = [i]
    print(' ',' '.join(s).strip())
#--------------------------------------------
# 6330259721 (20.00) 112 (2021-02-28 23:14)

def get_unique( words ):

    if len(words) == 0:
        return []
    unique_words = []
    words.sort()
    words.append(words[-1]+"5")
    e = words[0]



    for i in range(1,len(words)):
        if words[i] != e:
           unique_words.append(e)
           e = words[i]

    return unique_words
def jaccard(words_1, words_2):

    n = 0
    for e1 in words_1:
       if e1 in words_2 :
           n += 1
    d = len(words_1)+len(words_2) - n
    jaccard_coef = n/d

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []
    for tweet_id in range(len(norm_tweets)):
        top_n.append([jaccard(norm_tweets[tweet_id],norm_query)*(-1),tweet_id])
        top_n.sort()
    for i in range(len(top_n)):
        top_n[i][0] *= -1
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1] = top_n[i][1],top_n[i][0]
    top_n = top_n[:n]
    q = []
    for r in top_n :
        if r[1] > 0 :
            q.append(r)

    return q
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print(" ")
    print("#" +str(tweet_id)+" ("+str(round(jc_coef,2))+")")
    x = tweet_content.split(" ")
    v = ""
    c = len(v)
    for e in x:
        d = c + len(e) + 1
        if d > print_width - 1:
            print(" " + v)
            v = ""
            v += " " + e
            c = len(v)
        else:
            v += " " + e
            c = len(v)
    if c != 0:
        print(" " + v)




#--------------------------------------------
# 6330260221 (16.44) 113 (2021-03-01 00:23)

def get_unique( words ):
    unique_words = []
    for i in range(0,len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    n = 0
    for i in words_1:
        if i in words_2:
            n+=1
    jaccard_coef = round(n/(len(words_1)+len(words_2)-n),2)
    print(jaccard_coef)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    t = 0
    z = 0
    jac = 0.0
    suml = []
    suml2 = []
    suml3=[]
    for i in norm_tweets:
        m = 0
        for j in i:
            if j in norm_query:
                m+=1

        jaccard = round((m/(len(i)+len(norm_query)-m)),2)
        t += 1
        suml.append([jaccard]+[t-1])
        if jaccard != 0.0:
            jac+=1
    suml.sort(reverse=True)

    while z<=n:
            for i in range(len(suml)-1):
                if suml[i][0]==suml[i+1][0]:
                    if suml[i][1]>suml[i+1][1]:
                        suml[i],suml[i+1]=suml[i+1],suml[i]
            z+=1

    top_n = suml[0:n]
    for z in range(len(top_n)):
        top_n[z][0],top_n[z][1] = top_n[z][1],top_n[z][0]
    if jac == 0.0:
        top_n.clear()
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' (' + str(jc_coef) + ')')
    b = tweet_content.split(' ')
    t1 = []
    n1 = 0
    n2 = 0
    for i in b:
        n2 += 1
        n1 += len(i)
        if i == '':
            n1+=1
        if n1<= (print_width-2) :
            t1.append(i)
            if n2 != len(b):
                n1+=1
            if n1<= (print_width-2) and n2 == len(b):
                c=' '.join(t1)
                print('  '+c)
        elif n1>(print_width-2):
            c=' '.join(t1)
            print('  '+c)
            t1.clear()
            t1.append(i)
            n1 = len(i)+1
            if n1<=(print_width) and n2 == len(b):
                c=' '.join(t1)
                print('  '+c)




#--------------------------------------------
# 6330261921 (20.00) 114 (2021-02-26 21:50)

def get_unique(words):
    words.sort()
    w=[]

    for i in words:
        if not i in w:w.append(i)
    unique_words = w
    return  unique_words
def jaccard(words_1, words_2):
    c=0
    k=0
    for i  in  words_1:
        if i in words_2:
            c+=1
        else:k+=1
    jaccard_coef=c/(k+len(words_2))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    l=[]
    for i in range(len(norm_tweets)):
        a=jaccard(norm_tweets[i],norm_query)
        if a>0:l.append([i,a])
    l.sort(key=lambda x:x[1],reverse=True)
    top_n=l[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n'+'#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    a=tweet_content
    g=tweet_content.split()
    t='  '+g[0]
    for i in range(1,len(g)):
        f=a.split(g[i-1],1)[1]
        b=f.split(g[i])
        k=len(b[0])
        if len(t+' '*k+g[i])>print_width:
            print(t)
            t='  '+g[i]
        else:t+=' '*k+g[i]
        a=a[len(g[i-1])+k:]
    print(t)
#--------------------------------------------
# 6330262521 (19.95) 115 (2021-03-01 21:39)

def get_unique( words ):
    unique_words = []
    words.sort()
    if words == []:
        return words
    else:
        for i in range(len(words)-1):
            if words[i] != words[i+1]:
                unique_words.append(words[i])
        unique_words.append(words[-1])
    return unique_words
def jaccard(words_1, words_2):
    d = words_1 + words_2
    d = get_unique(d)
    n = []
    for c in words_1:
        if c in words_2:
            n.append(c)
    c = get_unique(n)
    jaccard_coef = (len(c)/len(d))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    tweet_id = []
    for i in range(len(norm_tweets)):
        tweet_id.append(i)
    jac = []
    for i in range(len(norm_tweets)):
        j = jaccard(norm_tweets[i], norm_query)
        jac.append(j)
    top = []
    for i in range(len(norm_tweets)):
        if jac[i] != 0:
            top.append([jac[i],tweet_id[i]])
    top.sort()
    for i in range(len(top)):
        top[i][0] *= -1
    top.sort()
    for i in range(len(top)):
        top[i][0] *= -1
    top_n = top[:n:]
    for i in range(len(top_n)):
        top_n[i][1],top_n[i][0] = top_n[i][0],top_n[i][1]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    tweet_content = tweet_content.split()
    c = 0
    s = ''
    for i in range(len(tweet_content)):
        if c + len(tweet_content[i]) <= print_width -2:
            s += tweet_content[i] + ' '
            c += len(tweet_content[i])+1
        else:
            s += '\n' + '  '
            c = 0
            s += tweet_content[i] + ' '
            c += len(tweet_content[i])+1
    print('  '+s.strip())


#--------------------------------------------
# 6330263121 (13.33) 116 (2021-03-01 22:08)

def get_unique( words ):
    n = []
    for i in range(len(words)):
        d = words[i]
        if d not in words[i+1:]:
            n.append(d)
    unique_words = n
    return unique_words
def jaccard(words_1, words_2):
    v = len(words_1)
    a = 0
    for c in range(len(words_2)):
        w2 = words_2[c]
        if w2 not in words_1:
            v +=1
        else:
            a +=1
        if v != 0:
            jaccard_coef = a/v
        else :
            jaccard_coef = a/100000000000
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top=[]
    for i in range(len(norm_tweets)):
        tweet_id = i
        a = norm_tweets[i]
        a1 = get_unique(a)
        b1 = norm_query
        jac =round(jaccard(a1,b1),20)
        e =[-jac,tweet_id]
        top.append(e)
        a = sorted(top)
    top_n =[]
    for j in range(n):
        a1 = -a[j][0]
        b1 = a[j][1]
        if a1 !=0:
            r =[b1,a1]
            top_n.append(r)
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a = round(jc_coef,2)
    e = tweet_content.split(' ')
    print("")
    print(f'#{tweet_id} ({a})')
    d = []
    t = 0
    g = int(len(e))
    for i in range(g):
        if g+t+1>print_width:
            print("  "+" ".join(d[:-1]))
            d = [d[-1]]
            t = len(e[i-1])
            d +=[e[i]]
            t +=len(e[i])
        else:
            d +=[e[i]]
            t +=len(e[i])
    if g+t+1>print_width:
        print("  "+" ".join(d[:-1]))
        print("  "+d[-1])
    else:
        print("  "+" ".join(d))

#--------------------------------------------
# 6330264821 (18.50) 117 (2021-03-01 19:41)

def get_unique( words ):
    words.sort()
    words.append('ABCDEFGHIJJK')
    words1 = []
    unique_words = []
    unique_words.append(words[0])
    for i in range(1, len(words)-1) :
        if unique_words[-1] != words[i] :
            unique_words.append(words[i])
    unique_words.sort()
    for e in unique_words :
        words1.append([len(e),e])
    words1.sort()
    for i in range(len(unique_words)) :
        unique_words[i] = words1[i][1]

    return unique_words
def jaccard(words_1, words_2):
    c = 0
    for e in words_1 :
        for i in range(len(words_2)) :
            if e == words_2[i] :
                c += 1
    jaccard_coef = c/(len(words_1)+len(words_2)-c)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    x = []
    for tweet_id in range(len(norm_tweets)) :
        top_n.append([jaccard(norm_tweets[tweet_id], norm_query) ,-tweet_id ])
    top_n.sort()
    top_n = top_n[::-1]
    top_n = top_n[:n]
    for i in range(len(top_n)) :
        top_n[i][0],top_n[i][-1] = -top_n[i][-1],top_n[i][0]
        if top_n[i][1] == 0.0 :
            top_n = []
            break

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    c = '  '
    n = 0
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef, 2))+')')
    x = tweet_content.split(' ')
    for e in x :
        if len(c)+len(e) > print_width :
            print(c)
            c = '  '
        if e == x[-1] :
            print(c+e)
        else :
            c += e+' '

#--------------------------------------------
# 6330265421 (20.00) 118 (2021-03-01 12:16)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    x,y = words_1,words_2
    its = 0

    for i in range(len(x)):
        if x[i] in y:
            its += 1

    un = len(x) + len(y) - its
    jaccard_coef = its/un
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n1 = []
    t_j = []*2
    for i in range(len(norm_tweets)):
        tweet_id = i
        jc = jaccard(norm_tweets[i],norm_query)
        if jc > 0:
            t_j = [tweet_id, jc]
            top_n1.append(t_j)
    top_n1.sort()
    top_n2 = []
    for x,y in top_n1:
        top_n2.append([-y,x])
    top_n2.sort()
    top_n = []
    for x,y in top_n2:
        top_n.append([y,-x])
    top_n = top_n[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n#'+str(tweet_id)+' ('+str((round((jc_coef),2)))+')')
    t= tweet_content.split(' ')
    line = '  '
    for i in range(len(t)):
        if len(line) + len(t[i]) <= print_width:
            line += t[i] +' '
        else:
            print(line)
            line = '  ' + t[i] + ' '
    print(line)

#--------------------------------------------
# 6330266021 (0.00) 119 (2021-03-01 23:57)

def get_unique( words ):

    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
        else:
            pass

    return unique_words
def jaccard(words_1, words_2):

    intersec = words_1 + words_2
    words_total = []
    for i in intersec:
            if i not in words_total:
                words_total.append(i)
    num = 0
    for i in words_1:
        if i in words_2:
            num += 1
    if not words_total == 0:
        jaccard_coef = num/int(len(words_total))
    else:
        jaccard_coef = 0

    return jaccard_coef
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#" + str(tweet_id) +"("+ str(jc_coef) + ")")
    tweet_content.split(" ")
    a = " "
    for i in tweet_content:
        if len(a) < print_width:
            a += i
        else:
            a = " "
        print(a)

#--------------------------------------------
# 6330267721 (12.68) 120 (2021-02-27 17:56)

def get_unique( words ):

    unique_words = []
    [unique_words.append(i) for i in words if i not in unique_words]
    return unique_words
def jaccard(words_1, words_2):

    count = 0
    for i in words_1 :
        if i in words_2 :
            count += 1
    s_plus_t = words_1 + words_2
    temp_list = []
    for i in s_plus_t :
        if i not in temp_list :
            temp_list.append(i)
    s_plus_t = temp_list
    jaccard_coef = count/len(s_plus_t)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    pre_top_n = []
    switch_top_n = []
    for number in range(len(norm_tweets)):
        tweet_id_got = number
        jaccard_got = jaccard(norm_tweets[number],norm_query)
        pre_top_n.append([jaccard_got,tweet_id_got])
    pre_top_n.sort(reverse=True)
    for [jc,tweetid] in pre_top_n:
        switch_top_n.append([tweetid,jc])
    top_n = switch_top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    word_tweetcontent = tweet_content.split()
    jaccard_round = round(jc_coef,2)
    print()
    print('#'+str(tweet_id),'('+str(jaccard_round)+')')
    i = 0
    sentence = '  '
    while i < len(word_tweetcontent) :
        if len(sentence) + len(word_tweetcontent[i]) <= print_width+2 :
            sentence += word_tweetcontent[i]+' '
            i += 1
        else :
            print(sentence)
            sentence = '  '
    if sentence != '':
        print(sentence)
        sentence = ''
#--------------------------------------------
# 6330268321 (20.00) 121 (2021-02-28 16:19)

def get_unique( words ):
    unique_words=[]
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
#--------------------------------------------------
def jaccard(words_1, words_2):
    t=[]
    for e in words_1:
        t.append(e)
    for e in words_2:
        if e not in words_1:
            t.append(e)
    t=len(t)
    s=[]
    for e in words_1:
        if e in words_2:
            s.append(e)
    s=len(s)
    jaccard_coef=s/t
    return jaccard_coef
#---------------------------------------------
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    u=[]

    for tweet_id in range(len(norm_tweets)):
        p=jaccard(norm_tweets[tweet_id],norm_query)
        if p >0:
            u.append([p,-tweet_id])
        u.sort()
        u=u[:-n-1:-1]
    for e in u:
         top_n.append([-e[1],e[0]])
    return top_n
#-------------------------------------------------
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content=tweet_content.split(" ")
    a=""
    print()
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    for i in range(len(tweet_content)):
        if len(a)+1+len(tweet_content[i])<=print_width-1:
            a+=" "+tweet_content[i]

        else:
            print(" "+a)
            a=" "+tweet_content[i]

    print(" "+a)
#health care policy  5
#COVID economic crisis 3
# american president 5
#--------------------------------------------
# 6330269021 (17.95) 122 (2021-02-26 20:59)

def get_unique( words ):
    unique_words = []
    for a in words:
        if a not in unique_words:
            unique_words.append(a)
    return unique_words
def jaccard(words_1, words_2):
    duplicate_words = []
    for a in words_1:
        if a in words_2:
            duplicate_words.append(a)
    jaccard_coef = len(duplicate_words)/len(get_unique(words_1 + words_2))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    all = []
    for t in norm_tweets:
        all.append([norm_tweets.index(t), jaccard(t, norm_query)])
    all.sort(reverse=True, key=lambda x : x[1])
    top_n = all[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print(f"#{tweet_id} ({round(jc_coef, 2)})")
    print('  ', end = '')
    pool = tweet_content.split()
    char_count = 0
    for i in range(len(pool)):
        if i == 0:
            char_count += len(pool[i])
        else:
            char_count += 1 + len(pool[i])

        if char_count > print_width - 2:
            print()
            print('  ' + pool[i], end = '')
            char_count = len(pool[i])
        else:
            if i == 0:
                print(pool[i], end = '')
            else:
                print(' ' + pool[i], end = '')
    print()
#--------------------------------------------
# 6330270521 (20.00) 123 (2021-03-01 20:56)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in  unique_words:
            unique_words.append(i)
    return  unique_words
def jaccard(words_1, words_2):
    cnt = 0
    for i in words_1:
        if i in words_2:
            cnt += 1
    ans = len(words_1)+len(words_2)-cnt
    try :
        jaccard_coef = cnt/ans
    except ZeroDivisionError :
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    x = []
    for tweet_id in range(len(norm_tweets)) :
        y = jaccard(norm_tweets[tweet_id],norm_query)
        if y > 0 :
            x.append([y,-tweet_id])
        x.sort()
        x = x[:-n-1:-1]
    for i in x:
         top_n.append([-i[1],i[0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(' ')
    #print(tweet_content)
    x = ''
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    for i in range(len(tweet_content)) :
        if len(x)+1+len(tweet_content[i])<=print_width-1 :
            x += ' '+tweet_content[i]
        else :
            print(' '+x)
            x = ' '+tweet_content[i]
    print(' '+x)

#--------------------------------------------
# 6330271121 (20.00) 124 (2021-02-28 10:43)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    aa = 0
    cc = list(words_1)
    for e in words_1:
        if e in words_2:
            aa +=1
    for d in words_2:
        if d not in cc:
            cc.append(d)
    if len(cc) > 0:
        jaccard_coef = aa/len(cc)
    else:
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query)>0 :
            top.append([jaccard(norm_tweets[i],norm_query), -i])
    top.sort()
    x = top[-1:-n-1:-1]
    top_n = []
    for e in x:
        top_n.append([-e[1],e[0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    t = tweet_content.split(' ')
    x = ' '
    for e in t:
        if len(x) + len(e) + 1 <= print_width:
            x += ' ' + e
        else:
            print(x)
            x = '  '+ e
    print(x)

#--------------------------------------------
# 6330272821 (11.83) 125 (2021-02-28 23:58)

def get_unique(words):
    unique_word = []
    for i in words:
        if i not in unique_word:
            unique_word.append(i)
    return unique_word
def jaccard(words_1, words_2):
    repeat_word = []
    for n in words_1:
        if n in words_2: repeat_word.append(n)

    all_word = words_1 + words_2
    union_word = get_unique(all_word)

    jaccard_coef = len(repeat_word)/ len(union_word)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    top_n_re = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i], norm_query) > 0: top_n_re.append([jaccard(norm_tweets[i], norm_query), i])

    top_n_re.sort()
    top_n_re = top_n_re[:n]

    for n in top_n_re:
        top_n.append(n[::-1])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(" ")

    print("#"+str(tweet_id)+ " ("+ str(round(jc_coef,2)) + ")")
    for i in tweet_content:
        line = "  "
        if len(line) <= print_width:
            line = line+i
        else: print(line)




#--------------------------------------------
# 6330273421 (15.50) 126 (2021-02-27 01:27)

def get_unique( words ):
    words.sort()
    unique_words=[]
    for i in range(len(words)-1):
        if words[i]!=words[i+1]:
            unique_words.append(words[i])
        if i==(len(words)-2):
            unique_words.append(words[i+1])
    return unique_words
def jaccard(words_1, words_2):
    a=0
    if len(words_1)>=len(words_2):
        for i in range(len(words_2)):
            if words_2[i] in words_1:
                a+=1
    else :
        for i in range(len(words_1)):
            if words_1[i] in words_2:
                a+=1
    b=len(words_1)+len(words_2)-a
    jaccard_coef=a/b
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    in_top=[]
    for i in range(len(norm_tweets)):
        in_top.append(-jaccard(norm_tweets[i],norm_query))
        in_top.append(i)
        in_top.append(jaccard(norm_tweets[i],norm_query))
        top_n.append(in_top)
        in_top=[]
    top_n.sort()
    for i in range(len(norm_tweets)):
        x=top_n[i].pop(0)
    top_n=top_n[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tw ='#'+str(tweet_id)
    jc = '('+str(round(jc_coef,2))+')'
    ltc = tweet_content.split(' ')
    print('')
    print(tw,jc)
    sm=2
    op=''
    if (len(tweet_content)+2)<=print_width:
        print('  '+tweet_content)
    else:
        for i in range(len(ltc)):
            sm+=len(ltc[i])
            if sm<print_width:
                op+=ltc[i]+' '
                sm+=1
            else:
                print('  '+op[:-1:])
                op=ltc[i]+' '
                sm=3+len(ltc[i])
        print('  '+op[:-1:])
        print(print_width)

#--------------------------------------------
# 6330274021 (20.00) 127 (2021-02-27 16:31)

def get_unique( words ):
    unique_words = []
    for e in words:
        if not e in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    same = []
    al = []
    for e in words_1:
        if not e in al:
            al.append(e)
    for e in words_2:
        if not e in al:
            al.append(e)
    for e in al:
        if e in words_1 and e in words_2:
            same.append(e)
    jaccard_coef = len(same)/len(al)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        data = []
        jc = jaccard(norm_tweets[i], norm_query)
        if not jc <= 0:
            data.append(jc)
            data.append(i)
            top_n.append(data)
    for i in range(len(top_n)):
        top_n[i][0] *= -1
        top_n.sort()
    top_n = top_n[0:n]
    for i in range(len(top_n)):
        top_n[i][0] *= -1
        top_n[i][0],top_n[i][1] = top_n[i][1],top_n[i][0]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    t = tweet_content.split(' ')
    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    t1 = 'ก'.join(t)
    t = t1.split('ก')
    n_word = 0
    line = []
    line_= ""
    x = 0
    for i in range(len(t)):
        x = n_word
        x += len(t[i])
        if  x > print_width-2:
            line_ = ' '.join(line)
            print("  "+str(line_))
            line = []
            line.append(t[i])
            line_ = ""
            n_word = len(t[i]) + 1
        else:
            n_word += len(t[i]) + 1
            line.append(t[i])
    if line_ == "":
        line_ = ' '.join(line)
        print("  "+str(line_))


#--------------------------------------------
# 6330275721 (20.00) 128 (2021-02-28 17:50)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    w12 = words_1 + words_2
    lower = []
    for i in range(len(w12)):
        if w12[i] not in lower:
            lower.append(w12[i])
    upper = []
    for j in range(len(words_1)):
        if words_1[j] in words_2:
            upper.append(words_1[j])
    jaccard_coef = len(upper)/len(lower)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for tweet_id in range(len(norm_tweets)):
        JC = jaccard(norm_tweets[tweet_id], norm_query)
        if JC > 0:
            top_n.append([JC, tweet_id])
    for i in range(len(top_n)):
        top_n[i][0] *= -1
    top_n.sort()
    for i in range(len(top_n)):
        top_n[i][0], top_n[i][1] = top_n[i][1], top_n[i][0]
        top_n[i][1] *= -1
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#' + str(tweet_id), '(' + str(round(jc_coef, 2)) + ')')
    t = tweet_content.split(' ')
    t_ans = ''
    print_width -= 2
    for i in range(len(t)):
        if t_ans == '':
            if len(t_ans+t[i]) >= print_width:
                t_ans += t[i]
                print('  ' + t_ans)
                t_ans = ''
            else:
                t_ans += t[i]
        elif t_ans != '':
            t_ans += ' '
            if len(t_ans+t[i]) == print_width:
                t_ans += t[i]
                print('  ' + t_ans)
                t_ans = ''
            elif len(t_ans+t[i]) > print_width:
                print('  ' + t_ans)
                t_ans = t[i]
            elif len(t_ans+t[i]) < print_width:
                t_ans += t[i]
    if t_ans != '':
        print('  ' + t_ans)

#--------------------------------------------
# 6330276321 (18.01) 129 (2021-02-27 01:02)

def get_unique( words ):
    unique_words = []
    if len(words) != 0 :
        unique_words.append(words[0])
    for e in words :
        if not e in unique_words :
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    a = 0
    for e in words_1 :
        if e in words_2 :
            a += 1
    sum_words = words_1 + words_2
    b = a
    for j in sum_words :
        if sum_words.count(j) == 1 :
            b += 1
    jaccard_coef = a / b
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    total = []         # [ [tweet_id, jaccard], [.., ..], ...]
    for i in range(len(norm_tweets)) :
        total.append([jaccard(norm_tweets[i], norm_query), i * (-1)])
    total.sort()
    for k in range(len(norm_tweets)) :
        total[k][1] *= (-1)
    total = total[::-1]
    top_n = total[0:n]
    for j in range(len(top_n)) :
        top_n[j].append(top_n[j].pop(0))
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#" + str(tweet_id), "(" + str(round(jc_coef, 2)) + ")")
    content_words = tweet_content.split(' ')
    c = 0
    print_text = []
    for i in range(len(content_words)) :
        if (c + len(content_words[i])) <= print_width - 2 : # ต่อแล้วยังไม่เกิน width - 2
            c += len(content_words[i])
            print_text.append(content_words[i])
            if i == len(content_words) - 1 :
                print_str_text = " ".join(print_text)
                print("  " + print_str_text)
        else :
            print_str_text = " ".join(print_text)
            print("  " + print_str_text)
            c = 0 + len(content_words[i])
            print_text = [content_words[i]]
            print_str_text = [content_words[i]]
            if i == len(content_words) - 1 :
                print_str_text = " ".join(print_text)
                print("  " + print_str_text)
        c += 1


#--------------------------------------------
# 6330277021 (20.00) 130 (2021-02-27 23:03)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    up = []
    for a in words_1:
        if a in words_2:
            up.append(a)
    j_c_u = len(up)

    down = []
    for b in words_1:
        if b not in down:
            down.append(b)
    for c in words_2:
        if c not in down:
            down.append(c)
    j_c_d = len(down)

    jaccard_coef = j_c_u / j_c_d
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    top_n_x = []
    for i in range(len(norm_tweets)):
        jc = jaccard(norm_tweets[i], norm_query)
        if jc > 0:
            top_n_x.append([jc, i])
    if top_n_x != []:
        top_n_x.sort()
        top_n_x = top_n_x[::-1]
        if len(top_n_x) > 1:
            x = [top_n_x[0]]
            for j in range(1, len(top_n_x)):
                if top_n_x[j][0] == top_n_x[j-1][0]:
                    x.append(top_n_x[j])
                else:
                    for k in x:
                        k[0],k[1] = k[1],k[0]
                    x.sort()
                    top_n += x
                    x = []
                    x.append(top_n_x[j])
            for k in x:
                k[0],k[1] = k[1],k[0]
            x.sort()
            top_n += x
        elif len(top_n_x) == 1:
            top_n_x[0][0],top_n_x[0][1] = top_n_x[0][1],top_n_x[0][0]
            top_n = top_n_x
    if len(top_n) > n:
        top_n = top_n[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#" + str(tweet_id) + " " + "(" + str(round(jc_coef, 2)) + ")")
    tw_c = tweet_content.split(" ")
    tw_c_s = ""
    if len(tw_c[0]) >= print_width-2:
        tw_c_s += tw_c[0]
        print(" "*2 + tw_c_s)
        tw_c_s = ""
    else:
        tw_c_s += tw_c[0]
    for f in range(1, len(tw_c)):
        if tw_c_s == "":
            if len(tw_c_s+tw_c[f]) >= print_width-2:
                tw_c_s += tw_c[f]
                print(" "*2 + tw_c_s)
                tw_c_s = ""
            else:
                tw_c_s += tw_c[f]
        elif len(tw_c_s) > 0:
            tw_c_s += " "
            if len(tw_c_s+tw_c[f]) == print_width-2:
                tw_c_s += tw_c[f]
                print(" "*2 + tw_c_s)
                tw_c_s = ""
            elif len(tw_c_s+tw_c[f]) > print_width-2:
                print(" "*2 + tw_c_s)
                tw_c_s = tw_c[f]
            elif len(tw_c_s+tw_c[f]) < print_width-2:
                tw_c_s += tw_c[f]
    if tw_c_s != "":
        print(" "*2 + tw_c_s)



#--------------------------------------------
# 6330278621 (18.01) 131 (2021-02-28 00:13)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if  words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    n = 0
    for i in range(len(words_2)):
        if words_2[i] in words_1:
            n += 1
    k = len(words_1)+len(words_2)-n
    jaccard_coef = n/k
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        tweet_id = i
        x = jaccard( norm_tweets[tweet_id], norm_query)
        top_n.append([tweet_id,x])
    for i in range(len(top_n)):
        top_n[i][0] *= -1
        top_n[i][0],top_n[i][1] = top_n[i][1],top_n[i][0]
    top_n.sort(reverse = True)
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1] = top_n[i][1],top_n[i][0]
        top_n[i][0] *= -1
    top_n = top_n[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    t = tweet_content.split(' ')
    print_width -=2
    message = []
    sum_len = 0
    for e in t:
        sum_len += len(e)
        if sum_len <= print_width:
            if e == t[-1] :
                message.append(e)
                print('  '+''.join(message))
            else :
                message.append(e)
                message.append(' ')
                sum_len += 1
        else :
            print('  '+''.join(message))
            message = [e]
            message.append(' ')
            sum_len = len(e) + 1
            if e == t[-1] :
                print('  '+''.join(message))
#--------------------------------------------
# 6330279221 (20.00) 132 (2021-03-01 14:36)

def get_unique( words ):
    words.sort()
    unique_words = []
    if words == []:
        return words
    else:
        for i in range(len(words)-1):
            if words[i] != words[i+1]:
                unique_words.append(words[i])
        unique_words.append(words[-1])
        return unique_words
def jaccard(words_1, words_2):
    both = []
    for i in words_1:
        if i in words_2:
            k = words_2.index(i)
            both.append(words_2[k])
    t = words_1 + words_2
    tot = get_unique(t)
    jaccard_coef = len(both)/len(tot)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    t = []
    for i in range(len(norm_tweets)):
        jac = jaccard(norm_tweets[i],norm_query)
        j = [-jac, i]
        if -jac < 0:
            t.append(j)
            t.sort()
    for i in t:
        top_n.append([i[1],-i[0]])
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    twid = '#'+str(tweet_id)
    jc = '('+str(round(jc_coef,2))+')'
    print(twid, jc)
    tc = tweet_content.split(' ')
    s = []
    c = 0
    for e in range(len(tc)):
        if len(' '.join(s)) + len(tc[e]) + 3 <= print_width:
            s.append(tc[e])
            c += len(s)
        else:
            ss = ' '.join(s)
            print(' ',ss.strip())
            s = [tc[e]]
    print(' ', ' '.join(s).strip())


#--------------------------------------------
# 6330280821 (16.80) 133 (2021-03-01 21:48)

def get_unique( words ):
    words.sort()
    unique_words = []
    for i in range(len(words)-1):
        if words[i] != words[i+1] :
            unique_words.append(words[i])
    if len(unique_words)>0 :
        unique_words.append(words[-1])
    return unique_words
def jaccard(words_1, words_2 ):
    a = []
    for e in words_1 :
        if e in words_2 :
            a.append(e)
    for e in words_2 :
        if e in words_1 :
            a.append(e)
    c = get_unique(a)
    b = get_unique(words_1 + words_2)
    jaccard_coef = len(c)/len(b)
    return jaccard_coef
def top_n_similarity(norm_tweet, norm_query, n):
    x = []
    for i in range(len(norm_tweet)) :
        if jaccard(norm_tweet[i], norm_query) != 0:
            x.append([-jaccard(norm_tweet[i], norm_query),i])
    print(x)
    x.sort()
    for e in x:
        e[0] *= -1
        e[0],e[1] = e[1],e[0]
    top_n = x[:n:]
    print(x)
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width) :
    print("")
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    x = tweet_content.split(' ')
    s = "  "
    c = 0
    while len(x) > 0:
        if len(s + x[0]) < print_width :
            s += " " + x[0]
            x.pop(0)
        elif len(s + x[0]) > print_width :
            print(s)
            s = "  "
        else :
            s += " " + x[0]
            print(s)
            x.pop(0)
            s = "  "
            c += 1
    if c == 0 :
        print(s)
#--------------------------------------------
# 6330281421 (18.01) 134 (2021-02-26 22:38)

def get_unique( words ):
    unique_words=[]
    for k in range(len(words)):
        if not(words[k] in unique_words):
            unique_words.append(words[k])
    return unique_words
def jaccard(words_1, words_2):
    set_of_words=get_unique(words_1+words_2)
    set_of_sames=[]
    for k in range(len(set_of_words)):
        if set_of_words[k] in words_1 and set_of_words[k] in words_2:
            set_of_sames.append(set_of_words[k])
    jaccard_coef=len(set_of_sames)/len(set_of_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    all_list=[]
    for k in range(len(norm_tweets)):
        all_list.append([-k,jaccard(norm_tweets[k],norm_query)])#[-tweet_id,jaccard]
    all_list=sorted(all_list,reverse=True)#เรียง จากมากน้อย
    for k in range(len(all_list)):
        all_list[k][0],all_list[k][1]=all_list[k][1],all_list[k][0]
    #print(all_list)
    all_list=sorted(all_list,reverse=True)
    #print(all_list)
    top_n=[]
    if n>len(norm_tweets):
        n=len(norm_tweets)
    for k in range(n):
        top_n.append([-all_list[k][1],all_list[k][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    print_width+=1
    tweet_content=tweet_content.split(' ')
    #print(tweet_content)
    words='  '
    while True:
        if len(tweet_content)!=0:
            word=tweet_content.pop(0)
            word+=' '
            if len(words)==2 and len(words)+len(word)>print_width:
                print(words+word)
                words='  '
            else:
                if len(words)+len(word)<=print_width:
                    words+=word
                else:
                    print(words)
                    words='  '+word
        else:
            print(words)
            break
#--------------------------------------------
# 6330282021 (20.00) 135 (2021-02-25 23:20)

def get_unique( words ):
    words.sort()
    unique_words = []
    if len(words) != 0:
        unique_words = [words[0]]
    for i in range(1,len(words)):
        if words[i] != words[i-1]:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    words_in_both = 0
    total_words = len(words_1)+len(words_2)
    if len(words_1) >= len(words_2):
        for i in range(len(words_1)):
            if words_1[i] in words_2:
                words_in_both += 1
    else:
        for i in range(len(words_2)):
            if words_2[i] in words_1:
                words_in_both += 1
    total_words -= words_in_both
    jaccard_coef = words_in_both/total_words

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    for tweet_id in range(len(norm_tweets)):
        Jaccard = jaccard(norm_tweets[tweet_id],norm_query)
        if Jaccard > 0:
            top.append([Jaccard,tweet_id])
    top.sort()
    no_diff = 0
    jc_diff = [0]
    for c in range(1,len(top)):
        no_diff += 1
        if top[c][0] != top[c-1][0]:
            jc_diff.append(no_diff)
    jc_diff = jc_diff[::-1]
    top_list = []
    for ii in range(len(jc_diff)):
        if ii == 0:
            top_list.append(top[jc_diff[ii]:])
        else:
            top_list.append(top[jc_diff[ii]:jc_diff[ii-1]])
    top_n = []
    for e in top_list:
        for ee in e:
            top_n.append(ee)
    top_n = top_n[0:n]
    for d in range(len(top_n)):
        top_n[d] =top_n[d][::-1]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(' ')
    print('')
    print('#'+ str(tweet_id) + ' (' + str(round(jc_coef,2)) + ') ')
    show = []
    words = 0
    total_len = 1
    for l in range(len(tweet_content)):
        words += 1
        total_len += len(tweet_content[l]) + 1
        if total_len <= print_width:
            show.append(tweet_content[l])
        elif total_len > print_width:
            print('  ' + ' '.join(show))
            show = [tweet_content[l]]
            total_len = len(tweet_content[l]) + 2
    print('  ' + ' '.join(show))

#--------------------------------------------
# 6330283721 (17.57) 136 (2021-02-27 09:19)

def get_unique( words ):
    unique_words=[]
    for word in words:
        if word not in unique_words:
            unique_words.append(word)
    return unique_words
def jaccard(words_1, words_2):
    a=[]
    for word in words_1:
        if word in words_2:
            a.append(word)
    b = len(words_1)+len(words_2)-len(a)
    jaccard_coef = len(a)/b
    return  jaccard_coef
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range (len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query) >0:
            top_n.append([i,jaccard(norm_tweets[i],norm_query)])
    top_n=sorted(top_n,key=lambda l:(l[1],-l[0]), reverse=True)[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    words = tweet_content.split(" ")
    print("\n #"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    a="  "
    count=2
    for i in range (len(words)):
        if (count-print_width)<=0 and len(words[i])>=(print_width-count):
            count-=print_width
            a+="\n  "
            a+=" "+words[i]
        elif len(words[i])==0 and count+len(words[i+1])<=print_width and len(words[i])>=(print_width-count):
            a+="\n "
            a+=" "+words[i]
        else:
            a+=" "+words[i]
        count+=1+len(words[i])
    print(a)

#--------------------------------------------
# 6330284321 (20.00) 137 (2021-03-01 20:23)

def get_unique( words ):
    unique_words = []
    for x in words:
        if x not in unique_words:
            unique_words.append(x)
    return unique_words
def jaccard(words_1, words_2):
    jaccard_coef = 0
    for x in words_2:
        if x in words_1:
            jaccard_coef += 1
    return jaccard_coef / (len(words_1) + len(words_2) - jaccard_coef)
def top_n_similarity(norm_tweets, norm_query, n):
    ll = []
    for idx, val in enumerate(norm_tweets):
        ll.append([-jaccard(val, norm_query), idx])
    ll.sort()
    top_n = []
    for i in range(n):
        if ll[i][0] == 0:
            break
        top_n.append([ll[i][1], -ll[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#" + str(tweet_id), "(" + str(round(jc_coef,2)) + ")")
    x = tweet_content.split(" ")
    unique_words = "  "
    for i in x:
        if len(unique_words + i) <= print_width:
            unique_words += i
            if len(unique_words + i) == print_width:
                print(unique_words)
                unique_words = "  "
            else:
                unique_words += " "
        else:
            if unique_words == "  ":
                print(unique_words + i)
            else:
                print(unique_words)
                unique_words = "  " + i
                if len(unique_words) > print_width:
                    print(unique_words)
                    unique_words = "  "
                else:
                    unique_words += " "
    if unique_words != "  ":
        print(unique_words)

#--------------------------------------------
# 6330286621 (20.00) 138 (2021-02-28 04:44)

def get_unique( words ):
    unique_words=[]
    for i in words:
      if not i in unique_words: unique_words.append(i)
    return (unique_words)
def jaccard(words_1, words_2):
    words1= get_unique(words_1)
    words2= get_unique(words_2)
    s,t=0,0
    for i in words1:
        if i in words2: s+=1
        else : t+=1
    t=len(words2)+t
    jaccard_coef=s/t
    return (jaccard_coef)
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    k=len(norm_tweets)
    for i in range(k):
      j=jaccard(norm_query,norm_tweets[i])
      if j>0:
        top.append( [j , k-i , i])  #([jaccard,reverse_index,index]) ==>reverse_index sort จากมากไปน้อย จะได้ index sort จากน้อยไปมาก
    top=sorted(top, reverse=True)

    topform = []
    for r in top:
      topform.append([ r[2], r[0]])
    top_n=topform[:n]
    return (top_n)
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print('\n#{} ({})'.format( tweet_id ,round(jc_coef,2) ) )
    tweet_content=tweet_content.split(' ')    #print(tweet_content)
    k='start'
    n=0
    for i in tweet_content: #print('\n',n)
        if k!='start' : # word
          n+= 1+len(i)  # spacebar+word
          if n<=print_width:
            print(' {}'.format(i),end='')
          else:
            print("\n",end='')
            n,k=0,'start'
        if k=='start':  #starword
          print('  {}'.format(i),end='')
          k='none'
          n+= 2+len(i)
#--------------------------------------------
# 6330288921 (18.50) 139 (2021-03-01 17:02)

def get_unique( words ):
    unique_words = []
    for j in range(len(words)) :
        if words[j] in unique_words:
            continue
        else :
            unique_words.append(words[j])
    return unique_words
def jaccard(words_1, words_2):
    w = 0
    a = get_unique(words_1)
    b = get_unique(words_2)
    c = a+b
    d = []
    for j in range(len(c)) :
        if c[j] not in d :
            w += 0
        elif c[j] in d :
            w += 1
        d.append(c[j])
    n = len(get_unique(c))
    Jaccard_similarity_coefficient = w/n
    jaccard_coef = Jaccard_similarity_coefficient
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    def two(p):
        return p[1]
    p=[]
    top_n=[]
    for j in range(len(norm_tweets)):
        u = jaccard(norm_tweets[j], norm_query)
        if u>0 :
            p.append([j,u])
    if p != []:
        r = sorted(p,key=two,reverse = True)
    top_n=r[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
  print('\n'+'#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
  tweet_content= tweet_content.split(' ')
  az = ' '
  for e in tweet_content:
    if len(az)+1+len(e)<=print_width:
      az += ' '+e

    else:
      print(az)
      az = '  '+e
  print(az)




#--------------------------------------------
# 6330289521 (20.00) 140 (2021-03-01 23:16)

def get_unique( words ):
    unique_words=[]
    for e in words:
        if e not in unique_words:
            unique_words+=[e]
        else:
            pass
    return unique_words
def jaccard(words_1, words_2):
    #jaccard_coef
    x=[]
    for c in words_1:
        if c in words_2:
            x+=[c]
        else:
            pass
    y=words_1+words_2
    y.sort()
    z=[y[0]]
    for i in range(len(y)-1):
        if y[i]!=y[i+1]:
            z+=[y[i+1]]
    jaccard_coef=len(x)/len(z)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x=[]
    p=[]
    top_n=[]
    for i in range (len(norm_tweets)):
        tweet_id=i
        z=jaccard(norm_tweets[tweet_id],norm_query)
        if z!=0:
            x+=[[z,tweet_id]]
        else:
            pass
    x.sort(reverse=True)
    for i in range (len(x)):
        p+=[[x[i][1],x[i][0]]]
    #print(p)
    use=[]
    if len(p)!=0:
        l=[[p[0][0],p[0][1]]]
        for i in range(len(p)-1):
            if p[i][1]==p[i+1][1]:
                l+=[[p[i+1][0],p[i+1][1]]]
                l.sort()
            else:
                use+=l
                l=[[p[i+1][0],p[i+1][1]]]
        if len(l) != 0:
            use+=l
    #for i in range(len(use)):
     #   top_n+=[[round(use[i][1],2),use[i][0]]]
    #print(top_n)
    #print(use)
    x=use[:n:]
    top_n=x
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    t=tweet_content.split(' ')
        #print("  "+a[(print_width)*(i+1):(print_width-2)*(i+2)])
    #n=len(tweet_content)
    l="  "

    print("\n"+"#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    #print(len(t))
    #print(print_width)
    for e in t:
        if len(l)+len(e)<=print_width:
            if e==" ":
                l+=" "
            else:
                l+=e+" "
        else:
            print(l)
            l="  "+e+" "
    else:
        print(l)
#--------------------------------------------
# 6330290021 (19.68) 141 (2021-02-27 01:50)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    allwords = words_1 + words_2
    allwords = get_unique(allwords)
    samewords = []
    for i in words_1:
        if i in words_2 and i not in samewords:
            samewords.append(i)
    if len(allwords) > 0 :
        jaccard_coef = len(samewords)/len(allwords)
    else:
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query) > 0 :
            s = []
            s.append(i)
            s.append(jaccard(norm_tweets[i],norm_query))
            top_n.append(s)
    def sortsecond(jac):
        return(jac[1])
    top_n.sort(key=sortsecond,reverse=True)
    top_n = top_n[:n:]
    return(top_n)
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    show = []
    space = -1
    len_show = 0
    print_width -= 2
    tweet_content = tweet_content.split(' ')
    for i in range(len(tweet_content)):
        if len_show + space + len(tweet_content[i]) <=  print_width:
            show.append(tweet_content[i])
            len_show += len(tweet_content[i])
            space += 1
            if i == len(tweet_content)-1:
                print('  '+' '.join(show))
            elif len_show + space +1 + len(tweet_content[i+1]) >  print_width:
                print('  '+' '.join(show))
                show = []
                space = -1
                len_show = 0
#--------------------------------------------
# 6330291721 (15.78) 142 (2021-02-27 22:11)

def get_unique( words ):
    unique_words = [ ]
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    intersection = 0
    for i in words_1:
        if i in words_2:
            intersection += 1
    union = (len(words_1) + len(words_2)) - intersection
    jaccard_coef = intersection/union
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    tweet_n = [ ]
    for tweet_id in range(len(norm_tweets)):
        Jaccard = jaccard(get_unique(norm_tweets[tweet_id]), get_unique(norm_query))
        tweet_n.append([Jaccard,tweet_id])
    tweet_n.sort(reverse=True)
    sort_tweet = [ ]
    for [Jaccard,tweet_id] in tweet_n:
        if Jaccard > 0:
            sort_tweet.append([tweet_id,Jaccard])
    for k in range(len(sort_tweet)-1):
        for i in range(len(sort_tweet)-1):
            if sort_tweet[i][1] == sort_tweet[i+1][1]:
                if sort_tweet[i][0] > sort_tweet[i+1][0]:
                    sort_tweet[i],sort_tweet[i+1] = sort_tweet[i+1],sort_tweet[i]
    top_n = [ ]
    for i in range(n):
        top_n.append(sort_tweet[i])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(" ")
    print("#" + str(tweet_id) + " " + "(" + str(round(jc_coef,2)) + ")")
    t = tweet_content.split(' ')
    x = ' '
    for i in range(len(t)):
        if len(x)+1 <= print_width:
            x = x + t[i] + ' '
            if len(x) > print_width:
                x = x.strip().split(' ')
                y = x.pop()
                x = ' '.join(x)
                print('  '+ x)
                x = ' ' + y + ' '
                if i == len(t)-1:
                    print('  '+ y)
            elif len(x) == print_width:
                print(' '+ x)
                x = ' '
            elif i == len(t)-1:
                print(' '+ x)

#--------------------------------------------
# 6330292321 (19.37) 143 (2021-02-28 21:29)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    n=0
    m=0
    ans = []
    ans.extend(words_1)
    ans.extend(words_2)
    ans = get_unique( ans )
    for i in ans:
            if i in words_1 and i in words_2:
                n+=1
                m+=1
            else:
                m+=1
    if m == 0:
        jaccard_coef = 0
    else :
        jaccard_coef = n/m
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    q = []
    for i in range(len(norm_tweets)) :
        j = jaccard(norm_tweets[i],norm_query)
        if j > 0 :
            q.append([i,j])
    y = sorted(q, key = lambda s:(s[1],-s[0]), reverse = True)
    top_n = y[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    p = tweet_content+" "
    print("")
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    start = 0
    end = 0
    end1 = 0
    end2 = 0
    for i in range(len(p)) :
        if p[i] == ' ':
            end = end1
            end1 = end2
            end2 = i
        #print(i,p[i],start,"\t",end,end1,end2,"\t",end2-start,len(p))
        if (end2-start)>(print_width -2):
            print("  "+p[start:end1])
            start = end1+1
        if end2 == len(p)-1:
            print("  "+p[start::])

#--------------------------------------------
# 6330293021 (19.70) 144 (2021-03-01 14:36)

def get_unique( words ):

    unique_words = []
    for x in words :
        if x in unique_words :
            pass
        else :
            unique_words.append(x)

    return unique_words
def jaccard(words_1, words_2):

    i = 0
    for x in words_2 :
        if x in words_1 :
            i = i+1
        else :
            pass
    LOL = words_1 + words_2
    EE = []
    for x in LOL :
        if x in EE  :
            pass
        else :
            EE.append(x)

    jaccard_coef = i/len(EE)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):


    OAML = []
    for i  in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query) > 0:
            OAML.append([jaccard(norm_tweets[i],norm_query) , i])

    OAML.sort(reverse = True)

    FXCK = []
    OFF = []
    for i in range(len(OAML)-1) :
        if OAML[i][0] == OAML[i+1][0] :
                FXCK.append(OAML[i])
        else:
            FXCK.append(OAML[i])
            FXCK.sort()
            OFF.append(FXCK)
            FXCK = []
            if i == len(OAML)-2 :
                FXCK.append(OAML[i+1])
                FXCK.sort()
                OFF.append(FXCK)
    BULLSHIT = []
    for e in OFF :
        for MAIWAILAEW in e :
            BULLSHIT.append(MAIWAILAEW)
    for e in BULLSHIT :
        e[0],e[1] = e[1],e[0]

    top_n = BULLSHIT[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print()
    print('#'+ str(tweet_id),'('+str(round(jc_coef,2)) +')')
    x = tweet_content.split((' ')*1)

    HIGH = []
    LOW = ''
    for e in x :
        if len(e)+len(LOW) <= print_width-2 :
            LOW = LOW + e + (' ')*1
        else :
            HIGH.append(LOW.strip())
            if len(e) > print_width-2 :
                HIGH.append(e.strip())
            else :
                LOW = e + (' ')*1

    if LOW != (' ')*1 :
        HIGH.append(LOW.strip())

    for e in HIGH :
        print((' ')*2 + e)




#--------------------------------------------
# 6330294621 (12.91) 145 (2021-02-28 22:56)

def get_unique( words ):

    words.sort()
    unique_words = []
    for i in range(len(words)-1):
        if words[i] != words[i+1]:
           unique_words += [words[i]]
    if len(words) != 0:
        unique_words += [words[-1]]
    return unique_words
def jaccard(words_1, words_2):

    rpt_wd = []
    for e in words_1:
        if e in words_2:
            rpt_wd += [e]
    jaccard_coef = len(rpt_wd)/(len(words_1)+len(words_2)-len(rpt_wd))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []
    atww = []
    for i in range(len(norm_tweets)):
        atww.append([jaccard(norm_tweets[i],norm_query),i])
    atww.sort()
    atww = atww[::-1]
    for i in range(len(atww)):
        atww[i][0],atww[i][1] = atww[i][1],atww[i][0]
    for i  in range(n):
        top_n.append(atww[i])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print()
    print('#'+str(tweet_id),'('+(str(round(jc_coef,2)))+')')
    m = tweet_content.split(' ')
    for i in range(len(m)-1):
        m.insert((2*i)+1,' ')
    prt = []
    c = 0
    for i in range(len(m)):
        prt += m[i]
        c = len(prt)
        if c > print_width-2:
           prt = prt[0:len(prt)-len(m[i])]
           if prt[0] == ' ':
              prt = prt[1:]
           print('  '+''.join(prt))
           c = 0
           prt =m[i]
    if prt[0] == ' ':
        prt = prt[1:]
    print('  '+''.join(prt))


#--------------------------------------------
# 6330295221 (18.33) 146 (2021-02-27 23:43)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    same = 0
    jukkroo = []
    for e in words_1:
        if e in words_2:
            same += 1
        if e not in jukkroo:
            jukkroo.append(e)
    for e in words_2:
        if e not in jukkroo:
            jukkroo.append(e)
    jaccard_coef = same/len(jukkroo)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    nub = 0
    b = []
    g = []
    for i in range(len(norm_tweets)):
        nub = jaccard(norm_tweets[i], norm_query)
        if nub > 0:
            b.append([-nub, i])
        nub = 0
    b.sort()
    top_n = [[e[1],-1*e[0]] for e in b][:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#'+str(tweet_id)+" "+'('+str(round(jc_coef,2))+')')
    t = tweet_content.split(' ')
    s = '  '
    while len(t) != 0:
        if len(s+t[0]) < int(print_width):
            s += t[0]+' '
            t.remove(t[0])
        elif len(s+t[0]) == int(print_width):
            s += t[0]
            t.remove(t[0])
            print(s)
            s = '  '
        else:
            print(s)
            s = '  '
    if s != '  ':
        print(s)

#--------------------------------------------
# 6330296921 (17.03) 147 (2021-03-01 23:48)

def get_unique( words ):
    unique_words = []
    for i in words:
        if not i in unique_words:
            unique_words.append(i)

    return unique_words
def jaccard(words_1, words_2):
    mid = []
    for i in words_1 :
        if i in words_2:
            mid.append(i)
    nw1 = len(words_1)
    nw2 = len(words_2)
    nm  = len(mid)
    jaccard_coef = nm/(nw1+nw2-nm)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    tweet_id = []
    jaccards = []
    y = 0
    x = []

    for i in range (len(norm_tweets)):
        tweet_id.append(norm_tweets.index(norm_tweets[i]))
        jaccards.append(jaccard(norm_tweets[i], norm_query))
        if jaccards[i] > 0 :
            top_n.append([tweet_id[i], jaccards[i]])
    top_n.sort()

    for e in top_n:
        e[0],e[1] = -e[1],e[0]
    top_n.sort()

    for y in top_n:
        y[0],y[1] = -y[0],y[1]

    for x in top_n:
        x[0],x[1] = x[1],x[0]


    top_n = top_n[0:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    n = 2
    i = 0
    y = ''
    t = 0
    s = ''
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    x = tweet_content.split(' ')
    while i < len(x):
        while i < len(x) :
            if n < print_width:
                t = len(x[i])+1
                n += t
                y = x[i]+' '
                s += y
                i +=1
            else : break
        print('  '+s)
        n = 0
        s = ''
#--------------------------------------------
# 6330298121 (19.25) 148 (2021-02-27 23:41)

def get_unique( words ):
    unique_words = []
    for e in words:
        if not e in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    n = 0
    for e in words_1:
        if e in words_2:
            n += 1
    tn = len(words_1)+len(words_2) - n
    jaccard_coef = n / tn
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        a = []
        b = jaccard(norm_tweets[i],  norm_query)
        if b > 0:
            a.append(b)
            a.append(i)
            top_n.append(a)
    top_n.sort(reverse=True)
    for i in range(len(top_n)):
        top_n[i] = top_n[i][::-1]
    b = []
    c = []
    for i in range(len(top_n)-1):
        if top_n[i][1] == top_n[i+1][1]:
            b.append(top_n[i])
        else:
            b.append(top_n[i])
            b.sort()
            for e in b:
                c.append(e)
            b = []
    top_n = c[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    a = tweet_content.split(" ")
    c = 0
    line = " "
    for i in range(len(a)):
        if len(" "+a[i])+c <= print_width:
            line += " "+a[i]
            c = len(line)
        else:
            print(line)
            line = "  "+a[i]
            c = len(line)
        if i == len(a)-1:
            print(line)
        #--------------------------------------------
# 6330299821 (20.00) 149 (2021-02-26 22:20)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])

    return  unique_words
def jaccard(words_1, words_2):
    if len(words_1) == 0:
        return 0
    samewords = []
    allwords = []
    for i in range(len(words_1)):
        if words_1[i] not in allwords:
            allwords.append(words_1[i])
        for j in range(len(words_2)):
            if words_1[i] == words_2[j] and words_2[j] not in samewords:
                samewords.append(words_2[j])
            if words_2[j] not in allwords:
                allwords.append(words_2[j])

    jaccard_coef = len(samewords) / len(allwords)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    b = []
    for i in range(len(norm_tweets)):
        jc = jaccard(norm_tweets[i], norm_query)
        d = [i, jc]
        top_n.append(d)

    top_n.sort()
    for i in range(len(top_n)):
        top_n[i] = top_n[i][::-1]
    top_n.sort(reverse=True)
    for i in range(len(top_n)):
        top_n[i] = top_n[i][::-1]

    for i in range(len(top_n)):
        if top_n[i][1] != 0:
            if len(b) == 0:
                b.append(top_n[i])
            else:
                for j in range(len(b)):
                    if b[j][1] == top_n[i][1] and b[j][0] > top_n[i][0]:
                        b.insert(j, top_n[i])
                        break
                if j == len(b) - 1:
                    b.append(top_n[i])
    top_n = b[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    content_word = tweet_content.split(' ')
    m = 0
    line = " "
    for i in range(len(content_word)):
        if m == 0 or len(line+' '+content_word[i]):
            line += ' '+content_word[i]
            m += 1
        if i < len(content_word)-1:
            if len(line+' '+content_word[i+1]) > print_width:
                print(line)
                line = " "
                m = 0
        elif i == len(content_word)-1:
            print(line)

#--------------------------------------------
# 6330300721 (17.00) 150 (2021-02-28 02:19)

def get_unique(words):
    unique_words = []
    for e in words :
        if e not in unique_words:
            unique_words.append(e)
        else:
            pass

    return unique_words
def jaccard(words_1, words_2):

    words_1 = get_unique(words_1)
    words_2 = get_unique(words_2)
    z = []
    for e in words_1 :
        for r in words_2 :
            if e not in z and r not in z and e == r :
                z.append(e)
            else :
                pass
    s = words_1
    for e in words_2 :
        if e not in s :
            s.append(e)
        else :
            pass
    if len(s) != 0 :
        jaccard_coef = len(z)/len(s)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    number = n
    tweet_count = 0
    list_jaccard = []
    for j in range(len(norm_tweets)) :
        each_jaccard = jaccard(norm_tweets[j],norm_query)
        if each_jaccard > 0 :
             if each_jaccard != 0:
                list_jaccard.append([each_jaccard,j])
        else :
            tweet_count+=1
    list_jaccard.sort()
    top_n = []
    top_n = list_jaccard[-number::]
    for g in list_jaccard[:-number] :
        for e in top_n :
            if e[0] <= g[0] :
                if e[1] > g[0] :
                    e = g
                    break
                else :
                    pass
            else:
                break
    if len(top_n) > 0 :
        for e in top_n :
            e[1],e[0] = e[0],e[1]
    else :
        pass

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')

    text = tweet_content.split(' ')
    show = text[0]
    j = 0
    for i in range(1,len(text)) :

        if len(show+' '+text[i]) <= print_width-2 :
            show = show+' '+text[i]
        else :
            print('  '+show)
            show = text[i]
        j += 1
    if j != 0 :
        print('  '+show)


#--------------------------------------------
# 6330301321 (17.72) 151 (2021-03-01 16:02)

def get_unique( words ):
    words.sort()
    unique_words = []
    for i in range(len(words)) :
        if words[i] != words[i-1] or i == 0 :
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    w12 = sorted(words_1+words_2)
    aub = len(get_unique(w12))
    ainb = len(w12)-aub
    if aub == 0 :
        return round(0,2)
    jaccard_coef = round(ainb/aub,2)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    result = []
    for i in range(len(norm_tweets)) :
        jco = jaccard(norm_tweets[i],norm_query)
        if jco > 0 :
            result.append([i,jco])
    def sortkey(twt) :
        return twt[1]
    result.sort(reverse=True,key=sortkey)
    top_n = result[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    #print('123456789012345678901234567890123456789012345678901234567890')
    print('#'+str(tweet_id),'('+str(jc_coef)+')')
    twt = tweet_content.split(' ')
    line = []
    for i in range(len(twt)) :
        line.append(twt[i])
        if len('  '+' '.join(line)) > print_width :
            if len(line) > 1 :
                print('  '+' '.join(line[:-1]))
            line = [twt[i]]
        if i == len(twt)-1 :
            print('  '+' '.join(line))
#--------------------------------------------
# 6330302021 (12.78) 152 (2021-02-28 20:14)

def get_unique( words ):
    words.sort()
    unique_words = []
    for i in range (len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    count = 0
    for k in words_1:
        for w in words_2:
            if k == w :
                count += 1
    jaccard_coef = count/(len(words_1)+len(words_2)-count)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []
    for i in range(len(norm_tweets)):
        top_n.append([i,jaccard(norm_tweets[i], norm_query)])
#     print(top_n)
    top_n = sorted(top_n, key = lambda top_n : (top_n[1],-top_n[0]), reverse=True)
#     print(top_n)
    for k in range (len(top_n)):
        if float(top_n[k][1]) == 0.0:
            top_n1 = list(top_n)
            top_n1.pop(k)
    top_n1 = top_n1[:n]
#     print(top_n1)
    return top_n1
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    x = ''
    count = 0
    forwardcount = 0
    y = tweet_content.split(' ')
    print()
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    for i in range (len(y)):
    #print(i,y[i],count,forwardcount,'\t\t',x)
        try :
            if forwardcount <= int(print_width-2):
                x += y[i]
                x += ' '
                count += int(1+len(y[i]))
                forwardcount = int(count + len(y[i+1]))
            else :
                print('  '+str(x))
                x = y[i]
                l = y[i+1]
                x += ' '
                count = int(len(y[i])+1)
                forwardcount = count
        except:
            print('  '+str(x))
#--------------------------------------------
# 6330303621 (17.50) 153 (2021-02-28 11:34)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)) :
        if words[i] in unique_words:
            continue
        unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    n=0
    words_4=[]
    words_1=get_unique(words_1)
    words_2=get_unique(words_2)
    words_3=words_1+words_2
    for i in range(len(words_3)) :
        if words_3[i] in words_4 :
            n+=1
        else :
            n+=0
        words_4.append(words_3[i])
    a=len(get_unique(words_3))
    jaccard_coef=n/a
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a=[]
    for i in range(len(norm_tweets)):
        tweet_id=i
        f=jaccard(norm_tweets[i],norm_query)
        a.append([tweet_id, f])
    b=[]
    for r in range(len(a)):
        if a[r][1]>0 :
            b.append([a[r][1], a[r][0]])
    c = sorted(b, reverse=True)
    top_n=[]
    top_temp=[]
    for r in range(len(c)) :
        if c[r][0]==c[r-1][0] :
            top_temp.append([c[r][1], c[r][0]])
        else :
            if top_temp!=[] :
                top_temp=sorted(top_temp)
                top_n=top_n+top_temp
                top_temp=[]
                top_temp.append([c[r][1], c[r][0]])
            else :
                top_temp.append([c[r][1], c[r][0]])
    top_n=top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef, 2))+')')
    tweet_content=tweet_content.split(' ')
    n=0
    i=0
    while i in range(len(tweet_content)) :
        n+=len(tweet_content[i])+1
        if n in range(print_width) :
            if n==len(tweet_content[i])+1 :
                print('  '+tweet_content[i], end=' ')
                i+=1
            else :
                print(tweet_content[i], end=' ')
                i+=1
        else :
            print()
            n=0
    print()

#--------------------------------------------
# 6330304221 (18.01) 154 (2021-02-28 20:56)

def get_unique( words ):
    unique_words = []
    for word in words:
        if word not in unique_words:
            unique_words.append(word)
    return unique_words
def jaccard(words_1, words_2):
    intercept_words = []
    union_words = get_unique(words_1 + words_2)
    for word1 in words_1:
        for word2 in words_2:
            if word1 == word2:
                intercept_words.append(word1)
    intercept_words = get_unique(intercept_words)
    jaccard_coef = len(intercept_words) / len(union_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    all_n, top_n = [], []

    # all_n is list that the jc_coef descending --> [[jc_coef, id], ...]
    for idx, tweet in enumerate(norm_tweets):
        all_n.append([jaccard(norm_query, tweet), idx])
    all_n.sort(reverse=True)

    # Get list of tweet_id in the same jc_coef
    tweet_ids, tweet_id = [], [all_n[0][1]]
    for i in range(1, len(all_n)):
        if all_n[i][0] != all_n[i-1][0]:
            # Sort ids ascending
            tweet_id.sort()
            tweet_ids.append(tweet_id)
            tweet_id = [all_n[i][1]]
        else:
            tweet_id.append(all_n[i][1])
    tweet_ids.append(tweet_id)

    # Make new list with [[id, jc_coef], ...] which ids ascending and jc_coef descending
    jc_coef = get_unique([item[0] for item in all_n])
    for idx, jc in enumerate(jc_coef):
        for tweet_id in tweet_ids[idx]:
            top_n.append([tweet_id, jc])
    top_n = top_n[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n#{} ({})'.format(tweet_id, round(jc_coef, 2)))
    word2show, check_limit = ' ', 0
    for word in tweet_content.split(' '):
        word2show += ' ' + word
        check_limit += len(word2show)
        if check_limit <= print_width:
            print(word2show, end='')
        else:
            word2show = '  ' + word
            print('\n' + word2show, end='')
            check_limit = len(word2show)
        word2show = ''
    print()
#--------------------------------------------
# 6330305921 (18.01) 155 (2021-02-27 20:57)

def get_unique( words ):
    words.sort()
    unique_words = []
    if len(words) != 0 :
        c = words[0]
        unique_words.append(c)
        for i in range(1,len(words)):
            if words[i] != c :
                c = words[i]
                unique_words.append(c)
    return unique_words
def jaccard(words_1, words_2):
    j1 = []
    for e in words_1 :
        if e in words_2 :
            j1.append(e)
    s = words_1 + words_2
    j2 = get_unique( s )
    jaccard_coef = len(j1)/len(j2)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    s= []
    for i in range(len(norm_tweets)) :
        j = jaccard(norm_tweets[i],norm_query)
        s.append([j,-i])
    s.sort(reverse = True)
    for e in s :
        e[0],e[1] = int(- e[1]),e[0]
    top_n = s[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+ str(tweet_id),'('+str(round(jc_coef,2))+')')
    content = tweet_content.split(' ')
    k = []
    for e in content :
        if len(k) == 0 :
            k.append(e)
            if len(k)+2 >= print_width :
                print('  '+k[0])
                k = []
        else :
            if len(' '.join(k))+len(e)+3 <= print_width :
                k.append(e)
            else :
                print('  '+ ' '.join(k))
                k = []
                k.append(e)
    if len(k) != 0 :
        print('  '+' '.join(k))

#--------------------------------------------
# 6330306521 (14.37) 156 (2021-03-01 22:02)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)) :
        if (words[i] in unique_words)==False :
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    a=words_1
    b=0
    for i in range(len(words_2)) :
        if (words_2[i] in words_1) :
            b+=1
        a.append(words_2[i])
    a=get_unique(a)
    jaccard_coef = b/len(a)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    a=[]
    for i in range(len(norm_tweets)) :
        a.append([jaccard(norm_tweets[i],norm_query),i])
    a.sort(reverse=True)
    b=[]
    for i in range(len(a)) :
        a[i][1],a[i][0]=a[i][0],a[i][1]
    for i in range(len(a)) :
        if i<len(a)-200 :
            for e in range(200) :
                if a[i][1]==a[i+e][1] and i+e<len(a) :
                    if a[i][0]>a[i+e][0] :
                        a[i][0],a[i+e][0]=a[i+e][0],a[i][0]
    for i in range(n) :
        top_n.append([a[i][0],a[i][1]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    a=' '
    b=tweet_content.split(' ')
    for i in range(len(b)) :
        if len(a+b[i])<print_width :
            c=' '+b[i]
            a+=c
            q=0
        if i+1<len(b) and len(a+b[i+1])>=print_width :
            print(a)
            a=' '
            q=1
    if q==0 :
        print(a)

#--------------------------------------------
# 6330308821 (20.00) 157 (2021-02-27 19:23)

def get_unique( words ):
    unique_words = []
    while len(words) > 0:
        a = words.pop(0)
        if a not in unique_words:
            unique_words.append(a)
    return unique_words
def jaccard(words_1, words_2):
    n = 0
    for e in words_1:
        if e in words_2:
            n += 1
    c = len(words_1)+len(words_2)-n
    jaccard_coef = n/c
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    l = []
    for i in range(len(norm_tweets)):
        jaccard2 = jaccard(norm_tweets[i],norm_query)
        l.append([jaccard2,-i])
    l.sort(reverse = True)
    top_n = []
    for x,y in l:
        if x > 0:
            top_n.append([-y,x])
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    l = tweet_content.split(' ')
    m = []
    s = " "
    for e in l:
        s += ' '+e
        if len(s)  > print_width:
            s = s[:len(s)-len(e)]
            m.append(s)
            s = "  "+e
        if e == l[-1]:
            m.append(s)
    jc_coef = round(jc_coef,2)
    print(" ")
    print(f"#{tweet_id} ({jc_coef})")
    print(*m, sep = "\n")
#--------------------------------------------
# 6330309421 (17.60) 158 (2021-03-01 20:37)

def get_unique( words ):

    duplicate = set()
    unique_words = []
    for w in words:
        if w not in duplicate:
            duplicate.add(w)
            unique_words.append(w)
    return unique_words
def jaccard(words_1, words_2):

    intersect = len(list(set(words_1).intersection(words_2)))
    union = (len(words_1) + len(words_2)) - intersect
    jaccard_coef = (float(intersect)/union)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    jcn = []
    for i in range(len(norm_tweets)):
        jc_co = jaccard(norm_tweets[i], norm_query)
        jcn.append([jc_co, i])
    jcn.sort(key=lambda k:(k[0],-k[1]))
    top_n = []
    for i in range(len(jcn)-1, len(jcn)-1-n, -1):
        j = jcn[i]
        top_n.append([j[1], j[0]])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print( '\n'+'#'+str(tweet_id)+' ('+ str(round(jc_coef,2))+')')
    textlist = tweet_content.split(" ")
    m = 2
    text = "  "
    for t in textlist:
        word_ = t + " "
        m += len(word_)
        if (m-1 == print_width) :
            word_ = t
            text += word_
            m -= 1
        elif m <= print_width:
            text += word_
        else:
            m = 0
            word_ = "\n" + "  " + t + " "
            text += word_
            m += len(word_)-1
    print(text)

#--------------------------------------------
# 6330310021 (17.54) 159 (2021-03-01 16:47)

def get_unique( words ):
    unique_words = []
    for e in words:
        if not e in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    intersect_word = []
    jaccard_coef = 0
    for e in words_1:
        if e in words_2:
            intersect_word.append(e)
    if (len(words_1)+len(words_2)-len(intersect_word)) != 0:
        jaccard_coef = len(intersect_word)/(len(words_1)+len(words_2)-len(intersect_word))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n0 = []
    for i in range(len(norm_tweets)):
        j = jaccard(norm_tweets[i], norm_query)
        list007 = [j, -i]
        if j > 0 :
            top_n0.append(list007)
    top_n1 = sorted(top_n0)
    top_n2 = top_n1[::-1]
    top_n = []
    if len(top_n2) != 0:
        for i in range(len(top_n2)):
            top_n2[i][1] *= -1
        for i in range(n):
            top_n.append(top_n2[i][::-1])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    seperate = tweet_content.split(' ')
    print("\n#" + str(tweet_id) + " (" + str(round(jc_coef, 2)) + ")")
    s = " "
    n = 1
    x = 0
    for i in range(len(seperate)):
        if len(s) + len(seperate[i]) +1 + x <= (n * print_width) + (n-1) :
                s = s + " " + seperate[i]
        else:
            x = (n*print_width)-len(s)
            s = s + "\n  " + seperate[i]
            n += 1
    print(s)

#--------------------------------------------
# 6330311621 (20.00) 160 (2021-02-28 23:34)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if not(words[i] in unique_words):
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    x = 0
    for i in words_1:
        if i in words_2:
            x += 1
    y = len(words_1)+len(words_2)-x
    if y != 0:
        jaccard_coef = x/y
    else:
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    for i in range(len(norm_tweets)):
        x = jaccard(norm_tweets[i],norm_query)
        if x > 0:
            top.append([-1*x,i])
    top.sort()
    for i in range(len(top)):
        top[i][0] = top[i][0]*(-1)
    top_n = []
    for i in top:
        top_n.append([i[1],i[0]])
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")")
    x = " "
    words = tweet_content.split(' ')
    for word in words:
        if len(x)+len(word) < print_width:
            x += " "+word
        else:
            print(x)
            x = "  "+word
    print(x)


#--------------------------------------------
# 6330312221 (20.00) 161 (2021-02-26 16:07)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    word = words_1 +words_2
    word1 = get_unique(word)
    word2 = []
    for e in word1:
        if e in words_1 and e in words_2:
            word2.append(e)
    jaccard_coef = len(word2)/len(word1)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    index = 0
    for e in norm_tweets:
        jaccard1 = jaccard(e, norm_query)
        if jaccard1 > 0:
            if len(top_n) < n:
                top_n.append([index,jaccard1])
            else:
                min_jaccard = 2
                for i in range(n):
                    jaccard_n = top_n[i][1]
                    if jaccard_n < min_jaccard:
                        min_jaccard = jaccard_n
                        min_index = i
                    elif jaccard_n == min_jaccard:
                        now_index = top_n[i][0]
                        other_index = top_n[min_index][0]
                        if other_index < now_index:
                            min_index = i
                if min_jaccard < jaccard1:
                    top_n[min_index] = [index, jaccard1]
                elif min_jaccard == jaccard1 and index < min_index:
                    top_n[min_index] = [index, jaccard1]
        index += 1
    for i1 in range(len(top_n)):
        for i2 in range(len(top_n)-i1-1):
            if top_n[i2][1] < top_n[i2+1][1]:
                top_n[i2], top_n[i2+1] = top_n[i2+1], top_n[i2]
            elif top_n[i2][1] == top_n[i2+1][1]:
                if top_n[i2][0] > top_n[i2+1][0]:
                    top_n[i2], top_n[i2+1] = top_n[i2+1], top_n[i2]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    pw = print_width - 2
    words = tweet_content.split(' ')
    n_words = len(words)
    cnt = 1
    all_line = []
    line = ''
    for w in words:
        if len(line) + len(w) <= pw:
            line += w
        else:
            line = line[:len(line)-1]
            all_line.append(line)
            line = w
        if cnt == n_words:
            all_line.append(line)
        else:
            line += ' '
        cnt += 1
    print()
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    for line in all_line:
        print(' ' * 2 + line)
#--------------------------------------------
# 6330313921 (20.00) 162 (2021-03-01 22:44)

def get_unique( words ):
    unique_words = []
    for i in words :
        if i not in unique_words :
            unique_words.append(i)
    return unique_words
def jaccard(words_1 , words_2):
    jaccard_coef = 0
    for a in words_2 :
        if a in words_1 :
            jaccard_coef += 1
    jaccard_coef =  jaccard_coef / (len(words_1) + len(words_2) - jaccard_coef )
    return jaccard_coef
def top_n_similarity(norm_tweets , norm_query , n):
    jacc1 = []
    top_n = []
    for x, y in enumerate(norm_tweets ):
        jacc1.append([-jaccard(y, norm_query), x])
    jacc1.sort()

    for i in range(n):
        if jacc1[i][0] == 0:
            break
        nmn = [jacc1[i][1] , -jacc1[i][0]]
        top_n.append(nmn)

    return top_n
def show_tweet(tweet_id , tweet_content , jc_coef , print_width):
    a = tweet_content.split(" ")

    print('\n' + '#' + str(tweet_id ) , '(' + str(round(jc_coef , 2)) + ')')

    space = 2
    write = '  '
    for b in a :
        wordp = b +  ' '
        space += len(wordp)
        if space - 1 == print_width :
            wordp = b
            write += wordp
            space -= 1
        elif space <= print_width :
            write += wordp
        else :
            space = 0
            wordp = '\n' + '  ' + b + ' '
            write += wordp
            space += len(wordp) - 1


    print(write)


#--------------------------------------------
# 6330314521 (17.60) 163 (2021-02-27 15:14)

def get_unique( words ):
    unique_words = []

    for x in words:
        if x not in unique_words:
            unique_words.append(x)
    return unique_words
def jaccard(words_1, words_2):
    words_3 = []
    words_4 = []

    for i in range(len(words_1)):
        if words_1[i] not in words_3:
            words_3.append(words_1[i])

    for i in range(len(words_2)):
        if words_2[i] not in words_3:
            words_3.append(words_2[i])

    for i in range(len(words_1)):
        if words_1[i] in words_2:
            words_4.append(words_1[i])

    jaccard_coef = len(words_4)/len(words_3)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    jaccard_norm = []                  #jacard coef of norm
    for i in range(len(norm_tweets)):
        coef = jaccard(norm_tweets[i], norm_query)
        jaccard_norm.append([coef, i])
    jaccard_norm.sort(key=lambda k:(k[0],-k[1]))   #ascending and descending
    #print(jaccard_norm)
    top_n = []

    for i in range(len(jaccard_norm)-1, len(jaccard_norm)-1-n, -1):
        top_n.append([jaccard_norm[i][1], jaccard_norm[i][0]])

    #print(top_n)
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print( '\n'+'#'+str(tweet_id)+' ('+ str(round(jc_coef,2))+')')
    #print( '\n'+'#'+str(tweet_id)+' ('+ str(jc_coef)+')')
    list_t = tweet_content.split(" ")

    c = 2
    text = "  "
    for x in list_t:
        token = x + " "
        c += len(token)
        if (c-1 == print_width) :
            token = x
            text += token
            c -= 1
        elif c <= print_width:
            text += token

        else:
            c = 0

            token = "\n" + "  " + x + " "
            text += token
            c += len(token)-1
    print(text)



#--------------------------------------------
# 6330315121 (20.00) 164 (2021-03-01 14:49)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    all_words = []
    for i in range(len(words_1)):
        if words_1[i] not in all_words:
            all_words.append(words_1[i])
    for i in range(len(words_2)):
        if words_2[i] not in all_words:
            all_words.append(words_2[i])
    c = 0
    for e in words_1:
        if e in words_2:
            c+=1
    jaccard_coef = c/len(all_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    L = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query) > 0:
            L.append([jaccard(norm_tweets[i],norm_query),-i])
        L.sort()
        L = L[:-(n+1):-1]
    for e in L:
        top_n.append([-e[1],e[0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    x = tweet_content.split(' ')
    z = '  '
    for i in range(len(x)):
        y = x[i]
        w = ' '
        if len(z) + len(y) + len(w) <= print_width +1:
            z+=y
            z+=w
        elif len(z) + len(y) + len(w) > print_width:
            print(z)
            z = '  '
            z+=y
            z+=w
    print(z)

#--------------------------------------------
# 6330316821 (20.00) 165 (2021-03-01 22:01)

def get_unique( words ):
    unique_words = []
    for e in words :
        if e not in unique_words :
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    w1 = []
    for x in words_1:
        if x not in w1:
            w1.append(x)
    w2 = []
    for x in words_2:
        if x not in w2:
            w2.append(x)
    w12 = w1+w2
    w3 = []
    for x in w12 :
        if x not in w3:
            w3.append(x)
    ST = 0
    for x in words_1:
        if x in words_2 :
            ST += 1
    if len(w3) != 0:
        jaccard_coef = ST/len(w3)
    else :
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = [] * (n)
    for i in range(len(norm_tweets)):
        jack = jaccard(norm_tweets[i], norm_query)
        if jack > 0:
            top_n.append([jack, -i])
    top_n.sort()
    for e in range(len(top_n)):
        top_n[e][1] = abs(top_n[e][1])
    top_n = top_n[-1::-1]
    for x in range(len(top_n)):
        top_n[x] = [top_n[x][1], top_n[x][0]]
    top_n = top_n[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("                                              ")
    print("#"+str(tweet_id), "("+str(round(jc_coef,2))+")")
    content = tweet_content.split(" ")
    count = 1
    extra = 0
    string = " "
    for i in range(len(content)):
        if len(string) + len(content[i]) +1 <= count*print_width + extra:
            string += " " + content[i]
        elif len(string) + len(content[i]) +1 > count*print_width + extra:
            space = " " * (count*print_width - (len(string)) + extra)
            string += space + "\n" + " "*2 + content[i]
            extra +=1
            count += 1
    print(string)

#--------------------------------------------
# 6330317421 (20.00) 166 (2021-03-01 22:20)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    words_1 = get_unique(words_1)
    words_2 = get_unique(words_2)
    jaccard_coef =  (len(words_1)+len(words_2)-len(get_unique(words_1+words_2)))/len(get_unique(words_1+words_2))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    tweet_id = -1
    for i in norm_tweets:
        tweet_id += 1
        if jaccard(i,norm_query) > 0:
            top_n += [[tweet_id,jaccard(i,norm_query)]]
    def st(x):
        return x[1]
    top_n.sort(key=st,reverse=True)
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    t = tweet_content.split(' ')
    out = t[0] + ' '
    for i in range(1,len(t)):
        if len(out) + len(t[i]) <= print_width - 2:
            out += t[i] + ' '
        else:
            print('  ' + out)
            out = t[i] + ' '
    print('  ' + out)
#--------------------------------------------
# 6330318021 (20.00) 167 (2021-03-01 09:32)

def get_unique( words ):
    unique_words=[]
    while True :
        if len(words)==0 :
            break
        unique_words.append(words[0])
        repeated_word=words[0]
        while True :
            if repeated_word not in words :
                break
            else :
                words.remove(repeated_word)
    return unique_words
def jaccard(words_1, words_2):
    similar_words=[]
    for i in range(len(words_1)) :
        if words_1[i] in words_2 :
            similar_words.append(words_1[i])
    for i in range(len(similar_words)) :
        words_1.remove(similar_words[i])
    diff_words=words_1+words_2
    jaccard_coef=len(similar_words)/len(diff_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    list_jcc_tweets=[]
    for i in range(len(norm_tweets)) :
        jcc=jaccard(norm_tweets[i],norm_query)
        if jcc!=0 :
            list_jcc_tweets.append([jcc,i])
        list_jcc_tweets.sort(reverse=True)
    if len(list_jcc_tweets)<n :
        top_n=list_jcc_tweets
    elif len(list_jcc_tweets)>n :
        repeated_tweets=[]
        for i in range((len(list_jcc_tweets))):
            if list_jcc_tweets[i][0]==list_jcc_tweets[n-1][0]:
                repeated_tweets.append(list_jcc_tweets[i])
        repeated_tweets.sort()
        list_jcc_tweets=list_jcc_tweets[:n]
        for i in range(len(repeated_tweets)) :
            if repeated_tweets[i] in list_jcc_tweets :
                list_jcc_tweets.remove((repeated_tweets[i]))
        list_jcc_tweets+=repeated_tweets
        top_n=list_jcc_tweets[0:n]
    elif len(list_jcc_tweets)==0 :
        top_n=[]
    top_n.sort(reverse=True)
    top_n+=[['','']]
    n=[]
    for i in range(len(top_n)-1) :
        if top_n[i][0]!=top_n[i+1][0] :
            n.append(i+1)
    top=[]
    for i in range(len(n)) :
        r=[]
        if i==0 :
            r=top_n[0:n[i]]
        else :
            r=top_n[n[i-1]:n[i]]
        for k in range(len(r)) :
            (r[k][0],r[k][1])=(r[k][1],r[k][0])
        r.sort()
        top+=r
    top_n=top
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    word_show=tweet_content.split(' ')
    line=[' ']
    for i in range(len(word_show)) :
        word_show[i]=' '+word_show[i]
    sum_len=1
    for i in range(len(word_show)) :
        sum_len+=len(word_show[i])
        if sum_len>print_width:
            line.append('\n ')
            line.append(word_show[i])
            sum_len=len(word_show[i])+1
        else :
            if sum_len==print_width+1 :
                line.append(word_show[i])
            else :
                line.append(word_show[i])
    print(('').join(line))

#--------------------------------------------
# 6330319721 (18.33) 168 (2021-03-01 11:50)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
    return unique_words
#--------------------------------------------------------
def jaccard(words_1, words_2):
    s = 0
    t = 0
    for x in words_1:
        if x in words_2:
            s += 1
        else:
            t += 1
    for x in words_2:
        if not x in words_1:
            t += 1
    jaccard_coef = (s)/(s+t)
    return jaccard_coef
#--------------------------------------------------------
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    x=[]
    for tweet_id in range(len(norm_tweets)):
        j= norm_tweets[tweet_id]
        Jaccard=jaccard(j,norm_query)
        x=[]
        if Jaccard>0 :
            if Jaccard in x:
                continue
            x.append(tweet_id)
            x.append(Jaccard)
            top_n.append(x)
        else :
            x=[]
    top_n.sort(reverse=True,key=lambda x: x[1])
    top_n=top_n[:n:]
    return top_n
#--------------------------------------------------------
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a = round(jc_coef,2)
    b = str(a)
    print('\n'+'#'+str(tweet_id)+' ('+b+')')
    s = tweet_content.split(' ')
    o = ''
    index = len(s)
    i = 0
    while i < index:
        if len(o)+len(s[i]) < print_width-1 :
            o += ' '+s[i]
            i+=1
        else:
            print(' '+o)
            o = ''
        if i == index:
            print(' '+o)
#--------------------------------------------
# 6330320221 (16.67) 169 (2021-02-27 16:48)

def get_unique(words):
    unique_words=[]
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    dup=0
    allword=words_1+words_2
    for e in words_1:
        if e in words_2:
            dup+=1
    jaccard_coef=dup/len(get_unique(allword))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query)>0:
            s=[]
            s.append(i)
            s.append(jaccard(norm_tweets[i],norm_query))
            top_n.append(s)

    def takeSecond(x):
        return x[1]
    top_n.sort(key=takeSecond,reverse=True)
    top_n=top_n[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    word=tweet_content.split(' ')
    allword=[]
    while len(word)!=0:
        c=0
        word_line=[]
        wordspace=''
        for e in word:
            if (print_width-len(wordspace)-2)>len(e):
                word_line+=[e]
                wordspace=' '.join(word_line)
                if len(wordspace)>print_width-2:
                    word_line.pop()
                    wordspace=' '.join(word_line)
                    break
                c+=1
            else:
                break
        word=word[c::]
        allword.append(wordspace)
    allword[0]='  '+allword[0]
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    print('\n  '.join(allword))

#--------------------------------------------
# 6330321921 (16.94) 170 (2021-03-01 16:52)

def get_unique( words ):

    words.sort()
    unique_words = []
    words.append('@')
    for i in range(len(words) - 1):
        if words[i + 1] != words[i]:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    a = len(words_1) + len(words_2)
    b = 0
    for e in words_1:
        if e in words_2:
            a -= 1
            b += 1
    jaccard_coef = b/a
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []
    s = []
    for i in range(len(norm_tweets)):
        tweet_id = i
        jac = jaccard(norm_tweets[tweet_id], norm_query)
        s.append([jac, tweet_id*(-1)])
    s.sort(reverse= True)
    for k in range(len(s)):
        s[k][1] = s[k][1]*(-1)
    for j in range(n):
        top_n.append([s[j][1], s[j][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print('')
    print('#' + str(tweet_id), '(' + str(round(jc_coef,2)) + ')')
    w = tweet_content.split()
    c = 1
    sentence = ' '
    for i in range(len(w)):
        sentence += ' ' + w[i]
        c += 1 + len(w)
        if len(sentence) > print_width:
            b = sentence.split(' ')
            q = b.pop(-1)
            sentence = ' '.join(b)
            print(sentence)
            sentence += '\n'
            sentence = '  ' + q
            c = 2 + len(q)
    print(sentence)

#--------------------------------------------
# 6330322521 (15.00) 171 (2021-03-01 16:15)

def get_unique( words ):
    unique_words=[]
    for x in words:
        if  x in unique_words:
           unique_words.remove(x)
        unique_words.append(x)
    return unique_words
def jaccard(words_1, words_2):
    p = 0
    q = 0
    for e in words_2:
        if e in words_1:
            p += 1
        else:
            q += 1
    for e in words_1:
        if not e in words_2:
            q += 1
    jaccard_coef = (p)/(p+q)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    z = []
    top_n = []
    for tweet_id in range(len(norm_tweets)):
        t = norm_tweets[tweet_id]
        o = jaccard(t,norm_query)
        if o != 0:
            z.append([-o,tweet_id])
    z.sort()
    for e in z:
        e[0],e[1]=e[1],-e[0]
    top_n += z[:n]


    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a = round(jc_coef,2)
    b = str(a)
    print('\n'+'#'+str(tweet_id)+' ('+b+')')
    c = tweet_content.split(' ')
    d = ''
    index = len(c)
    i = 0
    while i < index:
        if len(o)+len(s[i]) < print_width-1 :
            d += ' '+c[i]
            i+=1
        else:
            print(' '+d)
            d = ''
        if i == index:
            print(' '+d)
#--------------------------------------------
# 6330323121 (17.00) 172 (2021-03-01 23:24)

def get_unique( words ):
    words.sort()

    unique_words = []
    for i in words:
        if i  in unique_words:
          continue

        else:
          unique_words.append(i)


    return unique_words
def jaccard(words_1, words_2):
    u=0
    for i in words_1:
        if i in words_2:
            u+=1
    mix=words_1+words_2 #รวม
    mix.sort()
    mix2=[mix[0]]
    for i in range(1,len(mix)):
        if mix[i]!=mix[i-1]:
            mix2.append(mix[i])

    d=len(mix2)
    jaccard_coef=u/d
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n=[]*n
    x=[]
    for tweet_id in range(len(norm_tweets)):
        jaccards=jaccard(norm_tweets[tweet_id],norm_query)
        x.append([jaccards,tweet_id*(-1)])
        x.sort()
        x=x[::-1] #เรียงแจค
    for a,b in x:
        b = b*(-1)
    for j in range(n):
        top_n.append([x[j][1]*-1, x[j][0]])


    return top_n
def show_tweet(tweet_id, tweet_content, jaccard_coef, print_width):
    print(' ')
    print('#'+str(tweet_id)+' '+'('+str(round(jaccard_coef,2))+')')
    tweet_word = tweet_content.split(' ')
    line = '  '
    for i in tweet_word:
        if len(line) + len(i) <= print_width:
            line+=i+' '
        else:
            print(line)
            line='  '+i+' '
    print(line)




#--------------------------------------------
# 6330324821 (15.00) 173 (2021-02-28 14:54)

def get_unique( words ):
    word_sort=sorted(words)
    unique_words=[]
    for c in word_sort:
        if c in unique_words:
            pass
        else:
            unique_words.append(c)

    return unique_words
def jaccard(words_1, words_2):
    same=0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            for c in range(len(words_2)):
                if words_1[i]==words_2[c]:
                    same+=1
    jaccard_coef=(same/(len(words_1)+len(words_2)-same))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    All_top=[]
    top_n=[]
    for i in range(len(norm_tweets)):
        tweet_id=i
        jaccards=jaccard(norm_tweets[i], norm_query)
        All_top.append([-(jaccards),tweet_id])
    top =sorted(All_top)
    for i in range(n):
        top_n.append([top[i][1],(-top[i][0])])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_contentword=tweet_content.split(' ')
    print(' ')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    while len(tweet_contentword)>0:
        x=''
        temp=list(tweet_contentword)
        for c in range(0,len(tweet_contentword)):
            if len(temp[c])+2+len(x)<=print_width:
                x+=(temp[c]+' ')
                tweet_contentword.pop(0)
            else:
                break
        print('  '+x)
#--------------------------------------------
# 6330325421 (20.00) 174 (2021-03-01 00:12)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    top = []
    for i in range (len(words_1)):
        if words_1[i] in words_2:
            top.append(words_1[i])
    top = len(top)
    oldbottom = words_1 + words_2
    bottom=[]
    for i in oldbottom:
        if i not in bottom:
            bottom.append(i)
    bottom= len(bottom)
    jaccard_coef = top/bottom
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    tweet=list()
    for q in range(len(norm_tweets)):
        if jaccard(norm_tweets[q],norm_query)!=0:
            tweet.append([-jaccard(norm_tweets[q],norm_query),q])
            tweet.sort()
        top_n=tweet[:n]
    for h in range(len(top_n)):
        top_n[h][0]=abs(top_n[h][0])
        top_n[h][1]=abs(top_n[h][1])
        a=top_n[h][1]
        b=top_n[h][0]
        top_n[h][0]=a
        top_n[h][1]=b
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    words = tweet_content.split(' ')
    text=''
    for i in range(len(words)):
        if len(text)+len(words[i])<=print_width-2:
            if words[i] == ' ':
                text+=words[i]
            else:text+=words[i]+' '
        else:
            print('  '+text)
            text=words[i]+' '
    print('  '+text.strip())


#--------------------------------------------
# 6330326021 (20.00) 175 (2021-02-26 12:23)

def get_unique( words ):
    unique_words = []
    for s in words:
        if s not in unique_words:
            unique_words.append(s)
    return unique_words
def jaccard(words_1, words_2):
    number_of_intersect = 0
    for s in words_1:
        if s in words_2:
            number_of_intersect += 1
    number_of_words = len(get_unique(words_1 + words_2))
    jaccard_coef = number_of_intersect / number_of_words
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    jacc_id = []
    for i in range(len(norm_tweets)):
        jacc_id.append([jaccard(norm_query,norm_tweets[i]),i])
    jacc_id.sort()
    jacc_id_decreasing = jacc_id[::-1]
    top_n = []
    jaccs_n = []
    for i in range(n):
        tweet_id, jacc = jacc_id_decreasing[i][1], jacc_id_decreasing[i][0]
        if jacc == 0:
            break
        jaccs_n.append(jacc) #เอาแค่ jacc ก่อนเพราะ tweet_id เรียงจากมากไปน้อยอยู่
    start = 0
    for j in jaccs_n: #เอาแต่ละ jacc มาแล้วไปดูใน jacc_id เพื่อเอาตัวที่มี tweet_id น้อยกว่าก่อน
        for i in range(len(jacc_id)):
            tweet_id, jacc = jacc_id[i][1],jacc_id[i][0]
            if j == jacc and [tweet_id,jacc] not in top_n:
                top_n.append([tweet_id,jacc])
                break
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    result = "#" + str(tweet_id) + " (" + str(round(jc_coef,2)) + ")" + "\n"
    content = tweet_content.split(" ")
    line = "  "
    for s in content:
        if len(line) + len(s) > print_width:
            result += line + "\n"
            line = "  "
        line += s + " "
    result += line
    print("")
    print(result.strip())

#--------------------------------------------
# 6330327721 (14.75) 176 (2021-02-27 15:54)

def get_unique( words ):
    words.sort()
    words.append('.')
    unique_words=[]
    i = 1
    for i in range(1,len(words),1):
        if words[i-1] != words[i]:
            unique_words.append(words[i-1])
    if len(words)<=1:
        unique_words=words
    return unique_words
def jaccard(words_1, words_2):
    t = 0.0
    x= words_1 + words_2
    for e in words_1:
        if e in words_2:
            t+=1
    x = get_unique(x)
    b=len(x)
    if b>0:
        jaccard_coef = t/b
    else :
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a=len(norm_tweets)
    i=0
    x = []
    for i in range(len(norm_tweets)):
        tweet_id = norm_tweets.index(norm_tweets[i])
        J = jaccard(norm_tweets[i], norm_query)
        if J>0:
            x.append([J,-i])
            a-=1
    x.sort()
    if a<n:
        n = a
    top_n = []
    for i in range(n):
        top_n.append([abs(x[-1-i][1]),x[-1-i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+ str(tweet_id) +' ('+ str(round(jc_coef,2)) +')')
    tweet_content = tweet_content.split(' ')
    i = 0
    y=0
    n=2
    x='  '
    for i in range(len(tweet_content)):
        a = len(tweet_content[i])
        n = n + a
        if n <= print_width :
            x += tweet_content[i]+' '
            n += 1
            y += 1
        elif y == 0:
            x += tweet_content[i]+' '
            print(x)
            x='  '
            n = 2
        else:
            print(x)
            n = 2 + a + 1
            y = 1
            x='  '
            x += tweet_content[i]+' '
    print(x)

#--------------------------------------------
# 6330328321 (16.94) 177 (2021-02-27 00:48)

def get_unique(words):
    unique_words=list()
    for i in range(len(words)):
        if words[i] in unique_words:
            continue
        else:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    words=list()
    for i in range(len(words_1)):
        words.append(words_1[i])
    for i in range(len(words_2)):
        words.append(words_2[i])
    unique_words = get_unique(words)
    n=len(words)-len(unique_words)
    jaccard_coef=n/len(unique_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=list()
    list_jc=list()
    for i in range(len(norm_tweets)):
        jc=jaccard(norm_tweets[i],norm_query)
        if jc>0:
            ln=list()
            ln.append(jc)
            ln.append(i)
            list_jc.append(ln)
    list_jc.sort()
    top_n=list_jc[-n:]
    for i in range(len(list_jc)-n):
        for j in range(len(top_n)):
            if top_n[j][0]<=list_jc[i][0]:
                if top_n[j][1]>list_jc[i][0]:
                    top_n[j]=list_jc[i]
                    break
            else:
                break
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1]=top_n[i][1],top_n[i][0]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(f'\n#{tweet_id} ({round(jc_coef,2)})')
    content = tweet_content.split()
    t='  '
    for i in range(len(content)):
        if len(t)+len(content[i])<=print_width:
            t+=content[i]+' '
        else:
            print(t)
            t='  '+content[i]+' '
    print(t)
#--------------------------------------------
# 6330329021 (20.00) 178 (2021-02-28 02:02)

def get_unique( words ):
    words.sort()
    unique_words = []
    if len(words) != 0:
        unique_words.append(words[0])
        for i in range(len(words)-1):
            if words[i] != words[i+1]:
                unique_words.append(words[i+1])
    return unique_words
def jaccard(words_1, words_2):
    same_words = 0
    for e in words_1:
        if e in words_2:
            same_words += 1
    jaccard_coef = same_words/(len(words_1)+len(words_2)-same_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        top_n.append([-jaccard(norm_tweets[i],norm_query),i])
    top_n.sort()
    top_n = top_n[:n]
    for i in range(len(top_n)):
        if top_n[i][0] == 0:
            top_n = top_n[0:i]
            break
        else:
            top_n[i] = [top_n[i][1],-top_n[i][0]]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    lines = [' ']
    k = 0
    t = tweet_content.split(' ')
    for i in range(len(t)):
        if len(lines[k])+len(t[i])+1 > print_width:
            lines.append('  '+t[i])
            k += 1
        else:
            lines[k] = lines[k]+' '+t[i]
    for e in lines:
        print(e)



#--------------------------------------------
# 6330330521 (19.19) 179 (2021-02-28 21:07)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    count = 0
    if len(words_1)>= len(words_2):
        for i in range(len(words_1)):
            for j in range(len(words_2)):
                if words_1[i]==words_2[j]:
                    count += 1
    else:
        for i in range(len(words_2)):
            for j in range(len(words_1)):
                if words_2[i]==words_1[j]:
                    count+=1
    words_sum = get_unique(words_1+words_2)
    n = len(words_sum)
    jaccard_coef = count/n
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    jaccard_index_list=[]
    for i in range(len(norm_tweets)):
        jac_num = jaccard(norm_tweets[i],norm_query)
        if jac_num > 0:
            ind_list = []
            ind_list.append(jac_num)
            ind_list.append(i)
            jaccard_index_list.append(ind_list)
    jaccard_index_list.sort()
    top_n = jaccard_index_list[::-1]
    top_n = top_n[:n]
    for i in range(len(top_n)-1):
        if top_n[i][0] == top_n[i+1][0]:
            if top_n[i][1] > top_n[i+1][1]:
                top_n[i],top_n[i+1] = top_n[i+1],top_n[i]
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1]=top_n[i][1],top_n[i][0]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef, 2))+")")
    tweet = tweet_content.split()
    content="  "
    for i in range(len(tweet)):
        if len(content) + len(tweet[i]) <= print_width:
            content+=tweet[i]+" "
        else:
            print(content)
            content="  "+tweet[i]+" "
    print(content)

#--------------------------------------------
# 6330331121 (20.00) 180 (2021-03-01 15:21)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)

    return unique_words
def jaccard(words_1, words_2):
    summ = 0
    same = 0
    for i in words_1:
        if i in words_2:
            same += 1
    summ = len(words_1) + len(words_2) - same
    jaccard_coef = same / summ

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        jac = jaccard(norm_tweets[i], norm_query)
        if jac > 0:
            top_n.append([-jac, i])
    top_n.sort()
    top_n = top_n[:n]

    for i in range(len(top_n)):
        top_n[i] = [(top_n[i][1]), (-top_n[i][0])]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a = str(tweet_id)
    b = str(round(jc_coef, 2))
    print("")
    print("#" + a + " (" + b + ")")
    con = tweet_content.split(" ")
    sh = ""
    d = 1
    for i in con:
        if (len(i) + d) <= (print_width - 2):
            sh += (" " + i)
            d = len(sh)
        else:
            print(" " + sh)
            sh = ""
            sh += (" " + i)
            d = len(sh)

    if len(sh) != 0:
        print(" " + sh)

#--------------------------------------------
# 6330332821 (20.00) 181 (2021-02-28 23:28)

def get_unique( words ):
    unique_words=[]
    for e in words:
        if e not in unique_words:
            unique_words.append(e)

    return unique_words
def jaccard(words_1, words_2):
    y = []
    for e in words_1:
        if e in words_2:
            y.append(e)
    union = words_1 + words_2
    union = get_unique(union)
    jaccard_coef = len(y)/len(union)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x=[]
    for e in norm_tweets:
        x.append(jaccard(e, norm_query))
    z=[]
    for i in range(len(x)):
        z.append([-x[i],i])
    z.sort()
    z=z[:n]
    top_n=[]
    for i in range(len(z)):
        if z[i][0] != 0:
            top_n.append([z[i][1],-z[i][0]])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    t=tweet_content.split(' ')
    b=''
    for e in t:
        if len(b)+len(e)+1 < print_width:
            b+=' '+e
        else:
            print(' '+b)
            b=''
            b=b+' '+e
    print(' '+b)

#--------------------------------------------
# 6330333421 (20.00) 182 (2021-02-28 21:11)

def get_unique( words ):
    unique_words=[]
    for word in words:
        if word in unique_words:
            continue
        else:
            unique_words.append(word)
    return unique_words
def jaccard(words_1, words_2):
    jaccard_coef = 0.00
    union_words = []
    intersect_words = []

    for word in words_1+words_2:
        if (word in words_1) and (word in words_2) and (word not in union_words):
            intersect_words.append(word)
        if word in union_words:
            continue
        else:
            union_words.append(word)

    if len(union_words) == 0: return 0

    jaccard_coef = len(intersect_words) / len(union_words)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    jaccard_tweets = []
    for tweet_id in range(len(norm_tweets)):
        jcc_tweet_coef = jaccard(norm_tweets[tweet_id], norm_query)
        if jcc_tweet_coef>0:
            jaccard_tweets.append([tweet_id, jcc_tweet_coef])
    jaccard_tweets = sorted(jaccard_tweets, key=lambda jaccard_tweets: jaccard_tweets[1], reverse=True)
    top_n = jaccard_tweets[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n#{} ({})'.format(tweet_id, round(jc_coef, 2)))

    line_characters=print_width-2

    list_of_content=tweet_content.split(' ')

    display=[]
    for word in list_of_content:
        if len(' '.join(display+[word])) <= line_characters:
            display.append(word)
        else:
            print(' '.join([' ']+display))
            display = [word]
    if len(display)>0:
        print(' '.join([' ']+display))




#--------------------------------------------
# 6330334021 (16.25) 183 (2021-02-28 15:04)

def get_unique( words ):
    unique_words = []
    for w in words:
        if not (w in unique_words) :
            unique_words.append(w)
    return unique_words
def jaccard(words_1, words_2):
    stimulus_check = 0
    for president_biden in words_1:
        if president_biden in words_2:
            stimulus_check += 1
    jaccard_coef = stimulus_check/(len(words_1) + len(words_2) - stimulus_check)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    usa = []
    white_house = []
    top_n = []
    congress = 0
    while congress < len(norm_tweets) - 1:
        if jaccard(norm_tweets[congress], norm_query) != 0:
            usa.append([jaccard(norm_tweets[congress], norm_query), (-1) * congress])
        congress += 1
    usa.sort()
    for voters in usa:
        white_house.append([voters[1] * (-1), voters[0]])
    white_house = white_house[::-1]
    for democrat in range(n):
        top_n.append(white_house[democrat])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#" + str(tweet_id) + " (" + str(round(jc_coef, 2)) + ')')
    new_content = tweet_content.split(' ')
    line = '  '
    for x in new_content:
        if len(line) <=  print_width - len(str(x)) :
            line += str(x)
            line += ' '
        else :
            print(line)
            line = '  '
            line += str(x)
            line += ' '
    print(line)

#--------------------------------------------
# 6330335721 (20.00) 184 (2021-02-27 14:56)

def get_unique( words ):
    unique_words  =[]
    for i in words:
        if i not in unique_words :
            unique_words.append(i)


    return unique_words
def jaccard(words_1, words_2):
    word3 =[]
    sameword = []
    for i in words_1:
        word3.append(i)
        for x in words_2:
            if i ==x:
                sameword.append(i)
            word3.append(x)
    word3 = get_unique(word3)
    sameword = get_unique(sameword)
    try:
        jaccard_coef = len(sameword)/len(word3)
    except ZeroDivisionError:
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        x = []
        j = jaccard(norm_tweets[i],norm_query)
        if j>0:
            x.append(i)
            x.append(j)
            top_n.append(x)
    top_n.sort(key =lambda y :y[1],reverse = True)
    top_n= top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    tweetc = tweet_content.split(" ")
    text ="  "
    c= ""
    z=0
    print_ww= 0
    #print(tweetc)
    for val in tweetc:
        if len(text+val)> print_width:
            print(text)
            text ="  "

        text+=val+" "

    print(text,end="\n\n")

#--------------------------------------------
# 6330336321 (18.01) 185 (2021-02-28 21:46)

def get_unique( words ):
    words.sort()
    unique_words = []
    for i in range(0, len(words)):
        if words[i] in unique_words:
            pass
        else:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    same = 0
    union = 0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            same += 1
    union += len(words_1) + len(words_2) - same
    jaccard_coef = same/union
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        top_n.append([jaccard(norm_tweets[i], norm_query)*(-1), i])
    top_n.sort()
    for i in range(len(top_n)):
        top_n[i] = [top_n[i][1],top_n[i][0]*(-1)]
    top_n = top_n[0:n:1]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    show_1 = ("#" + str(tweet_id) + " " + "(" + str(round(jc_coef,2)) + ")")
    pseudo = tweet_content.split(' ')
    prep = " "
    print(show_1)
    count = 1
    for i in range(len(pseudo)):
        prep += " " + pseudo[i]
        count += len(pseudo[i]) + 1
        if count > print_width:
            print(prep[0:-len(pseudo[i]):1])
            prep = "  " + pseudo[i]
            count = 2 + len(pseudo[i])
    print(prep)


#--------------------------------------------
# 6330337021 (20.00) 186 (2021-03-01 02:16)

def get_unique( words ):

    unique_words = []
    for c in words:
        if c not in unique_words :
            unique_words.append(c)
    return unique_words
def jaccard(words_1, words_2):

    down = len(get_unique(words_1+words_2))
    top  = len(words_1)+len(words_2)-down
    jaccard_coef = top/down
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []
    pou = []
    for i in range(len(norm_tweets)) :
        jaccard_l = jaccard(norm_tweets[i],norm_query)
        if jaccard_l > 0 :
            pou.append([-jaccard_l,i])
    pou.sort()
    for i in range(len(pou)) :
        top_n.append([pou[i][1],-pou[i][0]])
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    t = tweet_content.split(" ")
    g = " "
    tag = "#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")"
    print("\n"+tag)
    for c in t :
        if len(g)+1+len(c) > print_width :
            print(g)
            g = "  " + c
        else :
            g += " " + c
    print(g)

#--------------------------------------------
# 6330338621 (19.20) 187 (2021-03-01 22:51)

def get_unique( words ):

    unique_words = []
    for e in words :
        if e in unique_words:
            pass
        else:
            unique_words.append(e)

    return unique_words
def jaccard(words_1, words_2):

    num_s_in_t = 0
    num_s_not_t = len(words_1) + len(words_2)
    for e in words_1:
        if e in words_2:
            num_s_not_t -= 1
            num_s_in_t += 1
    jaccard_coef = (num_s_in_t)/(num_s_not_t)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []
    for i in range(len(norm_tweets)):
        k = jaccard(norm_tweets[i],norm_query)
        if k > 0:
            top_n.append([i,k])
    def jacnum(e):
        return e[1]
    top_n.sort(reverse=True,key=jacnum)
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    tweet_content = tweet_content.split(' ')
    print('')
    print('#' + str(tweet_id) + ' ' + '(' + str(round(jc_coef,2)) + ')')
    numsen = 2
    sen = [' ']
    for i in range(len(tweet_content)):
        numsen += len(tweet_content[i])
        if numsen <= print_width :
            sen.append(tweet_content[i])
            numsen += 1
            if i == len(tweet_content) - 1:
                print(" ".join(sen))
        else:
            print(" ".join(sen))
            numsen = 2 + len(tweet_content[i])
            sen = [' '] + [tweet_content[i]]
            if i == len(tweet_content) - 1:
                print(" ".join(sen))


#--------------------------------------------
# 6330339221 (18.01) 188 (2021-02-26 20:14)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)) :
        if words[i] not in unique_words :
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    x = len(words_2)
    y = 0
    for i in words_1 :
        if i not in words_2 :
            x = x+1
        else :
            y = y+1
    jaccard_coef = y/x
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)) :
        top_n.append([i,jaccard(norm_tweets[i],norm_query)])
    top_n = [[y,-x] for x,y in sorted([-y,x] for x,y in top_n)][:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef ,2))+')')
    tweet_content = tweet_content.split(' ')
    a = 0
    x = []
    while a < len(tweet_content) :
        x.append(tweet_content[a])
        x = ' '.join(x)
        if x == '' :
            x = x+' '
        if len(x)>print_width-2 :
            x = x.split(' ')
            if len(x) == 1 :
                x = ' '.join(x)
                print(' ',x)
                a = a+1
                x = []
            else :
                x = x[:len(x)-1]
                x = ' '.join(x)
                print(' ',x)
                x = []
        else :
            a=a+1
            x = x.split(' ')
    x = ' '.join(x)
    print(' ',x)




#--------------------------------------------
# 6330340821 (20.00) 189 (2021-02-28 00:41)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    a=0
    b=len(words_1)
    for i in words_1:
        if i in words_2:
            a+=1
    for i in words_2:
        if i not in words_1:
            b+=1
    jaccard_coef=a/b

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    tweet_id=[]
    jac=[]
    for i in range (len(norm_tweets)):
        tweet_id.append(i)
        jac.append(jaccard(norm_tweets[i],(norm_query)))
    top_n=[]
    for i in range (len(norm_tweets)):
        if jac[i]!=0:
            top_n.append([jac[i],-tweet_id[i]])
    top_n=sorted(top_n,reverse=True)
    top_n=top_n[:n:]
    for i in top_n:
        i[0],i[1]=-i[1],i[0]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    jround=round(jc_coef,2)
    print(' ')
    print('#'+str(tweet_id)+' ('+str(jround)+')')
    text = tweet_content.split(" ")
    current_width = 0
    first_word = True
    for i in text:
        if current_width+len(i) <= print_width:
            if first_word == True:
                print("  ", end="")
                current_width += 2
            print(i, end=" ")
            current_width += len(i)+1
            first_word = False
        else:
            print()
            current_width = 0
            first_word = True
            if current_width+len(i) > print_width:
                print("  " + i)
                current_width = 0
                continue
            else:
                print("  ", end="")
                current_width += 2
                print(i, end=" ")
                current_width += len(i)+1
                first_word = False
    print()


#--------------------------------------------
# 6330341421 (17.00) 190 (2021-02-28 16:07)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):

    a = 0
    for i in words_1:
        if i in words_2:
            a +=1

    jaccard_coef = float(a/(len(words_1)+len(words_2)-a))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):



    top_n=[]
    many = []
    for i in range(len(norm_tweets)):
        tweet_id = int(i)

        many.append([tweet_id,jaccard(norm_tweets[tweet_id],norm_query)])


    for c in many:
        c[0],c[1] = c[1],c[0]
    many.sort()
    for c in many:
        c[0],c[1] = c[1],c[0]
    many = many[::-1]

    c=0
    while c in range(len(many)-1):
        for i in range(len(many)-1):
            if many[i][1] == many[i+1][1]:
                if many[i][0]>many[i+1][0]:
                    many[i],many[i+1] = many[i+1],many[i]
        c+=1



    #รอนานหน่อยนะครับ
    for i in range(n):
        top_n.append(many[i])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    d = []
    tweet_content = tweet_content.split(' ')
    s = 2

    print('\n'+'#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    for i in range(len(tweet_content)):

        s += (len(tweet_content[i]))

        if s < print_width:
            d.append(tweet_content[i])
            s += 1

        elif s > print_width:
            d.append('\n ')
            d.append(tweet_content[i])
            s=2
            s+=(len(tweet_content[i]))
            s+=1
        elif s == print_width:
            d.append(tweet_content[i])
            d.append('\n ')
            s=2

    print(' ',(' ').join(d))


#--------------------------------------------
# 6330342021 (20.00) 191 (2021-03-01 02:54)

def get_unique( words ):

    t = []
    for e in words:
        t.append([len(e),e])
    t.sort()
    for i in range(len(t)):
        words[i] = t[i][1]
    unique_words=[]
    for i in range(len(words)-1):
        if words[i]!=words[i+1]:
            unique_words.append(words[i])
    if len(words)!=0:
        unique_words+=[words[-1]]
    return unique_words
def jaccard(words_1, words_2):
    w=words_1+ words_2
    t = []
    for e in w:
        t.append([len(e),e])
    t.sort()
    for i in range(len(t)):
        w[i] = t[i][1]
    x=[]
    for i in range(len(w)-1):
        if w[i]!=w[i+1]:
            x.append(w[i])
    x+=[w[-1]]
    c=0
    for i in range(len(words_1)):
        if words_1[i] in words_2 :
            c+=1
    jaccard_coef=c/len(x)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    x=[]
    for i in range(len(norm_tweets)):
        x.append([jaccard(norm_tweets[i], norm_query),i])
    y= sorted(x, reverse=True)
    m=0
    for k in range(1,len(x)):
        if y[k-1][0]!=y[k][0]:
            y[m:k]=sorted(y[m:k])
            m=k
    for i in range(len(y)):
        y[i][0],y[i][1] = y[i][1],y[i][0]
    top_n=y[:n]
    top=[]
    top+=top_n
    for i in range(len(top)):
        if top[i][1]==0:
            top_n.remove(top[i])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    j=tweet_content.split(' ')
    k=2
    s='  '
    for i in range(len(j)):
        k+=len(j[i])
        if k<=print_width :
            s+=str(j[i])+' '
            k+=1

        else :
            s+='\n'+'  '+str(j[i])+' '
            k=2+len(j[i])+1
    print(s)




#--------------------------------------------
# 6330343721 (17.00) 192 (2021-03-01 23:43)

def get_unique( words ):
    unique_words = []
    #Find the non repeat word
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    word1plus2 = words_1+words_2
    unique_words = []
    for i in range(len(word1plus2)):
        if word1plus2[i] not in unique_words:
            unique_words.append(word1plus2[i])
    jaccard_coef = (len(word1plus2)-len(unique_words))/len(unique_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    ########HELLLOOP
    generatenormtweetandjaccardorder = [] # To generate the norm tweet as a pattern
    for i in range(0,int(len(norm_tweets)), 1):
        tweet_id = int(i)
        generatenormtweetandjaccardorder.append([tweet_id, jaccard(norm_tweets[tweet_id], norm_query)])
    # Swap order to sort order
    for sort1 in generatenormtweetandjaccardorder:
        sort1[0], sort1[1] = sort1[1], sort1[0]
    # Sort it
    generatenormtweetandjaccardorder.sort()
    # Swap again !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    for sort2 in generatenormtweetandjaccardorder:
        sort2[0], sort2[1] = sort2[1], sort2[0]
    generatenormtweetandjaccardorder = generatenormtweetandjaccardorder[::-1]
    # Order the Jaccard order and Tweet order
    i = 0 #While Loop เซ็ต i เริ่มต้นเป็น 0
    while i in range(len(generatenormtweetandjaccardorder) - 1):
        for j in range(0,len(generatenormtweetandjaccardorder) - 1, 1):
            if generatenormtweetandjaccardorder[j][1] == generatenormtweetandjaccardorder[j + 1][1]:
                if generatenormtweetandjaccardorder[j][0] > generatenormtweetandjaccardorder[j + 1][0]:
                    generatenormtweetandjaccardorder[j], generatenormtweetandjaccardorder[j + 1] = generatenormtweetandjaccardorder[j + 1], generatenormtweetandjaccardorder[j]
        i += 1
    top_n = []
    # Shosen Tweet in range n
    # Construct Top n
    for i in range(n):
        top_n.append(generatenormtweetandjaccardorder[i])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(" ") #Print the blank space
    print('#'+str(tweet_id)+' '+"("+str(round(jc_coef, 2))+")")
    ysplit = tweet_content.split(" ")
    tweet = ' '
    for i in range(0, len(ysplit), 1):
        if (int(len(tweet))+1+int(len(ysplit[i]))) <= int(print_width):
            tweet += " " + ysplit[i]
        else:
            print(tweet)
            tweet = "  " + ysplit[i]
    print(tweet) #again ???

#--------------------------------------------
# 6330344321 (20.00) 193 (2021-03-01 22:48)

def get_unique( words ):
    unique_words = []
    for w in words:
        if w not in unique_words:
            unique_words.append(w)
    return unique_words
def jaccard(words_1, words_2):
    u = 0
    check_words = []
    all_words = words_1 + words_2
    for w in all_words:
        if w not in check_words:
            check_words.append(w)
    for w in check_words:
        if (w in words_1) and (w in words_2):
            u += 1
    jaccard_coef = u/len(check_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for t in range(len(norm_tweets)):
        if jaccard(norm_tweets[t],norm_query) > 0:
            top_n.append([-jaccard(norm_tweets[t],norm_query), t])
    top_n = sorted(top_n)[:n]
    for i in top_n:
        i[0] = -i[0]
        i[0], i[1] = i[1], i[0]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n" + "#" + str(tweet_id) + " (" + str(round(jc_coef,2)) + ")")
    tlist = tweet_content.split(' ')
    word_count = 1
    s = ' '
    for t in tlist:
        word_count = word_count + len(t) + 1
        if s == ' ' and word_count > print_width:
            s = s + ' ' + t
            print(s)
            s = ' '
            word_count = 1
        elif word_count <= print_width:
            s = s + ' ' + t
        else:
            print(s)
            s = ' '
            s = s + ' ' + t
            word_count = 2 + len(t)
    if s != ' ':
        print(s)

#--------------------------------------------
# 6330345021 (20.00) 194 (2021-02-27 20:46)

def get_unique( words ):
    words.sort()
    unique_words = []
    if len(words) != 0:
        unique_words.append(words[0])
    for i in range(len(words)-1):
        if words[i] != words[i+1]:
            unique_words.append(words[i+1])

    return unique_words
def jaccard(words_1, words_2):
    all_word = words_1 + words_2
    unique = get_unique( all_word )
    same_word = []
    for i in range(len(words_1)):
        for k in range(len(words_2)):
            if words_1[i] == words_2[k] and not(words_1[i] in same_word):
                same_word.append(words_1[i])
    if len(unique) != 0:
        jaccard_coef = len(same_word)/len(unique)
    else:
        jaccard_coef = 0

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    prime_s = []
    for i in range(len(norm_tweets)):
        prime_s.append([(jaccard(norm_tweets[i], norm_query))*-1,i])
    prime_s.sort()
    top_n = []
    prime = []
    for e in range(len(prime_s)):
        if prime_s[e][0] != 0:
            prime.append(prime_s[e])
    if n > len(prime):
        n = len(prime)
        for k in range(n):
            prime[k][0] *= -1
            prime[k][0],prime[k][1] = prime[k][1],prime[k][0]
            top_n.append(prime[k])
    else:
        for k in range(n):
            prime[k][0] *= -1
            prime[k][0],prime[k][1] = prime[k][1],prime[k][0]
            top_n.append(prime[k])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_word = tweet_content.split(' ')
    print("\n""#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    c = 2
    text = []
    for e in tweet_word:
        c+=len(e)
        if c <= print_width or (2+len(e) > print_width and text == []):
            text.append(e)
            c+=1
        if c >= print_width:
            text_ = " ".join(text)
            text_old = text
            print("  "+text_)
            text = []
            c = 2
            if not e in text_old:
                c+=len(e)
                text.append(e)
                c+=1
            text_old = []
        if e == tweet_word[-1] and not text == []:
            text_ = " ".join(text)
            print("  "+text_)

#--------------------------------------------
# 6330346621 (17.97) 195 (2021-03-01 23:10)

def get_unique( words ):
    unique_words=[]
    for e in words:
        if not e in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    a=0
    for e in words_1:
        for f in words_2:
            if f==e:
                a+=1
    jaccard_coef=a/(len(words_1)+len(words_2)-a)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a=[]
    for i in range(len(norm_tweets)):
        norm_tweets[i]
        jac=jaccard(norm_tweets[i],norm_query)
        if jac>0:
            a.append([-jac,i])
    a.sort()
    for e in a:
        e[0],e[1]=e[1],-e[0]
    top_n=a[0:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    a=tweet_content.split(' ')+['']
    i=0
    while i < len (a)-1:
        b='  '+a[i]
        if i < len (a)-1:i+=1
        else:break
        while len (b+a[i])+1 < print_width and i < len (a)-1:
            b+=' '+a[i]
            if i < len (a)-1:i+=1
            else:break
        print(b)

#--------------------------------------------
# 6330347221 (19.20) 196 (2021-03-01 14:38)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    a = 0
    for e in words_1:
        if e in words_2:
            a += 1
    words_tot = words_1
    b = len(words_tot)
    for e in words_2:
        if e not in words_tot:
            b += 1
    jaccard_coef = a/b
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    all_top_n = []
    for i in range(len(norm_tweets)):
        x = []
        if jaccard(norm_tweets[i], norm_query) == 0:
            pass
        elif jaccard(norm_tweets[i], norm_query) > 0:
            x.append(jaccard(norm_tweets[i], norm_query))
            x.append(-1*i)
            all_top_n.append(x)
    all_top_n.sort()
    all_top_n.reverse()
    for e in all_top_n:
        e[0],e[1] = e[1],e[0]
        e[0] *= -1
    if len(all_top_n) < n:
        top_n = all_top_n
    elif len(all_top_n) >= n:
        for i in range(n):
            top_n.append(all_top_n[i])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content1 = tweet_content.split(' ')
    print(' ')
    print('#'+str(tweet_id),'('+str(round(float(jc_coef),2))+')')
    c = 2
    print(' ',end = '')
    for e in tweet_content1:
        if c + len(e) <= print_width:
            print(' '+e,end = '')
            c += len(e) +1
        else:
            print()
            print(' ',end ='')
            c = 2 + len(e)
            print(' '+e,end = '')
    print(' ')


#--------------------------------------------
# 6330348921 (18.37) 197 (2021-02-28 14:56)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    c = 0
    for i in range(len(words_2)):
        if words_2[i] in words_1:
            c += 1
        jaccard_coef = c / len(get_unique(words_1 + words_2))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []*n
    a = []*n
    for i in range(len(norm_tweets)):
        for j in range(len(norm_query)):
            if norm_query[j] in norm_tweets[i]:
                tweet_id = i
                jaccardd = jaccard(norm_tweets[i], norm_query)
                if jaccardd > 0:
                    a.append([float(str(int(jaccardd*(10**2))) + str(5000-tweet_id)),tweet_id,jaccardd])
                break
    a.sort(reverse = True)
    for k in range(len(a)):
        y = a[k].pop(0)
    top_n[:n] = a[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('#'+ str(tweet_id) +' ' +'('+ str(round((jc_coef),2)) + ')')
    tweet_content = tweet_content.split(' ')
    x = []
    a = 1
    for i in range(len(tweet_content)):
        a += len(tweet_content[i])+1
        if a <= print_width:
            x.append(tweet_content[i])
            y = (' ').join(x)
        else:
            print('  '+y)
            y = ''
            x = []
            a = 1
            x.append(tweet_content[i])
            a+=len(tweet_content[i])+1
    y = (' ').join(x)
    print('  '+y)

#--------------------------------------------
# 6330349521 (17.00) 198 (2021-03-01 19:56)

def get_unique( words ):
    unique_words = []
    while True:
        if words == []:
            break
        x = words[0]
        unique_words.append(x)
        while x in words:
            words.remove(x)
        if words == []:
            break
    unique_words.sort()
    return unique_words
def jaccard(words_1, words_2):
    num = 0
    den = 0
    for e in words_1:
        if e in words_2:
            num += 1
    den = len(words_1)+len(words_2)-num
    jaccard_coef = num/den
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    g = []
    top_n = []
    for i in range(len(norm_tweets)):
        g.append([jaccard(norm_tweets[i],norm_query),-i])
    g.sort(reverse=True)
    for r in range(n):
        top_n.append([-(g[r])[1],(g[r])[0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    l = 2
    print('#'+str(tweet_id), '('+str(round(jc_coef,2))+')')
    c = tweet_content.split(' ')
    for i in range(len(c)):
        if i == 0:
            print(" ",end=" ")
        if l<print_width and len(c[i])<print_width-l+1:
            print(c[i],end = " ")
            l += len(c[i])+1
        else:
            l = 2
            print('\n',end=' ')
            print(" "+c[i],end=" ")
            l += len(c[i])+1
    print(' ')

#--------------------------------------------
# 6330350021 (18.44) 199 (2021-03-01 02:45)

def get_unique( words ):
    unique_words = []
    a = []
    if words != []:
        for i in words:
            a.append([len(i),i])
        a.sort()
        for i in range(len(a)-1):
            if a[i] != a[i+1]:
                unique_words.append(a[i+1][1])
        unique_words.append(a[0][1])
    return unique_words
def jaccard(words_1, words_2):
    a = []
    b = []
    c = 0
    d = words_1 + words_2
    e = []
    f = []
    for i in words_1:
        a.append([len(i),i])
    a.sort()
    for i in words_2:
        b.append([len(i),i])
    b.sort()
    if len(a) <= len(b):
        for i in a:
            if i in b:
                c += 1
    else:
        for i in b:
            if i in a:
                c += 1
    for i in d:
        e.append([len(i),i])
    e.sort()
    for i in range(len(e)-1):
        if e[i] != e[i+1]:
            f.append(e[i][1])
    if a != []:
        f.append(a[-1][1])
    if len(f) == 0:
        jaccard_coef = 0
    else:
        jaccard_coef = c/len(f)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    a = []
    for tweet_id in range(len(norm_tweets)):
        if jaccard(norm_tweets[tweet_id],norm_query) > 0:
            z = jaccard(norm_tweets[tweet_id],norm_query)
            a.append([-z,tweet_id])
    a.sort()
    if len(a) != 0:
        for i in range(n):
            top_n.append([a[i][1],-a[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef, 2))+')')
    a = tweet_content.split()
    d = 2
    e = '  '
    for c in a:
        d += len(c)
        if d <= print_width:
            e += c+' '
            d += 1
        else:
            print(e)
            e = '  '+c+' '
            d = len(e)
    print(e)
#--------------------------------------------
# 6330351721 (20.00) 200 (2021-02-26 23:01)

def get_unique( words ):
    words.sort()
    ans = []
    c = 0
    for i in range(len(words)):
        if i == 0:
            ans += [words[i]]
            ans += [1]
        else:
            if words[i] == ans[c]:
                ans[c+1] += 1
            else:
                c += 2
                ans += [words[i]]
                ans += [1]
    unique_words = ans[::2]
    return unique_words
def jaccard(words_1, words_2):
    words_1.sort()
    words_2.sort()
    ans1 = []
    c = 0
    for i in range(len(words_1)):
        if i == 0:
            ans1 += [words_1[i]]
            ans1 += [1]
        else:
            if words_1[i] == ans1[c]:
                ans1[c+1] += 1
            else:
                c += 2
                ans1 += [words_1[i]]
                ans1 += [1]
    unique_words1 = ans1[::2]
    #print(unique_words1)
    #

    ans2 = []
    c = 0
    for i in range(len(words_2)):
        if i == 0:
            ans2 += [words_2[i]]
            ans2 += [1]
        else:
            if words_2[i] == ans2[c]:
                ans2[c+1] += 1
            else:
                c += 2
                ans2 += [words_2[i]]
                ans2 += [1]
    unique_words2 = ans2[::2]
    #print(unique_words2)
    #
    n = 0
    for i in range(len(unique_words2)):
        if unique_words2[i] in unique_words1:
            n += 1
    #
    for i in range(len(unique_words2)):
        unique_words1.append(unique_words2[i])
    unique_words1.sort()
    ans = []
    c = 0
    for i in range(len(unique_words1)):
        if i == 0:
            ans += [unique_words1[i]]
            ans += [1]
        else:
            if unique_words1[i] == ans[c]:
                ans[c+1] += 1
            else:
                c += 2
                ans += [unique_words1[i]]
                ans += [1]
    unique_wordx = ans[::2]
    s = len(unique_wordx)
    ans = n/s
    jaccard_coef = ans
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    new = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i], norm_query) != 0:
            new += [[jaccard(norm_tweets[i], norm_query),-i]]
    new.sort()
    new = new[::-1]
    for i in range(len(new)):
        new[i][0],new[i][1] = -1*new[i][1],new[i][0]
    #for i in range(n):
     #   top_n += [new[i]]
    new = new[:n]
    top_n = new
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a = ' '
    print(a)
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    x = tweet_content.split(' ')
    for i in range(len(x)):
        a += ' ' + x[i]
        if len(a) > print_width:
            a = a[:-(len(x[i])+1)]
            print(a)
            a = '  ' + x[i]

    print(a)

#--------------------------------------------
# 6330352321 (17.00) 201 (2021-03-01 23:45)

def get_unique( words ):

    words.sort()
    unique_words = []
    for e in words :
        if e not in unique_words :
            unique_words.append(e)

    return unique_words
def jaccard(words_1, words_2):

    words_1.sort()
    words_2.sort()
    rpt_word = 0
    for e in words_1 :
        if e in words_2 :
            rpt_word += 1
    all_words = words_1 + words_2
    all_words.sort()
    aw = [all_words[0]]
    for i in range(1,len(all_words)) :
        if all_words[i] != all_words[i-1] :
            aw.append(all_words[i])
    jaccard_coef = rpt_word/len(aw)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []*n
    mtn = []
    for i in range(len(norm_tweets)) :
        jac = jaccard(norm_tweets[i],norm_query)
        mtn.append([jac,i*(-1)])
        mtn.sort()
        mtn = mtn[::-1]
    for x,y in mtn :
        y = y*(-1)
    for i in range(n):
        top_n.append([mtn[i][1]*(-1), mtn[i][0]])


    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    words = tweet_content.split(" ")
    line = "  "
    print(" ")
    print("#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")")
    for e in words:
        if len(line) + len(e) <= print_width :
            line += e+" "
        else :
            print(line)
            line = "  "+e+" "
    print(line)

#--------------------------------------------
# 6330353021 (15.40) 202 (2021-03-01 22:45)

def get_unique( words ):
 l = words
 for i in range (1,len(l),1) :
       if l[i] in l[0 :i:1] :
        l[i] = 'าา'
 for i in range (len(l)) :
       if 'าา' in l :
        l.remove('าา')
 unique_words = l
 return unique_words
def jaccard(words_1, words_2):
    top = 0
    for i in words_1 :
        if i in words_2 :
            top += 1
    down = len(words_1)+len(words_2)-top
    jaccard_coef = top/down
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)) :
        tweet_id = i
        j = jaccard(norm_tweets[i], norm_query)
        top_n.append([tweet_id, j])
    k = top_n
    for i in range (len(k)) :
        for j in range (i+1,len(k)) :
            if (k[i])[1] < (k[j])[1] :
                k[i], k[j] = k[j], k[i]
    for i in range (len(k)) :
        for j in range (i+1,len(k)) :
            if (k[i])[1] == (k[j])[1] and (k[i])[0] > (k[j])[0]:
                k[i], k[j] = k[j], k[i]

    top_n = k[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    n= int(print_width)
    print(' ')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    a = tweet_content
    k = 0
    while a != '' :
       if len(a) <= n-2 :
        c = a
        while c[0] == ' ' :
                c = c[1::]
        print('  '+c)
        break
       else :
        if a[n-2] == ' ' :
            c= a[0:n-2:]
            while c[0] == ' ' :
                c = c[1::]
            print('  '+c[0:n-2:])
            a = a[0:n-2:]
        else :
            a = a[0:n-2:]
            while a[-1] != ' ' :
                a= a[0:len(a)-1:]
            c = a
            while c[0] == ' ' :
                c = c[1::]
            print('  '+c)
        k += len(a)
        a = tweet_content[k::]





#--------------------------------------------
# 6330354621 (18.33) 203 (2021-03-01 00:49)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if i in unique_words:
            unique_words.remove(i)
        unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    a=[]
    n=0
    for i in words_1:
        a.append(i)
    for j in words_2:
        a.append(j)
    for k in words_1:
        if k in words_2:
            n+=1
    jaccard_coef=(n/len(get_unique(a)))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    x=[]
    for tweet_id in range(len(norm_tweets)): #ได้ตำแหน่ง index ทีละตัว
        j= norm_tweets[tweet_id] #ให้ j = norm_tweets ตัวที่ index
        Jaccard=jaccard(j,norm_query) #sk jaccard
        x=[]
        if Jaccard>0 : #ถ้า jaccard >0 ใส่ [id,jacc]
            if Jaccard in x:
                continue
            x.append(tweet_id)
            x.append(Jaccard)
            top_n.append(x)
        else :
            x=[]
    top_n.sort(reverse=True,key=lambda x: x[1])
    top_n=top_n[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a = round(jc_coef,2)
    b = str(a)
    print('\n'+'#'+str(tweet_id)+' ('+b+')')
    c = tweet_content.split(' ')
    x = ''
    lenght = len(c)
    i = 0
    while i < lenght:
        if len(x)+len(c[i]) < print_width-1 :
            x+=' '+c[i]
            i+=1
        else:
            print(' '+x)
            x=''
        if i == lenght:
            print(' '+x)

#--------------------------------------------
# 6330355221 (18.01) 204 (2021-03-01 01:53)

def get_unique( words ):
    unique_words = []
    for a_xy in range(len(words)):
        if words[a_xy] not in unique_words:
            unique_words.append(words[a_xy])
    return unique_words
#--------------------------------------------
def jaccard(words_1, words_2):
    mxvalueqq=0
    for e in words_1:mxvalueqq +=  int(e in words_2)
    nyvalepp=len(get_unique(words_1+words_2))
    jaccard_coef = mxvalueqq / nyvalepp
    return jaccard_coef
#--------------------------------------------
def top_n_similarity(norm_tweets, norm_query, n):
    inpxforthink = []
    for b in range(len(norm_tweets)) :
        inpxforthink.append(jaccard(norm_tweets[b],norm_query))
    soluxqq = [[-inpxforthink[b],b]
    for b in range(len(norm_tweets))]
    soluxqq = sorted(soluxqq)
    top_n = [[soluxqq[b][1],-soluxqq[b][0]] for b in range(len(soluxqq))][:n]
    return top_n
#--------------------------------------------
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    finalab = '(' + str(round(jc_coef, 2)) + ')'
    finalcd = '#' + str(tweet_id)
    print(finalcd, finalab)
    wordspace = tweet_content.split(' ')
    ansinfour = '  ' + wordspace[0]
    for soxy in wordspace[1:]:
        if len(ansinfour) + len(' ' + soxy) <= print_width:
            ansinfour += ' ' + soxy
        else:
            print(ansinfour)
            ansinfour = '  ' + soxy
    print(ansinfour)
#--------------------------------------------
# 6330356921 (18.01) 205 (2021-02-28 23:16)

def get_unique( words ):
    unique_words=[]
    for i in words :
        if i not in unique_words :
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    words = words_1+words_2
    a = sorted(words)
    c=0
    all_words=[]
    same_words=[]
    same_words2=[]
    for i in words :
        if i not in all_words :
            all_words.append(i)
    c=0
    while c<(len(a)-1) :
        if a[c] == a[c+1] :
            same_words.append(a[c])
            c+=1
        else :
            c+=1
    for i in same_words :
        if i not in same_words2 :
            same_words2.append(i)

    jaccard_coef = len(same_words2)/len(all_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)) :
        tweet_id = i
        a = jaccard(norm_tweets[i], norm_query)
        top_n.append([tweet_id, a])
    top_n=[[-y,x]for x,y in top_n]
    top_n.sort()
    top_n = [[y,-x] for x,y in top_n]
    top_n = top_n[0:n:1]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    t = tweet_content.split(" ")
    print("")
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    c = 0
    print('  ',end ="")
    c = 2
    for i in range(len(t)) :
        if t[i]=="" :
            print(" ",end="")
            c+=1
        elif c+len(t[i]) > print_width :
            if i==0 :
                print(t[0],end=" ")
            else :
                c = 2
                print(" ")
                print("  "+t[i],end=" ")
            c+=1+len(t[i])
        elif c+len(t[i]) <= print_width :
            print(t[i],end=" ")
            c+=1+len(t[i])
    print("          ")

#--------------------------------------------
# 6330357521 (18.50) 206 (2021-03-01 22:45)

def get_unique( words ):
    unique_words =[]
    words.sort()
    for i in range(len(words)):
        if i ==0:
            unique_words += [words[i]]
        else:
            if words[i]!=words[i-1]:
                unique_words += [words[i]]
    return unique_words
def jaccard(words_1, words_2):
    all=get_unique( words_1+words_2 )
    s=0
    for w in words_1:
        if w in words_2:
            s+=1
    jaccard_coef = s/len(all)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[[]]*n

    for i in range(n):
        top_n[i]=[jaccard(norm_tweets[i],norm_query),-i]
    top_n.sort()
    for i in range(n,len(norm_tweets)):
        jaccard1=jaccard(norm_tweets[i],norm_query)
        if jaccard1>top_n[0][0]:
            top_n[0][0]=jaccard1
            top_n[0][1]=-i
            top_n.sort()
    top_n.sort(reverse=True)
    for i in range(len(top_n)):
        top_n[i][0], top_n[i][1]=-top_n[i][1],top_n[i][0]
    top = top_n[::]
    for i in top_n:
        if i[-1] == 0:
             top.remove(i)
    top_n = top
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    a1 = '#'+str(tweet_id)
    a2 = '('+str(round(jc_coef,2))+')'
    print(a1,a2)

    words = tweet_content.split(' ')
    ans = '  ' + words[0]
    for w in words[1:]:
        if len(ans) + len(' '+w)<= print_width:
            ans += ' ' + w
        else:
            print(ans)
            ans = '  ' + w
    print(ans)

#--------------------------------------------
# 6330358121 (18.50) 207 (2021-02-28 23:29)

def get_unique( words ):
    unique_words=[]
    for e in words:
        if e not in unique_words:unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    m=0
    for e in words_1:m+=int(e in words_2)
    n=len(get_unique(words_1+words_2)); jaccard_coef=m/n
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    l=[]
    for i in range(len(norm_tweets)):
        jac=jaccard(norm_tweets[i],norm_query)
        if jac>0:l.append([jac,i])
    g=sorted(l,reverse=True);m=0
    for k in range(1,len(g)):
        if g[k-1][0]!=g[k][0]:g[m:k]=sorted(g[m:k]);m=k
    g[m:]=sorted(g[m:]);top_n=g[:min(n,len(g))]
    for j in range(len(top_n)):top_n[j]=top_n[j][::-1]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    l=tweet_content.split(' ')
    for k in range(len(l)-1):l.insert(2*k+1,' ')
    j=0 # [s1,'',s2,'',s3,...]
    for i in range(1,len(l)+1):
        g=l[j:i-1];h=''.join(g)
        if i==len(l):p=''.join(l[j:])
        else:p=''.join(l[j:i])
        if len(p)>print_width-2:
            if g!=[] and h!=' '*(len(g)//2+1):
                if g[0]==' ':
                    while g[0]==' ':g.pop(0)
                print('  '+''.join(g));j=i-1
            else:j=i-1
    g=l[j:]
    if g[0]==' ':
        while g[0]==' ':g.pop(0)
    print('  '+''.join(g))
#--------------------------------------------
# 6330360321 (15.50) 208 (2021-03-01 06:10)

def get_unique( words ):
    unique_words = []
    for n in words :
        if n not in unique_words :
            unique_words.append(n)



    return unique_words
def jaccard(words_1, words_2):
    i = 0
    for n in words_1 :
        if n in words_2 :
            i += 1
    jaccard_coef = i/(len(words_1)+len(words_2)-i)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    lis = []
    i = 0
    m = 0

    for h in range(len(norm_tweets)) :
        h1 = norm_tweets[h]
        d = jaccard(h1,norm_query)
        h = h+1
        lis.append([d+1/(h*10000000000),h-1])
        lis.sort(key=None, reverse=False)
        lis = lis[::-1]

    while i != n :
        if lis[0][0] <= 0.000001 :
            break
        else :
            top_n.append([lis[i][1],lis[i][0]])
            i += 1


    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(' ')
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    show = ''
    for n in tweet_content :
        if (len(show)+len(n)) > print_width-2 :
            print('  '+show)
            show = n+' '
        else :
            show += n+' '
    print('  '+show)






#--------------------------------------------
# 6330361021 (18.33) 209 (2021-03-01 02:38)

def get_unique( words ):

    unique_words = []
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):

    same = []
    for i in words_1:
        if i in words_2:
            same.append(i)

    dif = words_1[::]

    for i in words_2:
        if not i in words_1:
            dif.append(i)

    jaccard_coef = len(same)/len(dif)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top = []


    for i in range(len(norm_tweets)):
        a = []
        a.append(jaccard(norm_tweets[i], norm_query))
        a.append(i)
        top.append(a)
        top.sort(reverse=True)

    top_new = []
    for i in top:
        b = []
        b.append(-1*i[0])
        b.append(i[1])
        top_new.append(b)

    top_new.sort()


    top_m = []
    for i in top_new:
        if len(top_m)==n:
            break
        else:
            top_m.append(i)

    top_n_b = []

    for i in top_m:
        b = []
        b.append(-1*i[0])
        b.append(i[1])
        top_n_b.append(b)

    top_n_a = []
    for e in top_n_b:
        top_n_a.append([e[1],e[0]])

    top_n = top_n_a[::]
    for i in top_n_a:
        if i[1] == 0:
            top_n.remove(i)



    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+")")
    message = tweet_content
    message_lst = list(message)

    #print(message_lst)

    new_message = []
    b = ''
    j= 1

    for i in message_lst:

        if i != ' ':
            b += i
            if j == len(message_lst) :
                new_message.append(b)

        else:
            if b != '':
                new_message.append(b)
            new_message.append(' ')
            b = ''

        j+=1

    #print(new_message)

    a = '  '
    while new_message != []:

        if len(a)+len(new_message[0])<=print_width:
            a += new_message[0]
            new_message.pop(0)
            if new_message == []:
                print(a)
        else:
            print(a)
            if new_message[0] == ' ':
                a = ' '
            else:
                a = '  '
            #print(new_message)


#--------------------------------------------
# 6330362621 (20.00) 210 (2021-02-28 19:32)

def get_unique(words):
    unique_words = []
    words.sort()
    for i in range(len(words)):
        if i == 0:
            unique_words += [words[i]]
        else:
            if words[i] != words[i - 1]:
                unique_words += [words[i]]
    return unique_words
def jaccard(words_1, words_2):
    all = get_unique(words_1 + words_2)
    s = 0
    for w in words_1:
        if w in words_2:
            s += 1
    jaccard_coef = s / len(all)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []

    for i in range(len(norm_tweets)):
        jaccard1 = jaccard(norm_tweets[i], norm_query)
        if len(top_n) < n:
            if jaccard1 > 0:
                top_n.append([i,jaccard1])
        else:
            top_n.sort(key=lambda e: -e[0])
            top_n.sort(key=lambda e: e[1])
            if top_n[0][1] < jaccard1:
                top_n[0][1] = jaccard1
                top_n[0][0] = i
                top_n.sort(key=lambda e: -e[0])
                top_n.sort(key=lambda e: e[1])

    top_n.sort(key=lambda e: e[0])
    top_n.sort(key=lambda e: -e[1])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    a1 = '#' + str(tweet_id)
    a2 = '(' + str(round(jc_coef, 2)) + ')'
    print(a1, a2)

    words = tweet_content.split(' ')
    ans = '  ' + words[0]
    for w in words[1:]:
        if len(ans) + len(' ' + w) <= print_width:
            ans += ' ' + w
        else:
            print(ans)
            ans = '  ' + w
    print(ans)


# --------------------------------------------
# 6330365521 (14.75) 211 (2021-02-26 23:42)

def get_unique( words ):
    unique_words=[]
    for a in words:
        if not(a in unique_words) :
            unique_words.append(a)
    return unique_words
def jaccard(words_1, words_2):
    al=0
    for i in words_1:
        if i in words_2:
            al=al+1
    jaccard_coef=al/(len(words_1) +  len(words_2)-al)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x=[]
    top_n=[]
    for i in range (len(norm_tweets)):
        x.append([jaccard(norm_tweets[i],norm_query),-i])
    x.sort(reverse=True)

    for j in range (n):
        m,k=x[j]
        top_n.append([-k,m])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n'+'#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    t=tweet_content.split(' ')
    o=[]
    ch=2
    for i in range (len(t)):
        ch=ch+len(t[i])+1
        if ch > print_width:
            print(" ".join(o))
            o=[]
            ch=2+len(t[i])
        o.append(t[i])
    print(" ".join(o))

#--------------------------------------------
# 6330366121 (20.00) 212 (2021-02-28 18:01)

def get_unique( words ):
    unique_words= []
    C4 = 0
    while True:
        for b in words:
            if b not in unique_words:
                unique_words.append(b)
                C4 += 1
        if C4 == len(unique_words):
           break
    return unique_words
def jaccard(words_1, words_2):

    Suddam3=[]
    Suddam4=[]
    for s in words_1:
        if s not in words_2:
            Suddam3.append(s)
    Suddam3 += words_2
    for s in words_1:
        if s in words_2:
            Suddam4.append(s)
    AK = len(Suddam3)
    RPG = len(Suddam4)
    jaccard_coef = RPG/AK
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []
    for i  in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query) > 0:
            top_n.append([jaccard(norm_tweets[i],norm_query) , -i])

    top_n.sort(reverse = True)

    for e in top_n:
        e[0] ,e[1] = -e[1] , e[0]
    top_n = top_n[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    while len(tweet_content) != 0 :
        start = 0
        if ' ' not in tweet_content[:print_width-2]:
          sub = tweet_content[:tweet_content.find(' ')]
          tweet_content = tweet_content[tweet_content.find(' '):].strip() +' '
          print('  '+sub)
        else:
           while True:
              if len(tweet_content) < print_width:
                  sub = tweet_content
                  tweet_content = ''
                  print('  '+sub)
                  break
              end = tweet_content.find(' ', start) #indexที่ 20-> ตัวที่ 21 เก็บไว้ 20 ตัวแล้ว
              if end > print_width-2:
                  sub = tweet_content[:start]
                  tweet_content = tweet_content[start:].strip(' ') + ' '
                  print('  '+sub)
                  break
              else:
                  start = end +1



#--------------------------------------------
# 6330367821 (20.00) 213 (2021-03-01 23:16)

def get_unique( words ):
    x = []
    for k in words:
        if k in x:
            pass
        else:
            x.append(k)
    unique_words = x
    return unique_words
def jaccard(words_1, words_2):
    c=0
    for a in words_1:
        if a in words_2:
            c += 1
    x = words_1 + words_2
    k = [x[0]]
    for b in x:
        if b in k:
            pass
        else:
            k.append(b)
    jaccard_coef = c/len(k)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    for i in range(len(norm_tweets)):
        a = jaccard(norm_tweets[i], norm_query)
        if a > 0:
            top.append([a,-i])
    top = sorted(top, reverse=True)
    top1 = top[:n]
    top_n = []
    for c,d in top1:
        top_n.append([-d,c])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#' + str(tweet_id) + ' (' + str(round(jc_coef,2)) + ')')
    a = tweet_content.split(' ')
    b = 0
    c = 0
    d = []
    for i in a:
        b += len(i)
        if b<=(print_width-2):
            d.append(i)
            b += 1
        else:
            print('  '+' '.join(d))
            d = [i]
            b = len(i)+1
        if len(d)>0 and d[-1] == a[-1]:
            print('  '+' '.join(d))

#--------------------------------------------
# 6330370621 (17.00) 214 (2021-02-25 17:47)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    words_i = []
    words_u = []
    for e in words_1:
        words_u.append(e)
        if e in words_2:
            words_i.append(e)
    for e in words_2:
        if e not in words_i:
            words_u.append(e)
    jaccard_coef = len(words_i)/len(words_u)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    sim = []
    for i in range(len(norm_tweets)):
        sim.append([i,jaccard(norm_tweets[i], norm_query)])
    sim2 = []
    for e in sim:
        if e[1]>0:
            sim2.append(e)
    for e in sim2:
        e[0], e[1] = e[1], e[0]
        e[1] *= -1
    sim2.sort()
    sim2 = sim2[::-1]
    for e in sim2:
        e[0], e[1] = e[1], e[0]
        e[0] *= -1
    top_n = []
    for i in range(n):
        top_n.append(sim2[i])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    tc = str(tweet_content).split(' ')
    twl = 2
    s =[]
    for i in range(len(tc)):
        twl += len(tc[i])
        if twl<=print_width:
            s.append(tc[i]); twl += 1
        else:
            print('  '+' '.join(s))
            twl = 2+len(tc[i])+1; s = [tc[i]]
    print('  '+' '.join(s))
#--------------------------------------------
# 6330371221 (18.77) 215 (2021-03-01 16:49)

def get_unique( words ):
    unique_words = []
    for a in words:
        if a not in unique_words:
            unique_words.append(a)
    return unique_words
def jaccard(words_1, words_2):
    empty_list = []
    for a in words_1:
        for b in words_2:
            if a == b:
                empty_list.append( a )
    intersec=len(empty_list)
    union=(len(words_1)+len(words_2))-intersec
    Jaccard_coef = intersec / union
    return Jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        j=jaccard(norm_tweets[i],norm_query)
        if j>0:
            top_n.append([j,-i])
    top_n.sort()
    top_n=top_n[::-1]
    top_n = [[-y,x] for x,y in top_n]
    top_n=top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a = tweet_content.split()
    b = []
    k = 2
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    for i in range(len(a)):
        k += len(a[i])+1
        if k > print_width:
            print('  '+' '.join(b))
            b = []
            k = 2+len(a[i])
        b.append(a[i])
    print('  '+' '.join(b))
#--------------------------------------------
# 6330372921 (20.00) 216 (2021-02-27 21:54)

def get_unique( words ):
    unique_words = []
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    a1 = []
    a2 = []
    for i in words_1:
        if not i in a2:
            a2.append(i)
    for i in words_2:
        if not i in a2:
            a2.append(i)
    for i in a2:
        if i in words_1 and i in words_2:
           a1.append(i)
    jaccard_coef = len(a1)/len(a2)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    index = 0
    for i in norm_tweets:
        jaccard_value = jaccard(i,norm_query)
        if jaccard_value >0:
            top_n.append([index,jaccard_value])
        index += 1
    top_n.sort(key = lambda k:(k[1],-k[0]),reverse=True)
    top_n = top_n[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    tweet_content = tweet_content.split(' ')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    print('  ',end='')
    line_character_count = 2
    for word in tweet_content:
        word_length = len(word)
        if(line_character_count+word_length > print_width):
            print()
            print('  ',end='')
            line_character_count = 2
        print(word+' ',end='')
        line_character_count += (word_length+1)
    print()



#--------------------------------------------
# 6330374121 (20.00) 217 (2021-02-28 19:42)

def get_unique( words ):
    unique_words = []
    for word in words:
        if word not in unique_words:
            unique_words.append(word)
    return unique_words
def jaccard(words_1, words_2):
    top = 0
    for word in words_1:
        if word in words_2:
            top += 1
    bottom = len(words_1) + len(words_2) - top
    jaccard_coef = top/bottom
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        jc = jaccard(norm_tweets[i], norm_query)
        if jc > 0:
            top_n.append([jc,-i])
    top_n = sorted(top_n, reverse=True)[:n]
    top_n2 = []
    for x,y in top_n:
        top_n2.append([-y,x])
    return top_n2
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#" + str(tweet_id) + " (" + str(round(jc_coef,2)) + ")")
    words = tweet_content.split(' ')
    width = print_width-2
    trace = 0
    li = []
    for i in range(len(words)):
        trace += len(words[i])
        if trace <= width:
            li.append(words[i])
            trace += 1
        else:
            print("  " + " ".join(li))
            li = [words[i]]
            trace = len(words[i]) + 1
        if i == len(words)-1:
            print("  " + " ".join(li))

#--------------------------------------------
# 6330375821 (18.01) 218 (2021-03-01 21:19)

def get_unique( words ):
  unique_words=[]
  for i in range(len(words)):
    if words[i] not in unique_words:
      unique_words.append(words[i])
  return unique_words
def jaccard(words_1, words_2):
  ins = []
  U = []
  for i in range(len(words_1)):
    if words_1[i] in words_2 and words_1[i] not in ins:
      ins.append(words_1[i])
  for i in range(len(words_2)):
    if words_2[i] in words_1 and words_2[i] not in ins:
      ins.append(words_2[i])
  U = get_unique(words_1+words_2)
  jaccard_coef = len(ins)/len(U)
  return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
  top_n =[]
  for i in range(len(norm_tweets)):
    j_coef = jaccard(norm_tweets[i],norm_query)
    top=[i,j_coef]
    top_n.append(top)
  top_n.sort(key = lambda x: x[1],reverse=True)
  return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
  id= '\n#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')'
  print(id)
  tweet_content= tweet_content.split(' ')
  show = ' '
  for i in range(len(tweet_content)):
    if len(show)+1+len(tweet_content[i])<=print_width:
      show=show+' '+tweet_content[i]

    else:
      print(show)
      show = '  '+tweet_content[i]
  print(show)


#--------------------------------------------
# 6330376421 (20.00) 219 (2021-02-28 18:11)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    a=[]
    for i in words_1:
        if i in words_2:
            a.append(i)

    b=get_unique(words_1+words_2)
    jaccard_coef=len(a)/len(b)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x=[]
    for i in range(len(norm_tweets)):
        x.append([jaccard(norm_tweets[i], norm_query),i])
    x.sort(reverse=True)
    e=[]
    top_n=[]
    for i in range(len(x)-1):
        if x[i][0]>0:
            e.append([x[i][1],x[i][0]])
            if x[i][0]!=x[i+1][0]:
                e.reverse()
                top_n+=e
                e=[]
        if len(top_n)>=n:
            break
    top_n=top_n+e
    top_n=top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content=tweet_content.split(' ')
    a=2
    e=''
    print("\n#"+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    for i in range(len(tweet_content)):
        if a+len(tweet_content[i])>print_width:
            print('  '+e)
            a=2+len(tweet_content[i]+' ')
            e=''
        else:
            a+=len(tweet_content[i]+' ')
        e+=tweet_content[i]+' '
    print("  "+e)




#--------------------------------------------
# 6330377021 (15.52) 220 (2021-02-28 23:30)

def get_unique( words ):
    unique_words = []
    for c in words:
        if c in unique_words:
            pass
        else:
            unique_words.append(c)
    return unique_words
def jaccard(words_1, words_2):
    k = 0
    if len(words_1) < len(words_2):
        for i in range(len(words_1)):
            if words_1[i] in words_2:
                k += 1
    else:
        for i in range(len(words_2)):
            if words_2[i] in words_1:
                k += 1
    jaccard_coef = k/(len(words_1)+len(words_2)-k)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    top_n = []
    for i in range(len(norm_tweets)):
        a.append([jaccard(norm_tweets[i],norm_query),-i])
    a.sort()
    a = a[::-1]
    for i in range(n):
        top_n.append([-a[i][1],a[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    t = tweet_content.split()
    print('\n#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    x = []
    ga = 2
    for i in range(len(t)):
        ga += len(t[i])+1
        if ga > print_width:
            print("  "+" ".join(x))
            x = []
            ga = 2+len(t[i])
        x.append(t[i])
    print("  "+" ".join(x))

#--------------------------------------------
# 6330378721 (19.45) 221 (2021-02-28 18:05)

def get_unique( words ):

    unique_words = []

    for i in range(len(words)):
        if words[i] in words and not words[i] in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):

    intersect = []
    union = 0

    for i in range(len(words_1)):
        if words_1[i] in words_2:
            intersect.append(words_1[i])
            union += 1

    for i in range(len(words_1)):
        if not (words_1[i] in words_2 or words_1[i] in intersect):
            union +=1

    for i in range(len(words_2)):
        if not (words_2[i] in words_1 or words_2[i] in intersect):
            union +=1

    jaccard_coef = len(intersect)/union

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []
    maxx = []

    for j in range(len(norm_tweets)):
        maxx.append(len(norm_tweets[j]))
    maxx = max(maxx)

    for i in range(len(norm_tweets)):
        tweets_id = norm_tweets[i]
        top_n.append([jaccard(tweets_id,norm_query),(maxx-i)])
    top_n.sort()

    for k in range(len(norm_tweets)):
        top_n[k][1] = maxx-top_n[k][1]
        top_n[k][0],top_n[k][1]=top_n[k][1],top_n[k][0]
    top_n.reverse()
    top_n = top_n[0:n:]
    if top_n[0][0] == 0:
        top_n = []

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print('\n#' + str(tweet_id) + ' ' + '(' + str(round(jc_coef, 2)) + ')')

    w = tweet_content.split()
    n_words = len(tweet_content.split())
    n_space = n_words-1
    a=[]
    k = 0

    for i in range(n_words):
        if not len(w[i]) > (print_width-2)-k :
            a.append(w[i])
            k += len(w[i])+1
            if w[i] == w[n_words-1]:
                content = ' '.join(a)
                print('  '+ content)
        else:
            content = ' '.join(a)
            print('  '+ content)
            k = 0
            a = []
            a.append(w[i])
            k += len(w[i])+1
            if w[i] == w[n_words-1]:
                content = ''.join(a)
                print('  '+ content)


#--------------------------------------------
# 6330379321 (19.38) 222 (2021-02-28 08:27)

def get_unique( words ):

    unique_words=[]
    unique_words+=words
    for i in range(len(words)-1):
        if words[i] in words[i+1:] :
            unique_words.remove(words[i])
    return unique_words
def jaccard(words_1, words_2):

    c=0
    word_1=[]+words_1
    word_2=[]+words_2
    for e in range(len(words_1)) :
        if words_1[e] in words_2 :
            c+=1
            word_1.remove(words_1[e])
            word_2.remove(words_1[e])
    P=len(word_1)+len(word_2)+c
    if P != 0 :
        jaccard_coef=c/(P)
    else :
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    list_of_top=[]
    for tweet_id in range(len(norm_tweets)) :
        jaccards=jaccard(norm_tweets[tweet_id],norm_query)
        list_of_top.append([jaccards,tweet_id])
    list_of_top.sort(reverse=True)
    list_of_Top=[]+list_of_top
    top=[list_of_Top[0]]
    top_nn = []
    top_n = []
    for k in range(len(list_of_Top)-1) :
        if (list_of_Top[k])[0] == (list_of_Top[k+1])[0] :
            top.append(list_of_Top[k+1])
        else:
            top.sort()
            top_nn+=top
            top.append(list_of_Top[k+1])
            top=[list_of_Top[k+1]]
    for m in top_nn :
        if m[0] <= 0 :
            top_nn.remove(m)
    for e in top_nn :
        top_n.append([e[1],e[0]])
        top_n=top_n[:n ]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):


    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    tweet_content=tweet_content.split(' ')
    show='  '+tweet_content[0]
    for w in range(len(tweet_content)-1):
        w = w+1
        if len(show+tweet_content[w]) < print_width :
            show +=' '+tweet_content[w]
            if w ==len(tweet_content)-1 :
                print(show)
        else :
            print(show)
            show='  '+tweet_content[w]

#--------------------------------------------
# 6330380921 (15.87) 223 (2021-03-01 22:37)

def get_unique( words ):
    k=0
    b=[]
    while k<=len(words)-1:
        if words[k] in b:
            k+=1
        else:
            b.append(words[k])
            k+=1
    if b==[]:
        b=['']
    unique_words=b

    return unique_words
def jaccard(words_1, words_2):

    union=words_1 + words_2

    k3=0
    b3=[]
    if len(words_1) >= len(words_2):
        while k3<=len(words_1)-1:
            if words_1[k3] in words_2:
                b3.append(words_1[k3])
                k3+=1
            else:
                k3+=1
    else:
        while k3<=len(words_2)-1:
            if words_2[k3] in words_1:
                b3.append(words_2[k3])
                k3+=1
            else:
                k3+=1
    k=0
    b=[]
    while k<=len(union)-1:
        if union[k] in b:
            k+=1
        else:
            b.append(union[k])
            k+=1
    if len(b)==0:
        b=['']
    jaccard_coef= float(len(b3)/len(b))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    c=[]
    d=[]
    h=[]
    e=0
    top_n=[]
    for i in range(len(norm_tweets)-1):
        tweet_id = i
        jacc=jaccard(norm_tweets[tweet_id],norm_query)
        c.append([-jacc,tweet_id])
        c.sort()
    for i2 in range(len(norm_tweets)-1): #ทำให้ลบหายไป
        c[i2][0]=-c[i2][0]

    for i3 in range(len(c)):
        d.append(c[i3])

    for i4 in range(len(d)):
        y=d[i4]
        y[0],y[1]=y[1],y[0]
    for i5 in range(n):
        top_n.append(d[i5])
    topn=[]+top_n
    for xxx in topn:
        if xxx[1]<=0:
            top_n.remove(xxx)
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a=tweet_content.split(' ')
    x=round(jc_coef,2)
    b=[]
    d=''
    k=2
    print()
    print('#'+str(tweet_id)+' ('+str(x)+')')

    for e in a:
        k+=len(e)
        if k<print_width:
            b.append(e)
            d=' '*2+' '.join(b)
            k+=1
        elif k==print_width:
                b.append(e)
                d=' '*2+' '.join(b)
        else:
            print(d)
            k=2
            b=[]
            b.append(e)
            k+=len(e)+1
            d=''
    print(' '*2+' '.join(b))

#--------------------------------------------
# 6330381521 (18.01) 224 (2021-03-01 02:34)

def get_unique( words ):

    unique_words = []
    words.sort()
    for un_w in range(len(words)) :

        if un_w == 0 :
            unique_words+=[words[un_w]]
        else :
            if words[un_w]!=words[un_w-1] :
                unique_words+=[words[un_w]]

    return unique_words
def jaccard(words_1, words_2):

    word_1plusplusword_2 = words_1 + words_2
    unique_words = []
    for e_e in range(len(word_1plusplusword_2)) :

        if word_1plusplusword_2[e_e] not in unique_words :

            unique_words.append(word_1plusplusword_2[e_e])

    jaccard_coef = (len(word_1plusplusword_2) - len(unique_words)) / len(unique_words)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    aa_aa = []
    for bb in range(len(norm_tweets)) :

        aa_aa.append(jaccard(norm_tweets[bb] , norm_query))
    z_aba = [[-aa_aa[bb] , bb]
             for bb in range(len(norm_tweets))]

    z_aba = sorted(z_aba)

    top_n = [[z_aba[bb][1] , -z_aba[bb][0]] for bb in range(len(z_aba))][0 : n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print()
    speed_of_searching = "(" + str(round(jc_coef , 2)) + ")"
    number_of_tweet = "#" + str(tweet_id)

    print(number_of_tweet, speed_of_searching)

    words = tweet_content.split(" ")
    answer_search = "  " + words[0]

    for web_research in words[1:] :

        if len(answer_search) + len(" " + web_research) <= print_width :
            answer_search += " " + web_research
        else :
            print(answer_search)
            answer_search = "  " + web_research
    print(answer_search)

#--------------------------------------------
# 6330382121 (15.41) 225 (2021-03-01 23:14)

def get_unique( words ):
    unique_words = []
    for i in words:
        if not i in unique_words:
            unique_words.append(i)

    return unique_words
def jaccard(words_1, words_2):
    a = 0
    b = len(words_1) + len(words_2)
    for i in words_1:
        for j in words_2:
            if i == j:
                a = a + 1

    c = b - a
    jaccard_coef = round(a/(c+0.000000000000000000000000000000001), 2)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x = []
    y = []
    b = len(norm_tweets)
    for i in range(b):
        x.append([jaccard(norm_tweets[i], norm_query), -i])

    x.sort(reverse = True)

    for j in range(n):
        y.append([-x[j][1], x[j][0]])

    top_n = []

    for k in y:
        top_n.append(k)



    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+ str(tweet_id), '(' + str(round(jc_coef,2)) + ')')

    a = tweet_content.split(' ')
    b = len(tweet_content)
    c = (b//(print_width-2)) + 1
    t = [' ']

    e = 1
    f = 0
    for j in range(len(a)):
        if len(a[j]) + 2 > print_width:
            c = c + 1


    for x in range(c):
        for i in range(f, len(a)):
            if (e <= (x + 1) * print_width) and (e > (x) * print_width) and e + (1+len(a[i])) <= (x + 1) * print_width:
                t.append(a[i])
                e = e + 1 + len(a[i])

            elif e <= (x + 1) * print_width and e > (x) * print_width and e + (1+len(a[i])) > (x + 1) * print_width and len(a[i]) + 2 <= print_width:
                t.append('\n ')

                e = ((x+1) * print_width) + 1

                f = i
            elif e <= (x + 1) * print_width and e > (x) * print_width and e + (1+len(a[i])) > (x + 1) * print_width and len(a[i]) + 2 > print_width:
                t.append('\n ')
                t.append(a[i])
                t.append('\n ')
                e = ((x+2) * print_width) + 1
                f = i + 1



    print(' '.join(t))



#--------------------------------------------
# 6330384421 (20.00) 226 (2021-03-01 23:19)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if not i in unique_words:unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    sameword=0
    for i in words_1:
        if i in words_2:sameword += 1
    jaccard_coef=sameword/(len(words_1)+len(words_2)-sameword)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        top_n.append([jaccard(norm_tweets[i], norm_query),-i])
    top_n.sort()
    top_n=top_n[::-1]
    top_n=top_n[:n]
    top_n_real=[]
    for i in range(len(top_n)):
        if top_n[i][0]>0:
            top_n_real.append([-top_n[i][1],top_n[i][0]])
    top_n=top_n_real
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#' + str(tweet_id) + ' (' + str(round(jc_coef, 2)) + ')')
    tweet_content_list=tweet_content.split(' ');out = ' ';a = 1
    for i in tweet_content_list:
        if (a+1+len(i)) > print_width:
            print(out)
            out=' '
            out+=(' '+i)
            a=len(out)
        else:
            out+=(' '+i)
            a=len(out)
    if out !='':print(out)
#--------------------------------------------
# 6330386721 (15.00) 227 (2021-03-01 23:26)

def get_unique( words ):
    l = []
    for e in words:
        if e not in l:
            l.append(e)
    unique_words = l
    return unique_words
def jaccard(words_1, words_2):
    st = 0
    for e in words_1:
        if e in words_2:
            st += 1
    snt = len(words_1)+len(words_2)-st
    jaccard_coef = st/snt
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    L = []
    for i in range(len(norm_tweets)):
        jc = jaccard(norm_tweets[i] ,norm_query)
        if jc > 0:
            L.append([-jc, i])
    L.sort()
    top_n = []
    for i in range(n):
        top_n.append([L[i][1],-L[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    x = tweet_content.split(' ')
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    s = '  '
    while len(x) > 0:
        if len(s+x[0]) < print_width:
            s += x[0]+' '
            x.pop(0)
        elif len(s+x[0]) == print_width:
            s += x[0]
            x.pop(0)
            print(s)
            s = '  '
        else:
            print(s)
            s = '  '
    if s != '  ':
        print(s)
#--------------------------------------------
# 6330387321 (18.33) 228 (2021-03-01 23:12)

def get_unique( words ):
    unique_words = []
    for i in range(0,len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    a = 0
    b = len(words_1)+len(words_2)
    for i in range(0,len(words_1)):
        for j in range(0,len(words_2)):
            if words_1[i] == words_2[j]:
                a += 1
            else:
                pass
    c = b-a
    jaccard_coef = a/c
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    nt = len(norm_tweets)
    top_n0 = []
    top_n = []
    for i in range(0,nt):
        a = jaccard(norm_tweets[i],norm_query)
        if a > 0:
            top_n0.append([a,i*(-1)])
        else:
            pass
    top_n0 = sorted(top_n0, reverse=True)[0:n:1]
    for x,y in top_n0:
        top_n.append([y*(-1),x])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet = tweet_content.split(' ')
    a = round(float(jc_coef), 2)
    print("")
    print("#"+str(tweet_id), "("+str(a)+")")
    t = len(tweet_content)
    while t > int(print_width):
        b0 = [tweet[0]]
        c0 = len(tweet[0])
        a0 = 2
        d = 0
        for i in range(1,len(tweet)):
            if c0+a0 <= int(print_width):
                b0.append(tweet[i])
                a0 += 1
                c0 += len(tweet[i])
                d += 1
            else:
                pass
        b0 = b0[:-1:1]
        print("  "+" ".join(b0))
        tweet = tweet[d::1]
        t = len(" ".join(tweet)) +2
    b0 = [tweet[0]]
    c0 = len(tweet[0])
    a0 = 2
    d = 0
    for i in range(1,len(tweet)):
        if c0+a0 <= int(print_width):
            b0.append(tweet[i])
            a0 += 1
            c0 += len(tweet[i])
            d += 1
        else:
            pass
    print("  "+" ".join(b0))
#--------------------------------------------
# 6330388021 (16.30) 229 (2021-03-01 10:01)

def get_unique( words ):
    unique_words=[]
    for i in words :
        if i not in unique_words :
            unique_words+=[i]
    return unique_words
def jaccard(words_1, words_2):
    c=0
    a=get_unique( words_1 )
    b=get_unique( words_2 )
    for i in a :
        if i in b :
            c+=1
    d=len(get_unique( a+b ))
    jaccard_coef=c/d
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top=[]
    for i in range(len(norm_tweets)) :
        top+=[[jaccard(norm_tweets[i],norm_query),-i]]
    top.sort(reverse=True)
    for i in range(len(top)) :
        top[i][0],top[i][1]=-top[i][1],top[i][0]
    top_n=top[:n]
    for e in top_n :
        if e[1]<=0 :
            top_n.remove(e)
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a=tweet_content.split(' ')
    print('   ')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    s=' '
    c=1
    for i in a:
        if c==1 :
            if len(i)+2>=int(print_width):
                print('  '+i)
        c+=(len(i)+1)
        if c < int(print_width):
            s+=' '
            s+=i
        else:
            if len(s)!=2 :
                print(s)
            c=2+len(i)
            s='  '
            s+=i
    print(s)


#--------------------------------------------
# 6330389621 (18.33) 230 (2021-03-01 23:21)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words += [words[i]]
    return unique_words
def jaccard(words_1, words_2):
    x=[]
    mix_words= words_1 + words_2
    for i in range(len(mix_words)):
        if mix_words[i] not in x:
            x += [mix_words[i]]
    count=0
    for j in range(len(words_1)):
        if words_1[j] in words_2:
            count+=1
    jaccard_coef=count/(int(len(x)))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    jacc=[]
    for i in range(len(norm_tweets)):
        found_jaccard=jaccard(norm_tweets[i], norm_query)
        if found_jaccard >0:
            jacc.append([-found_jaccard,i])
            jacc.sort()
    top_n=[]
    for a1,a2 in jacc:
        top_n.append([a2,-a1])
    top_n=top_n[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print(" ")
    print("#"+ str(tweet_id) + " "+"("+ str(round(jc_coef,2))+")")
    s=tweet_content.split(' ')
    while len(s)>0:
        a=""
        b=list(s)
        for i in range(0,len(s)):
            if 2+len(b[i])+len(a)<=print_width:
                a+=(b[i]+" ")
                s.pop(0)
            else:
              break
        print("  "+a)

#--------------------------------------------
# 6330391821 (15.00) 231 (2021-03-01 16:15)

def get_unique( words ):
    unique_words = []
    for e in range(len(words)):
        if words[e] not in unique_words:
            unique_words.append(words[e])
    return unique_words
def jaccard(words_1, words_2):
    a = []
    for e in range(len(words_2)):
        a.append(words_2[e])
    c = 0
    for e in range(len(words_1)):
        if words_1[e] in words_2:
            c += 1
        a.append(words_1[e])
    z = get_unique(a)
    jaccard_coef = c/len(z)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    jaccard_list = []
    for number in range(len(norm_tweets)):
        jack = jaccard(norm_tweets[number],norm_query)
        jaccard_list.append(jack)
    jaccard_sort = []
    for s in range(len(jaccard_list)):
        jaccard_sort.append(jaccard_list[s])
    jaccard_sort.sort()
    tweet_id = []
    for r in range(n):
        d = (jaccard_list.index(jaccard_sort[-r-1]))
        tweet_id.append(d)
        jaccard_list = jaccard_list[:d:]+[2]+jaccard_list[d+1::]
    top_n = []
    for g in range(n):
        top_n.append([tweet_id[g],jaccard_sort[-g-1]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    x = tweet_content.split(" ")
    print("")
    print("#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")")
    y = " "
    c = 0
    while c != len(x):
        while len(y) < print_width and c < len(x):
            y +=" "+x[c]
            c += 1
        if len(y) > print_width:
            y = y[:-len(x[c-1]):]
            print(y)
            c -= 1
            y = " "
        else:
            print(y)
            y = " "
#--------------------------------------------
# 6330392421 (20.00) 232 (2021-02-26 11:15)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)

    return unique_words
#--------------------------------------------------------
def jaccard(words_1, words_2):

    c = 0
    all_words = []
    for e in words_1:
        if e not in all_words:
            all_words.append(e)
    for e in words_2:
        if e not in all_words:
            all_words.append(e)
        else:
            c += 1
    jaccard_coef = c/(len(all_words))

    return jaccard_coef
#--------------------------------------------------------
def top_n_similarity(norm_tweets, norm_query, n):

    pre1 = []

    for i in range(len(norm_tweets)):
        tweet_id = i
        jc = jaccard(norm_tweets[i], norm_query)
        new_tweet_id = i*-1
        pre1.append([new_tweet_id,jc])
    pre2 = []
    for i in range(len(pre1)):
        if pre1[i][1] > 0:
            pre2.append(pre1[i])
    for i in range(len(pre2)):
        pre2[i][0],pre2[i][1] = pre2[i][1],pre2[i][0]

    pre2.sort()
    pre3 = pre2[::-1]

    for i in range(len(pre2)):
        pre3[i][0],pre3[i][1] = pre3[i][1],pre3[i][0]
        pre3[i][0] = pre3[i][0]*-1

    top_n = pre3[:n]


    return top_n
#--------------------------------------------
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print()
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    result = tweet_content.split(' ')
    c = '  '
    for i in range(len(result)):
            if len(c)+len(result[i]) > print_width:
                print(c)
                c = '  ' + str(result[i]) + ' '
            else:
                if len(c)+len(result[i]) == print_width:
                    c = c + str(result[i])
                    print(c)
                    c = '  '
                else:
                    c = c + str(result[i]) + ' '
    print(c)

#--------------------------------------------
# 6330393021 (16.94) 233 (2021-03-01 20:54)

def get_unique( words ):
    unique_words = []
    for word in words :
        if word not in unique_words :
            unique_words.append(word)
    return unique_words
def jaccard(words_1, words_2):
    sameWords = []
    for word in (words_1) :
        if(word in words_1 and word in words_2) :
            sameWords.append(word)
    nAllWord = len(words_1)+len(words_2)-len(sameWords)
    return len(sameWords)/nAllWord
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = [];all_data = []
    for i in range(len(norm_tweets)) :
        if(jaccard(norm_tweets[i],norm_query)>0):
            all_data.append([-jaccard(norm_tweets[i],norm_query),i])
    all_data.sort()
    for i in range(n) :
        top_n.append([all_data[i][1],-all_data[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print("\n#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    tweet_word = tweet_content.split()
    line = " "
    content = []
    for word in tweet_word :
        if len(line+" "+word) <= print_width :
            line+=" "+word
        else :
            content.append(line)
            line = "  " + word
    if(line not in content) :
        content.append(line)
    for e in content :
        print(e)


#--------------------------------------------
# 6330394721 (13.63) 234 (2021-03-01 05:36)

def get_unique( words ):
  unique_words=[]
  words.sort()
  for i in range(len(words)-1):
    if words[i]!=words[i+1]:
      unique_words.append(words[i])
  if len(words)!=0:
    unique_words.append(words[-1])
  return unique_words
def jaccard(words_1, words_2):
    same_words = []
    for e in words_1:
        if e in words_2:
            same_words.append(e)
    if len(same_words)-len(words_1)-len(words_2)!=0:
     jaccard_coef = len(same_words)/(len(words_1) + len(words_2) - len(same_words))
     return jaccard_coef
    else:
     return 0
def top_n_similarity(norm_tweets, norm_query, n):
  top_n=[0]*len(norm_tweets)
  same_words=[]
  for tweet_id in range(len(norm_tweets)):
    jaccards=jaccard(norm_tweets[tweet_id], norm_query)
    top_n[tweet_id]=[jaccards,tweet_id]
  top_n.sort()
  top_n=top_n[:-n-1:-1]
  for i in range(n):
    top_n[i]=[top_n[i][1],top_n[i][0]]


  return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
 tweet_content=tweet_content.split()
 k=0
 j=0
 m=0
 print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
 for i in range(len(tweet_content)-1):
   k+=len(tweet_content[i])
   if k>print_width-2:
     print('  '+' '.join(tweet_content[m:m+j]))
     k=len(tweet_content[i])
     m+=j
     j=0
   if k<=print_width-2:
     k+=1
   j+=1
 print('  '+' '.join(tweet_content[m:]))

#--------------------------------------------
# 6330395321 (20.00) 235 (2021-03-01 22:13)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    allword = []
    same = 0
    for e in words_1:
        if e in words_2:
            same += 1
    for e in words_1:
        if e not in allword:
            allword.append(e)
    for e in words_2:
        if e not in allword:
            allword.append(e)
    jaccard_coef = same/len(allword)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i  in range(len(norm_tweets)):
        g = jaccard(norm_tweets[i],norm_query)
        if g > 0:
            top_n.append([g,-i])
    top_n.sort(reverse = True)
    for e in top_n:
        e[0] ,e[1] = -e[1] , e[0]
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+ str(tweet_id),'('+str(round(jc_coef,2)) +')')
    a = tweet_content
    while len(a) != 0 :
        start = 0
        if ' ' not in a[:print_width-2]:
          sub = a[:a.find(' ')]
          a = a[a.find(' '):].strip() + ' '
          print('  '+sub)
        else:
           while True:
              if len(a) < print_width:
                  sub = a
                  a = ''
                  print('  '+sub)
                  break
              end = a.find(' ', start)
              if end > print_width-2:
                  sub = a[:start]
                  a = a[start:].strip(' ') + ' '
                  print('  '+sub)
                  break
              else:
                  start = end +1
#--------------------------------------------
# 6330396021 (17.95) 236 (2021-03-01 17:43)

def get_unique( words ):
    unique_words = words.copy()
    for i in range(len(words)-1):
        words.sort()
        if words[i] == words[i+1] :
            unique_words.remove(words[i])
    return unique_words
def jaccard(words_1, words_2):
    n_same = 0
    for i in words_1:
        if i in words_2:
            n_same += 1

    jaccard_coef = n_same/(len(words_1)+len(words_2)- n_same)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query,n):
    top_n_copy = []
    for i in range(len(norm_tweets)):
        j = jaccard(norm_tweets[i], norm_query)
        top_n_copy.append([i,j])
    for i in range(len(top_n_copy)):
        top_n_copy[i] = [-top_n_copy[i][1],top_n_copy[i][0]]
    top_n_copy.sort()
    for i in range(len(top_n_copy)):
        top_n_copy[i] = [top_n_copy[i][1],-top_n_copy[i][0]]
        top_n = top_n_copy[0:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#" +str(tweet_id)+" ("+str(round(jc_coef,2))+")")
    tw_c = tweet_content.split()
    text_tweet = " "
    for i in tw_c:
        text_tweet_copy = text_tweet
        text_tweet += " "+str(i)
        if print_width < len(text_tweet) :
            if i == tw_c[0] :
                print(text_tweet)
                text_tweet = " "
            else :
                print(text_tweet_copy)
                text_tweet = "  " + str(i)
    print(text_tweet)



#--------------------------------------------
# 6330398221 (18.50) 237 (2021-03-01 16:14)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    w=words_1+words_2
    d=[]
    u=[]

    for e in range(len(w)):
        if w[e] not in d:
            d.append(w[e])
        else:
            u.append(w[e])
    if len(u)!=0:
        u1=[u[0]]
        for e1 in range(len(u)):
            if u[e1] not in u1:
                u1.append(u[e1])
        up=len(u1);down=len(d)
    else:up=len(u);down=len(d)
    jaccard_coef=up/down
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top=[]
    top_n=[]
    for m1 in range(len(norm_tweets)):
        tweet_id=m1
        wn=norm_tweets[m1]+norm_query
        dn=[]
        un=[]
        for mn in range(len(wn)):
            if wn[mn] not in dn:
                dn.append(wn[mn])
            else:
                un.append(wn[mn])
            if len(un)!=0:
                un1=[un[0]]
                for mn1 in range(len(un)):
                    if un[mn1] not in un1:
                        un1.append(un[mn1])
                upn=len(un1);downn=len(dn)
            else:upn=len(un);downn=len(dn)
            jaccard=upn/downn

        t=[jaccard,-tweet_id]
        top.append(t)
    top.sort(reverse=True)

    for m2 in range(len(top)):
        top1=top[m2]
        f=-top1[1]
        b=top1[0]
        final=[f,b]
        if b!=0:
            top_n.append(final)

    while len(top_n)!=n and len(top_n)!=0:
        top_n.pop(-1)
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    text='  '

    tw=tweet_content.split(' ')
    jaco=round(jc_coef,2)
    print('\n#'+str(tweet_id)+' ('+str(jaco)+')')
    for h in range(len(tw)):
        if len(text)+len(tw[h])<=print_width:
            text+=tw[h]
            text+=' '

        else:
            print(text)
            text='  '+tw[h]+' '
    print(text)


#--------------------------------------------
# 6330399921 (19.95) 238 (2021-02-28 21:45)

def get_unique( words ):
    unique_words=[]
    for x in words:
        if not x in unique_words:
            unique_words.append(x)
    return unique_words
def jaccard(words_1, words_2):
    i=0
    for x in words_1:
        if x in words_2:
            i+=1
    jaccard_coef=i/(len(words_1)+len(words_2)-i)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    total=[]
    lis_jac=[]
    for i in range(len(norm_tweets)):
        jac=jaccard(norm_tweets[i],norm_query)
        if jac>0:
            if not jac in lis_jac:
                lis_jac.append(jac)
            total.append([i,jac])
    lis_jac.sort()
    lis_jac=lis_jac[::-1]
    for j in lis_jac:
        for x in total:
            if x[1]==j:
                if not len(top_n)==n:
                    top_n.append(x)
                else:
                    break
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    wrd=tweet_content.split()
    txt=' '
    for i in wrd:
        if len(txt+' '+i)>print_width :
            print(txt)
            txt='  '+i
            if i==wrd[-1]:
                print(txt)
        elif i==wrd[-1]:
            print(txt+' '+i)
        else:
            txt+=' '+i


#--------------------------------------------
# 6330400821 (20.00) 239 (2021-02-28 16:05)

def get_unique( words ):
    words.sort()
    x = len(words)
    i = 0
    while i < x-1:
        c = words[0]
        words.remove(c)
        if c not in words :
            words.append(c)
        i += 1
    unique_words = words
    return unique_words
def jaccard(words_1, words_2):
    n = 0
    s = 0
    if len(words_1) > len(words_2):
        while n < len(words_2):
            if words_2[n] in words_1:
                s += 1
            n += 1
    elif len(words_2) > len(words_1):
        while n < len(words_1):
            if words_1[n] in words_2:
                s += 1
            n += 1
    else :
        while n < len(words_1):
            if words_1[n] in words_2:
                s += 1
            n += 1
    jaccard_coef = s/(len(words_2)+len(words_1)-s)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n) :
    tweet_id = 0
    su_m = []
    i = 1

    while tweet_id < len(norm_tweets):
        x = jaccard(norm_tweets[(tweet_id)], norm_query)
        if x != 0 :
          su_m.append([x,-tweet_id])
        tweet_id += 1
    su_m.sort(reverse=True)

    for r in su_m:
        r[0], r[1] = -r[1], r[0]

    top_n = su_m[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n#" + str(tweet_id) + " " + "(" + str(round(jc_coef, 2)) + ")")
    i = 0
    y = tweet_content.split(" ")
    n = 1
    st_r = "  "
    while i < len(y) :
      if len(st_r) + len(y[i]) <= print_width :
         st_r += y[i] + " "
      else :
        print(st_r)
        st_r = "  "
        st_r += y[i] + " "
      i += 1
    print(st_r)


#--------------------------------------------
# 6330401421 (17.47) 240 (2021-02-28 22:32)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    a = 0
    b = len(words_1)
    for i in words_1:
        if i in words_2:
            a += 1
    for i in words_2:
        if i not in words_1:
            b += 1
    jaccard_coef = a/b
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i], norm_query) > 0:
            top_n.append([jaccard(norm_tweets[i], norm_query), -i])
    top_n = sorted(top_n)[::-1][:n]
    for i in range(len(top_n)):
        top_n[i] = [-top_n[i][1], top_n[i][0]]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n#" + str(tweet_id) + " (" + str(round(jc_coef,2)) + ")")
    tw = tweet_content.split()
    current_text = "  "
    for word in tw:
        if (len(current_text)+2) + len(word) > print_width:
            print(current_text)
            current_text = "  "
        current_text += str(word) + " "
    print(current_text)


#--------------------------------------------
# 6330402021 (20.00) 241 (2021-02-27 20:17)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    all_words = []
    for i in words_1+words_2:
        if i not in all_words:
            all_words.append(i)
    inter_words = []
    for i in words_1:
        if i in words_2:
            inter_words.append(i)
    jaccard_coef = len(inter_words)/len(all_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    words_n = []
    for i in range(len(norm_tweets)):
        jac = jaccard(norm_tweets[i], norm_query)
        words_n.append([jac, i])
    words_n.sort(reverse = True)
    for i in range(len(words_n)):
        words_n[i][0],words_n[i][1] = words_n[i][1],words_n[i][0]
    lenn = len(words_n)-1
    for i in range(n):
        if i < lenn:
            if words_n[0][1] > 0 and words_n[0][1] != words_n[0+1][1]:
                top_n.append(words_n.pop(0))
            elif words_n[0][1] > 0:
                dup = []
                for e in words_n:
                    if e[1] == words_n[0][1]:
                        dup.append(e)
                dup.sort()
                top_n.append(words_n.pop(words_n.index(dup[0])))
        elif i == lenn and words_n[0][1] > 0:
            top_n.append(words_n.pop(0))
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")")
    content = tweet_content.split(" ")
    word = len(content)
    while word > 0:
        width_count = 0
        prin_t1 = "  "
        w1 = content.pop(0)
        prin_t2 = [w1]
        word -= 1
        width_count += len(w1)
        for i in content:
            width_count += len(i)+1
            if width_count <= print_width-2:
                prin_t2.append(i)
                word -= 1
        print(prin_t1 + " ".join(prin_t2))
        for i in prin_t2[1:]:
            content.remove(i)

#--------------------------------------------
# 6330403721 (18.98) 242 (2021-02-28 18:17)

def get_unique( words ):

    unique_words = []

    words.sort()
    if len(words) != 0 :
        for i in range(len(words)):
            if words[i-1] != words[i]:
                unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):


    unique_words1 = get_unique( words_1 )

    unique_words2 = get_unique( words_2 )

    wt = unique_words1 + unique_words2
    wt.sort()
    t = 0
    s = 0

    for i in range(len(wt)):
        if wt[i-1] != wt[i]:
            t += 1
        if wt[i-1] == wt[i]:
            s += 1
    if t == 0 :
        jaccard_coef = 0
    else :
        jaccard_coef = s/t

    return  jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []
    for i in range(len(norm_tweets)):
        tweet_id = i
        jc = jaccard(norm_tweets[i],norm_query)
        if jc != 0 :
            top_n.append([-1*jc,tweet_id])
        top_n.sort()
    for i in range(len(top_n)):
        top_n[i][0] = -1*top_n[i][0]
        top_n[i][0],top_n[i][1] = top_n[i][1],top_n[i][0]

    top_n = top_n[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print()
    tweet_content_list = tweet_content.split(' ')
    tweet_content_list.append(' ')

    jcc = round(jc_coef,2)

    print('#'+str(tweet_id)+' ('+str(jcc)+')')

    c = 0
    for i in range(len(tweet_content_list)-1):
        c += len(tweet_content_list[i])+1
        if print_width-2-c >= len(tweet_content_list[i+1]):
            print(tweet_content_list[i],end = ' ')
        else:
            print(tweet_content_list[i])
            c = 0
    print()


#--------------------------------------------
# 6330404321 (18.01) 243 (2021-03-01 20:25)

def get_unique( words ):
    unique_words=[]
    for e in words:
        unique_words.append(e)
        for i in range(len(unique_words)):
            if unique_words[i] in unique_words[0:i]:
                unique_words.remove(unique_words[i])
    return unique_words
def jaccard(words_1, words_2):
    s=[]
    x=get_unique( words_1+words_2 )
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            s.append(words_1[i])
    a=len(s)
    jaccard_coef=a/len(x)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        top_n.append([jaccard(norm_query,norm_tweets[i]),i])
    for i in range(len(top_n)):
        top_n[i][0]=-top_n[i][0]
    top_n.sort()
    for i in range(len(top_n)):
        top_n[i][0]=-top_n[i][0]
        top_n[i]=top_n[i][::-1]
    top_n=top_n[0:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    x='#'+str(tweet_id)
    print(x+' '+'('+str(round(jc_coef,2))+')')
    words = tweet_content.split(" ")
    q = " "
    for i in range(len(words)):
        if len(q) + len(' '+words[i])<= print_width:
            q += ' ' + words[i]
        else:
            print(q)
            q ='  ' + words[i]
    print(q)
#--------------------------------------------
# 6330405021 (18.50) 244 (2021-02-26 00:33)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    s=[x for x in words_1 if x in words_2]
    n=words_1+[y for y in words_2 if y not in words_1]
    jaccard_coef=len(s)/(len(n)+int(len(n)==0))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    count=[]
    for i in range(len(norm_tweets)):
        count.append([jaccard(norm_tweets[i],norm_query),-i])
    count.sort()
    count=count[len(count)-n:]
    top_n=[[abs(a),b] for b,a in count]
    top_n.reverse()
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(f"\n#{tweet_id} ({round(jc_coef,2)})")
    x=tweet_content.split(' ')
    p='  '
    for i in range(len(x)):
        if len(p)+len(x[i])+1<=print_width:
            p+=' '*int(i!=0)+x[i]
        else:
            print(p)
            p='  '+x[i]
    print(p)

#--------------------------------------------
# 6330406621 (16.25) 245 (2021-03-01 23:28)

def get_unique( words ):

    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):

    wordss = []
    for i in words_1:
        if i in words_2:
            wordss.append(i)
    wordsf = []
    for j in wordss:
        if j not in wordsf:
            wordsf.append(j)
    words_3 = words_1 + words_2
    r = []
    for k in words_3:
        if k not in r:
            r.append(k)

    a = len(wordsf)
    b = len(r)
    if b != 0:
        jaccard_coef = a/b
        return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    tweet_id = []
    jaccard1 = []
    top = []
    top0 = []
    l1 = []
    a = []
    r = []
    rep = []
    rep1 = []
    tt = []
    vv = []
    target = []
    temp = []
    count = 0
    for i in range(len(norm_tweets)):
        jaccard1 += [jaccard((norm_tweets[i]), norm_query)]
        tweet_id += [[i]]

    for t in range(len(tweet_id)):
        for j in range(len(jaccard1)):
            if t == j:
                if jaccard1[j] != 0.0:
                    top += [tweet_id[t]+[jaccard1[j]]]


    top.sort(key = lambda v:v[1])
    #print(top)

    tt = top[-1:-len(top):-1]
    #print(tt)
    for ll in range(len(tt)):
        if tt[ll] not in vv:
            vv.append(tt[ll])
    #print(vv)

    if len(vv) != 0:
        target = []
        temp = []
        variable = vv[0][1]

        for i in range(len(vv)):

            if vv[i][1] == variable:
                temp.append(vv[i])
            else:
                target.append(temp)
                temp = [vv[i]]
                variable = vv[i][1]
    #print(target)

    for rr in target:
        if rr not in rep1:
            rep1.append(rr)
    #print(rep1)
    for ss in rep1:
        ss.sort()
        for xx in ss:
            top0 += [xx]

    #print(top0)

    top_n = top0[:n]
    #print(top_n)
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(" ")
    print('#'+str(tweet_id)+' '+"("+str(round(jc_coef, 2))+")")
    x = len(tweet_content)
    #print(x) 72
    y = len(tweet_content.split(" "))
    ysplit = tweet_content.split(" ")
    #print(y) 15
    i = 0

    a = ' '


    for i in range(len(ysplit)):
        if (len(a)+1+len(ysplit[i])) <= print_width:
            a += " " + ysplit[i]
        else:
            print("  "+a.strip())
            a = "  " + ysplit[i]



    print("  "+a.strip())



#--------------------------------------------
# 6330407221 (17.00) 246 (2021-02-27 17:41)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    same = 0
    notsame = 0
    different = []
    for i in words_1 :
        if i in words_2:
            same +=1

    for i in words_1:
        if i not in different:
            different.append(i)
            notsame += 1
    for j in words_2:
        if j not in different:
            different.append(j)
            notsame += 1
    jaccard_coef = int(same)/int(notsame)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    list1 = []
    list2 = []
    top_n = []
    for i in norm_tweets:
        list1.append(jaccard(i,norm_query))
    for j in range(len(norm_tweets)):
        list2.append([-1*list1[j],j])
    list2.sort()
    for z in range(n):
        top_n.append([list2[z][1],-1*list2[z][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id), '('+str(round(jc_coef,2))+')')
    t = tweet_content.split(' ')
    width = 0
    list1 = []
    list2 = []
    limit = print_width-2
    for i in t:
        if width+len(i) <= limit:
            list1.append(i)
            width += len(i)+1
        else :
            width = 0
            list2.append(list1)
            list1 = []
            list1.append(i)
            width += len(i)+1

    for j in list2:
        print('  '+' '.join(j))
    print("  "+" ".join(list1))



#--------------------------------------------
# 6330408921 (18.35) 247 (2021-02-26 00:29)

def get_unique( words ):
    unique_words = words.copy()
    words.sort()
    for i in range(len(words) - 1):
        if words[i] == words[i + 1]:
            unique_words.remove(words[i])
    return unique_words
def jaccard(words_1, words_2):
    jaccard_coef = 0
    for s in words_1:
        if s in words_2:
            jaccard_coef += 1
    jaccard_coef = jaccard_coef / (len(words_1) + len(words_2) - jaccard_coef)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    a = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i], norm_query) > 0:
            a.append(i)
            a.append(jaccard(norm_tweets[i], norm_query))
            top_n.append(a)
            a = []
    top_n = sorted(top_n, reverse=True, key=lambda x: x[1])
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a = tweet_content.split(' ')
    b = 2
    c = []
    d = []
    for i in range(len(a)):
        if a[i] == '':
            b += 1
        b += len(a[i]) + 1
        if b <= print_width + 1:
            d.append(a[i])
        else:
            c.append(d)
            d = [a[i]]
            b = 2 + len(a[i])
    c.append(d)
    print('')
    print('#' + str(tweet_id), '(' + str(round(jc_coef, 2)) + ')')
    for s in c:
        print('  ' + ' '.join(s))




#--------------------------------------------
# 6330409521 (10.52) 248 (2021-02-28 15:22)

def get_unique( words ):
    unique_words = ['']
    for i in range(len(words)):
        if not words[i] in unique_words:
            unique_words.append(words[i])

    unique_words.remove('')
    return unique_words
def jaccard(words_1, words_2):
    words_3 = ['']
    for i in range(len(words_1)):
        if  words_1[i] in words_2:
            words_3.append(words_1[i])
    words_3.remove('')
    words_4 = words_1.copy()
    for i in range(len(words_2)):
        if not words_2[i] in words_4:
            words_4.append(words_2[i])

    jaccard_coef = round((len(words_3)/len(words_4)),2)


    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    u = []
    z = []
    k = 0
    for i in range(len(norm_tweets)):
        x=jaccard(norm_tweets[i],norm_query)
        y = i
        z.append(y)
        z.append(x)

    for i in range(int(len(z)/2)):
        u.append(z[2*i+1])
    u.sort()
    while k < n :
        x = u.pop()
        top_n.append('')
        for i in range(int(len(z)/2)):
            if x == 0:
                break
            if k >0:
                if x == z[2*i+1]:
                    z.pop(2*i+1)
                    t = []
                    l = z.pop(2*i)
                    t.append(l)
                    t.append(x)
                    top_n.append(t)
                    top_n.remove('')
                    k += 1
                    break
            elif x == z[2*i+1]:
                z.pop(2*i+1)
                t = []
                l = z.pop(2*i)
                t.append(l)
                t.append(x)
                top_n.append(t)
                top_n.remove('')
                k += 1
                break
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('-'*print_width)
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    t = tweet_content.split(' ')
    length = 0
    limit = print_width -2
    a = []
    b = []
    for i in t:
        if length + len(i) <= limit:
            length += len(i) + 1
            a.append(i)
        else:
            length = 0
            b.append(a)
            a = []
            a.append(i)
            length += len(i) +1
    for j in b:
        print('  '+' '.join(j))
    print('  '+' '.join(a))



#--------------------------------------------
# 6330410021 (15.38) 249 (2021-02-27 12:46)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):

    ja1=[]
    if len(words_1)>len(words_2):
        for i in range(len(words_2)):
            if words_2[i] in words_1:
                ja1.append(words_2[i])
    else:
        for i in range(len(words_1)):
            if words_1[i] in words_2:
                ja1.append(words_1[i])

    ja2 = get_unique(words_1+words_2)
    jaccard_coef = len(ja1)/len(ja2)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n =[['','']]*len(norm_tweets)
    for tweet_id in range(len(norm_tweets)):
        jaccards= jaccard((norm_tweets[tweet_id]),norm_query)
        top_n[tweet_id]=[jaccards,tweet_id]
    top_n.sort()
    top_n=top_n[::-1]
    for i in range(len(top_n)-3):
        if      top_n[i][0]==top_n[i+1][0]==top_n[i+2][0]==top_n[i+3][0]:
            if  top_n[i][1] >= top_n[i+3][1]:
                 top_n[i][1],top_n[i+3][1]=top_n[i+3][1],top_n[i][1]


        if    top_n[i][0]==top_n[i+1][0]==top_n[i+2][0] :
            if  top_n[i][1] >= top_n[i+2][1]:
                 top_n[i][1],top_n[i+2][1]=top_n[i+2][1],top_n[i][1]



        if top_n[i][0]==top_n[i+1][0] and top_n[i][1] >= top_n[i+1][1] :
            top_n[i][1],top_n[i+1][1]=top_n[i+1][1],top_n[i][1]
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1]=top_n[i][1],top_n[i][0]

    top_n =top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content=tweet_content.split(' ')
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    x7='  '
    x8='  '
    x9='  '
    x10='  '
    x11='  '
    x12='  '
    x13='  '
    for i in range(len(tweet_content)):
        if len(x7) <= print_width-len(tweet_content[i]) and len(x8)==2:
            x7+=tweet_content[i]
            x7+=' '

        elif len(x8)<=print_width-len(tweet_content[i]) and len(x9)==2:
            x8+=tweet_content[i]
            x8+=' '

        elif len(x9)<=print_width-len(tweet_content[i])and len(x10)==2:
            x9+=tweet_content[i]
            x9+=' '
        elif len(x10)<=print_width-len(tweet_content[i])and len(x11)==2:
            x10+=tweet_content[i]
            x10+=' '
        elif len(x11)<=print_width-len(tweet_content[i])and len(x12)==2:
            x11+=tweet_content[i]
            x11+=' '
        elif len(x12)<=print_width-len(tweet_content[i])and len(x13)==2:
            x12+=tweet_content[i]
            x12+=' '
        elif len(x13)<=print_width-len(tweet_content[i]):
            x13+=tweet_content[i]
            x13+=' '
    print(x7)
    if len(x8)>2:
        print(x8)
    if len(x9)>2:
        print(x9)
    if len(x10)>2:
        print(x10)
    if len(x11)>2:
        print(x11)
    if len(x12)>2:
        print(x12)
    if len(x13)>2:
        print(x13)





#--------------------------------------------
# 6330411721 (17.43) 250 (2021-02-28 15:48)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    s = 0
    word = max(len(words_1),len(words_2))
    for i in range(word):
        if len(words_1)>len(words_2):
            if words_1[i] in words_2:
                s += 1
        else:
            if words_2[i] in words_1:
                s += 1
    a = len(words_1)+len(words_2)-s
    jaccard_coef = s / a
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        top_n.append([jaccard(norm_tweets[i], norm_query),-i])
    top_n.sort()
    top_n.reverse()
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1] = top_n[i][1],top_n[i][0]
        top_n[i][0] = abs(top_n[i][0])
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#" + str(tweet_id) + " " + "("+str(round(jc_coef,2))+")")
    x = tweet_content.split(' ')
    for i in range(len(x)-1):
        x.insert(2*i+1,' ')
    a = '  '
    for i in range(len(x)):
        if len(a)+len(x[i]) <= print_width:
            a += x[i]
        else:
            print(a)
            if x[i] == ' ':
                a = '  '
            else:
                a = '  ' + x[i]
    print(a)



#--------------------------------------------
# 6330412321 (18.01) 251 (2021-03-01 17:06)

def get_unique( words ):
    unique_words = []
    for x in words:
        if x not in unique_words:
            unique_words.append(x)
    return unique_words
def jaccard(words_1, words_2):
    words_1 = get_unique(words_1)
    words_2 = get_unique(words_2)
    a = words_1 + words_2
    lower = []
    for b in a:
        if b not in lower:
            lower.append(b)
    c = len(lower)
    upper = []
    for d in a:
        if d in words_1 and d in words_2:
            if d not in upper:
                upper.append(d)
    e = len(upper)
    jaccard_coef = e/c

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    for i in range (len(norm_tweets)):
        s = jaccard(norm_tweets[i], norm_query)
        s = -1*s
        a.append([s,i])
    b = sorted(a)
    top_n = []
    for e in b[:n]:
        u = e[1]
        v = -1*e[0]
        top_n.append([u,v])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    j = '('+str(round(jc_coef,2))+')'
    print('#'+str(tweet_id),j)
    x = tweet_content.split(' ')
    a = ' '
    for e in x:
        a+=' '+e
        if len(a)>print_width:
            print(a[:len(a)-len(e)])
            a = '  '+e
    print(a)

#--------------------------------------------
# 6330413021 (20.00) 252 (2021-02-28 16:30)

def get_unique( words ):
    b = []
    for e in words :
        if e not in b:
            b.append(e)
    unique_words = b
    return unique_words
def jaccard(words_1, words_2):

    coef = 0
    for e in get_unique(words_1) :
        if e in get_unique(words_2) :
           coef += 1
    div = len(words_1)+len(words_2) - coef
    jaccard_coef = coef/div

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    for i in range(len(norm_tweets)):
      e = jaccard(norm_tweets[i],norm_query)
      if  e != 0 :
        a.append([i,e])


    top_n = sorted(a,key=lambda x:x[1],reverse=True)[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print(f'\n#{tweet_id} ({round(jc_coef,2)})')

    a = '  '
    x = tweet_content.split(' ')
    leng = 0
    for i in range(len(x)) :
        leng += len(x[i])+1
        if leng < print_width :

          a += x[i]+' '
        else :

          print(a)
          leng = len(x[i])+1
          a = '  '+x[i]+' '
        if i + 1 == len(x) :
          print(a)

#--------------------------------------------
# 6330415221 (14.92) 253 (2021-03-01 23:14)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)) :
        if not words[i] in unique_words :
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    a = []
    b = []
    w = words_1 + words_2
    for i in range(max(len(words_1),len(words_2))) :
        if  w[i] in words_1 and w[i] in words_2 :
            a.append(w[i])
    for i in range(len(w)) :
        if not w[i] in b :
            b.append(w[i])
    jaccard_coef = len(a)/len(b)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    def Sorts(a) :
        for i in range(0,len(a)):
            for j in range(0,len(a)-i-1):
                if(a[j][1]<a[j+1][1]):
                    temp=a[j]
                    a[j]=a[j+1]
                    a[j+1]=temp
        return a
    for i in range(len(norm_tweets)) :
        tweet_id = i
        p = jaccard(norm_tweets[i],norm_query)
        if p>0 :
            top_n.append([tweet_id,p])
    top_n = Sorts(top_n)
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    w = ' '
    tweet_content = tweet_content.split(' ')
    for i in range(1,len(tweet_content)) :
        w += ' ' + tweet_content[i-1]
        if i==len(tweet_content)-1 :
            if len(w)+len(tweet_content[-1])+1 <= print_width :
                w += ' '+tweet_content[-1]
                print(w)
            elif len(w)+len(tweet_content[-1])+1 > print_width :
                print(w)
                w = ' '
                w += ' '+tweet_content[-1]
                print(w)
        elif len(w)+len(tweet_content[i])+1 > print_width :
            print(w)
            w = ' '
#--------------------------------------------
# 6330416921 (16.94) 254 (2021-02-28 02:20)

def get_unique( words ):

    unique_words=[]

    for i in range(len(words)):
        if words[i] in unique_words:
            pass
        else:
           unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):

    t=get_unique( words_1 )#['x', 'y', 'z', 'xyz']
    o=get_unique( words_2 )#['y', 'x', 'w']
    n=0
    g=t.copy()
    for p in range(len(t)):
        if t[p] in o:
            n+=1
    for i in range(len(o)):
        if o[i] in g:
            pass
        else:
            g.append(o[i])

    jaccard_coef = n/len(g)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    b=[]
    c=0
    for i in norm_tweets :
        a=[c,jaccard(i,norm_query)]
        b.append(a)
        c+=1

    b.sort(key=lambda x: x[1])

    d=[]

    for e in range (1,len(b)+1):
        d.append(b[-e])


    for f in range(len(d)):
        if f!= len(d)-1:
            g=1
            while d[f][1]==d[f+g][1]:
                if d[f+g][0]<d[f][0] :
                    d[f],d[f+g]=d[f+g],d[f]
                g+=1
                if g+f == len(d):
                    break
        else:
            pass

    top_n=[]
    for j in range (n):
        top_n.append(d[j])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    a=tweet_content.split()

    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')

    b='  '

    for i in range(len(a)):
        b=b+a[i]+' '
        if i < len(a)-1:
            if len(b)+len(a[i+1])>=print_width+1  :
                print(b)
                b='  '
            else:
                pass
    print(b)






#--------------------------------------------
# 6330417521 (17.92) 255 (2021-03-01 02:12)

def get_unique( words ):
  unique_words = []
  for e in words:
    if e not in unique_words:
      unique_words.append(e)
  return unique_words
def jaccard(words_1, words_2):
  a = 0
  for e in words_1:
    if e in words_2:
      a += 1
  b = (len(words_1)+len(words_2)) - a
  jaccards_coef = round(a/b,2)
  return jaccards_coef
def top_n_similarity(norm_tweets, norm_query, n):
  chec = []
  for i in range(len(norm_tweets)):
    a = jaccard(norm_tweets[i],norm_query)
    if a != 0:
      chec.append([-1*a,i])
  chec.sort()
  top = chec[:n]
  top_n = [[m,-1*n] for n,m in top]
  return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
  li_wo = tweet_content.split(' ')
  print()
  print('#'+str(tweet_id), '('+str(round(jc_coef,2))+')')
  tex = ' '
  n = 1
  for e in li_wo:
    n += len(e)+1
    if n > print_width:
      tex += '\n'
      tex += '  '+e
      n = 2+len(e)
    else:
      tex += ' ' +e
  print(tex)

#--------------------------------------------
# 6330418121 (19.15) 256 (2021-02-27 17:27)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)

    return unique_words
def jaccard(words_1, words_2):
    w = words_1
    for i in words_2:
        w.append(i)
    a = []
    for i in w:
        if i not in a:
            a.append(i)
    jaccard_coef = (len(w)-len(a)) / len(a)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        tweet_id = i
        j = jaccard(norm_tweets[i],norm_query)
        if j > 0:
            top_n.append([-j,i])
    top_n.sort()
    for e in top_n:
        e[0], e[1] = e[1], e[0]
        e[1] = -e[1]
    top_n = top_n[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#' + str(tweet_id) + ' ' + '(' + str(round(jc_coef,2)) + ')')
    string = '  '
    t = tweet_content.split(' ')
    for e in t:
        if len(string) + len(e) <= print_width:
            string += e+' '
        else:
            print(string)
            string = '  ' +e

    print(string)



#--------------------------------------------
# 6330420321 (20.00) 257 (2021-02-26 21:54)

def get_unique( words ):
    unique_words = []
    for i in words :
        if not i in unique_words:
            unique_words.append(i)

    return unique_words
def jaccard(words_1, words_2):
    totall = []
    count_top = 0
    for i in words_1 :
        if i in words_2:
            count_top += 1
        totall.append(i)
    for i in words_2:
        totall.append(i)
    count_bott = len(get_unique(totall))
    jaccard_coef = count_top / count_bott
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    j = 0
    for i in norm_tweets:
        jaccardss = jaccard(i,norm_query)
        if jaccardss > 0 :
            top_n.append([j,jaccardss])

        j += 1
    top_n.sort()
    top_n.sort(key=lambda x: x[1],reverse=True)
    top_n =top_n[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    width = print_width
    tweet_content = tweet_content.split(' ')
    print()
    print('#'+str(tweet_id), '('+str(round(jc_coef,2))+')')
    sadkvak = ' '
    sadkvakthis = ' '
    for i in range (1,len(tweet_content)) :
      thisIm1w = tweet_content[i-1]
      thisIw = tweet_content[i]
      sadkvak += ' '+thisIm1w
      sadkvakthis = sadkvak + ' ' + thisIw

      if len(sadkvakthis) > width :
        if not any(c.isalpha() for c in sadkvak.lstrip()) :
          sadkvak = ' '

          sadkvakthis = ' '
          if i != len(tweet_content)-1 :
            continue

        if any(c.isalpha() for c in sadkvak.lstrip()) :
          print('  '+sadkvak.lstrip())
        sadkvak = ' '
        sadkvakthis = ' '
      if i == len(tweet_content)-1 :
        sadkvakthis = sadkvak + ' ' + thisIw
        print('  '+sadkvakthis.lstrip())

#--------------------------------------------
# 6330422621 (17.95) 258 (2021-02-28 23:14)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    ns = len(get_unique(words_1 + words_2))
    ne = len(words_1 + words_2)-ns
    jaccard_coef = ne/ns
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    jj = []
    tweet_id = []

    for i in range(len(norm_tweets)):
        j = jaccard(norm_tweets[i], norm_query)
        jj.append(j)
    j = sorted(jj)[::-1][:n]
    kk = jj.copy()
    for i in j :
        tweet_id.append(jj.index(i))
        jj.insert(jj.index(i),2)
        jj.pop(jj.index(i))


    for z in range(len(j)):
        top_n.append([tweet_id[z],j[z]])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    t = tweet_content.split()
    paragraph = ''
    for i in range(len(t)):
        paragraph += str(' '+t[i])
        if i+1 == len(t):
            print('  '+paragraph.lstrip())
            break
        elif len(paragraph)+len(t[i+1]) < print_width-1:
            pass
        else:
            print('  '+paragraph.lstrip())
            paragraph = ''
#--------------------------------------------
# 6330423221 (6.58) 259 (2021-03-01 23:59)

def get_unique( words ):
    unique_words=[]
    for e in words:
        if e not in unique_words :
           unique_words.append(e)

    return unique_words
def jaccard(words_1, words_2):
    A=words_1 + words_2
    A.sort()
    t=get_unique(A)
    if len(A)>0:
        jaccard_coef= len(t)/len(A)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]

    for e in norm_tweets:
        bank=[]
        tweet_id= norm_tweets.index(e)
        jac=jaccard(e,norm_query)


        bank.append(jac)
        bank.append(tweet_id)
        top_n.append(bank)


    top_n.sort() #เรียง
    top_n=top_n[:n:-1] #เอาjac เยอะขึ้นมาก่อน
    bank=[]
    for [e,t] in top_n:     #2  top_n=[ [y,x], [y,x] ]
        bank.append([t,e])
    top_n=bank

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id)+"  "+"("+str(round(jc_coef,2))+")")
    print("  "+ tweet_content)





#--------------------------------------------
# 6330424921 (18.01) 260 (2021-02-26 16:02)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)


    return unique_words
def jaccard(words_1, words_2):
    word = words_1
    for i in words_2:
        word.append(i)
    unique = []
    for i in word:
        if not(i in unique) :
            unique.append(i)
    jaccard_coef = (len(word)-len(unique))/len(unique)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top=[[jaccard(norm_tweets[i],norm_query),-i] for i in range(len(norm_tweets)) ]
    top.sort(reverse=True)
    top_n= [[-top[e][1],top[e][0]] for e in range(len(top))][:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    t = tweet_content.split(' ')
    string = ' '
    left = print_width-len(string)
    for e in t:
        if left >= len(e)+1:
            string+=' ' + e
            left = print_width-len(string)
            if e== t[-1]:
                print(string)
        else:
            print(string)
            string ='  ' +e
            left = print_width-len(string)
            if e== t[-1]:
                print(string)




#--------------------------------------------
# 6330425521 (20.00) 261 (2021-03-01 18:16)

def get_unique( words ):

#words =  ['x', 'y', 'z', 'y', 'xyz', 'z']

    unique_words = []

    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])

#print(unique_words)

    return unique_words

#--------------------------------------------------------
def jaccard(words_1, words_2):

    #words_1 = ['x', 'y', 'z', 'xyz']
    #words_2 = ['y', 'x', 'w']

    words_1 = get_unique(words_1)
    words_2 = get_unique(words_2)

    up = 0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            up += 1

    words_u = words_2
    for i in range(len(words_1)):
        if words_1[i] not in words_2:
            words_u.append(words_1[i])

    down = len(words_u)
    jaccard_coef = up/down

    #print(jaccard_coef)

    return jaccard_coef

#--------------------------------------------------------
def top_n_similarity(norm_tweets, norm_query, n):

    pre_topn = []
    for tweet_id in range(len(norm_tweets)):
        Jack = jaccard(norm_tweets[tweet_id],norm_query)
        if Jack > 0:
            pre_topn.append([-Jack, tweet_id])
    pre_topn.sort()
    for e in pre_topn:
        e[0],e[1] = e[1],-e[0]
    top_n = pre_topn[:n:]


    return top_n

#--------------------------------------------------------
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    tw_cont = tweet_content.split(' ')
    show_tweet = '  '
    n = 2
    for e in tw_cont:
        if n == 2:
            show_tweet += e
            n += len(e)
        elif 2 < n+len(e)+1 <= print_width :
            show_tweet += ' '+e
            n += 1+len(e)
        elif n+len(e)+1 > print_width:
            show_tweet += '\n  '+e
            n = 2+len(e)
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    print(show_tweet)


#--------------------------------------------
# 6330426121 (20.00) 262 (2021-03-01 03:14)

def get_unique( words ):
    unique_words = []
    for word in words:
        if word not in unique_words:
            unique_words.append(word)
    return unique_words
def jaccard(words_1, words_2):
    c = 0
    for word in words_1:
        if word in words_2:
            c += 1
    jaccard_coef = c/(len(words_1)+len(words_2)-c)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x = []
    for i in range(len(norm_tweets)):
        j = jaccard(norm_tweets[i], norm_query)
        if j > 0:
            x.append([j,i])
    x = sorted(x, reverse = True)
    top_n = []
    ar = []
    for i in range(len(x)):
        ar.append(x[i][::-1])
        if i < len(x)-1:
            if x[i][0] != x[i+1][0]:
                top_n += sorted(ar)
                ar = []
        else:
            top_n += sorted(ar)
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n#{:d} ({:.2f})'.format(tweet_id,round(jc_coef,2)))
    pt = tweet_content.split(' ')
    L = print_width-2
    pl = '  '
    final_print = []
    i = 0
    while i < len(pt):
        if len(pt[i]) <= L:
            pl += pt[i]
            L -= len(pt[i])
            if L > 0:
                pl += ' '
                L -= 1
        else:
            if pl == '  ':pl += pt[i]
            else:i -= 1
            final_print.append(pl)
            pl = '  '
            L = print_width-2
        i += 1
    final_print.append(pl)
    for e in final_print:print(e)

#--------------------------------------------
# 6330427821 (13.75) 263 (2021-02-27 19:29)

def get_unique(words):
    unique_words = []
    words.sort()
    words.append('')
    for i in range(len(words)-1):
        if words[i] != words[i+1]:
            unique_words.append(words[i])
    return unique_words

#--------------------------------------------------------
def top_n_similarity(norm_tweets, norm_query, n):
    tweet_id = []
    jaccard = []
    top_n = []
    for i in range(len(norm_tweets)):
        tweet_id.append(i)
    for i in range(len(norm_tweets)):
        jaccard.append(jack_card(norm_tweets[i],norm_query))
    for i in range(len(jaccard)):
        if jaccard[i] != 0 :
            top_n.append([jaccard[i],tweet_id[i]])
    top_n.sort(reverse = True)
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1] = top_n[i][1],top_n[i][0]
    a = []
    if len(top_n) > n :
        for i in range(n):
            a.append(top_n[i])
    else:
        for i in range(len(top_n)):
            a.append(top_n[i])


    top_n = a

    return top_n

#--------------------------------------------------------
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    v = '  '
    t = tweet_content.split(' ')
    print('')
    print('#' + str(tweet_id), '(' + str(round(jc_coef, 2)) + ')')
    for i in range(len(t)):
        if len(v)+len(t[i]+' ') <= print_width + 1:
            v += t[i]+' '
        else:
            print(v)
            v = '  '+t[i]+' '
    print(v)
#--------------------------------------------------------
def jack_card(words_1,words_2):
   a = words_1+words_2
   s = len(get_unique(a))
   c = 0
   u = get_unique(words_1)
   for i in range(len(u)):
         if u[i] in words_2:
           c+=1
         else:
           c+=0
   jaccard_coef = c/s
   return jaccard_coef

#--------------------------------------------------------
# 6330428421 (18.01) 264 (2021-02-28 23:24)

def get_unique( words ):
    words_copy=words[:]
    unique_words=[]
    #words_copy.sort()
    #print(words_copy)
    for i in range (len(words_copy)):
        if not words_copy[i] in unique_words:
            unique_words.append(words_copy[i])
    #print(unique_words)
    return unique_words
def jaccard(words_1, words_2):
    overlap=0
    union=0
    for e in words_2:
            union+=1
    for e in words_1:
        if not (e in words_2 ):
            union+=1
        if e in words_2:
            overlap+=1
    jaccard_coef=overlap/union

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        jaccard_coef=jaccard(norm_tweets[i],norm_query)
        #print(jaccard_coef)
        top_n.append([jaccard_coef,(-1)*i])
    top_n.sort(reverse=True)
    top_n=top_n[0:n]
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1]=top_n[i][1]*(-1),top_n[i][0]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    line=""
    tweet_words=tweet_content.split(' ')
    for i in range(len(tweet_words)):
        if len(line+tweet_words[i])+2<=print_width and line=='':
            line+='  '+tweet_words[i]
        elif len(line+tweet_words[i])+1<=print_width and line!='':
            line+=' '+tweet_words[i]
        elif len(tweet_words[i])+2>print_width and line=='':
            line+='  '+tweet_words[i]
            print(line)
            line=''
        else:
            print(line)
            line="  "+tweet_words[i]
    if line!='':
        print(line)
#--------------------------------------------
# 6330429021 (18.40) 265 (2021-02-27 21:08)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    i_words = 0
    u_words = len(words_1)+len(words_2)
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            i_words += 1
            u_words -= 1
    jaccard_coef = i_words/u_words
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    b = []
    top_n = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query) > 0:
            a.append(-i)
            a.append(jaccard(norm_tweets[i],norm_query))
    for e in range(0,len(a)-1,2):
        b.append([a[e+1],a[e]])
    b.sort(reverse = True)
    top_n = b[:n]
    for x in range(len(top_n)):
        top_n[x][0],top_n[x][1] = top_n[x][1]*-1,top_n[x][0]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    x = tweet_content.split(" ")
    s1 = ' '
    for i in x:
        if len(s1) < print_width:
            s2 = s1
            s3 = ' ' + i
            s1 += s3
        elif len(s1) == print_width:
            print(s1)
            s1 = ' '
            s1 += (' '+i)
        elif len(s1) > print_width:
            print(s2)
            s1 = ' '
            s1 += (s3+' '+i)
    if len(s1) <= print_width:
        print(s1)
    else:
        print(s2)
        print(' ' + s3)
#--------------------------------------------
# 6330430621 (18.33) 266 (2021-03-01 18:11)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    c = 0
    g = 0
    for e in words_1:
        if e in words_2:
            c += 1
        else:
            g += 1
    for e in words_2:
        if  e in words_1:
            pass
        else:
            g += 1
    jaccard_coef = (c)/(c+g)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    q = []
    top_n = []
    for tweet_id in range(len(norm_tweets)):
        t = norm_tweets[tweet_id]
        o = jaccard(t,norm_query)
        if o == 0:
            pass
        else:
            q.append([-o,tweet_id])
    q.sort()
    for e in q:
        e[0],e[1]=e[1],-e[0]
    top_n += q[:n]


    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a = round(jc_coef,2)
    b = str(a)
    c = str(tweet_id)
    print('\n'+'#'+c+' ('+b+')')
    s = tweet_content.split(' ')
    o = ''
    index = len(s)
    i = 0
    while i < index:
        if len(o)+len(s[i]) < print_width-1 :
            o += ' '+s[i]
            i+=1
        else:
            print(' '+o)
            o = ''
        if i == index:
            print(' '+o)





#--------------------------------------------
# 6330431221 (20.00) 267 (2021-03-01 04:11)

def get_unique( words ):
    unique_words= []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    a = 0
    for i in words_1:
        if i in words_2:
            a += 1

    x = len(words_1) + len(words_2)-a
    jaccard_coef = a/x
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    def select(a):
        return a[1]
    for i in range(len(norm_tweets)) :
        jc_cof = jaccard(norm_tweets[i],norm_query)
        if jc_cof > 0 :
            top_n += [[i,jc_cof]]
    top_n.sort(reverse = True,key = select)
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n')
    print('#'+str(tweet_id)+' (' + str(round(jc_coef,2)) + ')')
    x = tweet_content.split(' ')
    L = [' ']
    for words in x :
        L += [words]
        if len(' '.join(L)) > print_width:
            L = L[:-1]
            print(' '.join(L))
            L = ['  '+words]
    print(' '.join(L))





#--------------------------------------------
# 6330432921 (17.50) 268 (2021-03-01 14:47)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if not words[i] in unique_words :
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    repeat_1 = 0
    not_repeat_1 = 0
    repeat_2 = 0
    not_repeat_2 = 0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            repeat_1 += 1
        else:
            not_repeat_1 += 1
    for i in range(len(words_2)):
        if not words_2[i] in words_1:
            not_repeat_2 += 1
    jaccard_coef = (repeat_1)/(repeat_1 + not_repeat_1 + not_repeat_2)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    top_m = []
    for i in range(len(norm_tweets)):
        a = [-jaccard(norm_tweets[i],norm_query),i]
        top_m.append(a)
    top_m.sort()
    for i in range(n):
        if top_m[i][1] == 0: break
        b = [top_m[i][1],-top_m[i][0]]
        top_n.append(b)
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print_width = print_width-2
    print("")
    x = "#"+str(tweet_id)
    y = "("+str(round(jc_coef,2))+")"
    print(x,y)
    while True:
        if len(tweet_content) <= print_width:
            print(" ",tweet_content)
            break
        if tweet_content[print_width] == " ":
            a = tweet_content[:print_width+1]
            tweet_content = tweet_content[print_width+1:]
            print(" ",a)
        else:
            for i in range(0,print_width):
                if tweet_content[print_width-i] == " ":
                    a = tweet_content[:print_width+1-i]
                    tweet_content = tweet_content[print_width+1-i:]
                    print(" ",a)
                    break
#--------------------------------------------
# 6330433521 (18.33) 269 (2021-03-01 23:25)

def get_unique( words ):
    unique = []
    for w in words:
        if w not in unique:
            unique.append(w)
    unique_words = unique
    return unique_words
def jaccard(words_1, words_2):
    count = 0
    for word in words_1:
        if word in words_2:
            count += 1
    jaccard_coef = count/(len(words_1)+len(words_2)-count)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    for i in range(len(norm_tweets)):
        jac = jaccard(norm_tweets[i],norm_query)
        if jac > 0:
            top.append([-jac,i])
    top.sort()
    topn = []
    for a1,a2 in top:
        topn.append([a2,-a1])
    top_n = topn[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    t = ' #'+str(tweet_id) + ' ('+str(round(jc_coef,2))+')'
    print(t)
    tc = tweet_content.split(' ')
    sp = print_width - 2
    pans = []
    ans = '  '
    i = 0
    while i < len(tc):
        if len(tc[i]) <= sp:
            ans += tc[i]
            sp -= len(tc[i])
            if sp > 0:
                sp -= 1
                ans += ' '
        else:
            if sp == '  ':
                ans += tc[i]
            else :
                i -= 1
            pans.append(ans)
            ans = '  '
            sp = print_width -2
        i += 1
    pans.append(ans)
    for e in pans:
        print(e)
#--------------------------------------------
# 6330434121 (20.00) 270 (2021-03-01 18:30)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words :
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    jc = 0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            jc += 1
    jaccard_coef = jc/((len(words_1)+len(words_2)-jc))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    b = []
    for i in range(len(norm_tweets)):
        a = jaccard(norm_tweets[i], norm_query) #norm_tweets[i] norm_query
        if a >0:
            b.append([i,a])
    top_n = sorted(b,key= lambda x:x[1],reverse=True)[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(" ")
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    a = tweet_content.split(' ')
    e =" "
    for i in a:
        if len(e)+len(i)+1<= print_width:
            a = e
            e = e+" "+i
            if len(e)> print_width:
                e = a



        else:
            print(e)
            e="  "+i






    print(e)







#--------------------------------------------
# 6330435821 (20.00) 271 (2021-02-26 01:22)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    #intersec
    intersec=0
    for i in words_1:
        if i in words_2:
            intersec+=1
    #union
    union=len(words_2)+len(words_1)-intersec
    jaccard_coef=intersec/union
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top=[]
    for tweets_id in range(len(norm_tweets)):
        top.append([jaccard(norm_query,norm_tweets[tweets_id]),(-1)*tweets_id])
    top.sort()
    top=top[::-1]
    top_n=[]
    for i in top[0:n]:
        if i[0]!=0:
            top_n.append([(-1)*i[1],i[0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    word_list=tweet_content.split(' ')
    display=' '
    for i in word_list:
        if len(display)+len(i)+1<=print_width:
            display +=' '+i
        else:
            print(display)
            display='  '+i
    print(display)


#--------------------------------------------
# 6330436421 (17.00) 272 (2021-02-27 23:07)

def get_unique(words):
    """words เป็นลิสต์ที่เก็บสตริง
    ต้องทำ: ตั้งค่าให้ตัวแปร unique_words ที่เก็บสตริงได้มาจาก words แต่ไม่มีตัวซ้ำ (คือตัวไหนมีซ้ำใน words
    จะมีตัวนั้นแค่ตัวเดียวใน unique_words)

    Doctest :
        >>> words = ['x', 'y', 'z', 'y', 'xyz', 'z']
        >>> get_unique(words)
        ['x', 'y', 'z', 'xyz']
    """
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    """words_1 และ words_2 เป็นลิสต์ของคำต่าง ๆ (ไม่มีคำซ้ำใน words_1 และ ไม่มีคำซ้ำใน words_2)
    ต้องทำ: ตั้งตัวแปร jaccard_coef ให้มีค่าเท่ากับ Jaccard similarity coefficient ที่คำนวณจากค่าใน
    words_1 และ words_2 ตามสูตรที่แสดงไว้ก่อนนี้

    Doctest :
        >>> words_1 = ['x', 'y', 'z', 'xyz']
        >>> words_2 = ['y', 'x', 'w']
        >>> jaccard(words_1,words_2)
        0.4
    """
    # Check intersect
    in_other = 0
    for i in words_1:
        if i in words_2:
            in_other += 1
    # Make list of total member in both list
    both_list = []
    for i in words_1:
        if i not in both_list:
            both_list.append(i)
    for i in words_2:
        if i not in both_list:
            both_list.append(i)
    jaccard_coef = in_other / len(both_list)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    """norm_tweets เป็นลิสต์ที่ภายในเก็บลิสต์ของคำต่าง ๆ [ [w00,w01,...], [w10,w11,...], ... ]
    norm_query เป็นลิสต์ของคำต่าง ๆ
    n เป็นจำนวนเต็ม
    ต้องทำ: ตั้งค่าให้ตัวแปร top_n ที่เก็บลิสต์ขนาดไม่เกิน n ช่อง
    แต่ละช่องเก็บลิสต์ย่อยขนาดสองช่อง [ [tweet_id, jaccard], ... ]
    tweet_id คือเลขอินเด็กซ์ของทวีตใน norm_tweets
    jaccard คือค่า Jaccard coefficient ของ norm_tweets[tweet_id] กับ norm_query
    โดยจะเลือกทวีตที่มีค่า Jaccard มากกว่า 0 และติดอันดับมากสุด n ตัวแรก
    ในกรณีที่มีค่า Jaccard เท่ากัน ให้เลือกอันที่มี tweet_id น้อยกว่าก่อน
    """
    index_list = []
    jaccard_list = []
    for i in range(len(norm_tweets)):
        index_list.append(i + 1)
        jaccard_list.append(jaccard(norm_tweets[i], norm_query))
    top_n = []
    result_list = []
    number_list = []
    sort_jaccard = sorted(jaccard_list, reverse=True)
    for i in range(len(sort_jaccard)):
        for j in range(len(jaccard_list)):
            if (sort_jaccard[i] == jaccard_list[j]) and (jaccard_list[j] != 0) and (j not in number_list):
                number_list.append(j)
                result_list.append([j, sort_jaccard[i]])
    for i in range(n):
        top_n.append(result_list[i])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    """tweet_id เป็นจำนวนเต็มแทนเลขอินเด็กซ์ของทวีต
    tweet_content เป็นสตริงเก็บข้อความของทวีตที่ต้องการแสดง
    jc_coef เป็นจำนวนจริงแทนค่า Jaccard coefficient
    print_width เป็นจำนวนเต็มแทนจำนวนตัวอักษรที่แสดงได้ในหนึ่งบรรทัด
    ต้องทำ: นำข้อมูลทั้งหลายที่ได้รับมาแสดงทางจอภาพในรูปแบบที่แสดงในตัวอย่าง ฟังก์ชันนี้ไม่คืนผลอะไร

    Doctest :
        >>> t = 'I promise you that as president, I will always appeal to the best  in us.'
        >>> show_tweet(1076, t, 0.222222, 40)
        <BLANKLINE>
        #1076 (0.22)
          I promise you that as president, I
          will always appeal to the best  in us.
        >>> show_tweet(1076, t, 0.222222, 30)
        <BLANKLINE>
        #1076 (0.22)
          I promise you that as
          president, I will always
          appeal to the best  in us.
        >>> show_tweet(1076, t, 0.222222, 20)
        <BLANKLINE>
        #1076 (0.22)
          I promise you that
          as president, I
          will always appeal
          to the best  in
          us.
    """
    print()
    print(f"#{tweet_id} ({round(jc_coef, 2)})")
    word_list = tweet_content.split(" ")
    print_list = []
    word_to_print = ""
    for i in range(len(word_list)):
        if i == len(word_list) - 1:
            if (len(word_to_print + word_list[i]) + 2) <= print_width:
                word_to_print += f"{word_list[i]} "
                print_list.append(word_to_print)
                continue
            else:
                print_list.append(word_to_print)
                print_list.append(f"{word_list[i]} ")
                continue
        elif (len(word_to_print + word_list[i]) + 2) <= print_width:
            word_to_print += f"{word_list[i]} "
        else:
            print_list.append(word_to_print)
            word_to_print = f"{word_list[i]} "
    for word in print_list:
        print(f"  {word[:-1]}")



#--------------------------------------------
# 6330437021 (20.00) 273 (2021-03-01 16:40)

def get_unique( words ):
    unique_words = []
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    A=0
    B=0
    C=0
    for i in words_1:
        if i not in words_2:
            A+=1
    for i in words_2:
        if i not in words_1:
            B+=1
    for i in words_1:
        if i in words_2:
            C+=1
    jaccard_coef=C/(A+B+C)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        D=jaccard(norm_tweets[i], norm_query)
        if D>0:
            top_n.append([D,-i])
    top_n.sort(reverse=True)
    top_n=top_n[:n]
    for i in range(len(top_n)):
        top_n[i] = [-top_n[i][1], (top_n[i][0])]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    E=tweet_content.split(" ")
    F=""
    for G in E:
        if (len(F)+1+len(G))<=(print_width-1):
            F+=" "+G
        else:
            print(" "+F)
            F=""
            F+=" "+G
    if not F=="":
        print(" " + F)
#--------------------------------------------
# 6330438721 (19.48) 274 (2021-03-01 22:46)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    w1 = get_unique(words_1)
    w2 = get_unique(words_2)
    c = 0
    for e in w1:
        if e in w2:
            c += 1
    if len(get_unique(w1+w2)) != 0:
        jaccard_coef = c/int(len(get_unique(w1+w2)))
    else:
        jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i], norm_query)>0:
            top_n.append([i,jaccard(norm_tweets[i], norm_query)])
    t = []
    for e in top_n:
        e = [-e[1],e[0]]
        t.append(e)
    t.sort()
    top_n = []
    for m in t:
        m = [m[1],-m[0]]
        top_n.append(m)
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(" ")
    print(" ")
    print("#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")")
    co = []
    n = 0
    for i in range(len(tweet_content)):
        z=int(len(co)) + 1 + n
        if  z > print_width:
            print(" "+" ".join(co[:-1]))
            co = [co[-1]]
            n = len(tweet_content[i-1])
            co += [tweet_content[i]]
            n += len(tweet_content[i])
        else:
            co += [tweet_content[i]]
            n += len(tweet_content[i])
    if not len(co) + 1 + n <= print_width:
        print(" "+" ".join(co[:-1]))
        print(" "+co[-1])
    else:
        print(" "+" ".join(co))

#--------------------------------------------
# 6330439321 (18.44) 275 (2021-02-25 23:42)

def get_unique(words):

    unique = []

    for w in words:
        if w not in unique:
            unique.append(w)

    return unique
def jaccard(words_1, words_2):

    same_word1 = words_1 + words_2
    same_word1 = get_unique(same_word1)
    same_word2 = []

    for e in same_word1:
        if e in words_1 and e in words_2:
            same_word2.append(e)

    if same_word1:
        return len(same_word2)/len(same_word1)
    else:
        return 0
def top_n_similarity(norm_tweets, norm_query, n):

    topN = []  # [[jcd1, id1], [jcd2,id2]...]

    for idx, tweet in enumerate(norm_tweets):

        # find jcd
        jcd = jaccard(tweet, norm_query)

        # append to topN
        if jcd != 0:

            if len(topN) < n:
                topN.append([jcd, idx])

            else:
                # sort before
                for dummy in range(n):
                    for j in range(n-(dummy+1)):
                        if topN[j][0] < topN[j+1][0] or (topN[j][0] == topN[j+1][0] and topN[j][1] > topN[j+1][1]):
                            topN[j], topN[j+1] = topN[j+1], topN[j]

                # pop the worst

                least_jcd = topN[-1][0]
                least_jcd_id = topN[-1][1]

                if least_jcd < jcd or (least_jcd == jcd and least_jcd_id > idx):
                    topN[-1] = [jcd, idx]

    # reverse the data
    final_topN = []

    for data in topN:
        final_topN.append(data[::-1])

    return final_topN
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    output_stack = []
    words = tweet_content.split()
    sentence = ''

    for word in words:

        if len(sentence) + len(word) > print_width-2:
            output_stack.append(sentence[:len(sentence)-1])
            sentence = ''

        sentence += word + ' '

    # check remaining
    if sentence != '':
        output_stack.append(sentence)

    # printer
    print('\n#{} ({})'.format(tweet_id, round(jc_coef, 2)))

    # loop each
    while output_stack:
        line = output_stack.pop(0)
        print(' '*2 + line)


# --------------------------------------------
# 6330440921 (17.78) 276 (2021-02-27 00:37)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in words[i+1:]:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    same = 0
    for word in words_1:
        if word in words_2:
            same += 1
    jaccard_coef = same/(len(words_1) + len(words_2) - same)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = [0]*len(norm_tweets)
    for i in range(len(norm_tweets)):
        tweet_id = i
        jaccards = jaccard(norm_tweets[tweet_id], norm_query)
        if jaccards >= 0:
            top_n[i] = [-jaccards, tweet_id]
    top_n.sort()
    for i in range(len(top_n)):
        top_n[i] = [top_n[i][1], -top_n[i][0]]
    if [0, 0.0] not in top_n[:n]:
        return top_n[:n]
    else:
        return []
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    words = tweet_content.split(" ")
    jc_coef = round(jc_coef, 2)
    print(end = '\n')
    print('#' + str(tweet_id) + ' (' + str(jc_coef) + ')')
    sentence = []
    total = 0
    while total < len(tweet_content):
        if len(words) == 0: break
        line = []
        word = words[0]
        length_line = len(word) + 1
        while length_line < print_width:
            words = words[1:]
            line.append(word)
            if len(words) == 0: break
            word = words[0]
            length_line += len(word) + 1
        line = " ".join(line)
        sentence.append(line)
        total += len(line)
    for i in range(len(sentence)):
        print('  ' + sentence[i])

#--------------------------------------------
# 6330441521 (16.36) 277 (2021-03-01 21:02)

def get_unique( words ):

    unique_words = []
    for d in words:
        if d not in unique_words:
            unique_words.append(d)
    return unique_words
def jaccard(words_1, words_2):

    jcd = []
    jcu = []
    for d in words_1:
        if d not in jcd:
            jcd.append(d)
    for d in words_2:
        if d not in jcd:
            jcd.append(d)
    for d in jcd:
        if d in words_1 and d in words_2:
            jcu.append(d)
    jaccard_coef = len(jcu)/len(jcd)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    v = []
    for d in range (len(norm_tweets)):
        x = jaccard(norm_tweets[d],norm_query)
        if x > 0:
            top_n.append( [x,d] )

    top_n.sort()
    top_n.reverse()
    for d in top_n:
        d[0],d[1] = d[1],d[0]
    for d in range (len(top_n)-1):
        if top_n[d][1] == top_n[d+1][1] and top_n[d][0] > top_n[d+1][0]:
            top_n[d],top_n[d+1] = top_n[d+1],top_n[d]
    for d in range (len(top_n)-1):
        if top_n[d][1] == top_n[d+1][1] and top_n[d][0] > top_n[d+1][0]:
            top_n[d],top_n[d+1] = top_n[d+1],top_n[d]
    for d in range (len(top_n)-1):
        if top_n[d][1] == top_n[d+1][1] and top_n[d][0] > top_n[d+1][0]:
            top_n[d],top_n[d+1] = top_n[d+1],top_n[d]

    for d in range (0,n):
        v.append(top_n[d])



    top_n = v

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    x = tweet_content.split()
    print(' ')
    o = '  '
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    for d in x:
        if len(o)+len(d) > print_width:
            print(o)
            o = '  '
        if len(o) < print_width:
            o += d+' '
        if len(o) >= print_width:
            print(o)
            o = '  '
    if len(o) > 0 and x != ' ':
        print(o)

#--------------------------------------------
# 6330443821 (18.01) 278 (2021-03-01 10:14)

def get_unique( words ):
    if words:
        unique_words_lst = []
        for word in words:
            if word not in unique_words_lst:
                unique_words_lst.append(word)
        return unique_words_lst
def jaccard(words_1, words_2):
    #ppp = get_unique(words_1+words_2)
    same_words = 0
    for i in words_1:
        if i in words_2:
            same_words += 1
    jaccard_coef = same_words/(len(words_1)+len(words_2)-same_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    global percent
    percent = 0

    top_n = []
    for i in range(len(norm_tweets)):
        try:
            percent = jaccard(norm_query, norm_tweets[i])
            top_n.append([i, percent])
        except:
            pass
    top_n.sort(key=lambda x: x[1], reverse=True)
    top_n = top_n[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n", end = '')
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef, 2))+")")
    n = print_width
    print("  ", end = '')
    for i in range(len(tweet_content.split(' '))):
        if len(tweet_content.split(' ')[i]) < n-1:
            print(tweet_content.split(' ')[i], end = ' ')
            n -= (len(tweet_content.split(' ')[i])+1)
        else:
            print("\n", end = '  '+ tweet_content.split(' ')[i]+ ' ')
            n = print_width
            n -= (len(tweet_content.split(' ')[i])+1)

    print("\n", end = '')




#--------------------------------------------
# 6330444421 (19.95) 279 (2021-03-01 18:02)

def get_unique( words ):
  unique_words = []
  for word in words:
    if word not in unique_words:
      unique_words.append(word)
  return unique_words
def jaccard(words_1, words_2):
  dup = 0
  for word in words_1:
    if word in words_2:
      dup += 1
  totalLenth = len(words_1) + len(words_2) - dup
  jaccard_coef = dup / totalLenth
  return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
  top_n = []
  for i in range(len(norm_tweets)):
    jac = jaccard(norm_tweets[i], norm_query)
    if jac == 0:
      continue
    top_n.append([i, jac])
  top_n.sort(key=lambda x: (-x[1], x[0]), reverse = False)
  top_n = top_n[:n]
  return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
  words = tweet_content.split()
  print("\n#{} ({})".format(tweet_id, round(jc_coef, 2)))
  i = 0
  cursor = 2
  while i < len(words):
    if cursor == 2:
      print("  ", end="")
    if cursor + len(words[i]) < print_width:
      print(words[i], end = " ")
      cursor += len(words[i]) + 1
      i += 1
    elif cursor + len(words[i]) == print_width:
      print(words[i])
      i += 1
      cursor = 2
    else:

      cursor = 2
      if len(words[i]) + 2 > print_width:
        print(words[i])
        i += 1
      else:
        print("")



#--------------------------------------------
# 6330445021 (19.19) 280 (2021-02-28 21:57)

def get_unique( words ):
    unique_words = []
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    all_words = words_1 + words_2
    jaccard_coef = (len(all_words))/len(get_unique(all_words)) - 1
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    tweet_id = 0
    for tweet in norm_tweets:
        jc = jaccard(tweet,norm_query)
        if jc > 0:
            top.append([1-jc,tweet_id])
        tweet_id += 1
    top.sort()
    top_n = top[:n]
    for i in top_n:
        top_n[top_n.index(i)] = [i[1],1-i[0]]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    jc = round(jc_coef,2)
    c = 0
    line = '  '
    tweet = tweet_content.split()
    print()
    print('#' + str(tweet_id) + ' (' + str(jc) + ')')
    for word in tweet:
        l = len(line)
        if l == 2:
            line += word
        elif l + len(word) < print_width:
            line += ' ' + word
        else:
            print(line)
            line = '  ' + word
    print(line)
#--------------------------------------------
# 6330446721 (20.00) 281 (2021-02-27 17:06)

def get_unique( words ):
    unique_words = []
    for i in words :
        if i not in unique_words :
           unique_words.append(i)

    return unique_words
def jaccard(words_1, words_2):
    a = words_1 + words_2
    b = []
    c = 0
    for i in a :
        if i not in b :
            b.append(i)
    c = len(a)-len(b)

    jaccard_coef = c / len(b)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    top_m = []
    for e in norm_tweets :
        tweet_id = norm_tweets.index(e)
        a = jaccard(norm_tweets[tweet_id], norm_query)
        if a > 0  :
            top_m.append([-a,tweet_id])
    top_m.sort()
    for x in top_m :
        x[0] = -x[0]
        top_n.append([x[1],x[0]])
        top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(" ")
    print("#"+str(tweet_id)+" "+"("+(str(round(jc_coef,2))+")"))
    s = "  "
    t = tweet_content.split(" ")
    for e in t :
        if len(s) + len(e) <= print_width:
            s += e + " "
        else:
            print(s)
            s = "  "
            s += e + " "
    print(s)



#--------------------------------------------
# 6330447321 (14.45) 282 (2021-02-28 17:20)

def get_unique( words ):
    unique_words = []
    for e in words:
        if not e in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    a = list(words_1)
    for e in words_2:
        if not e in a:
            a.append(e) #union
    c = []
    for e in words_1:
        if e in words_2:
            c.append(e) #intersect
    jaccard_coef = len(c)/len(a)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    L = []
    for i in range(len(norm_tweets)):
        L.append([-jaccard(norm_tweets[i],norm_query),i])
    L.sort()
    L2 = L[:n]
    top_n = [[a,-b] for [b,a] in L2]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n#"+str(tweet_id),'('+str(round(jc_coef,2))+')')
    w = [str(e) for e in tweet_content.split(' ')] #split word
    i = 0
    pri = ""
    while True:
        if i == len(w):
            if len(pri) != 0 :
                print('  '+pri)
            break
        if len(pri) + len(w[i]) + 1 <= print_width - 2:
            if len(pri) != 0:
                pri += ' ' + w[i]
            else:
                pri = w[i]
            i += 1
        else :
            print('  '+pri)
            pri = ""

#--------------------------------------------
# 6330448021 (20.00) 283 (2021-03-01 23:46)

# 6330449621 (18.50) 284 (2021-03-01 21:43)

def get_unique( words ):
    unique_words = []
    for i in words :
        if i not in unique_words :
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    same = [i for i in words_1 if i in words_2]
    n = [e for e in words_2 if e not in words_1]
    w = words_1 + n

    jaccard_coef = len(same)/len(w)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    for i in range(len(norm_tweets)) :
        top.append([jaccard(norm_tweets[i],norm_query),-i])

    top.sort()
    x = top[len(top)-n:]
    top_n = [[abs(a),b] for b,a in x][::-1]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n#{} ({})'.format(tweet_id, round(jc_coef, 2)))
    content = tweet_content.split(' ')
    a = '  '
    for i in range(len(content)) :
        if len(a)+len(content[i])+1 <= print_width :
            a+=' '*int(i!=0) + content[i]
        else:
            print(a)
            a = '  ' + content[i]

    print(a)

#--------------------------------------------
# 6330450121 (20.00) 285 (2021-03-01 09:37)

def get_unique( words ):
    unique_words=list()
    for x in words:
        if x in unique_words:
            pass
        else:
            unique_words.append(x)

    return unique_words
def jaccard(words_1, words_2):
    same=0
    alll=0


    for x in words_1:
        if x in words_2:
            same=same+1
    for x in words_2:
        if x not in words_1:
            alll=alll+1
    alll=alll+len(words_1)
    if same==0:
        return 0
    else:

        jaccard_coef=(same)/(alll)
        return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a=list()
    for i in range(len(norm_tweets)):
        j = jaccard(norm_tweets[i], norm_query)
        if j == 0:
            pass


        else:
            a.append([-j, i])


    #sort by -j so that the top will appear on the left
    #with the appropiate i arranging
    a.sort()
    all_top_n=a[:n]
    lenn=len(all_top_n)
    for i in range(lenn):

        all_top_n[i]=[all_top_n[i][1],(-(all_top_n[i][0]))]

    top_n=all_top_n
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef, 2))+')')
    #split content
    content=tweet_content.split(' ')
    output=''
    lenn=0
    for x in content:
        l=len(x)
        #1 =len leftt
        if (lenn+1+l)<=(print_width-1):
            lenn=len(output)+1+len(x)
            output=output+' '+(x)

        else:
            print(' '+output)
            output= ''
            lenn = len(x)+1
            output=output+' '+(x)


    if len(output)==0:
        pass
    else:
        print(' '+output)

#--------------------------------------------
# 6330452421 (18.50) 286 (2021-02-26 16:01)

def get_unique( words ):
    unique_words =[]
    words.sort()
    for i in range(len(words)-1):
        if words[i]!=words[i+1]:
            unique_words.append(words[i])
    if len(words) !=0:
        unique_words.append(words[-1])
    return unique_words
def jaccard(words_1, words_2):
    c=0
    for e in words_1 :
        if e in words_2 :
            c+=1
    jaccard_coef = c/(len(words_1)+len(words_2)-c)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top =[]
    j =[]
    for i in range(len(norm_tweets)):

        jac = jaccard(norm_tweets[i], norm_query)
        if jac >0 :
            top.append([jac,i])
    top.sort(reverse=True)
    group = [[top[0]]]
    m=0
    for i in range(1,len(top)):
        if top[i-1][0] == top[i][0]:
            group[m].append(top[i])
        else:
            group.append([top[i]])
            m+=1
    output=[]
    for x in group:
        x.sort()
        output+=x
    top_n = output[:n]
    for i in top_n :
        i[0],i[1] = i[1],i[0]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    jc = "("+str(round(jc_coef,2))+")"
    print()
    print("#"+str(tweet_id),jc)

    word = tweet_content.split(" ")
    c= len(word[0])
    show = word[0]
    if c >= print_width-2:
        print("  "+show.strip())

    for i in range(1,len(word)):
        c+=(len(word[i])+1)

        if c == print_width-2:
            show+=" "+word[i]
            print("  "+show.strip())
            c=-1
            show = ""

        elif c < print_width-2:
            if c == len(word[i]) :
                show+=word[i]
            else:
                show+=" "+word[i]

        elif c > print_width-2:
            print("  "+show.strip())
            c = len(word[i])
            show = word[i]
    print("  "+show.strip())


#--------------------------------------------
# 6330453021 (19.51) 287 (2021-02-27 16:22)

def get_unique( words ):
    x = []
    for e in range(len(words)):
        if words[e] not in x:
            x.append(words[e])
    print(x)
    unique_words = x
    return unique_words
def jaccard(words_1, words_2):
    words_1.sort()
    words_2.sort()
    intersec = 0
    if len(words_1) > len(words_2):
        for i in range(len(words_1)):
            if words_1[i] in words_2:
                intersec += 1
    else:
        for i in range(len(words_2)):
            if words_2[i] in words_1:
                intersec += 1

    total = []
    for i in range(len(words_1)):
        total.append(words_1[i])
    for i in range(len(words_2)):
        if words_2[i] not in total:
            total.append(words_2[i])
    jaccard_coef = intersec/len(total)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n_unfin = []
    jaccard_num = 0
    for k in range(len(norm_tweets)):
        tweet_id = norm_tweets.index(norm_tweets[k])
        jaccard_num = jaccard(norm_tweets[tweet_id], norm_query)
        top_n_unfin.append([jaccard_num,tweet_id])
    top_n_unfin.sort(reverse=True)
    for i in range(len(top_n_unfin)):
        top_n_unfin[i][0]*= -1
    top_n_unfin.sort()
    for i in range(len(top_n_unfin)):
        top_n_unfin[i][0]*= -1
    for j in top_n_unfin :
        j[0],j[1] = j[1],j[0]
    if float(top_n_unfin[0][1]) == 0.0:
        top_n_unfin = []
    top_n = top_n_unfin[0:n]


    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    full = tweet_content.replace(' ','/').split('/')
    n = 0
    p_sent = []
    sent = []
    for i in full:
        n += len(i)
        if n <= print_width-2 :
            n += 1
            p_sent.append(i)
        else :
            sent.append(p_sent)
            p_sent = []
            p_sent.append(i)
            n = len(i)+1
    for i in sent:
        print("  "+" ".join(i))
    print("  "+" ".join(p_sent))

#--------------------------------------------
# 6330454721 (18.33) 288 (2021-02-28 11:05)

def get_unique( words ):
    unique_words = []
    for e in words :
        if e not in unique_words :
            unique_words.append(e)

    return unique_words
def jaccard(words_1, words_2):
    a = words_1 + words_2
    b = []
    for e in a :
        if e not in b :
            b.append(e)
    t = len(b)
    s = 0
    for k in words_1 :
        for g in words_2 :
            if k == g :
                s += 1
    jaccard_coef = s/t

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x = []
    for i in range(len(norm_tweets)) :
        num = jaccard(norm_tweets[i],norm_query)
        if num > 0 :
            x.append([-num,i])
    x.sort()
    for a in x :
        a[0],a[1] = a[1],a[0]
        a[1] = -a[1]

    top_n = x[:n:1]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    printwidth = print_width - 2
    tweet_content = tweet_content.split(' ')
    num = 0
    word = ""
    i = 0
    while i < len(tweet_content) :
        num = len(word) + len(tweet_content[i])
        if num > printwidth :
            print("  " + word.strip())
            word = ""
            num = 0
        else :
            word += " " + tweet_content[i]
            i += 1

    print("  " + word.strip())

#--------------------------------------------
# 6330455321 (20.00) 289 (2021-02-27 00:40)

def get_unique( words ):
    unique_words = []
    for e in words :
        if e not in unique_words :
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
   z = []
   for e in words_1 :
       if e in words_2 :
            z.append(e)
   y = []
   y += words_1
   y += words_2
   a = []
   for e in y :
       if e not in a:
           a.append(e)
   jaccard_coef = len(z)/len(a)
   return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    y = []
    x = []
    a = []
    b = []
    top_n = []
    jaccardsa = 0
    for i in range(len(norm_tweets)) :

        tweet_id = norm_tweets.index(norm_tweets[i])
        jaccardsa = jaccard(norm_tweets[tweet_id], norm_query)
        y.append([jaccardsa,tweet_id])
    y.sort()
    y.reverse()
    y.append([0,0])
   # print(y)
    for i in range(len(y)-1) :
        if y[i][0] == y[i+1][0] :
            a.append(y[i])
        elif y[i][0] !=  y[i+1][0] and i != 0:
            a.append(y[i])
           # print(a)
            a.sort()
           # print(a)
            b.append(a)
            a = []
        else :
            b.append([y[i]])
    for e in b :
        for i in range(len(e)):
            top_n.append(e[i])
    for e in top_n :
            e[0],e[1] = e[1],e[0]
    top_n = top_n[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    liss = tweet_content.split(' ')
    n = 0
    y = []
    x = []
    for i in liss:
        n += len(i)
        if n <= print_width-2 :
            n += 1
            y.append(i)
        else :
            x.append(y)
            y = []
            y.append(i)
            n = len(i)+1
    for i in x:
        print("  "+" ".join(i))
    print("  "+" ".join(y))


#--------------------------------------------
# 6330458221 (20.00) 290 (2021-02-26 15:27)

def get_unique( words ):

    unique_word = []
    for i in range(len(words)):
        if words[i] not in unique_word:
            unique_word.append(words[i])
    return unique_word
def jaccard(words_1, words_2):

    u = 0
    for i in range(len(words_1)):
        for s in range(len(words_2)):
            if words_1[i] == words_2[s]:
                u += 1
                break
    d = (len(words_1)+len(words_2)) - u
    jaccard_coef = u / d
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    tw_coef = []
    for i in range(len(norm_tweets)):
        tw_coef.append(jaccard(norm_tweets[i], norm_query))
    top_n = []
    for i in range(n):
        if max(tw_coef) > 0:
            top_n.append([tw_coef.index(max(tw_coef)),max(tw_coef)])
            tw_coef[tw_coef.index(max(tw_coef))] = 0
        else:
            break
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print('')
    print('#' + str(tweet_id) + ' ' + '(' + str(round(jc_coef,2)) + ')')
    ct_sp = tweet_content.split()
    s = ''
    for i in range(len(ct_sp)):
        if i != len(ct_sp)-1:
            x = ' '*((tweet_content.index(ct_sp[i+1]))-((tweet_content.index(ct_sp[i]))+len(ct_sp[i])))
            s += ct_sp[i] + x
            tweet_content = tweet_content[(tweet_content.index(ct_sp[i]))+len(ct_sp[i]):]
            if (print_width-2)-len(s) < len(ct_sp[i+1]):
                s = s[:-(len(x))]
                print('  ' + s)
                s = ''
        else:
            s += ct_sp[i]
    print('  ' + s)

#--------------------------------------------
# 6330459921 (20.00) 291 (2021-02-28 23:16)

def get_unique( words ):
    unique_words = []
    for e in words :
        if e not in unique_words:
            unique_words.append(e)


    return unique_words
def jaccard(words_1, words_2):
    word = 0
    total_word = 0
    for e in words_2 :
        if e in words_1 :
            word = word + 1
    word_t = words_1 + words_2
    total_word = len(word_t) - word
    jaccard_coef = word / total_word

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)) :
        j = jaccard(norm_tweets[i], norm_query)
        if j > 0 :
            top_n.append([-j,i])
    top_n.sort()
    top_n = top_n[:n]
    for i in range(len(top_n)) :
        top_n[i] = [top_n[i][1] , (-top_n[i][0])]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#' + str(tweet_id) + ' (' + str(round(jc_coef, 2)) + ')')
    content = tweet_content.split(" ")
    out_put = ""
    l = 0
    for e in content :
        if (l + (1+ len(e))) <= (print_width - 1):
            out_put += " " + e
            l = len(out_put)
        else :
            print(" " + out_put)
            out_put = ""
            out_put += " " + e
            l = len(out_put)

    if len(out_put) != 0 :
        print(" " + out_put)




#--------------------------------------------
# 6330460421 (20.00) 292 (2021-02-27 03:14)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    words_1 = get_unique(words_1)
    words_2 = get_unique(words_2)
    allst = []
    allst += words_2
    for e in words_1:
        if e not in words_2:
            allst.append(e)
    samest = []
    for e in words_1:
        if e in words_2:
            samest.append(e)
    jaccard_coef = len(samest) / len(allst)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    tweet_id = []
    for e in norm_tweets:
        Jaccard = jaccard(e, norm_query)
        if Jaccard > 0:
            tweet_id.append([-Jaccard, norm_tweets.index(e)])
    tweet_id.sort()
    for i in range(len(tweet_id)):
        tweet_id[i][0], tweet_id[i][1] = tweet_id[i][1], tweet_id[i][0]
        tweet_id[i][1] *= -1
    top_n = tweet_id[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(" ")
    print("#" + str(tweet_id) + " " + "(" + str(round(jc_coef, 2)) + ")")
    tweet_content = tweet_content.split(" ")
    n = " "
    for e in tweet_content:
        n += " " + e
        if len(n) > print_width:
            print(n[:-len(e):])
            n = "  " + e
    print(n)

#--------------------------------------------
# 6330461021 (20.00) 293 (2021-03-01 03:24)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    intersect = 0
    for i in words_1:
        if i in words_2:
            intersect += 1
    sum = len(words_1) + len(words_2) - intersect
    jaccard_coef = intersect/sum
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    b = []
    p = []
    for i in range(len(norm_tweets)):
        b.append([jaccard(norm_tweets[i], norm_query),i])
    for i in b:
        if i[0] > 0:
            p.append(i)
    p.sort()
    for k in range(len(p)):
        if p[k-1][0] == p[k][0] and p[k-1][1] < p[k][1]:
            p[k-1],p[k] = p[k],p[k-1]
    if len(p) > n:
        top_n_ = p[-n::]
    else:
        top_n_ = p
    for i in range(len(top_n_)):
        top_n_[i][0],top_n_[i][1] = top_n_[i][1],top_n_[i][0]
    for i in range(len(top_n_)):
        top_n.insert(0,top_n_[i])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#" + str(tweet_id) + " (" + str(round(jc_coef,2)) +")")
    a = tweet_content.split(' ')
    b = []
    c = 2
    for i in range(len(a)):
        c += len(a[i])
        if c > print_width:
            print("  " + " ".join(b))
            c = len(a[i]) + 3
            b = [a[i]]
        else:
            b.append(a[i])
            c += 1
    print("  " + " ".join(b))

#--------------------------------------------
# 6330462721 (20.00) 294 (2021-03-01 23:16)

def get_unique( words ):

    unique_words = []
    for i in range(len(words)) :
        if not words[i] in unique_words   :
            unique_words += [words[i]]

    return unique_words
def jaccard(words_1, words_2):

    u = len(get_unique(words_1 + words_2))
    jaccard_coef = (len(words_1)+len(words_2)-u)/u

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = [] ; x = []
    for tweet_id in range(len(norm_tweets)) :
        jc = jaccard(norm_query,norm_tweets[tweet_id])
        if jc != 0 :
            x.append([-jc,tweet_id])
    x.sort()

    for i in range(len(x)) :
        top_n.append([x[i][1],-x[i][0]])

    top_n=top_n[:n:]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print("")
    print("#"+str(tweet_id)+" "+"("+str(round(float(jc_coef),2))+")")
    word = tweet_content.split(" ")+[""]
    x = word[0]
    for i in range(len(word)-1) :
        a = x +" "+word[i+1]
        if len(a) <= print_width-2 :
            x += " "
            x += word[i+1]
            if i == len(word)-2 :
                print("  "+x)
        else :
            print("  "+x)
            x = word[i+1]

#--------------------------------------------
# 6330463321 (20.00) 295 (2021-03-01 23:41)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words += [e]

    return unique_words
def jaccard(words_1, words_2):
    a = []
    b = []
    for e in words_1:
        if e not in b:
            b += [e]
        if e in words_2 and e not in a :
            a += [e]
    for e in words_2:
        if e not in b:
            b += [e]
        if e in words_1 and e not in a :
            a += [e]
    j = len(a)
    s = len(b)
    jaccard_coef = j/s
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    l = []
    for i in range(len(norm_tweets)):
        jec = jaccard(norm_tweets[i],norm_query)
        if jec>0:
            l.append([jec,-i])
    l.sort(reverse=True)

    for i in range(len(l)):
        l[i][0],l[i][1] = -l[i][1],l[i][0]
    top_n =[]
    top_n += l[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    a = tweet_content.split(' ')
    b = []
    for i in range(len(a)):
        k = len(' '.join(b))+len(a[i])+1
        if k <= print_width-2:
            b.append(a[i])
        if k > print_width-2 or i == len(a)-1:
            print('  '+' '.join(b))
            if i == len(a)-1 and a[i] not in b:
                print('  '+a[i])
            b = [a[i]]



#--------------------------------------------
# 6330464021 (17.86) 296 (2021-02-28 23:16)

def get_unique( words ):
    unique_words=[]
    for e in words:
        if not e in unique_words:
            unique_words.append(e)

    return unique_words
def jaccard(words_1, words_2):
    b=[];nsw=[];sw=[]
    for a1 in words_1:
        b.append(a1)
    for a2 in words_2:
        b.append(a2)
    for c in b:
        if not c in nsw:
            nsw.append(c)
    for g in words_1:
        if g in words_2:
            sw.append(g)
    jaccard_coef=round(len(sw)/len(nsw),2)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    m=[]
    for i in range(len(norm_tweets)):
        tweet_id=i
        jac=jaccard(norm_tweets[tweet_id],norm_query)
        if jac>0:
            m.append([jac,-tweet_id])
    m.sort(reverse=True)
    j=m[0:n]
    top_n=[]
    for k in range(len(j)):
        (j[k][0]),(j[k][1])=(-1*j[k][1]),(j[k][0])
        top_n.append([(j[k][0]),(j[k][1])])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    y=' '
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')

    t=tweet_content.split()
    for w in t:
        l=len(y)
        if l == 1 or (l+len(w)<print_width):
            y+=' '+w
        else:
            print(y)
            y = '  '+w
    print(y)






#--------------------------------------------
# 6330465621 (18.44) 297 (2021-03-01 16:12)

def get_unique( words ):
    unique_words = []
    for e in (words) :
        if not e in unique_words :
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    k = 0
    for e in words_1 :
        if e in words_2 :
            k += 1
    m = len(words_1) + len(words_2) - k
    jaccard_coef = k/m
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    top1 = []
    top_n = []
    for tweet_id in range(len(norm_tweets)) :
        Jaccard = jaccard(norm_tweets[tweet_id],norm_query)
        jac = -Jaccard
        if Jaccard > 0 :
            top.append([jac, tweet_id])
    top.sort()
    for g in top :
        g[0],g[1] = g[1],g[0]
        g[1] = -g[1]
    for i in range(len(top)) :
        if len(top_n) != n :
            if top[i][0] == top[i+1][0] and top[i][0] == top[i-1][0]:
                top.remove(top[i])
                top.remove(top[i+1])
            elif top[i][0] == top[i+1][0] :
                top_n.append(top[i])
                top.remove(top[i+1])
            else :
                top_n.append(top[i])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print("#" + str(tweet_id),'(' + str(round(jc_coef, 2)) + ')')
    tc = tweet_content.split()
    z = []
    for e in range(len(tc)) :
        z.append(tc[e])
        y = (' ').join(z)
        if e == len(tc) - 1 and len(y) <= print_width - 2 :
            print("  " + y )
        elif len(y) == print_width - 2 :
            print('  ' + y)
            z = []
        elif len(y) > print_width - 2 :
            if e != 0:
                a = z.pop(-1)
                y = (' ').join(z)
                print('  ' + y )
                z = [a]
                if e == len(tc) - 1 :
                    print('  '+ a)
            else :
                print('  '+ y )
                z = []




#--------------------------------------------
# 6330466221 (19.10) 298 (2021-03-01 23:50)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    upper = []
    tot = words_1 + words_2
    uniq = []
    for i in range(len(tot)):
        if tot[i] not in uniq:
            uniq.append(tot[i])
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            if words_1[i] not in upper:
                upper.append(words_1[i])
    jaccard_coef = len(upper)/len(uniq)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    new = []
    jac = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i], norm_query) > 0:
            new.append([i,jaccard(norm_tweets[i], norm_query)])
            jac.append(jaccard(norm_tweets[i], norm_query))
    if len(new) > 0:
        for i in range(n):
            top_n.append([new[jac.index(max(jac))][0],max(jac)])
            jac.insert(jac.index(max(jac)), -1)
            jac.remove(max(jac))

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    words = tweet_content.split(' ')
    w = '  '
    if round(jc_coef,2) > 0:
        print()
        print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
        for i in words:
            if len(w)+len(i) <= print_width:
                w += i + ' '
            else:
                print(w)
                w = '  '+i+' '
        print(w)

#--------------------------------------------
# 6330467921 (20.00) 299 (2021-02-26 21:26)

def get_unique( words ):
    words.sort()
    repeat_words = []
    for i in range(1,len(words)):
        if words[i] == words[i-1]:
            repeat_words.append(words[i])
    for a in repeat_words:
        words.remove(a)
    unique_words = words
    return unique_words
def jaccard(words_1, words_2):
    intercept = []
    plus = words_1 + words_2
    uniplus = get_unique(plus)
    for e in uniplus:
        if e in words_1 and e in words_2:
            intercept.append(e)
    jaccard_coef = len(intercept)/len(uniplus)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    tw = []
    for i in range(len(norm_tweets)):
        jc = jaccard(norm_tweets[i],norm_query)
        if jc > 0:
            tw.append([-jc,i])
    tw.sort()
    for e in tw:
        e[0], e[1] = e[1], e[0]
        e[1] = -e[1]
    top_n = tw[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    t = tweet_content.split(' ')
    show = []
    i = 0
    while i < len(t):
        js = ' '.join(show)
        if len(t) == 1:
            n = 0
        else:
            n = len(t[1])
        if len(js)+2 < print_width:
            show.append(t.pop(0))
            js = ' '.join(show)
            if len(js)+3+n > print_width:
                 print('  '+js)
                 show = []
    print('  '+' '.join(show))

#--------------------------------------------
# 6330468521 (17.00) 300 (2021-02-28 02:08)

def get_unique( words ):
    words.sort()
    unique_words = []
    unique_words[:] = words
    i = 0
    while i < len(unique_words)-1 :
        if unique_words[i] == unique_words[i+1] :
            unique_words.remove(unique_words[i])
        else :
            i += 1

    return unique_words
def jaccard(words_1, words_2):
    t = []
    for e in words_1 :
        t.append(e)
    for e in words_2 :
        t.append(e)
    t.sort()
    s = []
    i = 0
    while i < len(t)-1 :
        if t[i] == t[i+1] :
            x = t.pop(i)
            s.append(x)
        else :
            i += 1
    jaccard_coef = len(s)/len(t)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    topn = []
    for  i in range(len(norm_tweets)) :
        tweet_id = i
        t = []
        for e in norm_tweets[i] :
            t.append(e)
        for e in norm_query :
            t.append(e)
        t.sort()
        s = []
        i = 0
        while i < len(t)-1 :
            if t[i] == t[i+1] :
                y = t.pop(i)
                s.append(y)
            else :
                i += 1
        jaccard = len(s) / len(t)
        topn.append([jaccard,tweet_id])
    top_n1 = []
    for i in range(len(topn)) :
        if topn[i][0] > 0 :
            top_n1.append(topn[i])
    top_n2 = []
    for i in range(len(top_n1)) :
        w = -1*top_n1[i][0]
        top_n2.append([w,top_n1[i][1]])
    top_n2.sort()
    top_n3 = []
    for i in range(len(top_n2)) :
        z = -1*top_n2[i][0]
        top_n3.append([top_n2[i][1],z])
    top_n = []
    for i in range(n) :
        top_n.append(top_n3[i])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(" "*print_width)
    print("#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")")
    t_c = tweet_content.split(" ")
    show = "  "
    for e in t_c :
        if len(show)+len(e) > print_width :
            print(show)
            show = "  "+e+" "
        else :
            show += e+" "
    print(show)

#--------------------------------------------
# 6330469121 (19.38) 301 (2021-03-01 03:09)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if not words[i] in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    n=[]
    for e in words_1:
        if e in words_2:
            n.append(e)
    jaccard_coef=len(n)/(len(words_1)+len(words_2)-len(n))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a=[]
    for i in range(len(norm_tweets)):
        norm_tweets[i]
        x=jaccard(norm_tweets[i],norm_query)
        if x>0:
            a.append([-x,i])
    a.sort()
    for e in a:
        e[0],e[1]=e[1],-e[0]
    top_n=a[0:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    x=tweet_content.split(" ")
    tweet_id=str(tweet_id)
    jc_coef=str(round(jc_coef,2))
    print()
    print('#'+tweet_id+'  '+'('+jc_coef+')')
    y=''
    for i in range(len(x)):
        if len(y)+len(x[i])<print_width-1:
            y=y+' '+(x[i])
        else:
            print('  '+y)
            y=''
            y=' '+(x[i])
    print('  '+y)
#--------------------------------------------
# 6330470721 (19.75) 302 (2021-03-01 11:49)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
     result=[]
     words_all=words_1+words_2
     same=0
     for i in range(len(words_1)):
         if words_1[i] in words_2:
             same+=1
     for i in words_all:
         if i not in result:
             result.append(i)
     jaccard_coef = float(same/len(result))
     return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
     top_n=[]
     for k in norm_tweets:
         tweet_id =norm_tweets.index(k)
         cal_jac = jaccard(norm_tweets[tweet_id],norm_query)
         if cal_jac > 0:
             list_sub = [cal_jac,-tweet_id]
             top_n.append(list_sub)
     top_n.sort(reverse=True)
     for i in range(len(top_n)):
         top_n[i] = [-top_n[i][1],top_n[i][0]]
     top_n = top_n[:n]
     return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    x = tweet_content.split()
    y = ' '
    print('')
    print('#'+str(tweet_id),round(jc_coef,2))
    for i in range(len(x)):
        if len(' ') + len(x[i]) + len(y) > print_width:
            print(y)
            y = '  ' + x[i]
        else:
            y = y + ' ' + x[i]
    print(y)







#--------------------------------------------
# 6330471321 (20.00) 303 (2021-02-28 19:47)

def get_unique(words):
    res = []
    for i in words:
        if i not in res:
            res.append(i)
    return res
def jaccard(words_1, words_2):
    equal = 0
    for i in range(len(words_1)):
        for e in range(len(words_2)):
            if words_1[i] == words_2[e]:
                equal+=1
    if(len(words_1) + len(words_2) == equal):
        return 1
    return equal / (len(words_1) + len(words_2) - equal)
def top_n_similarity(norm_tweets, norm_query, n):
    Foo = []
    for i in range(len(norm_tweets) - 1, -1, -1):
        val = jaccard(norm_query, norm_tweets[i])
        if(val > 0):
            Foo.append([i, val])

    for i in range(len(Foo)):
        for j in range(len(Foo) - i - 1):
            if (Foo[j][1] > Foo[j + 1][1]):
                tempo = Foo[j]
                Foo[j] = Foo[j + 1]
                Foo[j + 1] = tempo

    return Foo[::-1][0:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n #" + str(tweet_id) + " (" + str(round(jc_coef, 2)) + ')')

    line = " "
    Foo = tweet_content.split(' ')
    for i in range(len(Foo)):
        if(len(' ') + len(Foo[i]) + len(line) > print_width):
            print(line)
            line = "  " + Foo[i]
        else:
            line = line + ' '+ Foo[i]
    print(line)


#--------------------------------------------
# 6330472021 (18.78) 304 (2021-03-01 23:11)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    c=0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            c+=1

    w=words_1+words_2

    a=len(w)
    jaccard_coef=c/(a-c)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        jac = jaccard(norm_tweets[i],norm_query)
        if jac>0:
            top_n.append([-jac,i])
    s=[]
    top_n.sort()
    for jac,i in top_n:
        s.append([i,-jac])

    top_n=s[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    z=tweet_content.split()
    print(''*print_width)
    print('#'+str(tweet_id) + '(' + str(round(jc_coef,2)) + ')')
    x = ' '
    for e in z:
        if len(x+e) > print_width:
            print(' '+x[:print_width:])
            x = ' '
        x += e +  ' '
    print(' '+x)




#--------------------------------------------
# 6330473621 (15.67) 305 (2021-02-27 23:03)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)) :
        if words[i] not in unique_words :
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    s = words_1+words_2
    summ = []
    re = []
    for i in range(len(words_1)) :
        if words_1[i] in words_2 :
            re.append(words_1[i])
    for i in range(len(s)) :
        if s[i] not in summ :
            summ.append(s[i])
    jaccard_coef = round(len(re)/len(summ),2)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    j = []

    for tweet_id in range(len(norm_tweets)) :
        s = norm_tweets[tweet_id]+norm_query
        summ = []
        re = []
        for i in range(len(norm_tweets[tweet_id])) :
            if norm_tweets[tweet_id][i] in norm_query :
                re.append(norm_tweets[tweet_id][i])
        for i in range(len(s)) :
            if s[i] not in summ :
                summ.append(s[i])
        jac = len(re)/len(summ)
        j.append([jac,tweet_id])
        j.sort()
        j = j[::-1]


    a =[j[0]]
    b = []
    top_n = []
    for i in range(len(j)-1) :
        a.append(j[i+1])
    for i in range(len(a)) :
        b.append(a[i][::-1])
    for i in range(len(b)) :
        for i in range(len(b)-2,-1,-1) :
            if b[i][1] == b[i+1][1] and b[i][0] > b[i+1][0] :
                b[i],b[i+1] = b[i+1],b[i]
    for i in range(n) :
        top_n.append(b[i])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width) :
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef, 2 ))+')')
    j = 0
    a =  tweet_content.split(' ')
    b = '  '
    for i in range(len(a)) :
        j += len(a[i])
        if j > print_width-2 :
            print(b)
            b = '  '
            j = len(a[i])
        j += 1
        b += a[i]+' '
        if i == len(a)-1 :
            print(b)



#--------------------------------------------
# 6330474221 (17.60) 306 (2021-03-01 23:58)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    y = words_1 + words_2
    S = []
    h = len(words_1)
    i = 0
    N = []
    for x in y:
        if x not in S:
            S.append(x)
    while True:
        if i == h:
            break
        if words_1[i] in words_2:
            if words_1[i] not in N:
                N.append(words_1[i])
            i += 1
        else:
            i += 1
            continue
    return(len(N)/len(S))
def top_n_similarity(norm_tweets, norm_query, n):
    new = []
    jac = []
    top_n = []
    for i in range(len(norm_tweets)):
        new.append([i])
        jac.append(jaccard(norm_tweets[i],norm_query))
    if len(new) > 0:
        for i in range(n):
            top_n.append([new[jac.index(max(jac))][0],max(jac)])
            jac.insert(jac.index(max(jac)), 0)
            jac.remove(max(jac))

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    if round(jc_coef,2) > 0:
        print()
        print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
        words = tweet_content.split(' ')
        word = '  '
        for k in words:
            if not len(k)+len(word) > print_width:
                word += k + ' '
            else:
                print(word)
                word = '  '+k+' '
        print(word)





#--------------------------------------------
# 6330475921 (18.01) 307 (2021-03-01 18:14)

def get_unique( words ):
    unique_words=[]
    i=0
    while i<len(words):
        if not words[i] in unique_words:
            unique_words+=[words[i]]
        i+=1
    #print(unique_words)
    return unique_words
def jaccard(words_1, words_2):
    words_3=[]
    i=0
    while i<len(words_1):
        if words_1[i] in words_2:
            words_3+=[words_1[i]]
        i+=1
    jaccard_coef=len(words_3)/len(get_unique(words_1+words_2))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a=[]
    tweet_id=0
    while tweet_id< len(norm_tweets):
        jackky=jaccard(norm_tweets[tweet_id], norm_query)
        a.append([-jackky,tweet_id])
        tweet_id+=1
    a.sort()
    b=[]
    i=0
    while i < len(a):
        b.append([a[i][1],-a[i][0]])
        i+=1
    top_n=b[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    tweet_content=tweet_content.split(' ')
    space=' '
    for e in tweet_content:
        space+=' '+e
        if print_width<len(space):
            space=space[:len(space)-len(e)-1]
            print(space)
            space='  '+e
    print(space)



#--------------------------------------------
# 6330476521 (18.01) 308 (2021-03-01 17:30)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    words = len(words_1)+len(words_2)
    s = 0
    for e in words_1:
        if e in words_2:
            s += 1
            words -= 1
    jaccard_coef = s/words
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    rev_topn = []
    for i in range(len(norm_tweets)):
        tweet_id = norm_tweets[i]
        jaccards = jaccard(tweet_id,norm_query)
        rev_topn.append([jaccards,-i])
    rev_topn.sort()
    rev_topn = rev_topn[-1::-1]
    for [a1,a2] in rev_topn:
        top_n.append([-a2,a1])
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    words = tweet_content.split(' ')
    s = ''
    a = 0
    i = 0
    for w in words:
        if len(w)+a+2 <= print_width:
            s += w+' '
            a = len(s)
        else:
            print('  '+s)
            s = ''
            s += w+' '
            a = len(s)
    if len(s) > 0:
        print('  '+s)
#--------------------------------------------
# 6330477121 (20.00) 309 (2021-03-01 01:07)

def get_unique( words ):
    SpecialWord = []
    for w in words:
        if w in SpecialWord:
            pass

        else:
           SpecialWord.append(w)

    return SpecialWord
def jaccard(words_1, words_2):
    SameWord = 0
    All = 0

    for w in words_1:
        if w in words_2:
            SameWord = SameWord+1

        else:
            pass

    All = (len(words_1) + len(words_2)) - SameWord
    jaccard_coef = (SameWord / All)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    Top = []
    for i in range(len(norm_tweets)):

        j = jaccard(norm_tweets[i], norm_query)
        if j > 0:
            Top.append([-j, i])

        else:
            pass

    Top.sort()
    Top = Top[:n]
    for i in range(len(Top)):
        Top[i] = [Top[i][1], (-Top[i][0])]

    top_n = Top

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(" ")
    Show = ('#' + str(tweet_id) + ' (' + str(round(jc_coef, 2)) + ')')

    print(Show)

    X = tweet_content.split(' ')

    Space = ''
    l = 0
    for i in X:
        if (l + 1 + len(i)) > (print_width - 1):
            print(' ' + Space)
            Space = ''
            Space += ' ' + i
            l = len(Space)

        else:
            Space += ' ' + i
            l = len(Space)

    if len(Space) != 0:
        print(' ' + Space)

#--------------------------------------------
# 6330478821 (20.00) 310 (2021-02-28 23:07)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    a=words_1+words_2
    sum=get_unique(a)
    m=[]
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            m.append(words_1[i])
    jaccard_coef=len(m)/len(sum)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        j=jaccard(norm_tweets[i], norm_query)
        if j>0:
            top_n.append([-j,i])
    top_n.sort()
    top_n=top_n[:n]
    for i in range(len(top_n)):
        top_n[i]=[top_n[i][1],(-top_n[i][0])]


    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    t=tweet_content.split(' ')
    e=' '
    for i in range(len(t)):
        e+=' '+t[i]
        if len(e)>print_width:
            e=e[0:len(e)-len(t[i])]
            print(e)
            e='  '+t[i]
    print(e)



#--------------------------------------------
# 6330481621 (17.95) 311 (2021-03-01 23:40)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    a=len(get_unique(words_1+words_2))
    b=len(words_1)+len(words_2)-a
    jaccard_coef=b/a
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        a=jaccard(norm_tweets[i],norm_query)
        top_n.append([-a,i])
    top_n.sort()
    for e in top_n:
        e[0],e[1]=e[1],-e[0]
    topest_n=top_n[0:n]

    return topest_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content=tweet_content.split()
    n=0
    print(' ')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    for i in tweet_content:
        if len(i)>=print_width-2 and n==0:
            print(' '*2+i)
            continue
        if len(i)<print_width-2 and n==0:
            print(' '*2+i+' ',end='')
            n+=1+len(i)
            continue
        if len(i)<=print_width-2-n:
            print(i+' ',end='')
            n+=1+len(i)
            continue
        if len(i)>print_width-n-2:
            print()
            n=0
            if len(i)<print_width-2 and n==0:
                print(' '*2+i+' ',end='')
                n+=1+len(i)
                continue
            if len(i)>=print_width-2 and n==0:
                print(' '*2+i)
                continue
    print()
#--------------------------------------------
# 6330482221 (19.95) 312 (2021-02-26 22:32)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    w1 = get_unique(words_1); w2 = get_unique(words_2)
    n = 0
    for i in w1:
        if i in w2:
            n+=1
    jaccard_coef = n/len(get_unique(w1+w2))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        jc = -1*jaccard(norm_tweets[i],norm_query)
        if jc != 0:
            top_n.append([jc,i])
    top_n = sorted(top_n)
    top_n = [[e[1],-e[0]] for e in top_n]
    top_n = top_n[0:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(" ")
    print(f'#{tweet_id} ({round(jc_coef,2)})')
    twc = tweet_content.split()
    ans = " "
    for i in range(len(twc)):
        ans += " "+twc[i]
        if len(ans) > print_width:
            ans = ans[0:len(ans)-len(twc[i])]
            print(ans)
            ans="  "+twc[i]
    print(ans)





#--------------------------------------------
# 6330483921 (17.60) 313 (2021-03-01 23:46)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)) :
        if words[i] not in unique_words :
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    word_3=words_1+words_2
    word_3.sort()
    c=0
    for i in range (len(word_3)-1) :
        if word_3[i] == word_3[i+1] :
            c=c+1
    jaccard_coef=c/(len(word_3)-c)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    t=[]
    for i in range (len(norm_tweets)) :
        if jaccard(norm_tweets[i],norm_query) > 0 :
            t.append([jaccard(norm_tweets[i],norm_query)-i*(10**(-10)),i])
    for i in range (n) :
        c=max(t)
        d=[c[0]+(c[1]*(10**(-10))),c[1]]
        top_n.append(d)
        t.remove(max(t))
        t.insert(1,[-1,-1])
    for i in range (len(top_n)) :
        top_n[i]=top_n[i][::-1]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    word = tweet_content.split(' ')
    a = '  '
    if round(jc_coef,2) > 0 :
        print(' ')
        print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
        for i in word :
            if len(a)+len(i) <= print_width :
                a += i + ' '
            else :
                print(a)
                a = '  '+i+' '
        print(a)
#--------------------------------------------
# 6330485121 (20.00) 314 (2021-02-28 22:28)

def get_unique( words ):
    words.sort()
    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    d = words_1+words_2
    d.sort()
    d.append(d[0])
    s = []
    for i in range(len(d)-1):
        if d[i] != d[i+1]:
            s.append(d[i])
    sym = []
    for e in words_1:
        if e in words_2:
            sym.append(e)
    jaccard_coef = len(sym)/len(s)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    for tweet_id in range(len(norm_tweets)):
        jac = jaccard(norm_tweets[tweet_id],norm_query)
        if jac>0:
            a.append([jac,-tweet_id])
    a = sorted(a,reverse=True)
    for i in range(len(a)):
        a[i][0],a[i][1] = -a[i][1],a[i][0]
    top_n = a[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    d = tweet_content.split(' ')
    s = ' '
    for i in range(len(d)):
        x = s
        s = s+' '+d[i]
        if len(s) > print_width:
            print(x)
            s = '  '+d[i]
    print(s)
#--------------------------------------------
# 6330486821 (20.00) 315 (2021-02-28 22:16)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
#--------------------------------------------------------
def jaccard(words_1, words_2):
    s = len(get_unique(words_1)) + len(get_unique(words_2))
    k = len(get_unique(words_1 + words_2))
    return (s-k)/k
#--------------------------------------------------------
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        jc_coef = jaccard(norm_tweets[i],norm_query)
        if jc_coef>0:
            top_n.append([-jc_coef,i])
    top_n.sort()
    top_n = [[e[1],-e[0]] for e in top_n][:n]
    return top_n
#--------------------------------------------------------
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(' ')
    print(f'\n#{tweet_id} ({round(jc_coef,2)})')
    text = ' '
    for e in tweet_content:
        t = text
        text += ' ' + e
        if len(text)>print_width:
            print(t)
            text = '  ' + e
    print(text)
#--------------------------------------------
# 6330487421 (20.00) 316 (2021-03-01 22:49)

def get_unique( words ):
    x = []
    for each in words:
        if each not in x:
            x.append(each)

    return x
def jaccard(words_1, words_2):
    s = 1
    t=1
    for each in words_1:
        if each in words_2:
            s += 1
    t = len(words_1)+len(words_2)-(s-1)
    jaccard_coef = (s-1) / t

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    tp = []
    for i in range(len(norm_tweets)):
        j = jaccard(norm_tweets[i], norm_query)
        if j > 0:
            tp.append([-j, i])
    tp.sort()
    tp = tp[:n]
    for i in range(len(tp)):
        tp[i] = [tp[i][1], (-tp[i][0])]

    return tp
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#' + str(tweet_id) + ' (' + str(round(jc_coef, 2)) + ')')
    c = tweet_content.split(' ')
    o = ''
    l = 0
    for each in c:
        if (l + 1 + len(each)) <= (print_width - 1):
            o += ' ' + each
            l = len(o)
        else:
            print(' ' + o)
            o = ''
            o += ' ' + each
            l = len(o)

    if len(o) != 0:
        print(' ' + o)




#--------------------------------------------
# 6330488021 (18.01) 317 (2021-02-26 00:09)

def get_unique( words ):
    unique_words=[]
    for i in  range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(w1, w2):
    sum= w1 + w2
    a=get_unique( sum )
    b=[]
    for i in  range(len(w1)):
        if w1[i] in w2:
            b.append(w1[i])
    #print(sum)
    #print(a)
    #print(b)
    jaccard_coef= (len(b))/(len(a))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    t=[]
    for i in range(len(norm_tweets)):
        tweet_id= i
        jac= jaccard(norm_tweets[tweet_id], norm_query)
        t.append([-jac,tweet_id])
    t.sort()

    a=[]
    for j,i in t:
        a.append([i,-j])

    top_n=a[: n]


    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    tweet_content=tweet_content.split(' ')
    text=' '
    for e in tweet_content:
        text+=' '+e
        if len(text)>print_width:
            text=text[:len(text)-len(e)-1]
            print(text)
            text='  '+e
    print(text)



#--------------------------------------------
# 6330489721 (18.50) 318 (2021-03-01 15:59)

def get_unique( words ):

    unique_words = []
    for i in range(len(words)):
        if not words[i] in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):

    x = words_1+words_2
    x.sort()
    x.append(x[0])
    y = []
    for i in range(len(x)-1):
        if x[i] != x[i+1]:
            y.append(x[i])
    sym = []
    for z in words_1:
        if z in words_2:
            sym.append(z)
    jaccard_coef = len(sym)/len(y)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n = []
    for i in range(len(norm_tweets)):
        top_n.append([-jaccard(norm_tweets[i],norm_query),i])
    top_n = sorted(top_n)
    for i in top_n:
        i[0],i[1] = i[1],-i[0]


    the_list = []
    for i in range(len(top_n)):
        if top_n[i][1] > 0:
            the_list.append(top_n[i])

    very_top_n = []
    if len(the_list) != 0 :
        for i in range(n):
            very_top_n.append(the_list[i])


    return very_top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    jround=round(jc_coef,2)
    print(' ')
    print("#"+str(tweet_id)+" ("+str(jround)+")")
    text = tweet_content.split(" ")
    current_width = 0
    first_word = True
    for i in text:
        if current_width+len(i) <= print_width:
            if first_word == True:
                print("  ", end="")
                current_width += 2
            print(i, end=" ")
            current_width += len(i)+1
            first_word = False
        else:
            print()
            current_width = 0
            first_word = True
            if current_width+len(i) > print_width:
                print("  " + i)
                current_width = 0
                continue
            else:
                print("  ", end="")
                current_width += 2
                print(i, end=" ")
                current_width += len(i)+1
                first_word = False
    print()


#--------------------------------------------
# 6330491921 (19.51) 319 (2021-03-01 22:26)

def get_unique( words ):
    unique_words = []
    for word in words:
        if word not in unique_words:
            unique_words.append(word)
    return unique_words
def jaccard(words_1, words_2):
    unique = []
    interception = 0
    for word in words_1:
        if word in words_2:
          interception += 1
    all_data = words_1 + words_2
    unique = get_unique(all_data)
    if len(unique) == 0:
        jaccard_coef = 0
    else:
        jaccard_coef = interception/len(unique)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for index in range(0,len(norm_tweets)):
        data = []
        result = jaccard(norm_tweets[index],norm_query)
        data.extend([index,result])
        top_n.append(data)
    # sort
    l = len(top_n)
    for i in range(0, l):
        for j in range(0, l-i-1):
            if (top_n[j][1] < top_n[j + 1][1]):
                tempo = top_n[j]
                top_n[j]= top_n[j + 1]
                top_n[j + 1]= tempo
    if top_n[0][1] == 0.0:
        top_n=[]

    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#{} ({})".format(tweet_id, round(jc_coef,2)))
    count=0
    print(" ", end = " ")
    for word in tweet_content.split(" "):
        # print(" ", end = " ")
        if count + len(word) + 1 < print_width:
            print(word, end = " ")
            count += len(word) + 1
        else:
            print()
            print(" ", end = " ")
            if word == " " :
                print("\n", end = " ")
            else:
                print(word, end = " ")
            count=len(word)+1
    print()






#--------------------------------------------
# 6330492521 (18.01) 320 (2021-03-01 21:04)

def get_unique( words ):
  unique_words=[]
  for i in range(len(words)):
    if words[i] not in unique_words:
      unique_words.append(words[i])
  return unique_words
def jaccard(words_1, words_2):
  inter = []
  uni = []
  for i in range(len(words_1)):
    if words_1[i] in words_2 and words_1[i] not in inter:
      inter.append(words_1[i])
  for i in range(len(words_2)):
    if words_2[i] in words_1 and words_2[i] not in inter:
      inter.append(words_2[i])
  w = words_1+words_2
  uni = get_unique(w)
  jaccard_coef = len(inter)/len(uni)
  return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
  top_n =[]
  for i in range(len(norm_tweets)):
    j = jaccard(norm_tweets[i],norm_query)
    t=[i,j]
    top_n.append(t)
  top_n.sort(key = lambda x: x[1],reverse=True)
  return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
  print('\n#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
  tweet_content= tweet_content.split(' ')
  st = ' '
  for i in range(len(tweet_content)):
    if len(st)+1+len(tweet_content[i])<=print_width:
      st=st+' '+tweet_content[i]

    else:
      print(st)
      st = '  '+tweet_content[i]
  print(st)

#--------------------------------------------
# 6330494821 (18.01) 321 (2021-02-27 23:17)

def get_unique( words ):
    unique_words=[]
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    a=0
    for x in words_1:
        if x in words_2:
            a+=1
    b=len(words_1)+len(words_2)-a
    jaccard_coef=a/b
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n1=[]
    a=[]
    top_n=[]
    for i in range(len(norm_tweets)):
        top_n1.append([jaccard(norm_tweets[i],norm_query),i])
    top_n1.sort(reverse=True)
    a.append([top_n1[0][1],top_n1[0][0]])
    for i in range(1,len(top_n1)):
        if i!=len(top_n1)-1:
            if top_n1[i][0]==top_n1[i-1][0]:
                a.append([top_n1[i][1],top_n1[i][0]])
            else:
                a.sort()
                for m in range(len(a)):
                    top_n.append([a[m][0],a[m][1]])
                a.clear()
                a.append([top_n1[i][1],top_n1[i][0]])
        else:
            if top_n1[i][0]==top_n1[i-1][0]:
                a.append([top_n1[i][1],top_n1[i][0]])
                a.sort()
                top_n.append(a)
            else:
                a.sort()
                for m in range(len(a)):
                    top_n.append([a[m][0],a[m][1]])
                top_n.append([top_n1[i][1],top_n1[i][0]])
        top_n=top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    y=tweet_content.split(' ')
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    x=' '
    for k in y:
        if len(x+' '+str(k))<=print_width:
            x+=' '
            x+=str(k)
        else:
            print(x)
            x='  '+str(k)
    print(x)
#--------------------------------------------
# 6330495421 (17.95) 322 (2021-03-01 20:27)

def get_unique( words ):
    unique_words=[]
    for e in words:
        if e not in unique_words:
            unique_words.append(e)

    return unique_words
def jaccard(words_1, words_2):
    c=0
    n=len(words_1)+len(words_2)
    for e in words_1:
        if e in words_2:
            c+=1
    jaccard_coef=c/(n-c)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range (len(norm_tweets)):
        tweet_id=i
        jaccard_=jaccard(norm_tweets[tweet_id],norm_query)
        top_n.append([tweet_id,jaccard_])
    a=sorted([[-c[1],c[0]] for c in top_n])
    d=[[b[1],-b[0]] for b in a]
    top_n =d[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    result = "#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+ ")"+"\n"
    s=tweet_content.split()
    start="  "
    for ss in s:

        if len(start)+len(ss)>print_width:
            result+=start+"\n"
            start="  "
        start+=ss+" "
    result+=start
    print("")
    print(result)




#--------------------------------------------
# 6330496021 (14.98) 323 (2021-03-01 23:59)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] in unique_words:
            continue
        else:
            unique_words = unique_words+[words[i]]
    return unique_words
def jaccard(words_1, words_2):
    n = 0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            n = n+1
    jaccard_coef = n/((len(words_1)-n)+(len(words_2)-n)+n)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = [[i,jaccard(norm_tweets[i],norm_query)] for i in range(len(norm_tweets))]
    top_n = [[no,-jc] for jc,no in sorted([[-a[i][1],a[i][0]] for i in range(len(a))])[:4]]
    d = []
    for i in range(len(top_n)):
        if top_n[i][1] <= 0.0:
            d = d + [top_n[i]]
    for i in range(len(d)):
        top_n.remove(d[i])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')

    a = ' '
    x = tweet_content.split(' ')
    for i in x:
        a = a+' '+i
        if len(a) > 20:
            a = a[:-len(i)]
            print(a)
            a = '  '+i
    print(a)




#--------------------------------------------
# 6330497721 (20.00) 324 (2021-02-25 20:15)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if i==0:
            unique_words.append(words[i])
        else:
            if words[i] in unique_words:
                pass
            else:
                unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    numerator=0
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            numerator+=1
    count=[]
    combine=words_1+words_2
    for i in range(len(combine)):
        if i==0:
            count.append(combine[i])
        else:
            if combine[i] in count:
                pass
            else:
                count.append(combine[i])
    denominator=len(count)
    jaccard_coef=numerator/denominator
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    info=[]
    for i in range(len(norm_tweets)):
        tweet_id=i
        j=jaccard(norm_tweets[i], norm_query)
        if j>0:
            info.append([tweet_id,j])
    check=sorted([[-info[1],info[0]] for info in info])[:n]
    top_n=[[check[1],-check[0]] for check in check]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    tweet_content=tweet_content.split(' ')
    while True:
        output=''
        while len(output)+2<print_width:
            if tweet_content==[]:
                break
            output+=' '+tweet_content[0]
            tweet_content=tweet_content[1:]
            if tweet_content==[]:
                break
            elif len(output)+2+len(tweet_content[0])>print_width:
                break
        print(' '+output)
        if tweet_content==[]:
            break
#--------------------------------------------
# 6330498321 (20.00) 325 (2021-02-28 22:17)

def get_unique( words ):
  s=[]
  for i in words:
    if i not in s:
      s.append(i)
  return s
def jaccard(words_1, words_2):
  top=0
  for i in words_1:
      if i in words_2:
          top+=1
  bot=len(get_unique(words_1+words_2))
  jaccard_coef=top/bot
  return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
  alltwe=[]
  n2=len(norm_tweets)
  for i in range(0,n2):
    if jaccard(norm_tweets[i],norm_query)>0:
        alltwe.append([i,jaccard(norm_tweets[i],norm_query)])
  al1=sorted(alltwe,key=lambda x:x[0])
  al1p=al1[::-1]
  al2=sorted(al1p,key=lambda x:x[1])
  topn1=al2[::-1]
  top_n=topn1[0:n]
  return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
  print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
  twc=tweet_content.split(' ')
  i=0
  line='  '
  for i in range(0,len(twc)):
      if len(line+twc[i])>print_width:
          print(line)
          line='  '
      line+=(twc[i]+' ')
  print(line+'\n')
#--------------------------------------------
# 6330499021 (19.45) 326 (2021-03-01 11:12)

def get_unique( words ):

    x = []
    for c in words:
        if c not in x:
            x.append(c)
    unique_words = x


    return unique_words
def jaccard(words_1, words_2):

    y = []
    for i in words_1:
        if i in words_2:
            y.append(i)
    len_y = len(y)

    words_mix = words_2 + words_1
    x = []
    for i in words_mix:
        if i not in x:
            x.append(i)
    len_x = len(x)

    jaccard_coef = len_y / len_x

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    norm_holder = []
    if n > len(norm_tweets):
        for i in range(len(norm_tweets)):
            tweet_id = i
            jaccard_co = jaccard(norm_tweets[tweet_id], norm_query)
            norm_holder.append([jaccard_co, tweet_id])
        norm_sort = sorted(norm_holder, key=lambda x: x[0], reverse=True)

    if n < len(norm_tweets):
        for i in range(len(norm_tweets)):
            tweet_id = i
            jaccard_co = jaccard(norm_tweets[tweet_id], norm_query)
            if jaccard_co>0:
                norm_holder.append([jaccard_co, tweet_id])
            norm_sort_1 = sorted(norm_holder, key=lambda x: x[0], reverse=True)
            norm_sort = norm_sort_1[:n]

    top_n = [[i, j] for j, i in norm_sort]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):


    x = '\n#{} ({})'.format(tweet_id, round(jc_coef, 2))
    print(x)

    ans = ""
    m = 0
    for word in tweet_content.split():
        n = m + len(word) + 2
        if m != 0:
            n += 1
        if n > print_width:
            ans += '\n  '
            m = 0
        if m != 0:
            ans += ' '
            m += 1
        ans += word
        m += len(word)
    print('  ' + ans)








#--------------------------------------------
# 6330500921 (20.00) 327 (2021-03-01 23:10)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if words[i] in unique_words :
            unique_words=unique_words
        else :
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    x=0
    for w in range(len(words_1)):
        if words_1[w] in words_2 :
            x=x+1
    m=len(words_1)+(len(words_2)-x)
    jaccard_coef=x/m
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    a=[]
    top_n=[]
    for c in range(len(norm_tweets)):
        tweet_id=c
        jacc = jaccard(norm_tweets[c],norm_query)
        if jacc != 0:
            a.append([jacc, tweet_id])

    for i in a:
        top_n.append([-1*(i[0]),i[1]])

    top_n.sort()
    top_n=top_n[0:n]
    for i in top_n:
        i[0],i[1]=i[1],i[0]
        i[1]=i[1]*(-1)
    k=[1,0]
    for i in top_n :
        if i[1]==0:
            top_n.remove(i)
        if top_n==[k]:
            top_n.remove(k)

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    x=tweet_content.split(' ')
    a=' '
    for i in range(len(x)):
        if len(a)+len(x[i])<=print_width-1:

            if x[i]==x[-1]:
                a=a+' '+x[i]
                print(a)
            a=a+' '+x[i]
        else :
            print(a)
            a='  '+x[i]
            if x[i]==x[-1]:
                print(a)





#--------------------------------------------
# 6330501521 (20.00) 328 (2021-02-27 10:44)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if i not in unique_words:
            unique_words.append(i)

    return unique_words
def jaccard(words_1, words_2):
    p=[]
    for n in words_1:
        p.append(n)
    for nn in words_2:
        if nn not in words_1:
            p.append(nn)
    s=len(words_1)+len(words_2)-len(p)
    jaccard_coef=s/len(p)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    r=[]
    for tweet_id in range(len(norm_tweets)):
        ming=-jaccard(norm_tweets[tweet_id],norm_query)
        if ming!=0:
            r.append([ming,tweet_id])
    r.sort()
    o=r[0:n:]
    top_n=[[o[i][1],-o[i][0]] for i in range(len(o))]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content=tweet_content.split(' ')
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef, 2 ))+')')
    eark=' '
    for e in tweet_content:
        eark+=' '+e
        if len(eark)>print_width:
            eark=eark[:len(eark)-len(e):]
            print(eark)
            eark='  '+e
    print(eark)

#--------------------------------------------
# 6330502121 (16.12) 329 (2021-03-01 00:21)

def get_unique( words ):
    c=[]
    unique_words=''
    for i in range(len(words)):
        if words[i] in words[i+1:]:
            c.append(words[i])
    for i in range(len(c)):
        words.remove(c[i])
    unique_words=words
    return unique_words
def jaccard(words_1, words_2):
    x=0
    y=0
    for i in words_1:
        for a in words_2:
            if i == a:
                x+=1
    z=words_1+words_2
    w=len(get_unique(z))
    jaccard_coef= x/w
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x=[]
    for i in range(len(norm_tweets)):
        tweet_id=i
        y=jaccard(norm_tweets[i], norm_query)
        x.append([y,tweet_id])
    for a in x:
      a[1]=a[1]*-1
    sort=sorted(x,reverse=True)
    for a in x:
      a[0],a[1]=a[1],a[0]
    for a in x:
      a[0]=a[0]*-1
    top_n=sort[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    c=tweet_content.split(' ')
    i=0
    b=True
    while b==True:
        s=' '
        while len(s) < print_width and i<len(c):
            s+=(' '+c[i])
            i+=1
        if len(s) > print_width:
            s=s[:-len(c[i-1])-1]
            i-=1
        print(s)
        if i==len(c):
            b=False
#--------------------------------------------
# 6330503821 (3.94) 330 (2021-03-01 18:36)

def get_unique( words ):
    return set( words )
def jaccard(words_1, words_2):
    intersect_words = words_1.intersection(words_2)
    union_words = words_1.union(words_2)
    jaccard_coef = len(intersect_words)/len(union_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for index, tweet in enumerate(norm_tweets):
        jaccard_coef = jaccard(tweet, norm_query)
        if jaccard_coef > 0:
            top_n.append([index, jaccard_coef])
    top_n = sorted(top_n, key=lambda item:
        (item[1], 9999999999-item[0]), reverse=True)
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(f"\n#{tweet_id} ({round(jc_coef,2)})")
    index = 0
    words = tweet_content.split()
    while index < len(words):
        line = "  "
        while index < len(words) and len(line+words[index]) <= print_width:
            line += words[index]+" "
            index += 1
        print(line)


# --------------------------------------------
# 6330504421 (20.00) 331 (2021-03-01 01:21)

def get_unique( words ):
    words.sort()
    unique_words = []
    for i in range(len(words)):
        if i == 0:
            unique_words += [words[0]]
        else:
            if words[i] != words[i-1] :
                unique_words += [words[i]]
    return unique_words
def jaccard(words_1, words_2):
    a = 0
    b = []
    if len(words_1) > len(words_2):
        b += words_1
        for i in words_2:
            if i in words_1:
                a += 1
            else:
                b += [i]
    else:
        b += words_2
        for i in words_1:
            if i in words_2:
                a += 1
            else:
                b += [i]
    if len(b) == 0:
        jaccard_coef = 0
    else:
        jaccard_coef = a/len(b)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    pretop_n = []
    for i in range(len(norm_tweets)):
        x = jaccard(norm_tweets[i],norm_query)
        if x > 0:
            pretop_n = [x,i]
            top_n.append(pretop_n)
    top_n = sorted(top_n, key=lambda x: x[0],reverse = True)
    top_n = top_n[:n]
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1] = top_n[i][1],top_n[i][0]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    x = tweet_content.split(' ')
    pre = ' '
    show = ' '
    for i in range(len(x)):
        pre += ' '+x[i]
        if len(pre) <= print_width:
            show = pre
        else :
            print(show)
            pre = '  ' + x[i]
            show = '  '+ x[i]
        if i == len(x)-1:
            print(show)



#--------------------------------------------
# 6330505021 (15.75) 332 (2021-03-01 19:26)

def get_unique( words ):
    unique_words = []

    for i in words:
        if i not in unique_words:
            unique_words.append(i)

    return unique_words
def jaccard(words_1, words_2):
    k = 0
    for i in words_1 :
        if i in words_2:
            k += 1

    m = len(words_1) + len(words_2)
    n = m - k
    jaccard_coef = k / n

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(0,len(norm_tweets)):
        eieiza = jaccard(norm_tweets[i], norm_query)

        if eieiza >= 0 :
            top_n.append([-eieiza, i])
    top_n.sort()


    for i in range(0,len(top_n)):
        top_n[i] = [top_n[i][1], -top_n[i][0]]


    if [0.0] not in top_n[:n][1]:
        return top_n[:n]
    else:
        return []
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    also_thekidknow = round(jc_coef,2)
    powerup = ("#"+str(tweet_id) + ' ('+str(also_thekidknow)+')')
    print(powerup)
    eieihahaha = tweet_content.split(" ")
    mvpking = 0
    niceza007 = []

    for i in eieihahaha :
        gamer = i
        mvpking = mvpking + len(gamer) + 1
        if mvpking <= print_width - 2 :
            niceza007 += [gamer]


        else :
            if mvpking > print_width-2 :
                niceza007 = ' '.join(niceza007)
                print("  " + niceza007)
                mvpking = 0
                niceza007 =[]
                mvpking += len(gamer) + 1
                niceza007 = niceza007 +[gamer]



        if gamer == eieihahaha[-1]:
            niceza007 = ' '.join(niceza007)
            print("  "+niceza007)
            niceza007 =[]


#--------------------------------------------
# 6330507321 (19.95) 333 (2021-02-28 16:27)

def get_unique( words ):

    unique_words=[]
    for e in words:
        if not e in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):

    c=0
    for e in words_1:
        if e in words_2:
            c+=1
    n=len(words_1)+len(words_2)-c
    jaccard_coef=c/n
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    b=[]
    a=[]
    for i in range(len(norm_tweets)):
        if jaccard(norm_query,norm_tweets[i])>0:
            b.append(jaccard(norm_query,norm_tweets[i]))
            a.append(i)
    c=[]
    for i in range(len(b)):
        c.append([b[i],-(a[i])])
    c=sorted(c,reverse=True)

    c=c[0:n]
    c=[[-cc[1],cc[0]]for cc in c]

    top_n=c
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    m=tweet_content.split()
    print(' ')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')


    l=0
    s=[]
    for i in range(len(m)-1):
        l+=len(m[i])+1
        s.append(m[i])
        if l+len(m[i+1])+1>=print_width:
            print(' ',' '.join(s))
            l=0
            s=[]

    s.append(m[-1])
    print(' ',' '.join(s))





#--------------------------------------------
# 6330508021 (17.70) 334 (2021-03-01 09:52)

def get_unique( words ):
    if words != []:
        a = sorted(words)
        unique_words = [a[0]]
        for i in range(len(a)):
            if i != len(a)-1:
                if a[i+1] != a[i]:
                    unique_words += [a[i+1]]
        return unique_words
def jaccard(words_1, words_2):
    same = 0
    if words_1 != None:
        for i in range(len(words_1)):
            if words_1[i] in words_2:
                same += 1
        sum = words_1 + words_2
    else :
        same = 0
        sum = words_2
    d =len(get_unique(sum))
    jaccard_coef = same/d
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    list =[]
    for i in range (len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query) > 0:
            list += [[round(jaccard(norm_tweets[i],norm_query),2),-i]]
    newlist = sorted(list,reverse=True)[:n]
    for i in range (len(newlist)):
        newlist[i][1] = newlist[i][1]*-1
        newlist[i][1],newlist[i][0] = newlist[i][0],newlist[i][1]
    top_n = newlist
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    k = tweet_content.split(' ')
    sentence = '  '+k[0]
    for i in range(len(k)):
        if i != len(k)-1:
            sentence += ' '+k[i+1]
            if len(sentence) <= print_width :
                real = sentence
                if i == len(k)-2:
                    print(real)
            else:
                print(real)
                sentence = '  '+k[i+1]
                if i == len(k)-2:
                    print(sentence)

#--------------------------------------------
# 6330509621 (18.82) 335 (2021-03-01 02:26)

def get_unique( words ):
    unique_words = []
    for n in range(len(words)) :
        if not(words[n] in unique_words):
            unique_words += [words[n]]
    return unique_words
def jaccard(words_1, words_2):
    words_1 = get_unique(words_1)
    words_2 = get_unique(words_2)
    w1nw2 = [e for e in words_1 if e in words_2]
    w12 = words_1+words_2
    w12_ = get_unique(w12)
    jaccard_coef = len(w1nw2)/len(w12_)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    l1 = []
    for i in range(len(norm_tweets)) :
        j = jaccard(norm_tweets[i],norm_query)
        if j > 0 :
            l1.append([i,j])
    top_n = [[e[1],-e[0]] for e in sorted([[-l[1],l[0]] for l in l1])][:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    t = tweet_content.split(' ') #['tab','a','sadasd']
    print(' ')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    n=2
    a=[]
    for e in t :
        a += [e]
        n += len(e)+1
        if n > print_width :
            a.pop(-1)
            print('  '+' '.join(a))
            a = [e]
            n = 2 + len(e)
    print('  ' +' '.join(a))


#--------------------------------------------
# 6330510121 (20.00) 336 (2021-02-27 10:18)

def get_unique( words ):
    unique_words=[]
    for x in words :
        if x not in unique_words:
             unique_words.append(x)
    return  unique_words
def jaccard(words_1, words_2):
    a=0
    b=0
    for x in words_2:
        if x in words_1:
            a+=1
        else :
            b+=1
    jaccard_coef=a/(b+len(words_1))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for x in range(len(norm_tweets)):
        a = jaccard(norm_tweets[x],norm_query)
        if a >0 :
            top_n.append([x,a])
    for i in range(len(top_n)):
        top_n[i][1]*=-1
    top_n=sorted(top_n,key=lambda top: top[1])
    for i in range(len(top_n)):
        top_n[i][1]*=-1
    top_n=top_n[0:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n#'+str(tweet_id),'('+str(round(jc_coef,2))+')  ')
    x=tweet_content.split(' ')
    a=0
    b=' '
    for i in x:
        if len(b)+len(i)+1<=print_width:
            if i==x[-1]:
                b+=' '+i
                print(b)
            b+=' '+i
        else:
            print(b)
            b='  '+i
            if i==x[-1]:
                print(b)
#--------------------------------------------
# 6330511821 (18.44) 337 (2021-02-27 23:46)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if not words[i] in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    sum_of_same = 0
    the_list = []
    for i in words_1 + words_2 :
        if not i in the_list:
            the_list.append(i)

    people_who_live_in_the_list = len(the_list)


    if len(words_1) < len(words_2):
        for i in range(len(words_1)):
            if words_1[i] in words_2:
                sum_of_same += 1
    if len(words_1) >= len(words_2):
        for i in range(len(words_2)):
            if words_2[i] in words_1:
                sum_of_same += 1

    unique_words = sum_of_same/people_who_live_in_the_list
    return unique_words
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        top_n.append([-jaccard(norm_tweets[i],norm_query),i])
    top_n = sorted(top_n)
    for i in top_n:
        i[0],i[1] = i[1],-i[0]


    the_list = []
    for i in range(len(top_n)):
        if top_n[i][1] > 0:
            the_list.append(top_n[i])

    very_top_n = []
    if len(the_list) != 0 :
        for i in range(n):
            very_top_n.append(the_list[i])


    return very_top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split()
    the_string = ''
    count = 0
    print(' ')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    for i in tweet_content:
        if len(i) >= print_width-2 and count == 0:
            print(' '*2+i)
            continue
        if len(i) < print_width-2 and count == 0:
            print(' '*2+i+' ',end = '')
            count += 1 + len(i)
            continue
        if len(i) <= print_width-2-count:
            print(i+' ',end = '')
            count += 1 + len(i)
            continue
        if len(i) > print_width-count-2:
            print()
            count = 0
            if len(i) >= print_width-2 and count == 0:
                print(' '*2+i)
                continue
            if len(i) < print_width-2 and count == 0:
                print(' '*2+i+' ',end = '')
                count += 1 + len(i)
                continue
    print()



#--------------------------------------------
# 6330512421 (16.00) 338 (2021-03-01 22:37)

def get_unique( words ):
    unique_words = []
    for e in words :
        if e not in unique_words :
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    a = 0
    for e in word_1 :
        if e in words_2 :
            a += 1
    x = []
    for e in (words_1+words_2) :
        if e not in x :
            x.append(e)
    b = len(x)
    jaccard_coef = a/b
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []*n
    for tweet_id in range(len(norm_tweets)) :
        a = 0
        for e in norm_tweets[tweet_id] :
            if e in norm_query :
                a += 1
        x = []
        for e in (norm_tweets[tweet_id]+norm_query) :
            if e not in x :
                x.append(e)
        b = len(x)
        jaccard_ = a/b
        top_n.append([jaccard_,tweet_id])
        top_n = sorted(top_n)
    top_n = [[-e[0],e[1]] for e in sorted([[-e[0],e[1]] for e in top_n])]
    top_n = [[e[1],e[0]] for e in top_n]
    top_n = top_n[:n]
    for i in range(len(top_n)) :
        if top_n[i][1] == 0 :
            top_n = top_n[:i]
            break
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    m=tweet_content.split(' ')
    k=' '
    for i in range (0,len(m)):
        if len(m[i])+len(k)+1 > print_width:
            print(k)
            k=' '
            k=k+' '+m[i]
            if m[-1] == m[i]:
                print(k)
            else :
                pass
        else:
            k=k+' '+m[i]
            if m[-1] == m[i]:
                print(k)
            else :
                pass

#--------------------------------------------
# 6330513021 (18.01) 339 (2021-02-26 23:00)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if not words[i] in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):

    swords = []
    awords = words_1+words_2
    twords = get_unique(awords)

    for i in range(len(twords)):
        if twords[i] in words_1 and twords[i] in words_2:
            swords.append(twords[i])

    jaccard_coef = len(swords)/len(twords)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n1 = []
    list_all = []

    for i in range(len(norm_tweets)):
        list_all.append([-jaccard(norm_tweets[i], norm_query), i])
        list_all.sort()

        top_n1 = list_all[:n]

    top_n = []

    for i in range(len(top_n1)):
        data = top_n1[i]
        m = [data[1], abs(data[0])]  # beware forget to swap
        top_n.append(m)

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print_width -= 2
    words = tweet_content.split(' ')
    n_words = len(words)
    count = 1

    all_role = list()

    role = ''

    for w in words:

        if len(role) + len(w) <= print_width:
            role += w

        else:
            role = role[:len(role)-1]
            all_role.append(role)
            role = w

        if count == n_words:
            all_role.append(role)
        else:
            role += ' '

        count += 1

    print(' ')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')

    for role in all_role:
        print(' ' * 2 + role)


#--------------------------------------------
# 6330514721 (20.00) 340 (2021-03-01 00:18)

def get_unique( words ):
    a=words
    unique_words=[]
    for i in range(len(a)) :
        if not a[i] in unique_words:
            unique_words+=[a[i]]
    return unique_words
def jaccard(words_1, words_2):
    x=len(get_unique(words_1+words_2))
    y=(len(words_1)+len(words_2)-x)
    jaccard_coef=y/x
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    w=[]
    for tweet_id in range(len(norm_tweets)):
        z=jaccard(norm_tweets[tweet_id],norm_query)
        if z>0:
            w+=[[z,-tweet_id]]
    m=sorted(w,reverse=True)
    for i in range(len(m)):
        m[i][0],m[i][1]=-m[i][1],m[i][0]
    top_n=m[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    s=tweet_content.split(" ")
    c=(" ")
    for i in range(len(s)):
        d=c
        c=c+" "+s[i]
        if len(c)>print_width:
            print(d)
            c=("  "+s[i])
    print(c)
#--------------------------------------------
# 6330515321 (18.33) 341 (2021-03-01 22:51)

def get_unique( words ):
    unique_words =[]
    words_ =[]
    for i in words :
        if i not in words_ :
            words_.append(i)
    list_ = []
    for i in words_ :
        list_.append([len(i),i])
        list_.sort()
    for i in range(len(list_)) :
        words_[i] = list_[i][1]
        unique_words = words_

    return unique_words
def jaccard(words_1, words_2):
    sum_ = words_1+words_2
    sum_1 = []
    sum_2 = []
    re = []
    for i in sum_ :
        if i not in sum_1 :
            sum_1.append(i)
        else:
            sum_2.append(i)
            sum_2.sort()
            for i in sum_2 :
                if i not in re :
                    re.append(i)
    J = len(sum_2)/len(sum_1)
    jaccard_coef = J

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    jaccard_ = []
    jaccard_n = []
    jeccard_topn = []
    for i in range(len(norm_tweets)) :
        tweet_id = i
        def_j = [jaccard(norm_tweets[tweet_id],norm_query),-tweet_id]
        jaccard_.append(def_j)
    jaccard_.sort(reverse = True)
    jaccard_ = jaccard_[:n]
    for x in jaccard_ :
        if x[0] > 0  :
            jaccard_n.append(x)
    for e in jaccard_n :
        id_ = -e[1]
        jac = e[0]
        defj = [id_,jac]
        jeccard_topn.append(defj)
    top_n = jeccard_topn
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id)+" ""("+str(round(jc_coef,2))+")")
    words_list = tweet_content.split(" ")
    s= "  "
    while len(words_list) > 0 :
        if len(s + words_list[0]) < print_width :
            s += words_list[0] + " "
            words_list.pop(0)
        elif len(s+words_list[0]) == print_width :
            s += words_list[0]
            print(s)
            words_list.pop(0)
            s = "  "
        else :
            print(s)
            s = "  "
    if len(s) > 2 :
        print(s)
# 6330516021 (16.94) 342 (2021-02-26 23:00)

def get_unique( words ):

    unique_words = []
    for e in words :
        if not e in unique_words :
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):

    S = []
    words = words_1 + words_2
    for e in words_1 :
        if e in words_2 :
            S.append(e)
    for e in words_2 :
        if e in words_1 :
            S.append(e)
    if len(get_unique(words)) > 0 :
        jaccard_coef = len(get_unique(S)) / len(get_unique(words))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top = []
    top_n = []
    for i in range(len(norm_tweets)) :
        tweet_id = -i
        jaccards = jaccard(norm_tweets[i],norm_query)
        if jaccards > 0 :
            top.append([jaccards,tweet_id])
    top.sort(reverse = True)
    for i in range(n) :
        tweet_id = -top[i][1]
        jaccards = top[i][0]
        top_n.append([tweet_id,jaccards])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    Atext = tweet_content.split()
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    Line = []
    for i in range(len(Atext)) :
        if len(Line)+len(Atext[i]) <= print_width-2 :
            Line += Atext[i]+' '
        else :
            print('  '+''.join(Line))
            Line = []
            Line += Atext[i] + ' '
    print('  '+''.join(Line))


#--------------------------------------------
# 6330517621 (19.68) 343 (2021-03-01 01:18)

def get_unique( words ):
    unique_words = []
    i = 0
    while i < len(words) :
        if words[i] not in unique_words :
            unique_words.append(words[i])
        i += 1

    return unique_words

#--------------------------------------------------------
def jaccard(words_1, words_2):
    x = []
    y = []
    i = 0
    di = 0
    for e in words_1 :
        if e in words_2 :
            x.append(e)
    for i in words_1 :
        if i not in y :
            y.append(i)
    for di in words_2 :
        if di not in y :
            y.append(di)
    if len(y) != 0 :
        jaccard_coef = len(x)/len(y)
    else :
        jaccard_coef = 0.0

    return jaccard_coef

#--------------------------------------------------------
def top_n_similarity(norm_tweets, norm_query, n):
    x = []
    y = []
    for l in range(len(norm_tweets)) :
        tweet_id = l
        x.append([jaccard(norm_tweets[l],norm_query), tweet_id])
    for e in x :
        y.append([-float(e[0]), e[1]])
    y.sort()
    for i in y :
        i[0],i[1] = i[1],i[0]
        i[1] = -i[1]
    top_n = y[:n]
    for i in top_n :
        if i[1] == 0.0 :
            top_n.remove(i)
    if top_n == [[1, 0.0]] :
        top_n.remove([1, 0.0])

    return top_n

#--------------------------------------------------------
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    x = tweet_content.split(' ')
    sp = '  '
    c = []
    i = -1
    total_length = 0
    while i < len(x)-1 :
        i += 1
        if total_length + len(x[i]) > print_width - 2 :
            v = ' '.join(c)
            co = sp+v
            print(co)
            c = []
            total_length = 0
        if total_length + len(x[i]) <= print_width - 2 :
            c.append(x[i])
            total_length += len(x[i])+1
    c.append(x[i])
    c = c[:-1:]
    i += 1
    v = ' '.join(c)
    co = sp+v
    print(co)

#--------------------------------------------
# 6330518221 (20.00) 344 (2021-02-27 23:25)

def get_unique( words ):
    words.sort()
    unique_words = list()
    for m in words:
        if m not in unique_words:
            unique_words.append(m)
    return unique_words
def jaccard(words_1, words_2):
    m = words_1 + words_2
    k = get_unique(m)
    n = sum([1 for q in range(len(k)) if k[q] in words_1 and k[q] in words_2])
    jaccard_coef = n / len(k)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    p_n = [[w,jaccard(norm_tweets[w], norm_query)] for w in range(len(norm_tweets))]
    k = [[-w[1],w[0]] for w in sorted([[i[1],-i[0]] for i in p_n], reverse = True)]
    k = k[:n]
    top_n = list()
    for m in k:
        if m[1] > 0:
            top_n.append(m)
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    s = round(jc_coef,2)
    print('#'+str(tweet_id),'('+str(s)+')')
    d = print_width - 2
    k = tweet_content.split(' ')
    f = ''
    for e in k:
        if len(f) == 0 and len(e) > d:
            print(e)
        elif len(f) + len(e) <= d:
            f += e + ' '
        else:
            print('  '+ f[:-1])
            f = e + ' '
    if len(f) != 0:
        print('  '+ f[:-1])


#--------------------------------------------
# 6330519921 (16.94) 345 (2021-03-01 19:07)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
         unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    newword = words_1+ words_2
    newword_2=[]
    for i in newword:
        if i not in newword_2:
          newword_2.append(i)
        y=len(newword_2)
        z=len(newword)-y
        jaccard_coef =z/y
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    t_x = []
    top_n = []
    for i in range(len(norm_tweets)) :
        tweet_id = -i
        jaccards = jaccard(norm_tweets[i],norm_query)
        if jaccards > 0 :
           t_x.append([jaccards,tweet_id])
    t_x.sort(reverse = True)
    for i in range(n) :
        tweet_id = -t_x[i][1]
        jaccards = t_x[i][0]
        top_n.append([tweet_id,jaccards])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
     T = tweet_content.split()
     print('        ')
     print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
     Line = []
     for i in range(len(T)) :
        if len(Line)+len(T[i])+2 <= print_width :
            Line += T[i]+' '
        else :
            print('  '+''.join(Line))
            Line = []
            Line += T[i] + ' '
     print('  '+''.join(Line))




#--------------------------------------------
# 6330520421 (9.33) 346 (2021-03-01 23:58)

def get_unique( words ):
    unique_words = [i for j, i in enumerate(words) if i not in words[:j]]
    return unique_words
def jaccard(words_1, words_2):
    a=set.intersection(set(words_1),set(words_2))
    b=set.union(set(words_1),set(words_2))
    na=len(a)
    nb=len(b)
    jaccard_coef=na/nb

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    for i in range(len(norm_tweets)):
        tweet_id=norm_tweets[i]
    t=True
    a=jaccard(norm_tweets[tweet_id], norm_query)
    if a > 0:
        top_n=sorted([[-a,tweet_id[i]] for i in range(len(norm_tweets))])[:n]
    else:
        t=False


    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    s=tweet_content.split(' ')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    sp=' '
    while len(tweet_content)< print_width:
       for i in s:
            sp += ' '+i
            if len(s) > print_width:
                print(sp[:-len(s)])
                ss=s.split()
                ss=str(ss)[2:-2]
                sp='  '+ss
    print(sp)




#--------------------------------------------
# 6330521021 (16.12) 347 (2021-02-27 14:42)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    twords=[]
    for i in range(len(words_1)):
        if words_1[i] not in twords:
            twords.append(words_1[i])
    for i in range(len(words_2)):
        if words_2[i] not in twords:
            twords.append(words_2[i])
    swords=len(words_1)+len(words_2)-len(twords)
    jcard=swords/len(twords)
    return jcard
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        tid = i
        jcard = jaccard(norm_tweets[tid], norm_query)
        if jcard >= 0:
            top_n.append([-jcard, tid])
    top_n.sort()
    for i in range(len(top_n)):
        top_n[i] = [top_n[i][1], -top_n[i][0]]
    if [0.0] in top_n[:n][1]:
        return []
    else:
        return top_n[:n]
def show_tweet(tweet_id, wordss, jc_coef, length):
    def count_len(list_):
        k = 0
        for i in range(len(list_)):
            k += len(list_[i])
        return k
    words = wordss.split(" ")
    jc_coef = round(jc_coef, 2)
    print(end = '\n')
    print('#' + str(tweet_id) + ' (' + str(jc_coef) + ')')
    string = []
    total = 0
    while total < len(wordss):
        k = 0
        count = 0
        sentence = []
        if len(words) == 0: break
        for i in range(len(words)):
            k += len(words[i]) + 1
            if k < length:
                sentence.append(words[i])
            else:
                count += i
                break
        line = ' '.join(sentence)
        string.append(line)
        words = words[count:]
        total += count_len(sentence) + count
    if string[-2] == string[-1]:
        for i in range(len(string)-1):
            print('  ' + string[i])
    else:
        for i in range(len(string)):
            print('  ' + string[i])

#--------------------------------------------
# 6330522721 (18.01) 348 (2021-03-01 18:26)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if not words[i] in unique_words :
            unique_words.append(words[i])


    return unique_words
def jaccard(words_1, words_2):
    n1 = len(words_1)
    n2 = len(words_2)
    n1_i_n2 = 0
    for e in words_1:
        if e in words_2:
            n1_i_n2 += 1
    jaccard_coef = (n1_i_n2) / ( n1 + n2 - n1_i_n2 )
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for tweet_id in range(len(norm_tweets)):
        top_n.append( [ -jaccard( norm_query , norm_tweets[tweet_id] ) ,  tweet_id ] )
    top_n = [[top[1],-top[0]] for top in sorted(top_n)][:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#' + str(tweet_id) , '(' +str(round(jc_coef,2)) +')' )
    tweet_content = tweet_content.split(' ')
    t ='  '+tweet_content[0]
    for i in range(1,len(tweet_content)):
        if len(t+' '+tweet_content[i]) > print_width and i != len(tweet_content)-1 :
            print(t)
            t = '  '+tweet_content[i]
        elif len(t+' '+tweet_content[i]) > print_width and i == len(tweet_content)-1:
            print(t)
            t = '  '+tweet_content[i]
            print(t)
        elif len(t+' '+tweet_content[i]) <= print_width and i != len(tweet_content)-1 :
            t += ' '+tweet_content[i]
        else:
            t += ' '+tweet_content[i]
            print(t)

#--------------------------------------------
# 6330523321 (17.72) 349 (2021-02-28 17:36)

def get_unique( words ):
    unique_words = []
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    words1 = []
    for i in words_1:
        if  i in words_2:
            words1.append(i)
    for i in words_2:
        if  not i in words_1:
            words_1.append(i)
    jaccard_coef = len(words1)/len(words_1)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    temp_top_n = []
    for tweet_id in range(len(norm_tweets)):
        item = norm_tweets[tweet_id]
        temp_jaccard = jaccard(item,norm_query)
        temp_result = [tweet_id,temp_jaccard]
        if(temp_jaccard > 0 ):
            temp_top_n.append(temp_result)

    def take_jaccard(elem):
        return elem[1]
    top_n =sorted(temp_top_n,key=take_jaccard,reverse=True)[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    split_word = tweet_content.split(' ')
    all_lines = []
    current_lines_index = 0
    for word in split_word:
        if len(all_lines) <= 0 :
            all_lines.append('  ')

        cur_lines  = all_lines[current_lines_index]
        if len(word) + len(cur_lines)+1 <= print_width :
            all_lines[current_lines_index] = cur_lines+' '+word
        else :
            current_lines_index = current_lines_index+1
            all_lines.append('  ')
            cur_lines  = all_lines[current_lines_index]
            all_lines[current_lines_index] = cur_lines+' '+word
    for curlines in all_lines:
        print(curlines)



#--------------------------------------------
# 6330524021 (20.00) 350 (2021-02-26 16:09)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    c=0
    words_0=words_1+words_2
    words_0.sort()
    for i in range(1,len(words_0)):
        if words_0[i]==words_0[i-1]:
            c+=1
    words_0 = get_unique(words_0)
    jaccard_coef=c/len(words_0)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        c= [-i,jaccard(norm_tweets[i],norm_query)]
        if c[1]>0:
            top_n.append(c[::-1])
    top_n=[[-e[1],e[0]] for e in sorted(top_n,reverse=True)[:n]]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print();print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    words = tweet_content.split(' ')
    c=[]
    for i in words:
        c.append(i)
        if len(' '.join(c))+2>print_width:
            print(' ',' '.join(c[:-1]))
            c=[i]
    print(' ',' '.join(c))
#--------------------------------------------
# 6330525621 (18.01) 351 (2021-02-28 19:00)

def get_unique( words ):
    unique_words = []
    for e in words:
        if not e in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    jac = 0
    unq_words = []
    for e in words_1:
        if not e in unq_words:
            unq_words.append(e)
    for e in words_2:
        if e in words_1:
            jac += 1
        else:
            unq_words.append(e)
    jaccard_coef = jac/len(unq_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    top_n = []
    for i in range(len(norm_tweets)):
        jac = 0
        unq_words = []
        for a in norm_tweets[i]:
            if not a in unq_words:
                unq_words.append(a)
        for b in norm_query:
            if b in unq_words:
                jac += 1
            else:
                unq_words.append(b)
        jaccard_co = jac/len(unq_words)
        top.append([-jaccard_co,i])
    top.sort()
    for i in range(len(top)):
        if i < n :
            top_n.append([top[i][1],-top[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(' ')
    tweet = []
    a = 0
    print(' ')
    print('#'+str(tweet_id)+' ('+str(round(float(jc_coef),2))+')')
    for i in range(len(tweet_content)):
        tweet.append(tweet_content[i])
        if 2 + len(' '.join(tweet)) > print_width:
            print('  '+' '.join(tweet[0:a]))
            tweet = [tweet[-1]]
            a = 0
        if tweet_content[i] == tweet_content[-1]:
            print('  '+' '.join(tweet))
        a +=1
#--------------------------------------------
# 6330526221 (17.75) 352 (2021-02-27 15:37)

def get_unique( words ):
    words.sort()
    x = []
    if words == [] :
        words = []
    else:
        e = words[0]
        c = 1
        for i in range(1,len(words)):
            if words[i] == e :
                c += 1
            else :
                x.append(e)
                e = words[i]
                c = 1
        x.append(e)
    unique_words = x
    return unique_words
def jaccard(words_1, words_2):
    a = 0
    b = 0
    for i in words_1 :
        if i in words_2 :
            a += 1
            b += 1
        else :
            b += 1
    b += len(words_2)-a
    jaccard_coef = a/b
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x = []
    top_n = []
    for i in range(len(norm_tweets)) :
        Jaccrad_coefficient = jaccard(norm_tweets[i],norm_query)
        if Jaccrad_coefficient > 0:
            x.append([Jaccrad_coefficient,i])
    x.sort(reverse=True)
    for i in range(len(x[:n])) :
        top_n.append([x[i][1],x[i][0]])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a = tweet_content.split(' ')
    c = ''
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    for i in range(len(a)) :
        if len(c) <= print_width - len(a[i])-2 :
            c += a[i] + ' '
        else :
            print('  '+c)
            c = ''+a[i]+' '
    print('  '+ c)



#--------------------------------------------
# 6330527921 (19.95) 353 (2021-03-01 20:00)

def get_unique( words ):
    words.sort()
    i = 1
    while i < len(words) :
        if words[i] == words[i-1] :
            words.remove(words[i-1])
        else :
            i += 1
    unique_words = words
    return unique_words
def jaccard(words_1, words_2):
    mix = words_1 + words_2
    get_unique(mix)
    jaccard_coef = (len(words_1) + len(words_2) - len(mix))/len(mix)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = sorted([[jaccard(norm_tweets[tweet_id],norm_query), -tweet_id] for tweet_id in range(len(norm_tweets))],reverse=True)
    top_n = [[-top[1], top[0]] for top in top][:n]
    i = 0
    while i < len(top_n) :
        if float(top_n[i][1]) == 0 :
            top_n.remove(top_n[-1])
        else :
            i += 1
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    k = tweet_content.split()
    i = 0
    n = 0
    m = 0
    while i < len(k) :
        n += len(k[i])+1
        if n > print_width-1 :
            print('  '+' '.join(k[m:i]))
            m = i
            n = len(k[i])+1
        i += 1
    print('  '+' '.join(k[m:i]))
#-------------------------------------------
# 6330528521 (20.00) 354 (2021-03-01 00:19)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if i in unique_words:
            unique_words=unique_words
        else :
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    words_12=[]
    words1_2=[]
    for i in words_1:
        if i in words_12:
            words_12=words_12
        else :
            words_12.append(i)
    for i in words_2:
        if i in words_12:
            words_12=words_12
        else:
            words_12.append(i)
    for i in words_1:
        if i in words_2:
            words1_2.append(i)
    a=len(words_12)
    b=len(words1_2)
    jaccard_coef=b/a
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    list_=[]
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query)>0:
            list_.append([i,jaccard(norm_tweets[i],norm_query)])
    for i in range(len(list_)):
        list_[i][0],list_[i][1]=-list_[i][1],list_[i][0]
    list_.sort()
    top_n=list_[:n]
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1]=top_n[i][1],-top_n[i][0]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    earth = tweet_content.split(' ')
    print('\n'+'#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    c=' '
    for e in earth:
        c += ' '+e
        if len(c) > print_width:
            print(c[:-len(e)])
            l=e.split()
            l=str(l)[2:-2]
            c='  '+l
    print(c)
#--------------------------------------------
# 6330529121 (18.01) 355 (2021-03-01 16:03)

def get_unique( words ):
    words.sort()
    unique_words =[]
    if len(words) != 0 :
        unique_words = [words[0]]

        for i in range(1,len(words)) :
            if words[i] != words[i-1] :
                unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):
    sade=[]
    for ch in words_1 :
        if ch in words_2:
            sade.append(ch)

    words_3 = words_1 + words_2
    words_3.sort()

    suan=[words_3[0]]
    for i in range(1,len(words_3)) :
        if words_3[i] != words_3[i-1] :
            suan.append(words_3[i])

    jaccard_coef = len(sade)/len(suan)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top_n= []
    for i in range(len(norm_tweets)) :
        jaccard_coef = jaccard(norm_tweets[i],norm_query)
        top_n.append([jaccard_coef,-i])

    top_n.sort(reverse=True)
    for e in top_n :
        e[0],e[1] = -e[1],e[0]
    top_n = top_n[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')

    tweet_content = tweet_content.split(' ')

    sentence = ''
    for ch in tweet_content :

        if len(sentence)==0 :
            sentence = sentence + ' ' + ' ' + ch
            if len(sentence) >= print_width:
                print(sentence)
                sentence = ''

        elif len(sentence)+len(ch)+1 <= print_width :
            sentence = sentence +' ' + ch

        elif len(sentence)+len(ch)+1 >= print_width :
            print(sentence)
            sentence='  ' + ch
            if len(sentence) >= print_width:
                print(sentence)
                sentence = ''
    if len(sentence) != 0 :
        print(sentence)












#--------------------------------------------
# 6330530721 (20.00) 356 (2021-02-28 23:04)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    intersec_words = []
    union_words = []
    for i in words_1 :
        if i not in union_words:
            union_words.append(i)
    for i in words_2:
        if i not in union_words:
            union_words.append(i)

    for x in words_1:
        if x in words_2:
            intersec_words.append(x)

    jaccard_coef = (len(intersec_words)/len(union_words))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    a = []
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query) > 0:
            a.append([i,jaccard(norm_tweets[i],norm_query)])
    a.sort(key=lambda x:x[1],reverse=True)
    top_n = a[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n' + '#' + str(tweet_id) + ' (' + str(round(jc_coef,2)) + ')')
    tweet_content = tweet_content.split(' ')
    t_space = ' '
    for i in range(len(tweet_content)):
        t_space += ' ' + tweet_content[i]
        if len(t_space) > print_width:
            print(t_space[:-len(tweet_content[i])])
            q = tweet_content[i].split()
            q = str(q)[2:-2]
            t_space = '  '+q
    print(t_space)





#--------------------------------------------
# 6330531321 (19.95) 357 (2021-03-01 02:02)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
#------------------------------------------------------------------------#
def jaccard(words_1, words_2):
    yahoo1 = 0
    for e in words_1:
        if e in words_2:
            yahoo1 += 1
    k = words_1 + words_2
    hewkaew = []
    for e in k:
        if e not in hewkaew:
            hewkaew.append(e)
    yahoo2 = len(hewkaew)
    jaccard_coef = yahoo1/yahoo2
    return jaccard_coef
#------------------------------------------------------------------------#
def top_n_similarity(norm_tweets, norm_query, n):

    lizze = []
    for i in range(len(norm_tweets)):
        jaccardy = jaccard(norm_tweets[i],norm_query)
        if jaccardy == 0.0:
            continue
        else:
            lizze.append([jaccardy,i])
    lizze.sort(reverse = not not not not not not not not not not not not not not not not not not not not not not not not not not not not not not not not not not not not not not not not True)
    for e in lizze:
        e[0],e[1] = e[1],e[0]
    top_n = lizze
    for e in top_n:
        if e[1] == 0:
            top_n.remove(e)
    def somchai(spj):
        return spj[0]
    top_n.sort(key = somchai)
    def som(chai):
        return chai[1]
    top_n.sort(reverse = True ,key=som)
    top_n = top_n[:n]
    return top_n
#------------------------------------------------------------------------#
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("#"+str(tweet_id),"("+str(round(jc_coef,2))+")")
    tweety= tweet_content.split()
    tweeter = []
    ig = []
    for e in tweety:
        if len(e)+len("0".join(tweeter)) < print_width-2:
            tweeter.append(e)
        else:
            ig.append(tweeter)
            tweeter = []
            tweeter.append(e)
    if tweeter != []:
        ig.append(tweeter)
    tinder = []
    for e in ig:
        facebook = " ".join(e)
        tinder.append(facebook)
    for e in tinder:
        print(" ",e)
#--------------------------------------------
# 6330532021 (18.50) 358 (2021-02-28 22:38)

def get_unique( words ):
    x = sorted(words)
    for i in range(len(words)):
        if x[i]==x[i-1]:
            words.remove(x[i])
    unique_words = words
    return unique_words
def jaccard(words_1, words_2):
    n1 = 0
    for i in words_1:
        for e in words_2:
             if i == e:
                 n1 += 1
    n2 = len(words_1)+len(words_2)-n1
    if n1 == 0:
        jaccard_coef = 0
    else:
        jaccard_coef = n1/n2
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    x = []
    for i in range(len(norm_tweets)):
        x.append([-jaccard(norm_tweets[i], norm_query),i])
    y = sorted(x)
    top_n = [[y[i][1],-y[i][0]] for i in range(len(y))]
    top_n = top_n[0:n]
    for i in range(len(top_n)):
        if top_n[n-i-1][1] == 0:
            top_n.remove(top_n[n-i-1])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    n = 2
    x = tweet_content.split(' ')
    st = ''
    for i in range(len(x)):
        if n+len(x[i]) <= print_width:
            st += x[i]+' '
            n = n+ len(x[i])+1
            if i == len(x)-1:
                print('  '+st[0:-1])
        else:
            print('  '+st[0:-1])
            st = x[i]+' '
            if i == len(x)-1:
                print('  '+st[0:-1])
            n = 2+ len(x[i])+1

#--------------------------------------------
# 6330533621 (20.00) 359 (2021-02-26 23:02)

def get_unique( words ):
  unique_words=[]
  for i in words:
    if not i in unique_words:
      unique_words.append(i)
  return unique_words
def jaccard(words_1, words_2):
  words_same=[]
  for i in words_1:
    if i in words_2:
      words_same.append(i)
  jaccard_coef=len(words_same)/(len(words_1)+len(words_2)-len(words_same))
  return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
  top_n=[]
  list_=[]
  for i in range(len(norm_tweets)):
    if jaccard(norm_tweets[i],norm_query)>0:
      list_.append([i,jaccard(norm_tweets[i],norm_query)])
  list_.sort(key=lambda x:x[1],reverse=True)
  top_n=list_[:n]
  return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n'+'#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    tweet_content=tweet_content.split(' ')
    t=' '
    for i in range(len(tweet_content)):
        t+=' '+tweet_content[i]
        if len(t)>print_width:
            print (t[:-len(tweet_content[i])])
            a=tweet_content[i].split()
            a=str(a)[2:-2]
            t='  '+a
    print(t)
#--------------------------------------------
# 6330534221 (18.01) 360 (2021-03-01 16:04)

def get_unique( words ):
    unique_words=[]
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    n=0
    for e in words_1:
        if e in words_2:
            n+=1
    jaccard_coef=n/(len(words_1)+len(words_2)-n)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top=[]
    for i in range(len(norm_tweets)):
        e=norm_tweets[i]
        jac=jaccard(e,norm_query)
        top.append([i,jac])
    for i in range(len(top)):
        e=top[i]
        top[i]=[-e[1],e[0]]
    top.sort()
    for i  in range(len(top)):
        e=top[i]
        top[i]=[e[1],-e[0]]
        top_n=top[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),"("+str(round(jc_coef,2))+")")
    word=tweet_content.split(' ')
    case=[]
    for i in range(len(word)):
        e=word[i]
        case.append(e)
        a=len("  "+" ".join(case))
        if a>print_width:
            if i==0:
                print("  "+" ".join(case))
            else:
                print("  "+" ".join(case[:-1]))
                case=[e]
    print("  "+" ".join(case))

#--------------------------------------------
# 6330535921 (20.00) 361 (2021-02-27 04:56)

def get_unique( words ):
    x = sorted([len(i),i] for i in words)
    y = []
    if words != []:
        y = [x[0][1]]
        for i in range(1,len(x)):
            if x[i][1] != y[-1] :
                y.append(x[i][1])
    unique_words = y

    return unique_words
def jaccard(words_1, words_2):
    same = 0
    for i in range(len(words_1)):
        for j in range(len(words_2)):
            if words_1[i] == words_2[j]:
                same += 1
    jaccard_coef = same/((len(words_1)-same)+len(words_2))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    ids = sorted([[i,jaccard(norm_tweets[i],norm_query)] for i in range(len(norm_tweets))])
    topn = []
    top_n = []
    for i in range(len(ids)):
        if ids[i][1] > 0 :
            topn.append(ids[i])
    topn =  sorted(topn, key=lambda x: x[1],reverse = True)
    if n < len(topn):
        for i in range(n):
            top_n.append(topn[i])
    else:
        for i in range(len(topn)):
            top_n.append(topn[i])

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    tweet_list = tweet_content.split(' ')
    lis = tweet_content.split(' ')
    s = '  '
    a = []
    n = 0
    for i in lis:
        if n == 0:
            s += i+' '
            if len(s)-2 >= print_width:
                if len(s)==print_width:
                    print(s)
                else:
                    print(s[:-(len(i)+2)])
                n+=1
                s ='  '
                s +=i+' '
        else:
            s += i+' '
            if len(s)-2 >= print_width:
                if len(s)==print_width:
                    print(s)
                else:
                    print(s[:-(len(i)+2)])
                s='  '
                s+=i+' '
    print(s)

#--------------------------------------------
# 6330536521 (20.00) 362 (2021-03-01 08:05)

def get_unique( words ):
    i=0
    while i<int(len(words)):
          if words[i] in words[:i]+words[i+1:]:
            words=words[:i]+words[i+1:]
            i-=1
          i+=1
    unique_words=words
    return unique_words
def jaccard(words_1, words_2):
    n=0
    wo1=get_unique(words_1)
    wo2=get_unique(words_2)
    for e in wo1:
        if e in wo2 :
            n+=1
    jaccard_coef=n/int(len(get_unique(wo1+wo2)))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i], norm_query)>0:
            top_n+=[[i,jaccard(norm_tweets[i], norm_query)]]
    top_n=[[k[1],-k[0]] for k in sorted([[-e[1],e[0]] for e in top_n])]
    top_n=top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    n=0
    tweet_content=tweet_content.split(" ")
    print(" ")
    print("#"+str(tweet_id)+" ("+str(round(jc_coef,2))+")")
    con=[]
    for e in range(len(tweet_content)) :
        if int(len(con))+n+1>print_width:
            print("  "+" ".join(con[:-1]))
            con=[con[-1]]
            n=len(tweet_content[e-1])
            con+=[tweet_content[e]]
            n+=len(tweet_content[e])
        else :
            con+=[tweet_content[e]]
            n+=len(tweet_content[e])
    if int(len(con))+n+1>print_width:
        print("  "+" ".join(con[:-1]))
        print("  "+con[-1])
    else:
        print("  "+" ".join(con))
#--------------------------------------------
# 6330537121 (20.00) 363 (2021-02-28 20:49)

def get_unique( words ):
  unique_words = []
  for i in words :
      if i not in  unique_words:
          unique_words.append(i)
  return unique_words
def jaccard(words_1, words_2):
  union_word = []
  intersect_word = []
  for i in words_1 :
    if i not in union_word:
      union_word.append(i)
  for i in words_2 :
    if i not in union_word:
      union_word.append(i)
  for i in words_1:
    if i in words_2:
      intersect_word.append(i)
  x = len(union_word)
  y = len(intersect_word )
  jaccard_coef  = y/x
  return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):


  top_n=[]
  list_=[]
  for e in range(len(norm_tweets)):
    if jaccard(norm_tweets[e],norm_query)>0:
      list_.append([e,jaccard(norm_tweets[e],norm_query)])
  list_.sort(key=lambda x:x[1],reverse=True)
  top_n=list_[:n]
  return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
  con = tweet_content.split(' ')
  print('')
  print('\n'+'#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
  p=' '
  for e in range(len(con)):
    p += ' '+con[e]
    if len(p) > print_width:
        print(p[:-len(con[e])])
        l=con[e].split()
        l=str(l)[2:-2]
        p='  '+l
  print(p)







#--------------------------------------------
# 6330538821 (17.97) 364 (2021-03-01 12:10)

def get_unique( words ):

    words=sorted(words)
    unique_words = []
    for i in range (len(words)) :
      if i == 0:
        unique_words += [words[i]]
      else :
          if words[i] != words[i-1] :
              unique_words += [words[i]]
    return unique_words
def jaccard(words_1, words_2):

    a=[]
    w=words_1 + words_2
    ww=get_unique( w )
    down = len(ww)
    for i in range (len(words_1)):
      for j in range (len(words_2)):
        if words_1[i] == words_2[j]:
          a += [words_1[i]]
    up = len(a)
    jaccard_coef=up/down
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    b=[]
    for i in range (len(norm_tweets)):
      jaccards=jaccard(norm_tweets[i],norm_query)
      b.append([i,jaccards])
    top=[[i[1],-i[0]] for i in sorted([[-j[1],j[0]] for j in b])[:n]]
    a=[]
    for j in top:
        if j[1] > 0 :
            a.append(j)
    top_n= a
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    c=tweet_content.split(" ")
    cc=0
    con=[]
    for i in c :
      cc += len(i)+1
      if cc <= print_width-2 :
        con += [i]
      elif cc > print_width-2 :
        con=" ".join(con)
        print('  '+con)
        con=[]
        cc=0
        con += [i]
        cc+= len(i)+1
      if i==c[-1] :
          con=" ".join(con)
          print('  '+con)
          con=[]
          cc=0

#--------------------------------------------
# 6330539421 (20.00) 365 (2021-03-01 04:01)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    a = words_1+words_2
    b = len(get_unique(a))
    if b == 0:
        jaccard_coef = 0
    else:
        jaccard_coef = (len(a)-b)/b
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    b = []
    for i in range(len(norm_tweets)):
        a = jaccard(norm_tweets[i],norm_query)
        if a > 0:
           b.append([i,a])
    top_n = [[c[1],-c[0]] for c in sorted([[-t[1],t[0]] for t in b])][:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    a = 0
    b = 1
    print('  '+tweet_content[0],end='')
    i = 1
    d = 1
    while i < len(tweet_content):
        if b == 1:
            #print words
            print(tweet_content[i],end='')
            d += 1
            if i+1 < len(tweet_content):
                if tweet_content[i+1] == ' ':
                    b = 0
                    a = 0
            i += 1
        else:
            if a != 0:
                #' '???
                while i+a < len(tweet_content) and tweet_content[i+a] != ' ' :
                    a += 1
                if d + a <= print_width-2 :
                    b = 1
                else:
                    while tweet_content[i+1] == ' ':
                        i += 1
                    print()
                    print(' ',end='')
                    b = 1
                    d = -1
                    a = 0
            else:
                while i+a < len(tweet_content) and tweet_content[i+a] == ' ':
                    a += 1

    print()
#--------------------------------------------
# 6330540021 (18.33) 366 (2021-02-28 20:00)

def get_unique( words ):
    unique = []
    for word in words:
        if word in unique:
            continue
        else:
            unique.extend([word])
    return unique
def jaccard(words_1, words_2):
    words_1, words_2 = get_unique(words_1), get_unique(words_2)
    both, either = [], []
    for word in words_1:
        if check_exist(word, words_2):
            both.extend([word])
        if not check_exist(word, either):
            either.extend([word])
    for word in words_2:
        if not check_exist(word, either):
            either.extend([word])
    num_same = len(both)
    num_all = len(either)
    return num_same/num_all
def top_n_similarity(norm_tweets, norm_query, n):
    i = 0
    top_n = []
    for norm_tweet in norm_tweets:
        jc_coef = jaccard(norm_tweet, norm_query)
        if jc_coef > 0:
            top_n.append([i, jc_coef])
        i += 1
    top_n.sort(key=lambda tweet: (-tweet[1], tweet[0]))
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("\n#" + str(tweet_id), '(' + str(round(jc_coef, 2)) + ')')
    real_print_width = print_width - 2
    len_tweet = len(tweet_content)

    while len_tweet > 0:
        if len_tweet < real_print_width:
            print(" ", tweet_content)
            break
        elif tweet_content[real_print_width] == ' ':
            print(" ", tweet_content[:real_print_width])
            tweet_content = tweet_content[real_print_width:]
        else:
            index_space = tweet_content[:real_print_width].rfind(' ') + 1
            print(" ", tweet_content[:index_space])
            tweet_content = tweet_content[index_space:]
        tweet_content = tweet_content.lstrip()
        len_tweet = len(tweet_content)

#--------------------------------------------
def check_exist(word, l):
    return word in l
# 6330541621 (18.13) 367 (2021-03-01 19:57)

def get_unique( words ):
    unique_words = []
    for i in words:
        if not i in unique_words:
            unique_words.append(i)
        else:
            pass
    return unique_words
def jaccard(words_1, words_2):
    x = []
    y = words_1 + words_2
    for j in words_1:
        for k in words_2:
            if j == k:
                x.append(j)
    for m in words_2:
        if m in words_1[0:]:
            y.remove(m)

    jaccard_coef = len(x)/len(y)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    top = []
    z = 0
    for a in norm_tweets:
        Nrm = [z,jaccard(a,norm_query)]
        if Nrm[1] > 0:
            top.append(Nrm)
        z = z + 1
    top.sort(key=lambda Text:(-Text[1], Text[0]))
    for b in top:
        if not b in top:
            top_n.append(b)
    top_n = top[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+ str(round(jc_coef,2))+')')

    twtcon = tweet_content.split()
    twtlen = 0
    print_width = print_width-2

    print('  ',end ='')
    for i in twtcon:
        twtlen = twtlen + len(i)
        twtcon = tweet_content.split()

        if twtlen < print_width :
            print(i,end =' ')
        else:
            twtlen = 0
            twtcon = twtcon[twtcon.index(i):]
            print('')
            print('  ',end ='')
            print(i,end =' ')
        twtlen = twtlen+1
    print('')
#--------------------------------------------
# 6330542221 (20.00) 368 (2021-03-01 18:39)

def get_unique( words ):
    a = sorted(words)
    b = ''
    unique_words = []
    for i in a:
        if i != b:
            unique_words.append(i)
            b = i
    return unique_words
def jaccard(words_1, words_2):
    s = get_unique(words_1+words_2)
    same_words = []
    d = ''
    for i in sorted(words_1) :
        if i in sorted(words_2) and i!=d:
            same_words.append(i)
            d = i
    #if len(s) != 0 :
    jaccard_coef = len(same_words)/len(s)
    #else :
     #   jaccard_coef = 0
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    p = 0
    for i in norm_tweets :
        j = jaccard(i,norm_query)
        if j > 0 :
            top_n.append([p,j])
        p += 1
    top_n = ([[-top[0],top[1]] for top in sorted([[-top[1],top[0]] for top in top_n])])
    top_n = [[top[1],top[0]] for top in top_n][:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    s = tweet_content.split(' ')
    line = '  '
    p = 2
    for e in s :
        p = p+len(e)
        if p > print_width:
            print(line)
            line = '  '
            p = len(e)+2
        line = line + e+' '
        p = p+1
    print(line)
#--------------------------------------------
# 6330543921 (18.82) 369 (2021-02-28 00:08)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    s = 0
    for i in words_1:
        if i in words_2:
            s = s+1
    jaccard_coef = s/(len(words_1)+len(words_2)-s)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    tweet_id = 0
    for r in norm_tweets:
        jc_coef = jaccard(r, norm_query)
        if jc_coef != 0:
            top_n.append([tweet_id, jc_coef])
        tweet_id = tweet_id+1
    top_n = [[top[1],-top[0]] for top in sorted([[-top[1],top[0]] for top in top_n])][:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    p_width = print_width
    content = ('  ')
    print(' \n#'+str(tweet_id), '('+str(round(jc_coef,2))+')')
    contents = tweet_content.split(' ')
    for i in range(len(contents)):
        if len(content + contents[i] + ' ') > print_width:
            print_width = len(content) + p_width + 2
            content = content + '\n' + '  ' + contents[i] + ' '
        else:
            content = content + contents[i] + ' '
    print(content)



#--------------------------------------------
# 6330544521 (18.82) 370 (2021-02-28 00:06)

def get_unique( words ):
    unique_words = []
    for word in words:
        if word not in unique_words:
            unique_words.append(word)
    return unique_words
def jaccard(words_1, words_2):
    n = 0
    for word in words_2:
        if word in words_1:
            n += 1
    union = len(words_1) + len(words_2) - n
    jaccard_coef = n/union
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    tweet_id = 0
    top_n = []
    while tweet_id < len(norm_tweets):
        jaccard_no = jaccard(norm_tweets[tweet_id], norm_query)
        if jaccard_no != 0:
            top_n.append([tweet_id, jaccard_no])
        tweet_id += 1
    top_n = [[top[1], -top[0]] for top in sorted([[-top[1], top[0]] for top in top_n])][:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    col_width = print_width
    content = ('  ')
    print(' \n#'+str(tweet_id), '('+str(round(jc_coef,2))+')')
    contents = tweet_content.split(' ')
    count = 0
    while count < len(contents):
        if len(content + contents[count] + ' ') > print_width:
            print_width = len(content) + col_width + 2
            content = content + '\n' + '  ' + contents[count] + ' '
        else:
            content = content + contents[count] + ' '
        count += 1
    print(content)



#--------------------------------------------
# 6330545121 (18.13) 371 (2021-03-01 02:47)

def get_unique( words ):
    unique_words = []
    for i in words:
        if not i in unique_words:
            unique_words.append(i)

    return unique_words
def jaccard(words_1, words_2):

    [Union,Intercept] = [[],[]]
    for i in words_1+words_2:
        if not i in Union:
            Union.append(i)
        if i in words_1 and i in words_2 and i not in Intercept:
            Intercept.append(i)
    jaccard_coef = len(Intercept)/len(Union)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    [k,top_n0,top_n] = [0,[],[]]
    for i in norm_tweets:
        Data = [k,jaccard(i, norm_query)]
        if Data[1] > 0:
            top_n0.append(Data)
        k = k + 1
    top_n0.sort(key=lambda Text: (-Text[1], Text[0]))
    for i in top_n0:
        if not i in top_n:
            top_n.append(i)
    top_n = top_n[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print('')
    print('#'+str(tweet_id),'('+ str(round(jc_coef,2))+')')
    [Newt,LenNewt,RLenNewt,print_width] = [tweet_content.split(),[ ],0,print_width - 2]
    print('  ',end ='')
    for i in Newt:
        Newt = tweet_content.split()
        RLenNewt = RLenNewt+len(i)
        if RLenNewt < print_width :
            print(i,end =' ')
        else:
            Newt = Newt[Newt.index(i):]
            print('')
            print('  ',end ='')
            print(i,end =' ')
            RLenNewt = 0

        RLenNewt = RLenNewt+1
    print('')
#--------------------------------------------
# 6330547421 (18.72) 372 (2021-03-01 22:41)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)) :
        if not (words[i] in words[i+1:]) :
            unique_words += [words[i]]
        else :
            unique_words += []
    return unique_words
def jaccard(words_1, words_2):
    i = 0
    if len(words_1) > len(words_2) :
        min = words_2
        max = words_1
    else :
        min = words_1
        max = words_2
    for q in range(len(min)) :
        if min[q] in max :
            i += 1
        else :
            i += 0
    mw = words_1 + words_2
    b = len(get_unique(mw))
    jaccard_coef = i / b
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    j = []
    tn = []
    tnn = []
    top_n = []
    for i in range(len(norm_tweets)) :
           if jaccard(norm_tweets[i],norm_query) != 0.00 :
              tn.append([(jaccard(norm_tweets[i],norm_query)),i])
           elif jaccard(norm_tweets[i],norm_query) == 0.00 :
              tn += []
    tnn = [[-tn[i][0],tn[i][1]] for i in range(len(tn))]
    tpn = sorted(tnn)
    top_n = [[tpn[i][1],-tpn[i][0]] for i in range(len(tpn))][:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    tw = tweet_content.split(' ')
    ss = []
    for i in range(len(tw)) :
        ss += [tw[i]]
        s = ' '.join(ss)
        if len(s) == print_width-2 :
            print('  '+s)
            ss = []
        elif len(s) < print_width-2 and i == len(tw)-1 :
            print('  '+s)
        elif len(s) > print_width-2 :
            if i == len(tw)-1 :
                if (len(tw[i])+(print_width-2)) > len(s) > print_width-2 :
                   s = ' '.join(ss[:-1])
                   print('  '+s)
                   print('  '+tw[-1])
            else :
               s = ' '.join(ss[:-1])
               print('  '+s)
               ss = [ss[-1]]

#--------------------------------------------
# 6330548021 (20.00) 373 (2021-02-28 20:27)

def get_unique( words ):
    unique_words = []
    for i in words :
        if not i in unique_words :
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    inters = 0
    all = sorted(words_1 + words_2)
    All = []
    for inter in words_1 :
        if inter in words_2 :
            inters += 1
    for al in all :
        if not al in All :
            All.append(al)
    if len(All) == 0 :
        jaccard_coef = 0
    else :
        jaccard_coef = inters / len(All)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    Ns = []
    Tn = []
    for e in range(len(norm_tweets)) :
        jaccard_coef  = jaccard(get_unique(norm_query), get_unique(norm_tweets[e]))
        Ns.append([jaccard_coef,e])
    top_N = [[N[1],-N[0]] for N in sorted([[-N[0],N[1]] for N in Ns])]
    for top in top_N :
        if top[1] > 0 :
            Tn.append(top)
    top_n = Tn[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    b = []
    B = '  '
    couns = 2
    tweet_content = tweet_content.split(' ')
    for word in tweet_content :
        coun = len(word)
        b.append(coun)
    for t in range(len(tweet_content)) :
        if couns + b[t] <= print_width :
            B += tweet_content[t] + ' '
            couns += b[t] + 1
        elif couns + b[t] > print_width :
            print(B)
            B = '  '
            B += tweet_content[t] + ' '
            couns = 2
            couns += b[t] + 1
    print(B)

#--------------------------------------------
# 6330549721 (18.50) 374 (2021-03-01 05:34)

def get_unique( words ):
    words.sort()
    unique_words=[]
    for i in range(len(words)):
        if words[i]!=words[i-1]:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    s=[]
    l=[]
    m=0
    for e in words_1:
        if e in words_2:
            m+=1
    for e in words_1:
        s.append(e)
    for e in words_2:
        s.append(e)
    s.sort()
    for i in range(len(s)):
        if s[i]!=s[i-1]:
          l.append(s[i])
    jaccard_coef=m/len(l)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a = []
    tweet_id = 0
    for e in norm_tweets :
        jac_num = -jaccard(e,norm_query)
        a.append([jac_num,tweet_id])
        tweet_id +=1
        a.sort()
    top_n=[]
    for i in range(n) :
        jac_num=a[i][0]
        if jac_num!=0:
            w=-1*a[i][0]
            y=a[i][1]
            z=[y,w]
            top_n.append(z)
 #   for m in range(n):
  #      l=[a[m][1],x[m]]

#print(x)

 #       if a[i][0] == a[i+1][0] :
  #          top_n.append([a[i+1][1],a[i+1][0]])
   #         top_n.append([a[i][1],a[i][0]])
   # i = 0
    #while(i<len(top_n)-1):
     #   if top_n[i] == top_n[i+1]:
      #      top_n = top_n.remove(top_n[i+1])
       # i+=1
   # top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+')')
    s=''
    k=0
    b=tweet_content.split(' ')
    for m in b:
        if k==0:
            s+='  '+m+' '
            k+=len(s)
        elif 0<len(m)+k<=print_width:
            s+=m+' '
            k+=len(m)+1
        elif len(m)+k>print_width:
            print(s)
            s='  '+m+' '
            k=len(s)
    print(s)


#--------------------------------------------
# 6330550221 (18.50) 375 (2021-03-01 00:49)

def get_unique( words ):
    unique_words = []
    for i in range(len(words)):
        if words[i] not in words[i+1:]:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    d = words_1+words_2
    f = []
    h = []
    for i in range(len(d)):
        if d[i] not in d[i+1:]:
            f.append(d[i])
        y = len(f)
        if d[i] in d[i+1:]:
            h.append(d[i])
        z = len(h)
    jaccard_coef = z/y
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    p = []
    for i in range(len(norm_tweets)):
        k = jaccard(norm_tweets[i], norm_query)
        r = [-k,i]
        p.append(r)
        p.sort()
    top_n = []
    for i in range(n):
        if -p[i][0] != 0:
            top_n.append([p[i][1],-p[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(' ')
    print(' ')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    d = ' '
    for i in range(len(tweet_content)):
        if len(d)+len(tweet_content[i]) < print_width:
            d += ' '+tweet_content[i]
        else:
            print(d)
            d = '  '+tweet_content[i]
    print(d)
#--------------------------------------------
# 6330551921 (17.92) 376 (2021-02-26 17:38)

def get_unique( words ):
    unique_words =[]
    for e in words:
        if not(e in unique_words):
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    same =[]
    for e in words_1:
        if e in words_2:
            same.append(e)
    jaccard_coef = float(len(same)/(len(words_1)+len(words_2)-len(same)))
    return round(jaccard_coef,2)
def top_n_similarity(norm_tweets, norm_query, n):
    k = sorted([[jaccard(norm_tweets[i],norm_query),-i] for i in range(len(norm_tweets))],reverse = True)[:n]
    top_n = [[-f[1],f[0]] for f in k]
    e=0
    while e<len(top_n):
        if top_n[e][1] == 0:
            top_n.remove(top_n[e])
        else:
            e+=1
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#'+str(tweet_id), '(' +str(round(jc_coef,2))+')'  )
    tweet_content = tweet_content.split(' ')
    show = [' ']
    for a in tweet_content:
        left = print_width-len(' '.join(show))
        if left-1>=len(a):
            show.append(a)
        else :
            print(' '.join(show))
            show = [' ',a]
        if a == tweet_content[-1]:
            print(' '.join(show))

#--------------------------------------------
# 6330552521 (14.57) 377 (2021-03-01 19:00)

def get_unique( words ):
    unique_words = []
    for k in words:
        if k not in unique_words:
            unique_words.append(k)
    return unique_words
def jaccard(words_1, words_2):
    merged_words = []
    merged_words.extend(words_1)
    merged_words.extend(words_2)
    gu = get_unique(merged_words)
    same = [x for x in words_1 if x in words_2]
    jaccard_coef = len(same) / len(gu)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    ind = []
    jc = []
    for tweet_id in range(len(norm_tweets)):
        jcc = jaccard(norm_tweets[tweet_id], norm_query)
        if jcc > 0:
            ind.append(tweet_id)
            jc.append(jcc)
    mix = []
    for tweet_id in ind:
        for jcc in jc:
            mix.append([jcc, tweet_id])
    mix = sorted(mix, reverse=True)
    for i in range(0, len(mix)):
        try:
            if mix[i][0] == mix[i + 1][0] and mix[i][1] > mix[i + 1][1]:
                mix[i], mix[i + 1] = mix[i + 1], mix[i]
            else:
                pass
        except IndexError:
            pass
    top_n = [w[::-1] for w in mix][:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n')
    print('#' + str(tweet_id), '(' + str(round(jc_coef, 2)) + ')')

    tw = [t for t in  tweet_content]
    mns = print_width - 2
    for x in tw:
        print('  ' + ''.join(tw[:mns]))
        try:
            tw = tw[mns: ]
            if IndexError:
                pass
            elif tw[mns] == ' ':
                tw = tw[mns + 1: ]
            else:
                break
        except IndexError:
            break
        if len(tw) == 0:
            break
#--------------------------------------------
# 6330553121 (17.00) 378 (2021-03-01 21:32)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    same = []
    for e in words_1 :
        if e in words_2 :
            same.append(e)
    jaccard_coef = len(same)/(len(words_1)+len(words_2)-len(same))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    chaos = [] ; top_n = []
    for i in range(len(norm_tweets)) :
        if jaccard(norm_tweets[i],norm_query) > 0 :
            chaos.append([-jaccard(norm_tweets[i],norm_query),i])
    order = sorted(chaos)
    for i in range(n):
        top_n.append([order[i][1],-order[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    twt_li = tweet_content.split(' ')
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    s_words = [] ; len_sen = 0
    i = 0
    while  i < len(twt_li):
        if len(twt_li[i])+1  +len_sen <= print_width-1 :
            s_words.append(twt_li[i])
            len_sen += len(twt_li[i])+1
            i += 1
        else :
            print('  '+' '.join(s_words))
            s_words = [twt_li[i]]
            len_sen = len(twt_li[i])+1
            i += 1
    print('  '+' '.join(s_words))
#--------------------------------------------
# 6330554821 (18.33) 379 (2021-02-26 23:48)

def get_unique( words ):

    unique_words = []
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):

    w = []
    c = 0
    words_1 = get_unique(words_1)
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            w.append(words_1[i])
            c += 1
        elif words_1[i] not in words_2:
            w.append(words_1[i])
    for i in range(len(words_2)):
        if words_2[i] not in w:
            w.append(words_2[i])
    jaccard_coef = c/len(w)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    t = []
    for j in range(len(norm_tweets)):
        jaccard1 = jaccard(norm_tweets[j],norm_query)
        if jaccard1 > 0:
            t.append([-jaccard1,j])
    t.sort()
    out = [[d[1],-d[0]] for d in t]
    top_n = out[:int(n):]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print(' ')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    t1 = tweet_content.split(' ')
    t = '  '
    i = 0
    while i < len(t1):
        if len(t)+len(t1[i]) <= int(print_width):
            t += t1[i]+' '
            i += 1
        else:
            print(t)
            t = '  '
    if i == len(t1) and t != '  ':
        print(t)

#--------------------------------------------
# 6330555421 (9.33) 380 (2021-03-01 23:59)

def get_unique( words ):
    words.sort()
    if len(words) == 0 :
        unique_words = words

    else :
        unique_words = [words[0]]
        for i in range(len(words)-1) :
            if words[i] != words[i+1] :
                unique_words += words[i+1]

        return unique_words
def jaccard(words_1, words_2):
    x = words_1 + words_2
    x.sort()
    results = []
    result = [x[0]]
    e = x[0]
    f = x[0]
    c = 0
    for i in range(1,len(x)) :
        if x[i] == e :
            c += 1
            results += [e]
        else :
            e = x[i]
            c = 1
    a = len(results)

    for i in range(1,len(x)) :
        if x[i] == f :
            c += 1
        else :
            f = x[i]
            c = 1
            result += [x[i]]
    b = len(result)
    jaccard_coef = b/a
    try :
        return jaccard_coef
    except ZeroDivisionError :
        return 0
def top_n_similarity(norm_tweets, norm_query, n):

    info=[]
    for i in range(len(norm_tweets)):
        tweet_id=i
        j=jaccard(norm_tweets[i], norm_query)
        if j>0:
            info.append([tweet_id,j])

    a = []
    for i in info:
        a.append([-i[1],i[0]])
    a.sort()
    b = a[:n]

    top_n =[[i[1],-i[0]] for i in b]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    y = tweet_content.split(' ')
    a = []
    space = [' ']*(len(y)-1)
    print(' ')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    for i in range(len(y)) :
        a.append(y[i])
        b = ' '.join(a)
        if len(b) > print_width-2 :
            b = b.split()
            print('  '+' '.join(b[:len(b)-1]))
            a = [b[-1]]
        if len(b) == print_width-2 :
            print('  '+b)
            a = []
    print('  '+' '.join(a))





#--------------------------------------------
# 6330556021 (20.00) 381 (2021-02-27 06:22)

def get_unique( words ):
    unique_words = []
    for i in words:
        if i in unique_words:
            unique_words += []
        else: unique_words += [i]
    return unique_words
def jaccard(words_1, words_2):
    both = []
    same = []
    x = words_1+words_2
    for a in x:
        if a in both:
            both += []
        else: both += [a]
    if len(words_1) < len(words_2):
        for c in words_1:
            if c in words_2:
                if c in same:
                    same += []
                else: same += [c]
            else: same += []
    else:
        for c in words_2:
            if c in words_1:
                if c in same:
                    same += []
                else: same += [c]
            else: same += []
    if len(both) ==  0:
        jaccard_coef = 99999999999999999999999999999999999999999991112223330901051800
    else:
        jaccard_coef = len(same)/len(both)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        jac = jaccard(norm_tweets[i], norm_query)
        x = i
        if jac > 0:
            top_n += [[jac, x]]
        else: top_n += []
    top_n = sorted(top_n, reverse = True)
    z = 1
    start = 0
    for i in range(1, len(top_n)):
        if top_n[i-1][0] == top_n[i][0]:
            z += 1
        else:
            top_n[start:z] = sorted(top_n[start:z])
            start = z
            z += 1
        top_n[start:z] = sorted(top_n[start:z])
    for i in range(len(top_n)):
        top_n[i][0], top_n[i][1] = top_n[i][1], top_n[i][0]
    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#"+str(tweet_id)+" ("+str(round(jc_coef, 2))+")")
    w_split = tweet_content.split(" ")
    show ="  "
    for i in range(len(w_split)):
        nletter = len(w_split[i])
        if len(show)+nletter <= print_width:
            show += w_split[i]+" "
            if i == len(w_split)-1 and len(show) == print_width:
                print(show)
        else:
            print(show)
            show ="  "+w_split[i]+" "
            if nletter > print_width+2:
                print(w_split[i])
        if i == len(w_split)-1 and len(show) < print_width:
            print(show)

#--------------------------------------------
# 6330557721 (11.48) 382 (2021-03-01 21:31)

def get_unique( words ):
    unique_words= []
    for i in range(len(words)):
        if words[i] in unique_words:
            pass
        else:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    up =[]
    for i in range(len(words_1)):
        for j in range(len(words_2)):
            if words_1[i] == words_2[j]:
                up.append(words_1[i])
    down =[]
    down1 = []
    for i in range(len(words_1)):
        down.append(words_1[i])
    for i in range(len(words_2)):
        down.append(words_2[i])
    for i in range(len(down)):
        if down[i] in down1:
            pass
        else:
            down1.append(down[i])
    jaccard_coef = len(up)/len(down1)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a1 = []
    for i in range(len(norm_tweets)):
        a1.append(norm_tweets[i])
        a1.append(norm_query)
    a2 = []
    for i in range(len(a1)):
        if i%2 ==0:
            a2.append(a1[i:i+2])
    j =[]
    for i in range(len(a2)):
        j.append(i)
        j1 = jaccard(a2[i][0],a2[i][1])
        j.append(j1)
    ja = []
    for i in range(len(j)):
        if i%2 == 0:
            ja.append([j[i+1],j[i]])
    ja.sort(reverse=True)
    for i in range(len(ja)):
        ja[i][0],ja[i][1] = ja[i][1],ja[i][0]
        jaa = ja[0:n]
        top_n = jaa
        top_n.sort(key = lambda x: x[1],reverse=True)
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    t = tweet_content.split(' ')
    print()
    print("#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")")
    c = [" "]
    for i in range(len(t)):
        if len(c)+1+len(t[i])<=print_width:
            c.append(t[i])
        else:
            print(c)
            c.append(t[i])
            c =[" "]
    print(" ".join(c))
#--------------------------------------------
# 6330558321 (18.33) 383 (2021-02-28 21:13)

def get_unique( words ):
    unique_list = []
    for w in words:
        if not contains(unique_list, w):
            unique_list.extend([w])
    return unique_list
def jaccard(words_1, words_2):
    words_1 = get_unique(words_1)
    words_2 = get_unique(words_2)
    intersect = []

    for w in words_2:
        if contains(words_1, w):
            intersect.extend([w])

    union_count = len(words_1)+len(words_2)-len(intersect)
    return len(intersect)/union_count
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = [[i, jaccard(tweet, norm_query)] for i, tweet in enumerate(norm_tweets) if jaccard(tweet, norm_query) > 0]
    top_n = sorted(top_n, key=lambda x: (-x[1], x[0]))
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(f'\n#{tweet_id} ({round(jc_coef, 2)})')
    while len(tweet_content):
        if len(tweet_content) < print_width-2:
            print(f'  {tweet_content}')
            break
        elif tweet_content[print_width-2] == ' ':
            print(f'  {tweet_content[:print_width-2]}')
            tweet_content = tweet_content[print_width-2:]
        else:
            trim = tweet_content[:print_width-2]
            w_index = trim.rfind(' ')+1
            print(f'  {tweet_content[:w_index]}')
            tweet_content = tweet_content[w_index:]
        tweet_content = tweet_content.lstrip()

#--------------------------------------------
def contains(l, w):
    for words in l:
        if words == w:
            return True
    return False
# 6330559021 (15.72) 384 (2021-02-28 00:23)

def get_unique( words ):

    words.sort()
    words.append('9999999999999')
    unique_words =[]
    for i in range(len(words)-1) :
        if words[i] != words[i+1] :
            unique_words.append(words[i])

    return unique_words
def jaccard(words_1, words_2):

    words_3 = words_1 + words_2
    words_3.sort()
    j = []
    for i in range(len(words_3)-1) :
        if words_3[i] == words_3[i+1] :
            j.append(words_3[i])
    jaccard_coef = len(j)/(len(words_1)+len(words_2)-len(j))

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    x = []
    for i in range(len(norm_tweets)) :
        x.append([i,jaccard(norm_tweets[i],norm_query)])
    y = sorted([[number[1],number[0]*-1] for number in x],reverse=True)
    z = [[number[1]*-1,number[0]] for number in y]
    top_n = z[:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print("                 ")
    t = tweet_content.split(" ")
    print("#" + str(tweet_id)+ " " + "("+ str(round(jc_coef,2))+")")
    x = []
    y = []
    c = 0
    n = 0
    for i in t :
        if len(i) > print_width - 2:
            print(i)
        if c + len(i) < print_width - 2  :
            x.append(i)
            c += len(i) + 1
        else :
            print("  "+" ".join(x))
            c =  1
            x =[i]
    print("  " + " ".join(x))














#--------------------------------------------
# 6330560521 (20.00) 385 (2021-03-01 01:04)

def get_unique( words ):
    words.sort()
    unique_words = []
    for i in range(len(words)) :
        if i == 0 :
            unique_words.append(words[i])
        elif words[i-1] != words[i] :
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    all_words = []
    if len(words_1)<len(words_2):
        for e in words_1 :
            if e not in words_2 :
                all_words.append(e)
        same = len(words_1)-len(all_words)
        all_words += words_2
        if len(all_words) == 0 :
            jaccard_coef = 0
        else :
            jaccard_coef = same/len(all_words)
    else :
        for e in words_2 :
            if e not in words_1 :
                all_words.append(e)
        same = len(words_2)-len(all_words)
        all_words += words_1
        if len(all_words) == 0 :
            jaccard_coef = 0
        else :
            jaccard_coef = same/len(all_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for tweet_id in range(len(norm_tweets)) :
        jaccard_ = jaccard(norm_tweets[tweet_id],norm_query)
        if jaccard_ > 0 :
            in_top_n = [jaccard_,tweet_id]
            top_n.append(in_top_n)
    top_n = sorted(top_n, key=lambda x: x[0],reverse = True)
    top_n = top_n[:n]
    for i in range(len(top_n)) :
        top_n[i][0],top_n[i][1] = top_n[i][1],top_n[i][0]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    t = tweet_content.split(' ')
    start = 2
    line = []
    each_line = ' '
    print_width -= 2
    for e in t :
        if  len(each_line) >= print_width-len(e) :
            print('  '+each_line)
            line = []
            line.append(e)
            each_line = ' '.join(line)

        elif len(each_line) < print_width-len(e) :
            line.append(e)
            each_line = ' '.join(line)
    each_line = ' '.join(line)
    print('  '+each_line)

#--------------------------------------------
# 6330561121 (17.03) 386 (2021-02-25 22:21)

def get_unique( words ):

    unique_words = []
    for i in range(len(words)) :
        x = words.pop(0)
        if x not in words : unique_words.append(x)

    return unique_words
def jaccard(words_1, words_2):

    up = []
    down = words_1 + words_2
    for i in range(len(words_1)) :
        if words_1[i] in words_2 : up.append(words_1[i]); down.remove(words_1[i])
    jaccard_coef= len(up)/len(down)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    i=0
    top_a = []
    while i < len(norm_tweets) :
        up = []
        down = norm_tweets[i] + norm_query
        for e in range(len(norm_tweets[i])) :
            if norm_tweets[i][e] in norm_query : up.append(norm_tweets[i][e]); down.remove(norm_tweets[i][e])
        top_a.append([i,len(up)/len(down)]) ; i+=1
    top_b =sorted([[d[1],d[0]*-1] for d in top_a],reverse=True)
    top_n =[[d[1]*-1,d[0]] for d in top_b][:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    nt = tweet_content.split(' ')
    wdc = 0
    word=[]
    print()
    print('#'+ str(tweet_id)+ ' ('+ str(round(jc_coef, 2))+ ')')
    for i in range(len(nt)) :
        if len(nt[i]) >= print_width-2 : print("  "+nt[i])
        else:
            wdc += len(nt[i]) ; word.append(nt[i])
            if wdc == print_width-2 :
                print('  '+" ".join(word))
                wdc = 0 ; word = []
            elif wdc > print_width-2 :
                x = word.pop(-1)
                print('  '+" ".join(word))
                word = [] ; wdc = 0
                word.append(x) ; wdc += len(x)+1
            else : wdc += 1
    if len(word) != 0 : print('  '+" ".join(word))
#--------------------------------------------
# 6330562821 (16.30) 387 (2021-03-01 00:51)

def get_unique( words ):
    unique_words=[]
    for e in words :
        if e not in unique_words :
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    words_rep = []
    for e in words_1 :
        if e in words_2 :
            words_rep.append(e)
    jaccard_coef = len(words_rep)/(len(words_1)+len(words_2)-len(words_rep))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    for tweet_id in range (len(norm_tweets)) :
        Jacc = jaccard(norm_tweets[tweet_id], norm_query)
        top.append([-Jacc, tweet_id])
    top.sort()
    top_n = []
    for i in range (n) :
        if top[i][0] != 0 :
            top_n.append([top[i][1], -top[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_content = tweet_content.split(' ')
    print()
    print('#'+str(tweet_id), '('+str(round(jc_coef,2))+')')
    show = ' '
    for e in tweet_content :
        if len(show)+len(e) < print_width-1 :
            show += ' ' + e
        else :
            print(show)
            show = '  ' + e
    print(show)

#--------------------------------------------
# 6330563421 (20.00) 388 (2021-02-28 01:17)

def get_unique( words ):

    unique_words=[]
    for k in words:
        if k not in unique_words:
            unique_words.append(k)
    return unique_words
def jaccard(words_1, words_2):

    words_intersect=[]
    for k in words_1:
        if k in words_2:
            words_intersect.append(k)
    jaccard_coef=len(words_intersect)/(len(words_1+words_2)-len(words_intersect))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    list_n=[]
    for i in range(len(norm_tweets)):
        if jaccard(norm_tweets[i],norm_query)>0:
            list_n.append([i,jaccard(norm_tweets[i],norm_query)])
    list_n=[[k[1],-k[0]] for k in sorted([[-k[1],k[0]] for k in list_n])]
    top_n=list_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    print_tweet=tweet_content.split(' ')
    print_output='  '
    letter_count=0
    space_count=0
    for i in range(len(print_tweet)):
        if letter_count+space_count+len(print_tweet[i])<=print_width-2:
            print_output+=print_tweet[i]
            letter_count+=len(print_tweet[i])
            if letter_count+space_count<=print_width-2:
                print_output+=' '
                space_count+=1
        else:
            print(print_output)
            print_output='  '+print_tweet[i]
            letter_count=len(print_tweet[i])
            space_count=0
            if letter_count+space_count<=print_width-2:
                print_output+=' '
                space_count+=1
    print(print_output)

#--------------------------------------------
# 6330565721 (18.01) 389 (2021-03-01 00:49)

def get_unique( words ):
    unique_words = list(dict.fromkeys(words))
    return unique_words
def jaccard(words_1, words_2):
    word1plus2 = words_1+words_2
    unique_words = []
    for i in range(len(word1plus2)):
        if word1plus2[i] not in unique_words:
            unique_words.append(word1plus2[i]) ; jaccard_coef = (len(word1plus2)-len(unique_words))/len(unique_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    c = []
    for i in range(len(norm_tweets)) :
        c.append(jaccard(norm_tweets[i],norm_query))
    a = [[-c[i],i] for i in range(len(norm_tweets))]
    a = sorted(a) ; top_n = [[a[i][1],-a[i][0]] for i in range(len(a))][:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print( )
    x = "#" + str(tweet_id)
    y = "(" + str(round(jc_coef,2)) + ")"
    print(x, y)
    words = tweet_content.split(' ') ; ans = '  ' + words[0]
    for z in words[1:]:
        if len(ans) + len(' ' + z) <= print_width:
            ans += ' ' + z
        else:
            print(ans)
            ans = '  ' + z
    print(ans)
#--------------------------------------------
# 6330566321 (20.00) 390 (2021-02-28 17:15)

def get_unique( words ):
    for word in words:
        for i in range(words.count(word)-1):
            words.remove(word)
    unique_words = sorted(words)
    return unique_words
def jaccard(words_1, words_2):
    w1=get_unique(words_1)
    w2=get_unique(words_2)
    iw=[]
    uw=get_unique(w1+w2)
    for word in w1:
        if word in w2:
            iw.append(word)
    jaccard_coef = len(iw)/len(uw)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    tweets_list = []

    for i in range(len(norm_tweets)):
        tweets_list.append([jaccard(norm_tweets[i],norm_query),-i])
    tweets_list.sort(reverse=True)
    top_n = tweets_list[:n]

    top_n = [[-id,jac] for jac,id in top_n]

    for i in range(len(top_n)):
        jac = top_n[i][1]
        if jac == 0:
            top_n[i] = []
    c  = top_n.count([])
    for i in range(c):
        top_n.remove([])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')

    words = tweet_content.split(' ')
    n = 0
    line = ''
    for word in words:
        if n == 0:
            line += '  ' + word
            n = 1
        elif len(line) + len(word) + 1 <= print_width:
            line += ' ' + word
        else:
            print(line)
            line = '  ' + word
    print(line)
#--------------------------------------------
# 6330567021 (14.68) 391 (2021-02-28 18:03)

def get_unique( words ):
    for i in range(len(words)):
        for j in range(i+1,len(words)):
            if words[i] == words[j] :
                words[i] = '0'
                break
    words.sort(reverse = True)
    if '0' in words:
        q = words.index('0')
        words = words[:q]
    unique_words = words
    return unique_words
def jaccard(words_1, words_2):
    a = 0
    for i in words_1 :
        for j in words_2:
            if i == j:
                a += 1
    jo1 = len(words_1)
    jo2 = len(words_2)
    ujo = jo1 + jo2 - a
    if ujo == 0:
        joceof = 0
    else:
        jocoef = a / ujo
        jaccard_coef = round(jocoef, 2)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    speed = []
    for i in range(len(norm_tweets)):
        if float(jaccard(norm_tweets[i], norm_query)) > 0:
            jacjac = float(jaccard(norm_tweets[i], norm_query))
            speed.append([i,jacjac])


    space = [[-speed[1],speed[0]] for speed in sorted(([speed[1],-speed[0]] for speed in speed),reverse = True)]

    top_n = space[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' ('+str(jc_coef)+')')
    tweet_content = tweet_content.split()
    l = len(tweet_content)
    n = print_width
    if l % n == 0:
        stack = l // n
    else:
        stack = l//n + 1
    aa = '  '
    for i in range(l):
        aa += tweet_content[i]
        aa += ' '
        if len(aa) >= print_width:
            print(aa)
            aa = '  '
    print(aa)

#--------------------------------------------
# 6330568621 (11.50) 392 (2021-02-27 22:27)

def get_unique( words ): #COVID economic crisis
    words.sort()
    unique_words = []
    words.append('?')
    for i in range(len(words)-1):
        if words[i] != words[i+1]:
            unique_words.append(words[i])
        else:
            pass
    return unique_words
def jaccard(words_1, words_2):
    c = 0
    words_1.sort()
    words_2.sort()
    t = min(len(words_1),len(words_2))
    if len(words_1) > len(words_2):
        maxx = words_1
        minn = words_2
    else:
        maxx = words_2
        minn = words_1
    for i in range(t):
        if minn[i] in maxx:
            c += 1
        else:
            pass
    happy = words_1 + words_2
    happy.sort()
    h_happy = []
    happy.append('?')
    for i in range(len(happy)-1):
        if happy[i] != happy[i+1]:
            h_happy.append(happy[i])
        else:
            pass
    sad = len(h_happy)
    jaccard_coef = round((c/sad),2)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    top_n2 = []
    for i in range(len(norm_tweets)):
        p = jaccard(norm_tweets[i],norm_query)
        top_n2.append([p,i])
    top_n2.sort(reverse = True)
    for t in range(n):
        top_n.append([top_n2[t][1],top_n2[t][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print(' ')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    words = tweet_content.split()
    w = ''
    for i in range(len(words)):
        if i == len(words)-1 and len(w) + len(words[i]) +1 < print_width:
            w += words[i]
            print(w)
        elif len(w) + len(words[i]) +1 > print_width:
            print(w)
            w = '  '+words[i]
        elif len(w) + len(words[i]) +1 < print_width:
            w += words[i]+' '

#--------------------------------------------
# 6330570821 (20.00) 393 (2021-02-28 20:22)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words += [e]

    return unique_words

#--------------------------------------------------------#
def jaccard(words_1, words_2):
    total_words =  len(get_unique(words_1 + words_2))
    c = 0
    for e in words_2:
        if e in words_1:
            c += 1
    jaccard_coef = c/total_words


    return jaccard_coef

#--------------------------------------------------------#
def top_n_similarity(norm_tweets, norm_query, n):

    x = []
    top_n = []
    for i in range(len(norm_tweets)):
        x += [[ jaccard(norm_tweets[i], norm_query) , i ]]
    top = sorted(x,reverse=True)
    for e in top:
        if e[0] > 0:
            top_n.append([ e[1] , e[0] ])
    for j in range(len(top_n)):
        for i in range(len(top_n)-1):
            if top_n[i][1] == top_n[i+1][1]:
                if top_n[i][0] > top_n[i+1][0]:
                    [top_n[i] , top_n[i+1]] = [top_n[i+1] , top_n[i]]


    return top_n[:n]

#--------------------------------------------------------#
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n')

    tweet_content = tweet_content.split(' ')

    p0 = "#"+str(tweet_id)+" "+"("+str(round(jc_coef,2))+")"
    print(p0)

    p ='  '
    for e in tweet_content:
        if len(p + e) <= print_width:
            p += e + ' '
            if e == tweet_content[-1]:
                print(p)
        elif len(p + e) > print_width:
            print(p)
            p = ''
            p += '  ' + e + ' '
            if e == tweet_content[-1]:
                print(p)


#--------------------------------------------
# 6330571421 (19.10) 394 (2021-02-28 11:59)

def get_unique( words ):
    unique_words = []
    for e in words:
        if e not in unique_words:
            unique_words.append(e)

    return unique_words
def jaccard(words_1, words_2):
    sameword = 0
    allword = []
    for e in words_1:
        if e in words_2:
            sameword += 1
    for e in words_1:
        if e not in allword:
            allword.append(e)
    for e in words_2:
        if e not in allword:
            allword.append(e)
    jaccard_coef = sameword/len(allword)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    all_n = []
    for i in range(len(norm_tweets)):
        x = jaccard(norm_tweets[i],norm_query)
        if x>0:
            all_n.append([i,x])

    top_n = [[-all_n[1],all_n[0]] for all_n in sorted(([all_n[1],-all_n[0]] for all_n in all_n),reverse = True)][:n]
    return top_n
def show_tweet(tweet_id,tweet_content,jc_coef,print_width):
    c = ''
    b = []
    a = '#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')'
    tweet_content = tweet_content.split(' ')
    for e in tweet_content:
        if len(c) == 0:
            if len(e) >= print_width-2:
                b.append(e)
            else:
                c = c + e + ' '
        else:
            if len(c)+len(e) <= print_width-2:
                c = c + e + ' '
            else:
                b.append(c)
                if len(e) >= print_width-2:
                    b.append(e)
                else:
                    c = e + ' '

    if c != ' ':
        b.append(c)
    print('')
    print(a)
    for e in b:
       print('  '+e)
#--------------------------------------------
# 6330572021 (17.00) 395 (2021-03-01 09:59)

def get_unique( words ):
    unique_words=[]
    for w in words:
        if w not in unique_words:
            unique_words.append(w)
    return unique_words
def jaccard(words_1, words_2):
    co=[]
    s=[]
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            co+=[words_1[i]]
        else:
            s+=[words_1[i]]
    for j in range(len(words_2)):
        if words_2[j] not in words_1:
            s+=[words_2[j]]
    s+=co
    jaccard_coef=len(co)/len(s)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    j=[]
    top_n=[]
    for i in range(len(norm_tweets)):
        jc=jaccard(norm_tweets[i],norm_query)
        j.append([jc,i])
    j_=[[h[1],-h[0]] for h in sorted([[-h[0],h[1]] for h in j])]
    for i in range(n):
        top_n+=[j_[i]]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    sen=tweet_content.split(' ')
    jac=str(round(jc_coef,2))
    print('')
    print('#'+str(tweet_id)+' ('+ jac +')')
    ms='  '
    for i in range(len(sen)-1):
        if len(ms+sen[i])<=print_width:
            ms+=sen[i]+' '
        else:
            print(ms)
            ms='  '+sen[i]+' '
    if len(ms+sen[-1])<=print_width:
        print(ms+sen[-1])
    else:
        print(ms)
        print('  '+sen[-1])

#--------------------------------------------
# 6330573721 (17.01) 396 (2021-02-26 18:28)

def get_unique( words ):
    unique_words = list()
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    f = []
    for i in words_1:
        for j in words_2:
            if i == j:
                f.append(i)
                break

    jaccard_coef = len(f)/((len(words_1)+len(words_2))-len(f))
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for index, values in enumerate(norm_tweets):
        _jaccards = jaccard(norm_tweets[index], norm_query)
        top_n.append([index, _jaccards])
    top_n.sort(key=lambda jaccard: jaccard[1], reverse=True)
    top_n = top_n[:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#{} ({})".format(tweet_id, round(jc_coef, 2)))
    string = '  '
    tweet_content = tweet_content.split()
    k = 1
    for i in tweet_content:
        if len(string) + len(i) > print_width*k:
            string+='\n'
            string+='  '
            k+=1
        string+=i
        string+=' '
    print(string)



#--------------------------------------------
# 6330574321 (19.78) 397 (2021-03-01 19:04)

def get_unique(words):
    unique_words=[]
    words.sort()
    for i in range(len(words)):
        if words[i] not in unique_words:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    a1=words_1
    a2=words_2
    a3=a1+a2
    a4=get_unique(a3)
    k=0
    for e in a1:
        if e in a2:
            k=k+1
    a5=k/len(a4)
    jaccard_coef=a5
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a1=norm_tweets
    a2=norm_query
    top_n=[]
    for i in range(len(a1)):
        b=jaccard(a1[i],a2)
        if len(top_n)<=n-1:
            if b>0:
                top_n.append([i,b])
        elif top_n[0][1]<b:
                top_n[0][0]=i
                top_n[0][1]=b
                top_n.sort(key=lambda c:-c[0])
                top_n.sort(key=lambda c:c[1])
    top_n.sort(key=lambda c:c[0])
    top_n.sort(key=lambda c:-c[1])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    a1=tweet_id
    a2=tweet_content
    a3=jc_coef
    a4=print_width
    b1="#"+str(a1)
    b2="("+str(round(a3,2))+")"
    print(" ")
    print(b1,b2)
    c1=a2.split(" ")
    c2=" "+c1[0]
    for e in c1[1:]:
        if len(c2)+len(" "+e)<=a4:
            c2+=" "+e
        else:
            print(c2)
            c2="  "+e
    print(c2)


# --------------------------------------------
# 6330576621 (20.00) 398 (2021-02-28 18:17)

def get_unique( words ):
    unique_words =[]
    words.sort()
    for i in range(len(words)):
        if i==0:
            unique_words+=[words[i]]
        elif words[i]!=words[i-1]:
            unique_words+=[words[i]]

    return unique_words
def jaccard(words_1, words_2):
    a=[]
    c=[]
    for word in words_1:
        a.append(word)
        if word in words_2 and (word not in c):
            c.append(word)
    for word in words_2:
        if word not in a:
            a.append(word)
    jaccard_coef=(len(c)/len(a))

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=[]
    for i in range(len(norm_tweets)):
        jac=jaccard(norm_tweets[i],norm_query)
        if jac>0:
            if len(top_n)<n:
                top_n.append([jac,-i])
            else:
                top_n.sort()
                if jac>top_n[0][0]:
                    top_n[0][0]=jac
                    top_n[0][1]=-i
    top_n.sort(reverse=True)
    top_n=[[-i, j] for j, i in top_n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    ans=''
    ans+= '#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')' + '\n'

    words= tweet_content.split(' ')
    t= '  ' +words[0]

    for w in words[1:]:
        n=len(t)+len(w)+1
        if n<=print_width:
            t+= ' ' + w
        else:
            ans+= t + '\n'
            t= '  ' + w
    ans+= t
    ans= '\n' + ans
    print(ans)




#--------------------------------------------
# 6330577221 (18.01) 399 (2021-02-28 07:09)

def get_unique( words ):
    unique_words=[]
    for e in words:
        if e not in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    both_words=[]
    for e in words_1:
        if e in words_2:
            both_words.append(e)
    unique_each=[]
    uw1=get_unique(words_1)
    uw2=get_unique(words_2)
    unique_each=uw1+uw2
    unique_words=get_unique(unique_each)
    jaccard_coef=len(both_words)/len(unique_words)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    jc_list=[]
    top_n=[]
    for i in range(len(norm_tweets)):
        jc=jaccard(norm_query,norm_tweets[i])
        jc_list.append([jc,-i])
        jc_list_sorted=sorted(jc_list, reverse=True)
    for j in jc_list_sorted:
        top_n.append([-j[1],j[0]])
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    tweet_words=tweet_content.split(' ')
    line=''
    i=0
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    while i<len(tweet_words):
        if len(line)==0:
            line+='  '+tweet_words[i]
            i+=1
        elif len(line)!=0 and len(line)+len(tweet_words[i])+1<=print_width:
            line+=' '+tweet_words[i]
            i+=1
        else:
            print(line)
            line=''
    if line!='':
        print(line)
#--------------------------------------------
# 6330578921 (15.18) 400 (2021-02-26 16:06)

def get_unique( words ):

    unique_words = []
    [unique_words.append(x) for x in words if x not in unique_words]

    return unique_words
def jaccard(words_1, words_2):

    Twords  = words_1 + words_2
    n1      = []
    n2      = 0

    for i in range(len(words_2)):
        if words_2[i] in words_1:
            n2 += 1
    n3  = [n1.append(x) for x in Twords if x not in n1 ]
    jaccard_coef = float((n2)/(len(n3)))

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

    top1 = sorted([[jaccard(norm_tweets[i],norm_query),-i] for i in range(len(norm_tweets))],reverse = True)
    top_n = [[-top1[j][1],top1[j][0]] for j in range(len(top1))][:n]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    rounded_jc_coef=round(jc_coef,2)
    standard_print_width=print_width+0
    allword_tweet_content=tweet_content.split()
    want_to_print="  "

    print()
    print("#"+str(tweet_id),"("+str(rounded_jc_coef)+")")

    for word in allword_tweet_content:
        if len(want_to_print)<print_width and len(want_to_print+word)<=print_width+2:
            want_to_print=want_to_print+word+" "
            if word==allword_tweet_content[-1]:
                print(want_to_print)
        else:
            print(want_to_print)
            want_to_print="  "+word+" "
            if word==allword_tweet_content[-1]:
                print(want_to_print)

    return

#--------------------------------------------
# 6330579521 (18.01) 401 (2021-03-01 00:53)

def get_unique( words ):
    unique_words=[]
    for i in range(len(words)):
        listt=words[:i]
        if words[i] not in listt:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):
    intercept,union=0,0
    for i in range(len(words_2)):
        if words_2[i] in words_1:
            intercept+=1
    union=len(words_1)+len(words_2)-intercept
    jaccard_coef=intercept/union

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n_pre=[]
    tweet_id=[i for i in range(len(norm_tweets))]
    for i in range(len(norm_tweets)):
        jac=jaccard(norm_tweets[i],norm_query)
        top_n_pre.append([i,jac])
#     top_n=[sorted([[data[1]]for data in top_n_pre],reverse=True)[:n]]
#
#     sorted(student_tuples, key=lambda student: student[2])
#
    top_n=sorted(top_n_pre, key=lambda data: data[1], reverse=True)[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' '+'('+str(round(jc_coef,2))+')')
    word_of_t = tweet_content.split(' ')
    count=0
    for i in range(len(word_of_t)):
        if i==0:
            print('  '+word_of_t[0],end='')
            count=len(word_of_t[0])+2
        else:
            if len(' '+word_of_t[i])<=print_width-count:
                print(' '+word_of_t[i],end='')
                count+=len(' '+word_of_t[i])
            else:
                print('')
                print('  '+word_of_t[i],end='')
                count=len('  '+word_of_t[i])
    print('')
#--------------------------------------------
# 6330580021 (15.45) 402 (2021-02-27 15:43)

def get_unique( words ):
    if len(words)!=0:
        words.sort()
        e = words[0]
        unique_words=[words[0]]
        for i in range(1,len(words)):
            if words[i] != e:
                unique_words.append(words[i])
                e = words[i]
    else:
        unique_words=[]
    return unique_words
def jaccard(words_1, words_2):
    words_3=words_1+words_2
    words_3.sort()
    e = words_3[0]
    T=[words_3[0]]
    for i in range(1,len(words_3)):
        if words_3[i] != e:
            T.append(words_3[i])
            e = words_3[i]
    S=[]
    for i in range(len(words_1)):
        if words_1[i] in words_2:
            S.append(words_1[i])
    T=len(T)
    S=len(S)
    jaccard_coef=S/T
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    tweet_id=0
    top_n=[]
    for i in range(len(norm_tweets)):
        tweet_id=i
        j=jaccard(norm_tweets[tweet_id], norm_query)
        z=[tweet_id,j]
        top_n.append(z)
    x=[]
    for i in range(len(top_n)):
        if top_n[i][1]>0:
            x.append([top_n[i][1],top_n[i][0]])
    x.sort(reverse=True)
    top_n=[]
    for i in range(len(x[:n])):
        top_n.append([x[i][1],x[i][0]])
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    b=tweet_content.split(" ")
    st=0
    c=[]
    d=""
    for j in range(len(b)):
        if st+len(b[j])+len(c)<=print_width-2:
            c.append(b[j])
            d+=b[j]
            st=len(d)
        else:
            print('  '+' '.join(c))
            st=0
            c=[b[j]]
            d=b[j]
    print('  '+' '.join(c))

#--------------------------------------------
# 6330583021 (18.50) 403 (2021-02-27 21:57)

def get_unique( words ):

    words.sort()
    words+=[' ']
    unique_words =[]
    for i in range (len(words)-1):
        if words[i]!=words[i+1]:
            unique_words.append(words[i])
    return unique_words
def jaccard(words_1, words_2):

    words_3= words_1+words_2
    words_3.sort()
    words_3+=['']
    bot=0
    top=0
    for i in range(len(words_3)-1):
        if words_3[i]!=words_3[i+1]:
            bot+=1
        else:
            top+=1
    jaccard_coef= float(top)/float(bot)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n=['']*int(n)
    jac=[]
    for i in range(len(norm_tweets)):
        a= jaccard(norm_tweets[i], norm_query)
        jac.append(a)
    x=sorted(jac)
    x=x[::-1]
    x= x+['']
    for i in range(int(n)):
        top_n[i]=[jac.index(x[i]),x[i]]
        if x[i]==x[i+1]:
            jac[jac.index(x[i])]=-1
    if top_n[0][1]==float(0):
        top_n=[]




    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    t= tweet_content.split(' ')
    print('')
    print('#'+str(tweet_id), '('+str(round(jc_coef,2))+')')
    print('  ',end="")
    a=''
    for s in t:
        if len(a)+len(s)<=(print_width-2):
            print(s,end=" ")
            a= a+ s+' '
        else :
            print('')
            a=''
            print('  ',end="")
            print(s,end=" ")
            a= a+ s+' '
    print('')


#--------------------------------------------
# 6330585221 (20.00) 404 (2021-03-01 00:42)

def get_unique( words ):
    unique_words = []
    x = sorted(words)
    if len(x) > 1:
        for i in range(len(x)-1):
            if x[i] == x[i+1]:
                continue
            unique_words.append(x[i])
        unique_words.append(x[i+1])
    else:
        unique_words = x

    return unique_words
def jaccard(words_1, words_2):
    e=0
    d=0
    for c in words_2:
        if c in words_1:
            e+=1
        if c not in words_1:
            d+=1
    jaccard_coef = e/(len(words_1)+d)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top = []
    for i in range(len(norm_tweets)):
        s = jaccard(norm_tweets[i],norm_query)
        if s > 0:
            top.append([i, s])
    for i in range(len(top)):
        top[i][0],top[i][1] = -top[i][1],top[i][0]
    top_n = sorted(top)[:n]
    for i in range(len(top_n)):
        top_n[i][0],top_n[i][1] = top_n[i][1],-top_n[i][0]

    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('\n'+'#'+str(tweet_id), '('+str(round(jc_coef, 2)) + ')')
    d = tweet_content.split(' ')
    new = ''
    for i in range(len(d)):
        g = 2+len(new)
        if g+len(d[i]) <= print_width:
            new += d[i]+' '
        elif abs(g+len(d[i])-1) >= print_width:
            print('  '+new)
            new = ''
            new += d[i]+' '
        else:
            new += d[i]
            print('  '+new)
            new = ''
    print('  '+new)



#--------------------------------------------
# 6330586921 (18.01) 405 (2021-02-26 22:49)

def get_unique( words ):
    kuy=[]
    for fun in words:
        if fun not in kuy:
            kuy.append(fun)
    return kuy
def jaccard(words_1, words_2):
    w1=get_unique(words_1)
    w2=get_unique(words_2)
    kuy1=0
    kuy2=len(w1)
    for fuck in w2:
        if fuck in w1:
            kuy1+=1
    for fuck in w2:
        if fuck not in w1:
            kuy2+=1
    superkuy=kuy1/kuy2
    return superkuy
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    i = 0
    for tweet in norm_tweets:
        jaccard_coef = jaccard(tweet,norm_query)
        top_n.append([i,jaccard_coef])
        i += 1
    top_n.sort(key = lambda x:x[1],reverse=True)
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print()
    print("#"+str(tweet_id),'('+str(round(jc_coef,2))+')')
    str_print = []
    tweet_content = tweet_content.split(' ')
    line = "  "
    for word in tweet_content:
        if len(line) + len(word) > print_width:
            str_print.append(line)
            line = "  "
        line += word + ' '
    if len(line) > 2:
        str_print.append(line)
    for l in str_print:
        print(l)
#--------------------------------------------
# 6330587521 (20.00) 406 (2021-02-28 22:43)

def get_unique( words ):
    unique_words=[]
    for e in range(len(words)):
        if words[e] not in words[e+1::]:
            unique_words.append(words[e])
    return unique_words
def jaccard(words_1, words_2):
    s=0
    for e in words_1:
        if e in words_2:
            s+=1
    t=len(words_1)+len(words_2)-s
    jaccard_coef = s/t
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    every=[]
    for tweet_id in range(len(norm_tweets)):
        cream=jaccard(norm_tweets[tweet_id],norm_query)
        if cream!=0:
            every.append([tweet_id,cream])
    top_n=[[x[1],-x[0]] for x in sorted([[-k[1],k[0]] for k in every])][:n:]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+' ('+str(round(jc_coef,2))+')')
    ja=tweet_content.split(' ')
    h=[' ']
    for el in ja:
        h.append(el)
        if len(' '.join(h)) <= print_width :
            continue
        else:
            c=' '.join(h[:-1])
            print(c)
            h=[' ']
            h.append(el)
    print(' '.join(h))


#--------------------------------------------
# 6330588121 (18.44) 407 (2021-03-01 17:32)

def get_unique( words ):
    unique_words=[]
    for i in words:
        if i not in unique_words:
            unique_words.append(i)
    return unique_words
def jaccard(words_1, words_2):
    c=0
    for i in words_1:
        if i in words_2:
            c+=1
    jaccard_coef=c/(len(words_1)+len(words_2)-c)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    a=[];top_n_prime=[];f=[]
    for i in range(len(norm_tweets)):
        b=[-1*jaccard(norm_tweets[i],norm_query),i]
        #if jaccard(norm_tweets[i],norm_query)!=0:
        top_n_prime.append(b)
    top_n_prime.sort()
    top_n=[]
    for e in range(n):
        if top_n_prime[e][0]*-1!=0:
            p=[top_n_prime[e][1],top_n_prime[e][0]*-1]
            top_n.append(p)
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id),'('+str(round(jc_coef,2))+')')
    tweet_content=tweet_content.split()
    a=' '
    for i in tweet_content:
        if len(a)==1:
            a+=' '+i
        elif 2<len(a)+1+len(i)<=print_width:
            a+=' '+i
        elif len(a)+1+len(i)>print_width:
            print(a)
            a='  '+i
    print(a)
#--------------------------------------------
# 6330589821 (19.95) 408 (2021-02-28 23:06)

def get_unique( words ):
    unique_words = []
    for w in words:
        if not w in unique_words:
            unique_words.append(w)
    return unique_words

#--------------------------------------------------------
def jaccard(words_1, words_2):
    union = []
    sum_12 = words_1 + words_2
    for w in sum_12:
        if not w in union:
            union.append(w)

    intersect = []
    for w in words_1:
        if w in words_2:
            intersect.append(w)

    jaccard_coef = len(intersect)/len(union)

    return jaccard_coef

#--------------------------------------------------------
def top_n_similarity(norm_tweets, norm_query, n):
    alltop_n = []
    for i in range(len(norm_tweets)):
        j=jaccard(norm_tweets[i],norm_query)
        if j != 0:
            alltop_n.append([i,j])

    alltop = [[h[1],-h[0]] for h in sorted([[-e[1],e[0]] for e in alltop_n])]
    top_n = alltop[:n]

    return top_n

#--------------------------------------------------------
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

    print()
    print('#'+ str(tweet_id) +' ('+str(round(jc_coef,2))+')')
    tt = tweet_content.split()
    l = len(tt[0])
    p = '  '+tt[0]
    index =0
    for e in tt[index+1:]:
        if len(e)+1 <= print_width-len(p):
            p += ' '+e
            index += 1
        else:
            print(p)
            p = '  '+e
    print(p)


#--------------------------------------------
# 6330591021 (20.00) 409 (2021-02-27 22:27)

def get_unique( words ):

  l = len(words)
  unique_words = []
  for i in words :
    #print(i)
    if i not in  unique_words :
      unique_words.append(i)

  return unique_words
def jaccard(words_1, words_2):

  up=0
  for i in words_1 :
    if i in words_2 :
      up=up+1
      #print(i)
  down=len(words_1)+len(words_2)-up
  jaccard_coef = float(up/down)

  return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):

  top_n=[]
  for i in range (0,len(norm_tweets),1) :
    j = jaccard(norm_tweets[i] , norm_query)
    if j>0 :
      top_n.append([i,j])
  top_n =[[x[1],-x[0]] for x in sorted([[-x[1],x[0]] for x in top_n])[:n]]

  return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):

  tweet_content=tweet_content.split(" ")
  a=print_width
  j = round(jc_coef,2)
  print("")
  print("#"+str(tweet_id)+" ("+str(j)+")")

  for i in range (len(tweet_content)) :
    if a==print_width :
      print("  ",end="")
      a=a-2
    if a>len(tweet_content[i]) :
      print(tweet_content[i],end=" ")
      a=a-len(tweet_content[i])-1
    elif a==len(tweet_content[i]) :
      print(tweet_content[i])
      a=print_width
    elif a<len(tweet_content[i]) :
      print("\n",end="  ")
      a=print_width - len(tweet_content[i]) -3
      print(tweet_content[i],end=" ")

  print("")

#--------------------------------------------
# 6330592621 (18.01) 410 (2021-02-28 14:21)

def get_unique( words ):
    unique_words = []
    for k in words :
        if k not in unique_words :
            unique_words.append(k)
    return unique_words
def jaccard(words_1, words_2):
    c = 0
    for i in words_1 :
        if i in words_2 :
            c += 1
    n = len(words_2) + len(words_1) - c
    jaccard_coef = c/n
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range (len(norm_tweets)) :
        top_n.append([i, jaccard(norm_tweets[i],norm_query)])
    #right[1,jaccard high to low : 0,index low to high ]
    top_n = [[jac[1],-jac[0]] for jac in sorted([[-jac[1],jac[0]] for jac in top_n])[:n]]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#"+ str(tweet_id)+" ("+str(round(jc_coef, 2))+")")
    c = 2
    tweet_content = tweet_content.split(" ")
    print("  ",end="")
    for i in range(len(tweet_content)) :
        if tweet_content[i] == "" :
            print(" ",end="")
            c += 1
        elif c + len(tweet_content[i]) > print_width :
            if c == 2 : print(tweet_content[i])
            else :
                print("")
                print("  ",end="")
                c = 2
                print(tweet_content[i] + " ",end="")
                c += len(tweet_content[i]) + 1
        elif c + len(tweet_content[i]) <= print_width :
            print(tweet_content[i] + " ",end="")
            c += len(tweet_content[i]) + 1
    print("")

#--------------------------------------------
# 6330593221 (20.00) 411 (2021-02-28 21:05)

def get_unique( words ):
    words = [[len(i), i] for i in words]
    words = [i[1] for i in sorted(words)]
    w = []
    unique_words = []
    for i in words:
        if i in w:
            pass
        else:
            unique_words.append(i)
            w = []
        w.append(i)
    return unique_words
def jaccard(words_1, words_2):
    words = []
    for i in words_1:
        words.append(i)
    for i in words_2:
        words.append(i)
    uppe = []
    for i in words_1:
        for e in words_2:
            if i == e:
                uppe.append(i)
    words = [[len(i), i] for i in words]
    words = [i[1] for i in sorted(words)]
    words = get_unique(words)
    if len(words) == 0:
        jaccard_coef = 0
    else:
        jaccard_coef = len(uppe) / len(words)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    tweet_id = []
    top_n = []
    for i in range(len(norm_tweets)):
        tweet_id.append(i)
    for i in range(len(tweet_id)):
        if jaccard(norm_tweets[i], norm_query) > 0:
            top_n.append([jaccard(norm_tweets[i], norm_query),i])
    top_n = [[-i[0], i[1]] for i in top_n]
    top_n = [[i[1], -i[0]] for i in sorted(top_n)]

    top_n = top_n[:n]
    return top_n
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print("#" + str(tweet_id) + " (" + str(round(jc_coef, 2)) + ")")
    tweet_content = tweet_content.split(" ")
    w = "  "
    output = []
    for i in tweet_content:
        if len(w + i) > print_width:
            output.append(w.strip())
            w = "  " + i + " "
        else:
            w += i + " "
    output.append(w.strip())
    for i in output:
        print("  " + i)



# --------------------------------------------
# 6330594921 (20.00) 412 (2021-03-01 14:52)

def get_unique( words ):
    unique_words = []
    for e in words:
        if not e in unique_words:
            unique_words.append(e)
    return unique_words
def jaccard(words_1, words_2):
    upper = []
    for e in words_1 :
        if e in words_2:
            upper.append(e)

    lower = get_unique( words_1 + words_2 )
    jaccard_coef = len(upper) / len(lower)

    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range(len(norm_tweets)):
        x = jaccard(norm_tweets[i], norm_query)
        if x > 0:
            top_n.append( [(-1)*x, i] ) #max-->min
    top_n = [[i[1], -i[0]] for i in sorted(top_n)]
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print("")
    print("#"+str(tweet_id) + " (" + str(round(jc_coef, 2) )+ ")")
    t = tweet_content.split(' ')
    w = "  "
    for i in t:
        if len((w+i)) <= print_width:
            w += i + " "
        else:
            print(w)
            w = "  " + i + " "
    print(w)


#--------------------------------------------
# 6330595521 (15.83) 413 (2021-03-01 23:57)

def get_unique( words ):
    word1 = []
    for i in words:
        if i in word1:
            pass
        else:
            word1.append(i)
    return word1
def jaccard(words_1, words_2):
    results = []
    jafftop = 0
    results1 = []
    [results.append(i) for i in words_1]
    [results.append(i) for i in words_2]

    for i in results:
        if i in results1:
            pass
        else:
            results1.append(i)
    results = results1

    for i in words_1:
        if i in words_2:
            jafftop += 1
    if len(results) == 0:
        return False
    else:
        return jafftop/len(results)
def top_n_similarity(norm_tweets, norm_query, n):
    index = len(norm_tweets)
    list_tweet = []
    for i in range(index):
        list_temp = []
        tweets = norm_tweets[i]
        if jaccard(tweets, norm_query) > 0:
            list_temp.append(i)
            list_temp.append(jaccard(tweets, norm_query))
            list_tweet.append(list_temp)
    top_n = sorted(list_tweet, key= lambda x: -x[1])
    return top_n[0:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('#{:} ({:})'.format(tweet_id, jc_coef))
    r = int(len(tweet_content)/48)+1
    for i in range(r):
        if len(tweet_content) > 48:
            print('  ' + tweet_content[:48])
            tweet_content = tweet_content[48:]
        elif len(tweet_content) <= 48:
            print('  ' + tweet_content)
            break
#--------------------------------------------
# 6331138621 (19.90) 414 (2021-03-01 23:50)

def get_unique( words ):
    a=[]
    for i in words:
        if not i in a:
            a.append(i)
    unique_words = a
    return unique_words
def jaccard(words_1, words_2):
    words = words_1 + words_2
    a = get_unique( words )
    b = []
    for i in words_1:
        if i in words_2:
             b.append(i)
    b = get_unique(b)
    jaccard_coef = len(b)/len(a)
    return jaccard_coef
def top_n_similarity(norm_tweets, norm_query, n):
    top_n = []
    for i in range (len(norm_tweets)):
        w = jaccard(norm_tweets[i],norm_query)
        if w>0 :
            top_n.append( [(-1)*w,i] )
    top_n = [[i[1], -i[0]] for i in sorted(top_n)]
    return top_n[:n]
def show_tweet(tweet_id, tweet_content, jc_coef, print_width):
    print('')
    print('#'+str(tweet_id)+'('+str(round(jc_coef,2))+')')
    t = tweet_content.split(' ')
    show_output = "  "
    for o in t:
        if len((show_output+o)) <= print_width:
            show_output += o + " "
        else:
            print(show_output)
            show_output = "  " + o + " "
    print(show_output)


#--------------------------------------------