Blame view
utils.py
295 Bytes
b6d0165d1 Initial commit |
1 2 3 4 5 6 7 8 9 10 |
def yield_corpus(df_list): for corpus in df_list: for id,doc in corpus.iterrows(): try: yield tok2.tokenize(doc[2].decode("utf-8")) except: print doc[2] raise def select(elm): return int(elm.split("_")[-1]) |