Search
Duplicate

colab활용기

colab에서 깃클론하기
import os import re # def rule(x): # # 괄호 # a = re.compile(r'\([^)]*\)') # # 문장 부호 # b = re.compile('[^가-힣 ]') # x = re.sub(pattern=a, repl='', string= x) # x = re.sub(pattern=b, repl='', string= x) # return x # for filename in [_ for _ in os.listdir("KsponSpeech_0001") if _.endswith('.txt')]: # newText = "" # with open(os.path.join("KsponSpeech_0001", filename), 'r', encoding='cp949') as f: # text = f.read() # newText = rule(text) # with open(os.path.join("KsponSpeech_0001", filename), 'w', encoding='cp949') as f: # f.write(newText) # print(os.path) with open("train.txt", 'w', encoding='utf-8') as f2: for filename in [_ for _ in os.listdir("KsponSpeech_0001") if _.endswith('.txt')]: with open(os.path.join("KsponSpeech_0001", filename), 'r', encoding='cp949') as f: text = f.read() f2.write(os.path.realpath(filename) + '\t' + text + '\n')
Shell
복사