π λ¨μ΄ μμ λ₯Ό ν΅ν λ°μ΄ν° μ¦κ°(Data Augmentation)
1. OS μ Random λͺ¨λ Import
import os
import random
OS | Random |
μ΄μ 체μ μ μνΈμμ©μ μν λͺ¨λλ‘, λλ ν 리λ νμΌκ³Ό κ΄λ ¨λ λ€μν μμ μ μ΄μ©λ¨ |
λμ μμ± λ° μνμ€μμμ 무μμ μμ μ ν λ±μ κΈ°λ₯μ μν λͺ¨λ |
2. μ£Όμ΄μ§ νμΌ κ²½λ‘μμ λλ€νκ² νλμ λ¨μ΄λ₯Ό μμ νλ remove_random_word() ν¨μ ꡬν
def remove_random_word(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
words = content.split()
if len(words) > 1: # Ensure there's at least one word to remove
index_to_remove = random.randint(0, len(words) - 1)
words.pop(index_to_remove)
with open(file_path, 'w', encoding='utf-8') as file:
file.write(' '.join(words))
# ν¨μ νΈμΆ μ μ€ν κ²½λ‘
β νμΌμ μ½μ ν κ·Έ λ΄μ©μ 'content' μ μ μ₯
①곡백μ κΈ°μ€μΌλ‘ 'content' λ₯Ό λΆν
β’ λΆν ν κ²°κ³Όλ¬Όμ 리μ€νΈ 'words' μ μ μ₯
β£ 'words' μ κΈΈμ΄κ° 1λ³΄λ€ ν° κ²½μ°(= μμ ν λ¨μ΄κ° μ‘΄μ¬νλ κ²½μ°) λλ€ν μΈλ±μ€ μ ν
β€ μ νλ μΈλ±μ€μ ν΄λΉνλ λ¨μ΄λ₯Ό 'words' μμ μμ
β₯ λ³κ²½λ 'words' λ₯Ό νμΌμ λ€μ μ°λ©° λ¨μ΄κ° μμ λ μνλ‘ νμΌ μ μ₯
3. remove_random_word() ν¨μλ₯Ό νΈμΆνκΈ° μν process_files() ν¨μ ꡬν
def process_files(folder_path):
for i in range(1, μ¦κ°ν νμΌ κ°μ + 1):
print("λ°μ΄ν° μ¦κ°μ μ±κ³΅νμ΅λλ€.")
file_name = f"file ({i}).txt"
file_path = os.path.join(folder_path, file_name)
if os.path.isfile(file_path):
remove_random_word(file_path)
# ν¨μ νΈμΆ μ μ€ν κ²½λ‘
β 1λΆν° "μ¦κ°ν νμΌ κ°μ + 1" κΉμ§μ μλ₯Ό μννλ©° νμΌ νμ
β‘ νμΌμ΄ μ‘΄μ¬νλ κ²½μ°, remove_random_word() ν¨μλ₯Ό νΈμΆ
β’ ν΄λΉ νμΌμμ λ¨μ΄λ₯Ό λλ€νκ² μμ
4. λͺ¨λκ³Ό μ€ν¬λ¦½νΈμ λμ ꡬλΆμ μν΄ if __name__ == "__main__" ꡬ문 μμ±
if __name__ == "__main__":
folder_path = r"νμΌμ΄ μμΉν κ²½λ‘"
process_files(folder_path)
++ if __name__ == "__main__" ꡬ문μ μ νμν κΉ?
if __name__ == "__main__" μ μ€ν¬λ¦½νΈκ° μ§μ μ€νλλ κ²½μ°(μ€ν¬λ¦½νΈκ° λͺ¨λλ‘ μν¬νΈλμ§ μκ³ λ°λ‘ μ€νλλ κ²½μ°)μ νν΄μλ§ process_files ν¨μλ₯Ό νΈμΆνλ μν μ μννλ ꡬ문μ΄λ€.
μ¦, μ ꡬ문μ μ€ν¬λ¦½νΈκ° λͺ¨λλ‘ μν¬νΈλμμ λ νΉμ μ½λ λΈλ‘μ΄ μλμΌλ‘ μ€νλλ κ²μ λ°©μ§νλ©°, μ΄λ₯Ό ν΅ν΄ λͺ¨λκ³Ό μ€ν¬λ¦½νΈμ λμμ ꡬλΆνμ¬ μ½λλ₯Ό μ€νν μ μλ€.
π λ¨μ΄ μμ λ₯Ό ν΅ν λ°μ΄ν° μ¦κ°(Data Augmentation) μ€ν¬λ¦½νΈ μ½λ μ λ¬Έ
import os
import random
def remove_random_word(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
words = content.split()
if len(words) > 1: # Ensure there's at least one word to remove
index_to_remove = random.randint(0, len(words) - 1)
words.pop(index_to_remove)
with open(file_path, 'w', encoding='utf-8') as file:
file.write(' '.join(words))
def process_files(folder_path):
for i in range(1, 2233):
print("μλ£")
file_name = f"file ({i}).txt"
file_path = os.path.join(folder_path, file_name)
if os.path.isfile(file_path):
remove_random_word(file_path)
if __name__ == "__main__":
folder_path = r"C:\Users\USER\Desktop\Data Augmentation_λ¨μ΄ μμ (μμ¬κΈ°κ΄ μ¬μΉν)"
process_files(folder_path)