From a12a2f4385063447e458c63ca3654be6bd5decf0 Mon Sep 17 00:00:00 2001 From: Grail Finder Date: Thu, 19 Jun 2025 06:17:39 +0300 Subject: [PATCH] Enha: set how common words are --- adjectives.txt => adjectives_5.txt | 0 main.py | 7 ++++--- nouns.txt => nouns_5.txt | 0 3 files changed, 4 insertions(+), 3 deletions(-) rename adjectives.txt => adjectives_5.txt (100%) rename nouns.txt => nouns_5.txt (100%) diff --git a/adjectives.txt b/adjectives_5.txt similarity index 100% rename from adjectives.txt rename to adjectives_5.txt diff --git a/main.py b/main.py index c255fcc..83470d8 100644 --- a/main.py +++ b/main.py @@ -5,6 +5,7 @@ from collections import defaultdict NPATH = os.environ["NLTK_DATA"] COMMON_WORDS = {} +HOW_COMMON = 10 def load_data(): nltk.download('brown', download_dir=NPATH) @@ -14,7 +15,7 @@ def load_data(): freq_dist = nltk.FreqDist(word.lower() for word in brown.words()) # Create a set of common words (adjust threshold as needed) global COMMON_WORDS - COMMON_WORDS = {word for word, count in freq_dist.items() if count >= 5} + COMMON_WORDS = {word for word, count in freq_dist.items() if count >= HOW_COMMON} def is_common(word): # Check if word exists in Brown Corpus with minimal frequency @@ -62,5 +63,5 @@ def writefile(fname, data): if __name__ == "__main__": load_data() nouns, adjectives = get_words() - writefile("nouns.txt", nouns) - writefile("adjectives.txt", adjectives) + writefile(f"nouns_{HOW_COMMON}.txt", nouns) + writefile(f"adjectives_{HOW_COMMON}.txt", adjectives) diff --git a/nouns.txt b/nouns_5.txt similarity index 100% rename from nouns.txt rename to nouns_5.txt