Enha: set how common words are

2025-06-19 06:17:39 +03:00
parent 91f0e93961
commit a12a2f4385
3 changed files with 4 additions and 3 deletions
--- a/adjectives_5.txt
+++ b/adjectives_5.txt
--- a/main.py
+++ b/main.py
@@ -5,6 +5,7 @@ from collections import defaultdict

 NPATH = os.environ["NLTK_DATA"]
 COMMON_WORDS = {}
+HOW_COMMON = 10

 def load_data():
    nltk.download('brown', download_dir=NPATH)
@@ -14,7 +15,7 @@ def load_data():
    freq_dist = nltk.FreqDist(word.lower() for word in brown.words())
    # Create a set of common words (adjust threshold as needed)
    global COMMON_WORDS
-    COMMON_WORDS = {word for word, count in freq_dist.items() if count >= 5}
+    COMMON_WORDS = {word for word, count in freq_dist.items() if count >= HOW_COMMON}

 def is_common(word):
    # Check if word exists in Brown Corpus with minimal frequency
@@ -62,5 +63,5 @@ def writefile(fname, data):
 if __name__ == "__main__":
    load_data()
    nouns, adjectives = get_words()
-    writefile("nouns.txt", nouns)
-    writefile("adjectives.txt", adjectives)
+    writefile(f"nouns_{HOW_COMMON}.txt", nouns)
+    writefile(f"adjectives_{HOW_COMMON}.txt", adjectives)
--- a/nouns_5.txt
+++ b/nouns_5.txt