This commit is contained in:
Grail Finder
2025-05-01 14:12:35 +03:00
commit 91f0e93961
6 changed files with 9176 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
venv/

3169
adjectives.txt Normal file

File diff suppressed because it is too large Load Diff

30
assign_colours.py Normal file
View File

@ -0,0 +1,30 @@
#!/usr/env/bin python
from random import choice
FNAME = "nouns.txt"
# 9 for blue
# 8 for red
# 1 for black
# 7 for white
BLUENUM = 9
REDNUM = 8
BLACKNUM = 1
WHITENUM = 7
ALLNUMS = {"blue": BLUENUM, "red": REDNUM, "black": BLACKNUM, "white": WHITENUM}
di = {}
def assign_words(words, num):
resp = set()
while len(resp) < num:
resp.add(choice(words).strip())
return resp
if __name__ == "__main__":
with open(FNAME) as lf:
data = lf.readlines()
for k,num in ALLNUMS.items():
di[k] = assign_words(data, num)
print(di)

66
main.py Normal file
View File

@ -0,0 +1,66 @@
import nltk
from nltk.corpus import wordnet as wn, brown
import os
from collections import defaultdict
NPATH = os.environ["NLTK_DATA"]
COMMON_WORDS = {}
def load_data():
nltk.download('brown', download_dir=NPATH)
nltk.download('wordnet', download_dir=NPATH)
nltk.download('omw-1.4', download_dir=NPATH)
# Load frequency distribution from Brown Corpus
freq_dist = nltk.FreqDist(word.lower() for word in brown.words())
# Create a set of common words (adjust threshold as needed)
global COMMON_WORDS
COMMON_WORDS = {word for word, count in freq_dist.items() if count >= 5}
def is_common(word):
# Check if word exists in Brown Corpus with minimal frequency
is_frequent = word in COMMON_WORDS
# Check if the word has multiple synsets (indicates broader usage)
synset_count = len(wn.synsets(word))
# Adjust thresholds: require frequency AND at least 1 synset
return is_frequent and synset_count >= 1
def filter_common(words):
return {word for word in words if is_common(word)}
def get_words():
nouns = set()
adjectives = set()
# Iterate over all synsets in WordNet
for synset in wn.all_synsets():
pos = synset.pos()
for lemma in synset.lemmas():
word = lemma.name().replace('_', ' ').lower() # Normalize word
# no need for compoud words
if "-" in word or " " in word or "'" in word or len(word) < 3 or "." in word:
continue
if lemma.name().istitle():
continue
# Check for nouns (singular/uncountable)
if pos == 'n':
# Use WordNet's morphy to get base form
base_form = wn.morphy(word, pos='n')
# If base form matches the word, it's singular/uncountable
if base_form == word:
nouns.add(word)
# Check for adjectives (including satellite adjectives)
elif pos in ('a', 's'):
adjectives.add(word)
# Filter using Brown Corpus frequency and synset count
nouns = filter_common(nouns)
adjectives = filter_common(adjectives)
return nouns, adjectives
def writefile(fname, data):
with open(fname, "w") as lf:
lf.write("\n".join(data))
if __name__ == "__main__":
load_data()
nouns, adjectives = get_words()
writefile("nouns.txt", nouns)
writefile("adjectives.txt", adjectives)

5909
nouns.txt Normal file

File diff suppressed because it is too large Load Diff

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
nltk