from sklearn.decomposition import LatentDirichletAllocation
from sklearn.utils.extmath import randomized_svd
from tqdm import tqdm
import numpy as np
n_components = 12
n_iter = 10
lda_model = LatentDirichletAllocation(
n_components=n_components,
max_iter=1, # We'll manually control iterations
random_state=42,
learning_method='online'
)
progress_bar = tqdm(total=n_iter, desc="LDA Training Progress")
for _ in range(n_iter):
lda_model.partial_fit(count_data)
progress_bar.update(1)
progress_bar.close()
lda_topics = lda_model.components_
lda_feature_names = count_vectorizer.get_feature_names_out()
for topic_idx, topic in enumerate(lda_topics):
print(f"Topic {topic_idx}:")
print(" ".join([lda_feature_names[i] for i in topic.argsort()[-10:]]))
