Reports

from sklearn.decomposition import LatentDirichletAllocation
from sklearn.utils.extmath import randomized_svd
from tqdm import tqdm
import numpy as np

n_components = 12
n_iter = 10  

lda_model = LatentDirichletAllocation(
    n_components=n_components,
    max_iter=1,  # We'll manually control iterations
    random_state=42,
    learning_method='online'
)

progress_bar = tqdm(total=n_iter, desc="LDA Training Progress")

for _ in range(n_iter):
    lda_model.partial_fit(count_data)  
    progress_bar.update(1)

progress_bar.close()

lda_topics = lda_model.components_
lda_feature_names = count_vectorizer.get_feature_names_out()

for topic_idx, topic in enumerate(lda_topics):
    print(f"Topic {topic_idx}:")
    print(" ".join([lda_feature_names[i] for i in topic.argsort()[-10:]]))
79275959