79129960

Date: 2024-10-27 05:37:26
Score: 6 🚩
Natty:
Report link
`I have saved my random classifier in pickle and the count vectorizer in vectorizer.py as shown below but I am getting Vocabulary not fitted or provided. Kindly help. Please let me know if any errors as well.


from vectorizer import vect
clf = pickle.load(open(os.path.join('pkl_objects', 'r_classifier.pkl'), 'rb'))

example = ["HELLO is racist"]
X = vect.transform(example)

prediction = clf.predict(X)
probability = clf.predict_proba(X)

print('Prediction: %s\nProbability: %.2f%%' % (label[prediction[0]], np.max(probability) * 100))

Vecotrizer.py
%%writefile HateSpeechDetection/vectorizer.py
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem import WordNetLemmatizer
import re
import os
import pickle

cur_dir = os.path.dirname(__file__)
stop = pickle.load(open(
                os.path.join(cur_dir, 
                'pkl_objects', 
                'stopwords.pkl'), 'rb'))

lemmatizer = WordNetLemmatizer()

def preprocessor(tweet):
    
    # Removal of user handles
    tweet = re.sub('@[\w\-]+','', tweet)
    
    # Coverting the string into lower case
    tweet = str(tweet).lower()
    
    tweet = re.sub('\[.*?\]','',tweet)
    
    # Removal of HTML linkups
    tweet = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|''[!*\(\),]|(?:%[0-9a-fA-F][0-    9a-fA-F]))+','',tweet)
    tweet = re.sub('<.*?>+', '', tweet)
    
    # Removal of punctuations
    tweet = re.sub('[%s]' % re.escape(string.punctuation), '', tweet)
    tweet = re.sub('\n','',tweet)
    tweet = re.sub('\w*\d\w*', '', tweet)
    
    # Removal of stopwords
    tweet = [word for word in tweet.split(' ') if word not in stopwords]
    
    #removal of greek characters
    tweet = [' '.join([unidecode.unidecode(word) for word in str(t).split()]) if t is not None else t for t in tweet]
    
    #lemmetizing of tweets
    tweet = [" ".join(lemmatizer.lemmatize(word) for word in t.split()) for t in tweet]
    
    tweet = " ".join(tweet)
    return tweet

vect = CountVectorizer()

def process_tweet(tweet):
    # Process the tweet
    processed_tweet = preprocessor(tweet)

    
    vect.transform([processed_tweet])  # Pass a list of processed_tweet

    return processed_tweet

I want to import the code to flask to create a web embedding but stuck near this one poin
`
Reasons:
  • Blacklisted phrase (3): Kindly help
  • RegEx Blacklisted phrase (2.5): Please let me know
  • RegEx Blacklisted phrase (1): I want
  • Long answer (-1):
  • Has code block (-0.5):
  • Low reputation (1):
Posted by: Kondapalli Sri Krishna Priya