`I have saved my random classifier in pickle and the count vectorizer in vectorizer.py as shown below but I am getting Vocabulary not fitted or provided. Kindly help. Please let me know if any errors as well.
from vectorizer import vect
clf = pickle.load(open(os.path.join('pkl_objects', 'r_classifier.pkl'), 'rb'))
example = ["HELLO is racist"]
X = vect.transform(example)
prediction = clf.predict(X)
probability = clf.predict_proba(X)
print('Prediction: %s\nProbability: %.2f%%' % (label[prediction[0]], np.max(probability) * 100))
Vecotrizer.py
%%writefile HateSpeechDetection/vectorizer.py
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem import WordNetLemmatizer
import re
import os
import pickle
cur_dir = os.path.dirname(__file__)
stop = pickle.load(open(
os.path.join(cur_dir,
'pkl_objects',
'stopwords.pkl'), 'rb'))
lemmatizer = WordNetLemmatizer()
def preprocessor(tweet):
# Removal of user handles
tweet = re.sub('@[\w\-]+','', tweet)
# Coverting the string into lower case
tweet = str(tweet).lower()
tweet = re.sub('\[.*?\]','',tweet)
# Removal of HTML linkups
tweet = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|''[!*\(\),]|(?:%[0-9a-fA-F][0- 9a-fA-F]))+','',tweet)
tweet = re.sub('<.*?>+', '', tweet)
# Removal of punctuations
tweet = re.sub('[%s]' % re.escape(string.punctuation), '', tweet)
tweet = re.sub('\n','',tweet)
tweet = re.sub('\w*\d\w*', '', tweet)
# Removal of stopwords
tweet = [word for word in tweet.split(' ') if word not in stopwords]
#removal of greek characters
tweet = [' '.join([unidecode.unidecode(word) for word in str(t).split()]) if t is not None else t for t in tweet]
#lemmetizing of tweets
tweet = [" ".join(lemmatizer.lemmatize(word) for word in t.split()) for t in tweet]
tweet = " ".join(tweet)
return tweet
vect = CountVectorizer()
def process_tweet(tweet):
# Process the tweet
processed_tweet = preprocessor(tweet)
vect.transform([processed_tweet]) # Pass a list of processed_tweet
return processed_tweet
I want to import the code to flask to create a web embedding but stuck near this one poin
`