Support
Quality
Security
License
Reuse
kandi has reviewed transformers and discovered the below as its top functions. This is intended to give you an instant insight into transformers implemented functionality, and help decide if they suit your requirements.
Get all kandi verified functions for this library.
Get all kandi verified functions for this library.
📝 Text, for tasks like text classification, information extraction, question answering, summarization, translation, text generation, in over 100 languages.
🖼️ Images, for tasks like image classification, object detection, and segmentation.
🗣️ Audio, for tasks like speech recognition and audio classification.
See all related Code Snippets
QUESTION
Unpickle instance from Jupyter Notebook in Flask App
Asked 2022-Feb-28 at 18:03I have created a class for word2vec vectorisation which is working fine. But when I create a model pickle file and use that pickle file in a Flask App, I am getting an error like:
AttributeError: module
'__main__'
has no attribute 'GensimWord2VecVectorizer'
I am creating the model on Google Colab.
Code in Jupyter Notebook:
# Word2Vec Model
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from gensim.models import Word2Vec
class GensimWord2VecVectorizer(BaseEstimator, TransformerMixin):
def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None,
sample=0.001, seed=1, workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5,
ns_exponent=0.75, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=10000, compute_loss=False,
callbacks=(), max_final_vocab=None):
self.size = size
self.alpha = alpha
self.window = window
self.min_count = min_count
self.max_vocab_size = max_vocab_size
self.sample = sample
self.seed = seed
self.workers = workers
self.min_alpha = min_alpha
self.sg = sg
self.hs = hs
self.negative = negative
self.ns_exponent = ns_exponent
self.cbow_mean = cbow_mean
self.hashfxn = hashfxn
self.iter = iter
self.null_word = null_word
self.trim_rule = trim_rule
self.sorted_vocab = sorted_vocab
self.batch_words = batch_words
self.compute_loss = compute_loss
self.callbacks = callbacks
self.max_final_vocab = max_final_vocab
def fit(self, X, y=None):
self.model_ = Word2Vec(
sentences=X, corpus_file=None,
size=self.size, alpha=self.alpha, window=self.window, min_count=self.min_count,
max_vocab_size=self.max_vocab_size, sample=self.sample, seed=self.seed,
workers=self.workers, min_alpha=self.min_alpha, sg=self.sg, hs=self.hs,
negative=self.negative, ns_exponent=self.ns_exponent, cbow_mean=self.cbow_mean,
hashfxn=self.hashfxn, iter=self.iter, null_word=self.null_word,
trim_rule=self.trim_rule, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words,
compute_loss=self.compute_loss, callbacks=self.callbacks,
max_final_vocab=self.max_final_vocab)
return self
def transform(self, X):
X_embeddings = np.array([self._get_embedding(words) for words in X])
return X_embeddings
def _get_embedding(self, words):
valid_words = [word for word in words if word in self.model_.wv.vocab]
if valid_words:
embedding = np.zeros((len(valid_words), self.size), dtype=np.float32)
for idx, word in enumerate(valid_words):
embedding[idx] = self.model_.wv[word]
return np.mean(embedding, axis=0)
else:
return np.zeros(self.size)
# column transformer
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([
('step1', GensimWord2VecVectorizer(), 'STATUS')
], remainder='drop')
# Create Model
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import pickle
import numpy as np
import dill
import torch
# ##########
# SVC - support vector classifier
# ##########
# defining parameter range
hyperparameters = {'C': [0.1, 1],
'gamma': [1, 0.1],
'kernel': ['rbf'],
'probability': [True]}
model_sv = Pipeline([
('column_transformers', ct),
('model', GridSearchCV(SVC(), hyperparameters,
refit=True, verbose=3)),
])
model_sv_cEXT = model_sv.fit(X_train, y_train['cEXT'])
# Save the trained cEXT - SVM Model.
import joblib
joblib.dump(model_sv_cEXT, 'model_Word2Vec_sv_cEXT.pkl')
Code in Flask App:
# Word2Vec
model_EXT_WV_SV = joblib.load('utility/model/MachineLearning/SVM/model_Word2Vec_sv_cEXT.pkl')
I tried to copy the same class into my Flask file, but it is also not working.
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from gensim.models import Word2Vec
class GensimWord2VecVectorizer(BaseEstimator, TransformerMixin):
def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None,
sample=0.001, seed=1, workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5,
ns_exponent=0.75, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=10000, compute_loss=False,
callbacks=(), max_final_vocab=None):
self.size = size
self.alpha = alpha
self.window = window
self.min_count = min_count
self.max_vocab_size = max_vocab_size
self.sample = sample
self.seed = seed
self.workers = workers
self.min_alpha = min_alpha
self.sg = sg
self.hs = hs
self.negative = negative
self.ns_exponent = ns_exponent
self.cbow_mean = cbow_mean
self.hashfxn = hashfxn
self.iter = iter
self.null_word = null_word
self.trim_rule = trim_rule
self.sorted_vocab = sorted_vocab
self.batch_words = batch_words
self.compute_loss = compute_loss
self.callbacks = callbacks
self.max_final_vocab = max_final_vocab
def fit(self, X, y=None):
self.model_ = Word2Vec(
sentences=X, corpus_file=None,
size=self.size, alpha=self.alpha, window=self.window, min_count=self.min_count,
max_vocab_size=self.max_vocab_size, sample=self.sample, seed=self.seed,
workers=self.workers, min_alpha=self.min_alpha, sg=self.sg, hs=self.hs,
negative=self.negative, ns_exponent=self.ns_exponent, cbow_mean=self.cbow_mean,
hashfxn=self.hashfxn, iter=self.iter, null_word=self.null_word,
trim_rule=self.trim_rule, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words,
compute_loss=self.compute_loss, callbacks=self.callbacks,
max_final_vocab=self.max_final_vocab)
return self
def transform(self, X):
X_embeddings = np.array([self._get_embedding(words) for words in X])
return X_embeddings
def _get_embedding(self, words):
valid_words = [word for word in words if word in self.model_.wv.vocab]
if valid_words:
embedding = np.zeros((len(valid_words), self.size), dtype=np.float32)
for idx, word in enumerate(valid_words):
embedding[idx] = self.model_.wv[word]
return np.mean(embedding, axis=0)
else:
return np.zeros(self.size)
# Word2Vec
model_EXT_WV_SV = joblib.load('utility/model/MachineLearning/SVM/model_Word2Vec_sv_cEXT.pkl')
GitHub code: https://github.com/Juned-Ansari/test
Pickle file: https://github.com/Juned-Ansari/test/blob/main/model_Word2Vec_sv_cEXT.pkl
Flask Web App: https://github.com/Juned-Ansari/test/tree/main/WebApp
ANSWER
Answered 2022-Feb-24 at 11:48Import GensimWord2VecVectorizer
in your Flask Web app python file.
Community Discussions, Code Snippets contain sources that include Stack Exchange Network
No vulnerabilities reported
Save this library and start creating your kit
See Similar Libraries in
Save this library and start creating your kit
Open Weaver – Develop Applications Faster with Open Source