Loading Pre-trained Models Directly
Duration: 5 min
Beyond pipelines, you can load model weights and tokenizers directly for more control — custom inference, feature extraction, or fine-tuning.
Loading a model and tokenizer
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
model_name = 'distilbert-base-uncased-finetuned-sst-2-english'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Tokenize input
text = 'This neighbourhood has great schools and low crime.'
inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
# Run inference
with torch.no_grad():
outputs = model(**inputs)
probs = torch.softmax(outputs.logits, dim=1)
label = model.config.id2label[probs.argmax().item()]
confidence = probs.max().item()
print(f'{label} ({confidence:.2%})') # POSITIVE (99.7%)Extracting embeddings
from transformers import AutoTokenizer, AutoModel
import torch
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
def get_embedding(text):
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
# Mean pool the token embeddings
return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
emb1 = get_embedding('California house prices are high')
emb2 = get_embedding('Real estate in California is expensive')
emb3 = get_embedding('I enjoy playing football')
# Cosine similarity
from numpy.linalg import norm
def cosine(a, b): return (a @ b) / (norm(a) * norm(b))
print(f'Similar sentences: {cosine(emb1, emb2):.3f}') # ~0.92
print(f'Different topics: {cosine(emb1, emb3):.3f}') # ~0.18💡 Tip: Embeddings are the foundation of RAG systems. Once you can turn text into vectors, you can build semantic search, recommendation systems, and document Q&A.
❓ What is a text embedding?