pytorch | Dynamic neural networks in Python with strong GPU | Machine Learning library
kandi X-RAY | pytorch Summary
Support
Quality
Security
License
Reuse
Currently covering the most popular Java, JavaScript and Python libraries. See a Sample Here
pytorch Key Features
pytorch Examples and Code Snippets
return torch.tensor(batch_x).float(), torch.tensor(batch_t)
from transformers import DebertaTokenizer, DebertaModel
import torch
# downloading the models
tokenizer = DebertaTokenizer.from_pretrained("microsoft/deberta-base")
model = DebertaModel.from_pretrained("microsoft/deberta-base")
# tokenizing the input text and converting it into pytorch tensors
inputs = tokenizer(["The cat cought the mouse", "This is the second sentence"], return_tensors="pt", padding=True)
# pass through the model
outputs = model(**inputs)
print(outputs.last_hidden_state.shape)
def backward(self, unet_loss, dis_loss):
dis_loss.backward(retain_graph = True)
self.dis_optimizer.step()
unet_loss.backward()
self.unet_optimizer.step()
def backward(self, unet_loss, dis_loss):
dis_loss.backward(retain_graph = True)
unet_loss.backward()
self.dis_optimizer.step()
self.unet_optimizer.step()
import torch
import torch.nn as nn
from torchcrf import CRF
from transformers import CamembertModel, CamembertTokenizerFast
class CamemBERTCRF(nn.Module):
def __init__(self, num_labels):
super(CamemBERTCRF, self).__init__()
self.encoder = CamembertModel.from_pretrained("camembert-base")
self.config = self.encoder.config
self.dropout = nn.Dropout(self.config.hidden_dropout_prob)
self.classifier = nn.Linear(self.config.hidden_size, num_labels)
self.crf = CRF(num_tags=num_labels, batch_first=True)
def forward(
self,
input_ids=None,
attention_mask=None,
token_type_ids=None,
position_ids=None,
head_mask=None,
inputs_embeds=None,
labels=None,
output_attentions=None,
output_hidden_states=None,
):
r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels -
1]``.
"""
outputs = self.encoder(
input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
)
sequence_output = outputs.last_hidden_state
sequence_output = self.dropout(sequence_output)
logits = self.classifier(sequence_output)
loss = None
if labels is not None:
log_likelihood, tags = self.crf(logits, labels), self.crf.decode(logits)
loss = 0 - log_likelihood
else:
tags = self.crf.decode(logits)
tags = torch.Tensor(tags)
output = (tags,) + outputs[2:]
return ((loss,) + output) if loss is not None else output
m = CamemBERTCRF(4)
t = CamembertTokenizerFast.from_pretrained("camembert-base")
print(m(**t("this is a test", return_tensors="pt"), labels=torch.tensor([[1,2,3,2,3,1]])))
print(m(**t("this is a test", return_tensors="pt")))
(tensor(8.0685, grad_fn=), tensor([[2., 2., 2., 2., 2., 2.]]))
(tensor([[2., 2., 2., 2., 2., 2.]]),)
file = "model.pt"
model = your_model()
model.load_state_dict(torch.load(file))
# this will automatically load the file and load the parameters into the model.
from typing import Tuple, List, Dict, Optional
import torch
from torch import Tensor
from collections import OrderedDict
from torchvision.models.detection.roi_heads import fastrcnn_loss
from torchvision.models.detection.rpn import concat_box_prediction_layers
def eval_forward(model, images, targets):
# type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
"""
Args:
images (list[Tensor]): images to be processed
targets (list[Dict[str, Tensor]]): ground-truth boxes present in the image (optional)
Returns:
result (list[BoxList] or dict[Tensor]): the output from the model.
It returns list[BoxList] contains additional fields
like `scores`, `labels` and `mask` (for Mask R-CNN models).
"""
model.eval()
original_image_sizes: List[Tuple[int, int]] = []
for img in images:
val = img.shape[-2:]
assert len(val) == 2
original_image_sizes.append((val[0], val[1]))
images, targets = model.transform(images, targets)
# Check for degenerate boxes
# TODO: Move this to a function
if targets is not None:
for target_idx, target in enumerate(targets):
boxes = target["boxes"]
degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
if degenerate_boxes.any():
# print the first degenerate box
bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0]
degen_bb: List[float] = boxes[bb_idx].tolist()
raise ValueError(
"All bounding boxes should have positive height and width."
f" Found invalid box {degen_bb} for target at index {target_idx}."
)
features = model.backbone(images.tensors)
if isinstance(features, torch.Tensor):
features = OrderedDict([("0", features)])
model.rpn.training=True
#model.roi_heads.training=True
#####proposals, proposal_losses = model.rpn(images, features, targets)
features_rpn = list(features.values())
objectness, pred_bbox_deltas = model.rpn.head(features_rpn)
anchors = model.rpn.anchor_generator(images, features_rpn)
num_images = len(anchors)
num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]
num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]
objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness, pred_bbox_deltas)
# apply pred_bbox_deltas to anchors to obtain the decoded proposals
# note that we detach the deltas because Faster R-CNN do not backprop through
# the proposals
proposals = model.rpn.box_coder.decode(pred_bbox_deltas.detach(), anchors)
proposals = proposals.view(num_images, -1, 4)
proposals, scores = model.rpn.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)
proposal_losses = {}
assert targets is not None
labels, matched_gt_boxes = model.rpn.assign_targets_to_anchors(anchors, targets)
regression_targets = model.rpn.box_coder.encode(matched_gt_boxes, anchors)
loss_objectness, loss_rpn_box_reg = model.rpn.compute_loss(
objectness, pred_bbox_deltas, labels, regression_targets
)
proposal_losses = {
"loss_objectness": loss_objectness,
"loss_rpn_box_reg": loss_rpn_box_reg,
}
#####detections, detector_losses = model.roi_heads(features, proposals, images.image_sizes, targets)
image_shapes = images.image_sizes
proposals, matched_idxs, labels, regression_targets = model.roi_heads.select_training_samples(proposals, targets)
box_features = model.roi_heads.box_roi_pool(features, proposals, image_shapes)
box_features = model.roi_heads.box_head(box_features)
class_logits, box_regression = model.roi_heads.box_predictor(box_features)
result: List[Dict[str, torch.Tensor]] = []
detector_losses = {}
loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
detector_losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
boxes, scores, labels = model.roi_heads.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
num_images = len(boxes)
for i in range(num_images):
result.append(
{
"boxes": boxes[i],
"labels": labels[i],
"scores": scores[i],
}
)
detections = result
detections = model.transform.postprocess(detections, images.image_sizes, original_image_sizes) # type: ignore[operator]
model.rpn.training=False
model.roi_heads.training=False
losses = {}
losses.update(detector_losses)
losses.update(proposal_losses)
return losses, detections
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
# load a model pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2 # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
losses, detections = eval_forward(model,torch.randn([1,3,300,300]),[{'boxes':torch.tensor([[100,100,200,200]]),'labels':torch.tensor([0])}])
{'loss_classifier': tensor(0.6594, grad_fn=),
'loss_box_reg': tensor(0., grad_fn=),
'loss_objectness': tensor(0.5108, grad_fn=),
'loss_rpn_box_reg': tensor(0.0160, grad_fn=)}
out = torch.conv1d(x_batch.unsqueeze(0), y_batch.unsqueeze(1).flip(2), padding=y_batch.size(1)-1, groups=x_batch.size(0))
print(torch.allclose(out, res1)) # True
from torch import nn
import torch.nn.functional as F
def network(nn.Module):
def __init__(self, M):
# M is the dimension of input feature
super(network, self).__init__()
self.layer1 = nn.Linear(M, 100)
self.layer2 = nn.Linear(100, 50)
self.out = nn.Linear(50,1)
def forward(self,x):
return F.sigmoid(self.out(self.layer2(self.layer1(x))))
----------
Trending Discussions on pytorch
Trending Discussions on pytorch
QUESTION
I understand that in python user-defined objects can be made callable by defining a __call__()
method in the class definition. For example,
class MyClass:
def __init__(self):
pass
def __call__(self, input1):
self.my_function(input1)
def my_function(self, input1):
print(f"MyClass - print {input1}")
my_obj = MyClass()
# same as calling my_obj.my_function("haha")
my_obj("haha") # prints "MyClass - print haha"
I was looking at how pytorch
makes the forward()
method of a nn.Module
object be called implicitly when the object is called and saw some syntax I didn't understand.
In the line that supposedly defines the __call__
method the syntax used is,
__call__ : Callable[..., Any] = _call_impl
This seemed like a combination of an annotation (keyword Callable[
following :
ignored by python) and a value of _call_impl
which we want to be called when __call__
is invoked, and my guess is that this is a shorthand for,
def __call__(self, *args, **kwargs):
return self._call_impl(*args, **kwargs)
but wanted to understand clearly how this method of defining functions worked.
My question is: When would we want to use such a definition of callable attributes of a class instead of the usual def myfunc(self, *args, **kwargs)
ANSWER
Answered 2022-Mar-26 at 18:08Functions are normal first-class objects in python. The name to with which you define a function object, e.g. with a def
statement, is not set in stone, any more than it would be for an int
or list
. Just as you can do
a = [1, 2, 3]
b = a
to access the elements of a
through the name b
, you can do the same with functions. In your first example, you could replace
def __call__(self, input1):
self.my_function(input1)
with the much simpler
__call__ = my_function
You would need to put this line after the definition of my_function
.
The key differences between the two implementations is that def __call__(...
creates a new function. __call__ = ...
simply binds the name __call__
to the same object as my_function
. The noticeable difference is that if you do __call__.__name__
, the first version will show __call__
, while the second will show my_function
, since that's what gets assigned by a def
statement.
QUESTION
For me what I do is detect what is unpickable and make it into a string (I guess I could have deleted it too but then it will falsely tell me that field didn't exist but I'd rather have it exist but be a string). But I wanted to know if there was a less hacky more official way to do this.
Current code I use:
def make_args_pickable(args: Namespace) -> Namespace:
"""
Returns a copy of the args namespace but with unpickable objects as strings.
note: implementation not tested against deep copying.
ref:
- https://stackoverflow.com/questions/70128335/what-is-the-proper-way-to-make-an-object-with-unpickable-fields-pickable
"""
pickable_args = argparse.Namespace()
# - go through fields in args, if they are not pickable make it a string else leave as it
# The vars() function returns the __dict__ attribute of the given object.
for field in vars(args):
field_val: Any = getattr(args, field)
if not dill.pickles(field_val):
field_val: str = str(field_val)
setattr(pickable_args, field, field_val)
return pickable_args
Context: I think I do it mostly to remove the annoying tensorboard object I carry around (but I don't think I will need the .tb
field anymore thanks to wandb
/weights and biases). Not that this matters a lot but context is always nice.
Related:
- What does it mean for an object to be picklable (or pickle-able)?
- Python - How can I make this un-pickleable object pickleable?
Edit:
Since I decided to move away from dill - since sometimes it cannot recover classes/objects (probably because it cannot save their code or something) - I decided to only use pickle
(which seems to be the recommended way to be done in PyTorch).
So what is the official (perhaps optimized) way to check for pickables without dill or with the official pickle?
Is this the best:
def is_picklable(obj):
try:
pickle.dumps(obj)
except pickle.PicklingError:
return False
return True
thus current soln:
def make_args_pickable(args: Namespace) -> Namespace:
"""
Returns a copy of the args namespace but with unpickable objects as strings.
note: implementation not tested against deep copying.
ref:
- https://stackoverflow.com/questions/70128335/what-is-the-proper-way-to-make-an-object-with-unpickable-fields-pickable
"""
pickable_args = argparse.Namespace()
# - go through fields in args, if they are not pickable make it a string else leave as it
# The vars() function returns the __dict__ attribute of the given object.
for field in vars(args):
field_val: Any = getattr(args, field)
# - if current field value is not pickable, make it pickable by casting to string
if not dill.pickles(field_val):
field_val: str = str(field_val)
elif not is_picklable(field_val):
field_val: str = str(field_val)
# - after this line the invariant is that it should be pickable, so set it in the new args obj
setattr(pickable_args, field, field_val)
return pickable_args
def make_opts_pickable(opts):
""" Makes a namespace pickable """
return make_args_pickable(opts)
def is_picklable(obj: Any) -> bool:
"""
Checks if somehting is pickable.
Ref:
- https://stackoverflow.com/questions/70128335/what-is-the-proper-way-to-make-an-object-with-unpickable-fields-pickable
"""
import pickle
try:
pickle.dumps(obj)
except pickle.PicklingError:
return False
return True
Note: one of the reasons I want something "offical"/tested is because I am getting pycharm halt on the try catch: How to stop PyCharm's break/stop/halt feature on handled exceptions (i.e. only break on python unhandled exceptions)? which is not what I want...I want it to only halt on unhandled exceptions.
ANSWER
Answered 2022-Jan-19 at 22:30Yes, a try/except
is the best way to go about this.
Per the docs, pickle
is capable of recursively pickling objects, that is to say, if you have a list of objects that are pickleable, it will pickle all objects inside of that list if you attempt to pickle that list. This means that you cannot feasibly test to see if an object is pickleable without pickling it. Because of that, your structure of:
def is_picklable(obj):
try:
pickle.dumps(obj)
except pickle.PicklingError:
return False
return True
is the simplest and easiest way to go about checking this. If you are not working with recursive structures and/or you can safely assume that all recursive structures will only contain pickleable objects, you could check the type()
value of the object against the list of pickleable objects:
- None, True, and False
- integers, floating point numbers, complex numbers
- strings, bytes, bytearrays
- tuples, lists, sets, and dictionaries containing only picklable objects
- functions defined at the top level of a module (using def, not lambda)
- built-in functions defined at the top level of a module
- classes that are defined at the top level of a module
- instances of such classes whose dict or the result of calling getstate() is picklable (see section Pickling Class Instances for details).
This is likely faster than using a try:... except:...
like you showed in your question.
QUESTION
Goal: I am trying to import a graph FROM networkx into PyTorch geometric and set labels and node features.
(This is in Python)
Question(s):
- How do I do this [the conversion from networkx to PyTorch geometric]? (presumably by using the
from_networkx
function) - How do I transfer over node features and labels? (more important question)
I have seen some other/previous posts with this question but they weren't answered (correct me if I am wrong).
Attempt: (I have just used an unrealistic example below, as I cannot post anything real on here)
Let us imagine we are trying to do a graph learning task (e.g. node classification) on a group of cars (not very realistic as I said). That is, we have a group of cars, an adjacency matrix, and some features (e.g. price at the end of the year). We want to predict the node label (i.e. brand of the car).
I will be using the following adjacency matrix: (apologies, cannot use latex to format this)
A = [(0, 1, 0, 1, 1), (1, 0, 1, 1, 0), (0, 1, 0, 0, 1), (1, 1, 0, 0, 0), (1, 0, 1, 0, 0)]
Here is the code (for Google Colab environment):
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from torch_geometric.utils.convert import to_networkx, from_networkx
import torch
!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
# Make the networkx graph
G = nx.Graph()
# Add some cars (just do 4 for now)
G.add_nodes_from([
(1, {'Brand': 'Ford'}),
(2, {'Brand': 'Audi'}),
(3, {'Brand': 'BMW'}),
(4, {'Brand': 'Peugot'}),
(5, {'Brand': 'Lexus'}),
])
# Add some edges
G.add_edges_from([
(1, 2), (1, 4), (1, 5),
(2, 3), (2, 4),
(3, 2), (3, 5),
(4, 1), (4, 2),
(5, 1), (5, 3)
])
# Convert the graph into PyTorch geometric
pyg_graph = from_networkx(G)
So this correctly converts the networkx graph to PyTorch Geometric. However, I still don't know how to properly set the labels.
The brand values for each node have been converted and are stored within:
pyg_graph.Brand
Below, I have just made some random numpy arrays of length 5 for each node (just pretend that these are realistic).
ford_prices = np.random.randint(100, size = 5)
lexus_prices = np.random.randint(100, size = 5)
audi_prices = np.random.randint(100, size = 5)
bmw_prices = np.random.randint(100, size = 5)
peugot_prices = np.random.randint(100, size = 5)
This brings me to the main question:
- How do I set the prices to be the node features of this graph?
- How do I set the labels of the nodes? (and will I need to remove the labels from
pyg_graph.Brand
when training the network?)
Thanks in advance and happy holidays.
ANSWER
Answered 2021-Dec-22 at 18:32The easiest way is to add all information to the networkx graph and directly create it in the way you need it. I guess you want to use some Graph Neural Networks. Then you want to have something like below.
- Instead of text as labels, you probably want to have a categorial representation, e.g. 1 stands for Ford.
- If you want to match the "usual convention". Then you name your input features
x
and your labels/ground truthy
. - The splitting of the data into train and test is done via mask. So the graph still contains all information, but only part of it is used for training. Check the
PyTorch Geometric introduction
for an example, which uses the Cora dataset.
import networkx as nx
import numpy as np
import torch
from torch_geometric.utils.convert import from_networkx
# Make the networkx graph
G = nx.Graph()
# Add some cars (just do 4 for now)
G.add_nodes_from([
(1, {'y': 1, 'x': 0.5}),
(2, {'y': 2, 'x': 0.2}),
(3, {'y': 3, 'x': 0.3}),
(4, {'y': 4, 'x': 0.1}),
(5, {'y': 5, 'x': 0.2}),
])
# Add some edges
G.add_edges_from([
(1, 2), (1, 4), (1, 5),
(2, 3), (2, 4),
(3, 2), (3, 5),
(4, 1), (4, 2),
(5, 1), (5, 3)
])
# Convert the graph into PyTorch geometric
pyg_graph = from_networkx(G)
print(pyg_graph)
# Data(edge_index=[2, 12], x=[5], y=[5])
print(pyg_graph.x)
# tensor([0.5000, 0.2000, 0.3000, 0.1000, 0.2000])
print(pyg_graph.y)
# tensor([1, 2, 3, 4, 5])
print(pyg_graph.edge_index)
# tensor([[0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4],
# [1, 3, 4, 0, 2, 3, 1, 4, 0, 1, 0, 2]])
# Split the data
train_ratio = 0.2
num_nodes = pyg_graph.x.shape[0]
num_train = int(num_nodes * train_ratio)
idx = [i for i in range(num_nodes)]
np.random.shuffle(idx)
train_mask = torch.full_like(pyg_graph.y, False, dtype=bool)
train_mask[idx[:num_train]] = True
test_mask = torch.full_like(pyg_graph.y, False, dtype=bool)
test_mask[idx[num_train:]] = True
print(train_mask)
# tensor([ True, False, False, False, False])
print(test_mask)
# tensor([False, True, True, True, True])
QUESTION
I'm working through the lessons on building a neural network and I'm confused as to why 512 is used for the linear_relu_stack in the example code:
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
nn.ReLU()
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
I started googling around and saw many examples of the torch.nn.Linear
function using various values of 2**N
but it isn't clear to me why they are using powers of 2 nor how they are choosing which value to use.
ANSWER
Answered 2021-Dec-01 at 15:00While there are unsubstantiated claims that powers of 2 help to optimize performance for various parts of a neural network, it is a convenient method of selecting/testing/finding the right order of magnitude to use for various parameters/hyperparameters.
QUESTION
I tried to train a model using PyTorch on my Macbook pro. It uses the new generation apple M1 CPU. However, PyTorch couldn't recognize my GPUs.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Does anyone know any solution?
I have updated all the libraries to the latest versions.
ANSWER
Answered 2021-Nov-18 at 03:08It looks like PyTorch support for the M1 GPU is in the works, but is not yet complete.
From @soumith on GitHub:
So, here's an update. We plan to get the M1 GPU supported. @albanD, @ezyang and a few core-devs have been looking into it. I can't confirm/deny the involvement of any other folks right now.
So, what we have so far is that we had a prototype that was just about okay. We took the wrong approach (more graph-matching-ish), and the user-experience wasn't great -- some operations were really fast, some were really slow, there wasn't a smooth experience overall. One had to guess-work which of their workflows would be fast.
So, we're completely re-writing it using a new approach, which I think is a lot closer to your good ole PyTorch, but it is going to take some time. I don't think we're going to hit a public alpha in the next ~4 months.
We will open up development of this backend as soon as we can.
That post: https://github.com/pytorch/pytorch/issues/47702#issuecomment-965625139
TL;DR: a public beta is at least 4 months out.
QUESTION
I am trying to understand an example snippet that makes use of the PyTorch transposed convolution function, with documentation here, where in the docs the author writes:
"The padding argument effectively adds dilation * (kernel_size - 1) - padding amount of zero padding to both sizes of the input."
Consider the snippet below where a [1, 1, 4, 4]
sample image of all ones is input to a ConvTranspose2D
operation with arguments stride=2
and padding=1
with a weight matrix of shape (1, 1, 4, 4)
that has entries from a range between 1
and 16
(in this case dilation=1
and added_padding = 1*(4-1)-1 = 2
)
sample_im = torch.ones(1, 1, 4, 4).cuda()
sample_deconv2 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False).cuda()
sample_deconv2.weight = torch.nn.Parameter(
torch.tensor([[[[ 1., 2., 3., 4.],
[ 5., 6., 7., 8.],
[ 9., 10., 11., 12.],
[13., 14., 15., 16.]]]]).cuda())
Which yields:
>>> sample_deconv2(sample_im)
tensor([[[[ 6., 12., 14., 12., 14., 12., 14., 7.],
[12., 24., 28., 24., 28., 24., 28., 14.],
[20., 40., 44., 40., 44., 40., 44., 22.],
[12., 24., 28., 24., 28., 24., 28., 14.],
[20., 40., 44., 40., 44., 40., 44., 22.],
[12., 24., 28., 24., 28., 24., 28., 14.],
[20., 40., 44., 40., 44., 40., 44., 22.],
[10., 20., 22., 20., 22., 20., 22., 11.]]]], device='cuda:0',
grad_fn=)
Now I have seen simple examples of transposed convolution without stride and padding. For instance, if the input is a 2x2
image [[2, 4], [0, 1]]
, and the convolutional filter with one output channel is [[3, 1], [1, 5]]
, then the resulting tensor of shape (1, 1, 3, 3)
can be seen as the sum of the rightmost four matrices in the image below:
The problem is I can't seem to find examples that use strides and/or padding in the same visualization. As per my snippet, I am having a very difficult time understanding how the padding is applied to the sample image, or how the stride works to get this output. Any insights appreciated, even just understanding how the 6
in the (0,0)
entry or the 12
in the (0,1)
entry of the resulting matrix are computed would be very helpful.
ANSWER
Answered 2021-Oct-31 at 10:39The output spatial dimensions of nn.ConvTranspose2d
are given by:
out = (x - 1)s - 2p + d(k - 1) + op + 1
where x
is the input spatial dimension and out
the corresponding output size, s
is the stride, d
the dilation, p
the padding, k
the kernel size, and op
the output padding.
If we keep the following operands:
For each value of the input, we compute a buffer (of the corresponding color) by calculating the product with each element of the kernel.
Here are the visualizations for s=1, p=0
, s=1, p=1
, s=2, p=0
, and s=2, p=1
:
s=1, p=0
: output is3x3
For the blue buffer, we have (1) 2*k_top-left = 2*3 = 6
; (2) 2*k_top-right = 2*1 = 2
; (3) 2*k_bottom-left = 2*1 = 2
; (4) 2*k_bottom-right = 2*5 = 10
.
s=1, p=1
: output is1x1
s=2, p=0
: output is4x4
s=2, p=2
: output is2x2
QUESTION
I have been reading the official guide here (https://www.tensorflow.org/text/tutorials/transformer) to try and recreate the Vanilla Transformer in Tensorflow. I notice the dataset used is quite specific, and at the end of the guide, it says to try with a different dataset.
But that is where I have been stuck for a long time! I am trying to use the WMT14 dataset (as used in the original paper, Vaswani et. al.) here: https://www.tensorflow.org/datasets/catalog/wmt14_translate#wmt14_translatede-en .
I have also tried Multi30k and IWSLT dataset from Spacy, but are there any guides on how I can fit the dataset to what the model requires? Specifically, to tokenize it. The official TF guide uses a pretrained tokenizer, which is specific to the PR-EN dataset given.
model_name = "ted_hrlr_translate_pt_en_converter"
I am wondering, how I can use the TF (bert) tokenizer to tokenize the Spacy dataset? I have the code for PyTorch, unfortunately I do not know how to adapt it for Tensorflow. Any help would be greatly appreciated!
import spacy
spacy_de = spacy.load('de')
spacy_en = spacy.load('en')
def tokenize_de(text):
return [tok.text for tok in spacy_de.tokenizer(text)]
def tokenize_en(text):
return [tok.text for tok in spacy_en.tokenizer(text)]
BOS_WORD = ''
EOS_WORD = ''
BLANK_WORD = ""
SRC = data.Field(tokenize=tokenize_de, pad_token=BLANK_WORD)
TGT = data.Field(tokenize=tokenize_en, init_token = BOS_WORD,
eos_token = EOS_WORD, pad_token=BLANK_WORD)
MAX_LEN = 100
train, val, test = datasets.IWSLT.splits(
exts=('.de', '.en'), fields=(SRC, TGT),
filter_pred=lambda x: len(vars(x)['src']) <= MAX_LEN and
len(vars(x)['trg']) <= MAX_LEN)
MIN_FREQ = 2
SRC.build_vocab(train.src, min_freq=MIN_FREQ)
TGT.build_vocab(train.trg, min_freq=MIN_FREQ)
ANSWER
Answered 2021-Oct-11 at 23:00You can build your own tokenizer following this tutorial https://www.tensorflow.org/text/guide/subwords_tokenizer
It is the exact same way they build the ted_hrlr_translate_pt_en_converter tokenizer in the transformers example, you just need to adjust it to your language.
I rewrote it for your case but didn't test it:
import collections
import logging
import os
import pathlib
import re
import string
import sys
import time
import numpy as np
#import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
import tensorflow_text as text
import tensorflow as tf
from tensorflow_text.tools.wordpiece_vocab import bert_vocab_from_dataset as bert_vocab
examples, metadata = tfds.load('wmt14_translate/de-en', with_info=True,
as_supervised=True)
train_examples, val_examples = examples['train'], examples['validation']
for de_examples, en_examples in train_examples.batch(3).take(1):
for pt in de_examples.numpy():
print(pt.decode('utf-8'))
print()
for en in en_examples.numpy():
print(en.decode('utf-8'))
train_en = train_examples.map(lambda de, en: en)
train_de = train_examples.map(lambda de, en: de)
bert_tokenizer_params=dict(lower_case=True)
reserved_tokens=["[PAD]", "[UNK]", "[START]", "[END]"]
bert_vocab_args = dict(
# The target vocabulary size
vocab_size = 8000,
# Reserved tokens that must be included in the vocabulary
reserved_tokens=reserved_tokens,
# Arguments for `text.BertTokenizer`
bert_tokenizer_params=bert_tokenizer_params,
# Arguments for `wordpiece_vocab.wordpiece_tokenizer_learner_lib.learn`
learn_params={},
)
de_vocab = bert_vocab.bert_vocab_from_dataset(
train_de.batch(1000).prefetch(2),
**bert_vocab_args
)
print(de_vocab[:10])
print(de_vocab[100:110])
print(de_vocab[1000:1010])
print(de_vocab[-10:])
def write_vocab_file(filepath, vocab):
with open(filepath, 'w') as f:
for token in vocab:
print(token, file=f)
write_vocab_file('de_vocab.txt', de_vocab)
en_vocab = bert_vocab.bert_vocab_from_dataset(
train_en.batch(1000).prefetch(2),
**bert_vocab_args
)
print(en_vocab[:10])
print(en_vocab[100:110])
print(en_vocab[1000:1010])
print(en_vocab[-10:])
write_vocab_file('en_vocab.txt', en_vocab)
de_tokenizer = text.BertTokenizer('de_vocab.txt', **bert_tokenizer_params)
en_tokenizer = text.BertTokenizer('en_vocab.txt', **bert_tokenizer_params)
# Tokenize the examples -> (batch, word, word-piece)
token_batch = en_tokenizer.tokenize(en_examples)
# Merge the word and word-piece axes -> (batch, tokens)
token_batch = token_batch.merge_dims(-2,-1)
for ex in token_batch.to_list():
print(ex)
# Lookup each token id in the vocabulary.
txt_tokens = tf.gather(en_vocab, token_batch)
# Join with spaces.
tf.strings.reduce_join(txt_tokens, separator=' ', axis=-1)
words = en_tokenizer.detokenize(token_batch)
tf.strings.reduce_join(words, separator=' ', axis=-1)
START = tf.argmax(tf.constant(reserved_tokens) == "[START]")
END = tf.argmax(tf.constant(reserved_tokens) == "[END]")
def add_start_end(ragged):
count = ragged.bounding_shape()[0]
starts = tf.fill([count,1], START)
ends = tf.fill([count,1], END)
return tf.concat([starts, ragged, ends], axis=1)
words = en_tokenizer.detokenize(add_start_end(token_batch))
tf.strings.reduce_join(words, separator=' ', axis=-1)
def cleanup_text(reserved_tokens, token_txt):
# Drop the reserved tokens, except for "[UNK]".
bad_tokens = [re.escape(tok) for tok in reserved_tokens if tok != "[UNK]"]
bad_token_re = "|".join(bad_tokens)
bad_cells = tf.strings.regex_full_match(token_txt, bad_token_re)
result = tf.ragged.boolean_mask(token_txt, ~bad_cells)
# Join them into strings.
result = tf.strings.reduce_join(result, separator=' ', axis=-1)
return result
token_batch = en_tokenizer.tokenize(en_examples).merge_dims(-2,-1)
words = en_tokenizer.detokenize(token_batch)
cleanup_text(reserved_tokens, words).numpy()
class CustomTokenizer(tf.Module):
def __init__(self, reserved_tokens, vocab_path):
self.tokenizer = text.BertTokenizer(vocab_path, lower_case=True)
self._reserved_tokens = reserved_tokens
self._vocab_path = tf.saved_model.Asset(vocab_path)
vocab = pathlib.Path(vocab_path).read_text().splitlines()
self.vocab = tf.Variable(vocab)
## Create the signatures for export:
# Include a tokenize signature for a batch of strings.
self.tokenize.get_concrete_function(
tf.TensorSpec(shape=[None], dtype=tf.string))
# Include `detokenize` and `lookup` signatures for:
# * `Tensors` with shapes [tokens] and [batch, tokens]
# * `RaggedTensors` with shape [batch, tokens]
self.detokenize.get_concrete_function(
tf.TensorSpec(shape=[None, None], dtype=tf.int64))
self.detokenize.get_concrete_function(
tf.RaggedTensorSpec(shape=[None, None], dtype=tf.int64))
self.lookup.get_concrete_function(
tf.TensorSpec(shape=[None, None], dtype=tf.int64))
self.lookup.get_concrete_function(
tf.RaggedTensorSpec(shape=[None, None], dtype=tf.int64))
# These `get_*` methods take no arguments
self.get_vocab_size.get_concrete_function()
self.get_vocab_path.get_concrete_function()
self.get_reserved_tokens.get_concrete_function()
@tf.function
def tokenize(self, strings):
enc = self.tokenizer.tokenize(strings)
# Merge the `word` and `word-piece` axes.
enc = enc.merge_dims(-2,-1)
enc = add_start_end(enc)
return enc
@tf.function
def detokenize(self, tokenized):
words = self.tokenizer.detokenize(tokenized)
return cleanup_text(self._reserved_tokens, words)
@tf.function
def lookup(self, token_ids):
return tf.gather(self.vocab, token_ids)
@tf.function
def get_vocab_size(self):
return tf.shape(self.vocab)[0]
@tf.function
def get_vocab_path(self):
return self._vocab_path
@tf.function
def get_reserved_tokens(self):
return tf.constant(self._reserved_tokens)
tokenizers = tf.Module()
tokenizers.pt = CustomTokenizer(reserved_tokens, 'de_vocab.txt')
tokenizers.en = CustomTokenizer(reserved_tokens, 'en_vocab.txt')
model_name = 'ted_hrlr_translate_de_en_converter'
tf.saved_model.save(tokenizers, model_name)
QUESTION
in the pytorch NLLLoss doc the default of ignore_index is -100 instead of the usual None
, are there any particular reasons? seems like any negative value is equivalent.
BTW, what may be the reason that I would want to ignore an index? Thanks!
ANSWER
Answered 2021-Sep-27 at 18:31The value for ignore_index
must be an int, that's why the default value is an int and not None
. The default value is arbitrary, it could have been any negative number, i.e. anything that is not a "valid" class label. The function will ignore all elements for which the target instance has that class label. In practice, this option can be used to identify unlabeled pixels for example in dense prediction tasks.
Edit: Tracing back the implementation of nn.NLLLoss
, we can find this comment in the nll_loss
implementation of torch/onnx/symbolic_opset12.py
:
# in onnx NegativeLogLikelihoodLoss specification, ignore_index is optional without default value.
# therefore we need to set ignore_index attribute even if it is not specified (e.g. ignore_index=-100).
ignore_index = sym_help._maybe_get_const(ignore_index, "i")
QUESTION
I have a CSV file that looks like this
I want to choose the last column and make character level one-hot-encode matrices of every sequence, I use this code and it doesn't work
data = pd.read_csv('database.csv', usecols=[4])
alphabet = ['A', 'C', 'D', 'E', 'F', 'G','H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
charto = dict((c,i) for i,c in enumerate(alphabet))
iint = [charto[char] for char in data]
onehot2 = []
for s in iint:
lett = [0 for _ in range(len(alphabet))]
lett[s] = 1
onehot2.append(lett)
What do you suggest doing for this task? (by the way, I want to use this dataset for a PyTorch model)
ANSWER
Answered 2021-Sep-22 at 15:21I think it would be best to keep pd.DataFrame
as is and do the transformation "on the fly" within PyTorch Dataset.
First, dummy data similar to yours:
df = pd.DataFrame(
{
"ID": [1, 2, 3],
"Source": ["Serbia", "Poland", "Germany"],
"Sequence": ["ABCDE", "EBCDA", "AAD"],
}
)
After that, we can create torch.utils.data.Dataset
class (example alphabet is shown, you might change it to anything you want):
class Dataset(torch.utils.data.Dataset):
def __init__(self, df: pd.DataFrame):
self.df = df
# Change alphabet to anything you need
alphabet = ["A", "B", "C", "D", "E", "F"]
self.mapping = dict((c, i) for i, c in enumerate(alphabet))
def __getitem__(self, index):
sample = df.iloc[index]
sequence = sample["Sequence"]
target = torch.nn.functional.one_hot(
torch.tensor([self.mapping[letter] for letter in sequence]),
num_classes=len(self.mapping),
)
return sample.drop("Sequence"), target
def __len__(self):
return len(self.df)
This code simply transforms indices of letters to their one-hot encoding via torch.nn.functional.one_hot
function.
Usage is pretty simple:
ds = Dataset(df)
ds[0]
which returns (you might want to change how your sample is created though as I'm not sure about the format and only focused on hot-encoded targets) the following targets (ID
and Source
omitted):
tensor([ [1., 0., 0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0.],
[0., 0., 1., 0., 0., 0.],
[0., 0., 0., 1., 0., 0.],
[0., 0., 0., 0., 1., 0.]]))
QUESTION
I have a 2D pytorch tensor of shape n by m. I want to index the second dimension using a list of indices (which could be done with torch.gather) then then also set new values to the result of the indexing.
Example:
data = torch.tensor([[0,1,2], [3,4,5], [6,7,8]]) # shape (3,3)
indices = torch.tensor([1,2,1], dtype=torch.long).unsqueeze(-1) # shape (3,1)
# data tensor:
# tensor([[0, 1, 2],
# [3, 4, 5],
# [6, 7, 8]])
I want to select the specified indices per row (which would be [1,5,7]
but then also set these values to another number - e.g. 42
I can select the desired columns row wise by doing:
data.gather(1, indices)
tensor([[1],
[5],
[7]])
data.gather(1, indices)[:] = 42 # **This does NOT work**, since the result of gather
# does not use the same storage as the original tensor
which is fine, but I would like to change these values now, and have the change also affect the data
tensor.
I can do what I want to achieve using this, but it seems to be very un-pythonic:
max_index = torch.max(indices)
for i in range(0, max_index + 1):
mask = (indices == i).nonzero(as_tuple=True)[0]
data[mask, i] = 42
print(data)
# tensor([[ 0, 42, 2],
# [ 3, 4, 42],
# [ 6, 42, 8]])
Any hints on how to do that more elegantly?
ANSWER
Answered 2021-Sep-08 at 12:16What you are looking for is torch.scatter_
with the value
option.
Tensor.scatter_(dim, index, src, reduce=None) → Tensor
Writes all values from the tensorsrc
intoself
at the indices specified in theindex
tensor. For each value insrc
, its outputindex
is specified by its index in src fordimension != dim
and by the corresponding value in index fordimension = dim
.With 2D tensors as input and
dim=1
, the operation is:self[i][index[i][j]] = src[i][j]
No mention of the value parameter though...
With value=42
, and dim=1
, this will have the following effect on data:
data[i][index[i][j]] = 42
Here applied in-place:
>>> data.scatter_(index=indices, dim=1, value=42)
>>> data
tensor([[ 0, 42, 2],
[ 3, 4, 42],
[ 6, 42, 8]])
Community Discussions, Code Snippets contain sources that include Stack Exchange Network
Vulnerabilities
No vulnerabilities reported
Install pytorch
Stable binaries: Python 3.6: https://nvidia.box.com/v/torch-stable-cp36-jetson-jp42
Rolling weekly binaries: Python 3.6: https://nvidia.box.com/v/torch-weekly-cp36-jetson-jp42
Three-pointers to get you started:.
Tutorials: get you started with understanding and using PyTorch
Examples: easy to understand PyTorch code across all domains
The API Reference
Glossary
Support
Find, review, and download reusable Libraries, Code Snippets, Cloud APIs from over 650 million Knowledge Items
Find more librariesExplore Kits - Develop, implement, customize Projects, Custom Functions and Applications with kandi kits
Save this library and start creating your kit
Share this Page