From 196bb5cba93ba89842e82f6c26ee73c7ba69ee9d Mon Sep 17 00:00:00 2001 From: Akhilesh Gotmare Date: Tue, 21 Apr 2020 15:12:04 +0800 Subject: [PATCH] masked_lm_labels -100 and not -1 Huggingface's BertForMaskedLM requires -100 for masked tokens (where no loss is to be computed/backproped) - https://github.com/huggingface/transformers/blob/master/src/transformers/modeling_bert.py#L881 if set to -1, loss.backward() returns the error described here - https://github.com/pytorch/pytorch/issues/1204 and log below - ``` Traceback (most recent call last): File "cbert_finetune.py", line 166, in main() File "cbert_finetune.py", line 149, in main loss.backward() File "/opt/conda/lib/python3.6/site-packages/torch/tensor.py", line 118, in backward torch.autograd.backward(self, gradient, retain_graph, create_graph) File "/opt/conda/lib/python3.6/site-packages/torch/autograd/__init__.py", line 93, in backward allow_unreachable=True) # allow_unreachable flag RuntimeError: CUDA error: device-side assert triggered /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [0,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [1,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [2,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [3,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [5,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [6,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [7,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [8,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [9,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [10,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [11,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [12,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [14,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [17,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [18,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [19,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [20,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [21,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [22,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [23,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [24,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [25,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [26,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [27,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [28,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [29,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [30,0,0] Assertion `t >= 0 && t < n_classes` failed. /opt/conda/conda-bld/pytorch_1565272279342/work/aten/src/THCUNN/ClassNLLCriterion.cu:105: void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *, Dtype *, Dtype *, long *, Dtype *, int, int, int, int, long) [with Dtype = float, Acctype = float]: block: [0,0,0], thread: [31,0,0] Assertion `t >= 0 && t < n_classes` failed. ``` --- cbert_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cbert_utils.py b/cbert_utils.py index b441d4b..a85d313 100755 --- a/cbert_utils.py +++ b/cbert_utils.py @@ -197,7 +197,7 @@ def extract_features(tokens_a, tokens_label, max_seq_length, tokenizer): masked_lm_probs = 0.15 max_predictions_per_seq = 20 rng = random.Random(12345) - original_masked_lm_labels = [-1] * max_seq_length + original_masked_lm_labels = [-100] * max_seq_length (output_tokens, masked_lm_positions, masked_lm_labels) = create_masked_lm_predictions( tokens, masked_lm_probs, original_masked_lm_labels, max_predictions_per_seq, rng, tokenizer) @@ -271,4 +271,4 @@ def create_masked_lm_predictions(tokens, masked_lm_probs, masked_lm_labels, masked_lm_labels[index] = convert_tokens_to_ids([tokens[index]], tokenizer)[0] output_tokens[index] = masked_token masked_lm_positions.append(index) - return output_tokens, masked_lm_positions, masked_lm_labels \ No newline at end of file + return output_tokens, masked_lm_positions, masked_lm_labels