Commit 26dfcb2f by xuchen

fix bugs of imputer.cu

parent ca4271f2
...@@ -329,7 +329,7 @@ imputer_loss_gpu_template(const Tensor &log_probs, const Tensor &targets, ...@@ -329,7 +329,7 @@ imputer_loss_gpu_template(const Tensor &log_probs, const Tensor &targets,
log_alpha.stride(0), log_alpha.stride(1), log_alpha.stride(2), log_alpha.stride(0), log_alpha.stride(1), log_alpha.stride(2),
tg_batch_offsets.data_ptr<int64_t>(), tg_target_stride, batch_size, tg_batch_offsets.data_ptr<int64_t>(), tg_target_stride, batch_size,
BLANK); BLANK);
THCudaCheck(cudaGetLastError()); // catch launch errors C10_CUDA_CHECK(cudaGetLastError()); // catch launch errors
return std::make_tuple(neg_log_likelihood, log_alpha); return std::make_tuple(neg_log_likelihood, log_alpha);
} }
...@@ -750,7 +750,7 @@ Tensor imputer_loss_backward_gpu_template( ...@@ -750,7 +750,7 @@ Tensor imputer_loss_backward_gpu_template(
force_emits.stride(0), log_beta.stride(0), log_beta.stride(1), force_emits.stride(0), log_beta.stride(0), log_beta.stride(1),
log_beta.stride(2), tg_batch_offsets.data_ptr<int64_t>(), log_beta.stride(2), tg_batch_offsets.data_ptr<int64_t>(),
tg_target_stride, batch_size, BLANK); tg_target_stride, batch_size, BLANK);
THCudaCheck(cudaGetLastError()); // catch launch errors C10_CUDA_CHECK(cudaGetLastError()); // catch launch errors
} }
// Very crude heuristic for what is a small problem., based on linearly // Very crude heuristic for what is a small problem., based on linearly
...@@ -814,7 +814,7 @@ Tensor imputer_loss_backward_gpu_template( ...@@ -814,7 +814,7 @@ Tensor imputer_loss_backward_gpu_template(
log_beta.stride(1), log_beta.stride(2), log_beta.stride(1), log_beta.stride(2),
tg_batch_offsets.data_ptr<int64_t>(), tg_target_stride, batch_size, tg_batch_offsets.data_ptr<int64_t>(), tg_target_stride, batch_size,
num_labels, BLANK, zero_infinity); num_labels, BLANK, zero_infinity);
THCudaCheck(cudaGetLastError()); // catch launch errors C10_CUDA_CHECK(cudaGetLastError()); // catch launch errors
} else { // small problem, use naive algorithm } else { // small problem, use naive algorithm
// Still no block/grid configuration guru... // Still no block/grid configuration guru...
int threads_input = max_threads; int threads_input = max_threads;
...@@ -839,7 +839,7 @@ Tensor imputer_loss_backward_gpu_template( ...@@ -839,7 +839,7 @@ Tensor imputer_loss_backward_gpu_template(
log_beta.stride(1), log_beta.stride(2), log_beta.stride(1), log_beta.stride(2),
tg_batch_offsets.data_ptr<int64_t>(), tg_target_stride, batch_size, tg_batch_offsets.data_ptr<int64_t>(), tg_target_stride, batch_size,
num_labels, BLANK, zero_infinity); num_labels, BLANK, zero_infinity);
THCudaCheck(cudaGetLastError()); // catch launch errors C10_CUDA_CHECK(cudaGetLastError()); // catch launch errors
} }
// zero those invalid graident elements due to padding // zero those invalid graident elements due to padding
...@@ -856,7 +856,7 @@ Tensor imputer_loss_backward_gpu_template( ...@@ -856,7 +856,7 @@ Tensor imputer_loss_backward_gpu_template(
grad.data_ptr<scalar_t>(), input_lengths_t.data_ptr<int64_t>(), grad.data_ptr<scalar_t>(), input_lengths_t.data_ptr<int64_t>(),
grad.stride(0), grad.stride(1), grad.stride(2), grad.size(0), grad.stride(0), grad.stride(1), grad.stride(2), grad.size(0),
grad.size(1), grad.size(2)); grad.size(1), grad.size(2));
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
return grad; return grad;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论