@@ -92,7 +92,8 @@ struct UnaryTwoOutputsContigFunctor
9292 /* NOTE: work-group size must be divisible by sub-group size */
9393
9494 if constexpr (enable_sg_loadstore &&
95- UnaryTwoOutputsOpT::is_constant::value) {
95+ UnaryTwoOutputsOpT::is_constant::value)
96+ {
9697 // value of operator is known to be a known constant
9798 constexpr resT1 const_val1 = UnaryTwoOutputsOpT::constant_value1;
9899 constexpr resT2 const_val2 = UnaryTwoOutputsOpT::constant_value2;
@@ -528,21 +529,18 @@ struct BinaryTwoOutputsStridedFunctor
528529 * dpctl::tensor::kernels::elementwise_common namespace.
529530 */
530531template <typename argTy,
531- template <typename T>
532- class UnaryTwoOutputsType ,
532+ template <typename T> class UnaryTwoOutputsType ,
533533 template <typename A,
534534 typename R1,
535535 typename R2,
536536 std::uint8_t vs,
537537 std::uint8_t nv,
538- bool enable>
539- class UnaryTwoOutputsContigFunctorT ,
538+ bool enable> class UnaryTwoOutputsContigFunctorT ,
540539 template <typename A,
541540 typename R1,
542541 typename R2,
543542 std::uint8_t vs,
544- std::uint8_t nv>
545- class kernel_name ,
543+ std::uint8_t nv> class kernel_name ,
546544 std::uint8_t vec_sz = 4u ,
547545 std::uint8_t n_vecs = 2u >
548546sycl::event
@@ -613,12 +611,15 @@ sycl::event
613611 * dpctl::tensor::kernels::elementwise_common namespace.
614612 */
615613template <typename argTy,
616- template <typename T>
617- class UnaryTwoOutputsType ,
618- template <typename A, typename R1, typename R2, typename I>
619- class UnaryTwoOutputsStridedFunctorT ,
620- template <typename A, typename R1, typename R2, typename I>
621- class kernel_name >
614+ template <typename T> class UnaryTwoOutputsType ,
615+ template <typename A,
616+ typename R1,
617+ typename R2,
618+ typename I> class UnaryTwoOutputsStridedFunctorT ,
619+ template <typename A,
620+ typename R1,
621+ typename R2,
622+ typename I> class kernel_name >
622623sycl::event unary_two_outputs_strided_impl (
623624 sycl::queue &exec_q,
624625 std::size_t nelems,
@@ -665,27 +666,25 @@ sycl::event unary_two_outputs_strided_impl(
665666 * @note It extends binary_contig_impl from
666667 * dpctl::tensor::kernels::elementwise_common namespace.
667668 */
668- template <typename argTy1,
669- typename argTy2,
670- template <typename T1, typename T2>
671- class BinaryTwoOutputsType ,
672- template <typename T1,
673- typename T2,
674- typename T3,
675- typename T4,
676- std::uint8_t vs,
677- std::uint8_t nv,
678- bool enable_sg_loadstore>
679- class BinaryTwoOutputsContigFunctorT ,
680- template <typename T1,
681- typename T2,
682- typename T3,
683- typename T4,
684- std::uint8_t vs,
685- std::uint8_t nv>
686- class kernel_name ,
687- std::uint8_t vec_sz = 4u ,
688- std::uint8_t n_vecs = 2u >
669+ template <
670+ typename argTy1,
671+ typename argTy2,
672+ template <typename T1, typename T2> class BinaryTwoOutputsType ,
673+ template <typename T1,
674+ typename T2,
675+ typename T3,
676+ typename T4,
677+ std::uint8_t vs,
678+ std::uint8_t nv,
679+ bool enable_sg_loadstore> class BinaryTwoOutputsContigFunctorT ,
680+ template <typename T1,
681+ typename T2,
682+ typename T3,
683+ typename T4,
684+ std::uint8_t vs,
685+ std::uint8_t nv> class kernel_name ,
686+ std::uint8_t vec_sz = 4u ,
687+ std::uint8_t n_vecs = 2u >
689688sycl::event
690689 binary_two_outputs_contig_impl (sycl::queue &exec_q,
691690 std::size_t nelems,
@@ -761,15 +760,19 @@ sycl::event
761760 * @note It extends binary_strided_impl from
762761 * dpctl::tensor::kernels::elementwise_common namespace.
763762 */
764- template <
765- typename argTy1,
766- typename argTy2,
767- template <typename T1, typename T2>
768- class BinaryTwoOutputsType ,
769- template <typename T1, typename T2, typename T3, typename T4, typename IndT>
770- class BinaryTwoOutputsStridedFunctorT ,
771- template <typename T1, typename T2, typename T3, typename T4, typename IndT>
772- class kernel_name >
763+ template <typename argTy1,
764+ typename argTy2,
765+ template <typename T1, typename T2> class BinaryTwoOutputsType ,
766+ template <typename T1,
767+ typename T2,
768+ typename T3,
769+ typename T4,
770+ typename IndT> class BinaryTwoOutputsStridedFunctorT ,
771+ template <typename T1,
772+ typename T2,
773+ typename T3,
774+ typename T4,
775+ typename IndT> class kernel_name >
773776sycl::event binary_two_outputs_strided_impl (
774777 sycl::queue &exec_q,
775778 std::size_t nelems,
0 commit comments