62 const line_buf* aug,
ui32 repeat,
bool synthesis) = NULL;
81 const line_buf* aug,
ui32 repeat,
bool synthesis) = NULL;
100 static std::once_flag wavelet_transform_functions_init_flag;
101 std::call_once(wavelet_transform_functions_init_flag, [](){
102#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
113 #ifndef OJPH_DISABLE_SIMD
115 #if (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
117 #ifndef OJPH_DISABLE_SSE
127 #ifndef OJPH_DISABLE_SSE2
136 #ifndef OJPH_DISABLE_AVX
146 #ifndef OJPH_DISABLE_AVX2
155 #if (defined(OJPH_ARCH_X86_64) && !defined(OJPH_DISABLE_AVX512))
169 #elif defined(OJPH_ARCH_ARM)
171 #elif defined(OJPH_ARCH_PPC64LE)
205#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
211 ui32 repeat,
bool synthesis)
218 const si32* src1 = sig->
i32, * src2 = other->
i32;
225 for (
ui32 i = repeat; i > 0; --i)
226 *dst++ -= (b + *src1++ + *src2++) >> e;
228 for (
ui32 i = repeat; i > 0; --i)
229 *dst++ += (b + *src1++ + *src2++) >> e;
231 else if (a == -1 && b == 1 && e == 1)
234 for (
ui32 i = repeat; i > 0; --i)
235 *dst++ += (*src1++ + *src2++) >> e;
237 for (
ui32 i = repeat; i > 0; --i)
238 *dst++ -= (*src1++ + *src2++) >> e;
243 for (
ui32 i = repeat; i > 0; --i)
244 *dst++ -= (b - (*src1++ + *src2++)) >> e;
246 for (
ui32 i = repeat; i > 0; --i)
247 *dst++ += (b - (*src1++ + *src2++)) >> e;
251 for (
ui32 i = repeat; i > 0; --i)
252 *dst++ -= (b + a * (*src1++ + *src2++)) >> e;
254 for (
ui32 i = repeat; i > 0; --i)
255 *dst++ += (b + a * (*src1++ + *src2++)) >> e;
263 ui32 repeat,
bool synthesis)
270 const si64* src1 = sig->
i64, * src2 = other->
i64;
277 for (
ui32 i = repeat; i > 0; --i)
278 *dst++ -= (b + *src1++ + *src2++) >> e;
280 for (
ui32 i = repeat; i > 0; --i)
281 *dst++ += (b + *src1++ + *src2++) >> e;
283 else if (a == -1 && b == 1 && e == 1)
286 for (
ui32 i = repeat; i > 0; --i)
287 *dst++ += (*src1++ + *src2++) >> e;
289 for (
ui32 i = repeat; i > 0; --i)
290 *dst++ -= (*src1++ + *src2++) >> e;
295 for (
ui32 i = repeat; i > 0; --i)
296 *dst++ -= (b - (*src1++ + *src2++)) >> e;
298 for (
ui32 i = repeat; i > 0; --i)
299 *dst++ += (b - (*src1++ + *src2++)) >> e;
303 for (
ui32 i = repeat; i > 0; --i)
304 *dst++ -= (b + a * (*src1++ + *src2++)) >> e;
306 for (
ui32 i = repeat; i > 0; --i)
307 *dst++ += (b + a * (*src1++ + *src2++)) >> e;
314 ui32 repeat,
bool synthesis)
338 ui32 width,
bool even)
351 for (; w > 1; w -= 2)
353 *dpl++ = *sp++; *dph++ = *sp++;
361 ui32 l_width = (width + (even ? 1 : 0)) >> 1;
362 ui32 h_width = (width + (even ? 0 : 1)) >> 1;
364 for (
ui32 j = num_steps; j > 0; --j)
374 lp[l_width] = lp[l_width - 1];
376 const si32* sp = lp + (even ? 1 : 0);
380 for (
ui32 i = h_width; i > 0; --i, sp++, dp++)
381 *dp += (b + (sp[-1] + sp[0])) >> e;
383 else if (a == -1 && b == 1 && e == 1)
385 for (
ui32 i = h_width; i > 0; --i, sp++, dp++)
386 *dp -= (sp[-1] + sp[0]) >> e;
390 for (
ui32 i = h_width; i > 0; --i, sp++, dp++)
391 *dp += (b - (sp[-1] + sp[0])) >> e;
395 for (
ui32 i = h_width; i > 0; --i, sp++, dp++)
396 *dp += (b + a * (sp[-1] + sp[0])) >> e;
400 si32* t = lp; lp = hp; hp = t;
402 ui32 w = l_width; l_width = h_width; h_width = w;
407 ldst->
i32[0] = src->
i32[0];
409 hdst->
i32[0] = src->
i32[0] << 1;
417 ui32 width,
bool even)
430 for (; w > 1; w -= 2)
432 *dpl++ = *sp++; *dph++ = *sp++;
440 ui32 l_width = (width + (even ? 1 : 0)) >> 1;
441 ui32 h_width = (width + (even ? 0 : 1)) >> 1;
443 for (
ui32 j = num_steps; j > 0; --j)
453 lp[l_width] = lp[l_width - 1];
455 const si64* sp = lp + (even ? 1 : 0);
459 for (
ui32 i = h_width; i > 0; --i, sp++, dp++)
460 *dp += (b + (sp[-1] + sp[0])) >> e;
462 else if (a == -1 && b == 1 && e == 1)
464 for (
ui32 i = h_width; i > 0; --i, sp++, dp++)
465 *dp -= (sp[-1] + sp[0]) >> e;
469 for (
ui32 i = h_width; i > 0; --i, sp++, dp++)
470 *dp += (b - (sp[-1] + sp[0])) >> e;
474 for (
ui32 i = h_width; i > 0; --i, sp++, dp++)
475 *dp += (b + a * (sp[-1] + sp[0])) >> e;
479 si64* t = lp; lp = hp; hp = t;
481 ui32 w = l_width; l_width = h_width; h_width = w;
486 ldst->
i64[0] = src->
i64[0];
488 hdst->
i64[0] = src->
i64[0] << 1;
495 ui32 width,
bool even)
516 ui32 width,
bool even)
522 ui32 aug_width = (width + (even ? 1 : 0)) >> 1;
523 ui32 oth_width = (width + (even ? 0 : 1)) >> 1;
525 for (
ui32 j = 0; j < num_steps; ++j)
534 oth[oth_width] = oth[oth_width - 1];
536 const si32* sp = oth + (ev ? 0 : 1);
540 for (
ui32 i = aug_width; i > 0; --i, sp++, dp++)
541 *dp -= (b + (sp[-1] + sp[0])) >> e;
543 else if (a == -1 && b == 1 && e == 1)
545 for (
ui32 i = aug_width; i > 0; --i, sp++, dp++)
546 *dp += (sp[-1] + sp[0]) >> e;
550 for (
ui32 i = aug_width; i > 0; --i, sp++, dp++)
551 *dp -= (b - (sp[-1] + sp[0])) >> e;
555 for (
ui32 i = aug_width; i > 0; --i, sp++, dp++)
556 *dp -= (b + a * (sp[-1] + sp[0])) >> e;
560 si32* t = aug; aug = oth; oth = t;
562 ui32 w = aug_width; aug_width = oth_width; oth_width = w;
574 for (; w > 1; w -= 2)
576 *dp++ = *spl++; *dp++ = *sph++;
585 dst->
i32[0] = lsrc->
i32[0];
587 dst->
i32[0] = hsrc->
i32[0] >> 1;
595 ui32 width,
bool even)
601 ui32 aug_width = (width + (even ? 1 : 0)) >> 1;
602 ui32 oth_width = (width + (even ? 0 : 1)) >> 1;
604 for (
ui32 j = 0; j < num_steps; ++j)
613 oth[oth_width] = oth[oth_width - 1];
615 const si64* sp = oth + (ev ? 0 : 1);
619 for (
ui32 i = aug_width; i > 0; --i, sp++, dp++)
620 *dp -= (b + (sp[-1] + sp[0])) >> e;
622 else if (a == -1 && b == 1 && e == 1)
624 for (
ui32 i = aug_width; i > 0; --i, sp++, dp++)
625 *dp += (sp[-1] + sp[0]) >> e;
629 for (
ui32 i = aug_width; i > 0; --i, sp++, dp++)
630 *dp -= (b - (sp[-1] + sp[0])) >> e;
634 for (
ui32 i = aug_width; i > 0; --i, sp++, dp++)
635 *dp -= (b + a * (sp[-1] + sp[0])) >> e;
639 si64* t = aug; aug = oth; oth = t;
641 ui32 w = aug_width; aug_width = oth_width; oth_width = w;
653 for (; w > 1; w -= 2)
655 *dp++ = *spl++; *dp++ = *sph++;
664 dst->
i64[0] = lsrc->
i64[0];
666 dst->
i64[0] = hsrc->
i64[0] >> 1;
673 ui32 width,
bool even)
693 ui32 repeat,
bool synthesis)
700 float* dst = aug->
f32;
701 const float* src1 = sig->
f32, * src2 = other->
f32;
702 for (
ui32 i = repeat; i > 0; --i)
703 *dst++ += a * (*src1++ + *src2++);
709 float* dst = aug->
f32;
710 for (
ui32 i = repeat; i > 0; --i)
717 ui32 width,
bool even)
722 float* dph = hdst->
f32;
723 float* dpl = ldst->
f32;
724 float* sp = src->
f32;
730 for (; w > 1; w -= 2)
732 *dpl++ = *sp++; *dph++ = *sp++;
739 float* hp = hdst->
f32, * lp = ldst->
f32;
740 ui32 l_width = (width + (even ? 1 : 0)) >> 1;
741 ui32 h_width = (width + (even ? 0 : 1)) >> 1;
743 for (
ui32 j = num_steps; j > 0; --j)
750 lp[l_width] = lp[l_width - 1];
752 const float* sp = lp + (even ? 1 : 0);
754 for (
ui32 i = h_width; i > 0; --i, sp++, dp++)
755 *dp += a * (sp[-1] + sp[0]);
758 float* t = lp; lp = hp; hp = t;
760 ui32 w = l_width; l_width = h_width; h_width = w;
764 float K = atk->
get_K();
765 float K_inv = 1.0f / K;
769 for (
ui32 i = l_width; i > 0; --i)
773 for (
ui32 i = h_width; i > 0; --i)
779 ldst->
f32[0] = src->
f32[0];
781 hdst->
f32[0] = src->
f32[0] * 2.0f;
788 ui32 width,
bool even)
793 float* oth = hsrc->
f32, * aug = lsrc->
f32;
794 ui32 aug_width = (width + (even ? 1 : 0)) >> 1;
795 ui32 oth_width = (width + (even ? 0 : 1)) >> 1;
798 float K = atk->
get_K();
799 float K_inv = 1.0f / K;
803 for (
ui32 i = aug_width; i > 0; --i)
807 for (
ui32 i = oth_width; i > 0; --i)
812 for (
ui32 j = 0; j < num_steps; ++j)
819 oth[oth_width] = oth[oth_width - 1];
821 const float* sp = oth + (ev ? 0 : 1);
823 for (
ui32 i = aug_width; i > 0; --i, sp++, dp++)
824 *dp -= a * (sp[-1] + sp[0]);
827 float* t = aug; aug = oth; oth = t;
829 ui32 w = aug_width; aug_width = oth_width; oth_width = w;
833 float* sph = hsrc->
f32;
834 float* spl = lsrc->
f32;
835 float* dp = dst->
f32;
838 { *dp++ = *sph++; --w; }
839 for (; w > 1; w -= 2)
840 { *dp++ = *spl++; *dp++ = *sph++; }
842 { *dp++ = *spl++; --w; }
846 dst->
f32[0] = lsrc->
f32[0];
848 dst->
f32[0] = hsrc->
f32[0] * 0.5f;
void(* rev_horz_ana)(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void gen_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void gen_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void gen_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
static void gen_rev_horz_syn32(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse2_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
static void gen_rev_vert_step64(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void avx512_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void gen_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void vsx_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void gen_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse2_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
static void gen_rev_vert_step32(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
static void gen_rev_horz_ana64(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void(* irv_vert_times_K)(float K, const line_buf *aug, ui32 repeat)
void gen_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void(* irv_vert_step)(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void avx_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void avx2_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void init_wavelet_transform_functions()
void wasm_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
static void gen_rev_horz_syn64(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void avx512_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void avx2_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void sse_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void vsx_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void wasm_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void sse_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void wasm_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void avx_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void sse_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void avx512_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void avx512_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void avx_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void avx2_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void(* rev_horz_syn)(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void vsx_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void vsx_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void sse2_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void(* irv_horz_ana)(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void(* rev_vert_step)(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void vsx_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void gen_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void avx_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void(* irv_horz_syn)(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void wasm_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void vsx_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
static void gen_rev_horz_ana32(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void vsx_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
@ PPC_CPU_EXT_LEVEL_ARCH_3_00
OJPH_EXPORT int get_cpu_ext_level()
@ X86_CPU_EXT_LEVEL_AVX512
ui32 get_num_steps() const
const lifting_step * get_step(ui32 s) const