1use crate::{PremultipliedColorU8, SpreadMode, PixmapRef};
19
20use crate::geom::ScreenIntRect;
21use crate::pixmap::SubPixmapMut;
22use crate::wide::{f32x8, i32x8, u32x8};
23
24pub const STAGE_WIDTH: usize = 8;
25
26pub type StageFn = fn(p: &mut Pipeline);
27
28pub struct Pipeline<'a, 'b: 'a> {
29 index: usize,
30 functions: &'a [StageFn],
31 pixmap_src: PixmapRef<'a>,
32 pixmap_dst: &'a mut SubPixmapMut<'b>,
33 ctx: &'a mut super::Context, mask_ctx: super::MaskCtx<'a>,
35 aa_mask_ctx: super::AAMaskCtx,
36 r: f32x8,
37 g: f32x8,
38 b: f32x8,
39 a: f32x8,
40 dr: f32x8,
41 dg: f32x8,
42 db: f32x8,
43 da: f32x8,
44 tail: usize,
45 dx: usize,
46 dy: usize,
47}
48
49impl Pipeline<'_, '_> {
50 #[inline(always)]
51 fn next_stage(&mut self) {
52 let next: fn(&mut Self) = self.functions[self.index];
53 self.index += 1;
54 next(self);
55 }
56}
57
58pub const STAGES: &[StageFn; super::STAGES_COUNT] = &[
60 move_source_to_destination,
61 move_destination_to_source,
62 clamp_0,
63 clamp_a,
64 premultiply,
65 uniform_color,
66 seed_shader,
67 load_dst,
68 store,
69 load_dst_u8,
70 store_u8,
71 gather,
72 load_mask_u8,
73 mask_u8,
74 scale_u8,
75 lerp_u8,
76 scale_1_float,
77 lerp_1_float,
78 destination_atop,
79 destination_in,
80 destination_out,
81 destination_over,
82 source_atop,
83 source_in,
84 source_out,
85 source_over,
86 clear,
87 modulate,
88 multiply,
89 plus,
90 screen,
91 xor,
92 color_burn,
93 color_dodge,
94 darken,
95 difference,
96 exclusion,
97 hard_light,
98 lighten,
99 overlay,
100 soft_light,
101 hue,
102 saturation,
103 color,
104 luminosity,
105 source_over_rgba,
106 transform,
107 reflect,
108 repeat,
109 bilinear,
110 bicubic,
111 pad_x1,
112 reflect_x1,
113 repeat_x1,
114 gradient,
115 evenly_spaced_2_stop_gradient,
116 xy_to_radius,
117 xy_to_2pt_conical_focal_on_circle,
118 xy_to_2pt_conical_well_behaved,
119 xy_to_2pt_conical_greater,
120 mask_2pt_conical_degenerates,
121 apply_vector_mask,
122];
123
124pub fn fn_ptr(f: StageFn) -> *const () {
125 f as *const ()
126}
127
128#[inline(never)]
129pub fn start(
130 functions: &[StageFn],
131 functions_tail: &[StageFn],
132 rect: &ScreenIntRect,
133 aa_mask_ctx: super::AAMaskCtx,
134 mask_ctx: super::MaskCtx,
135 ctx: &mut super::Context,
136 pixmap_src: PixmapRef,
137 pixmap_dst: &mut SubPixmapMut,
138) {
139 let mut p = Pipeline {
140 index: 0,
141 functions: &[],
142 pixmap_src,
143 pixmap_dst,
144 mask_ctx,
145 aa_mask_ctx,
146 ctx,
147 r: f32x8::default(),
148 g: f32x8::default(),
149 b: f32x8::default(),
150 a: f32x8::default(),
151 dr: f32x8::default(),
152 dg: f32x8::default(),
153 db: f32x8::default(),
154 da: f32x8::default(),
155 tail: 0,
156 dx: 0,
157 dy: 0,
158 };
159
160 for y in rect.y()..rect.bottom() {
161 let mut x = rect.x() as usize;
162 let end = rect.right() as usize;
163
164 p.functions = functions;
165 while x + STAGE_WIDTH <= end {
166 p.index = 0;
167 p.dx = x;
168 p.dy = y as usize;
169 p.tail = STAGE_WIDTH;
170 p.next_stage();
171 x += STAGE_WIDTH;
172 }
173
174 if x != end {
175 p.index = 0;
176 p.functions = functions_tail;
177 p.dx = x;
178 p.dy = y as usize;
179 p.tail = end - x;
180 p.next_stage();
181 }
182 }
183}
184
185fn move_source_to_destination(p: &mut Pipeline) {
186 p.dr = p.r;
187 p.dg = p.g;
188 p.db = p.b;
189 p.da = p.a;
190
191 p.next_stage();
192}
193
194fn premultiply(p: &mut Pipeline) {
195 p.r *= p.a;
196 p.g *= p.a;
197 p.b *= p.a;
198
199 p.next_stage();
200}
201
202fn move_destination_to_source(p: &mut Pipeline) {
203 p.r = p.dr;
204 p.g = p.dg;
205 p.b = p.db;
206 p.a = p.da;
207
208 p.next_stage();
209}
210
211fn clamp_0(p: &mut Pipeline) {
212 p.r = p.r.max(f32x8::default());
213 p.g = p.g.max(f32x8::default());
214 p.b = p.b.max(f32x8::default());
215 p.a = p.a.max(f32x8::default());
216
217 p.next_stage();
218}
219
220fn clamp_a(p: &mut Pipeline) {
221 p.r = p.r.min(f32x8::splat(1.0));
222 p.g = p.g.min(f32x8::splat(1.0));
223 p.b = p.b.min(f32x8::splat(1.0));
224 p.a = p.a.min(f32x8::splat(1.0));
225
226 p.next_stage();
227}
228
229fn uniform_color(p: &mut Pipeline) {
230 let ctx = &p.ctx.uniform_color;
231 p.r = f32x8::splat(ctx.r);
232 p.g = f32x8::splat(ctx.g);
233 p.b = f32x8::splat(ctx.b);
234 p.a = f32x8::splat(ctx.a);
235
236 p.next_stage();
237}
238
239fn seed_shader(p: &mut Pipeline) {
240 let iota = f32x8::from([0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]);
241
242 p.r = f32x8::splat(p.dx as f32) + iota;
243 p.g = f32x8::splat(p.dy as f32 + 0.5);
244 p.b = f32x8::splat(1.0);
245 p.a = f32x8::default();
246
247 p.dr = f32x8::default();
248 p.dg = f32x8::default();
249 p.db = f32x8::default();
250 p.da = f32x8::default();
251
252 p.next_stage();
253}
254
255pub fn load_dst(p: &mut Pipeline) {
256 load_8888(p.pixmap_dst.slice4_at_xy(p.dx, p.dy), &mut p.dr, &mut p.dg, &mut p.db, &mut p.da);
257 p.next_stage();
258}
259
260pub fn load_dst_tail(p: &mut Pipeline) {
261 load_8888_tail(p.tail, p.pixmap_dst.slice_at_xy(p.dx, p.dy), &mut p.dr, &mut p.dg, &mut p.db, &mut p.da);
262 p.next_stage();
263}
264
265pub fn store(p: &mut Pipeline) {
266 store_8888(&p.r, &p.g, &p.b, &p.a, p.pixmap_dst.slice4_at_xy(p.dx, p.dy));
267 p.next_stage();
268}
269
270pub fn store_tail(p: &mut Pipeline) {
271 store_8888_tail(&p.r, &p.g, &p.b, &p.a, p.tail, p.pixmap_dst.slice_at_xy(p.dx, p.dy));
272 p.next_stage();
273}
274
275pub fn load_dst_u8(_: &mut Pipeline) {
277 }
279
280pub fn load_dst_u8_tail(_: &mut Pipeline) {
281 }
283
284pub fn store_u8(_: &mut Pipeline) {
285 }
287
288pub fn store_u8_tail(_: &mut Pipeline) {
289 }
291
292pub fn gather(p: &mut Pipeline) {
293 let ix = gather_ix(p.pixmap_src, p.r, p.g);
294 load_8888(&p.pixmap_src.gather(ix), &mut p.r, &mut p.g, &mut p.b, &mut p.a);
295
296 p.next_stage();
297}
298
299#[inline(always)]
300fn gather_ix(pixmap: PixmapRef, mut x: f32x8, mut y: f32x8) -> u32x8 {
301 let w = ulp_sub(pixmap.width() as f32);
303 let h = ulp_sub(pixmap.height() as f32);
304 x = x.max(f32x8::default()).min(f32x8::splat(w));
305 y = y.max(f32x8::default()).min(f32x8::splat(h));
306
307 (y.trunc_int() * i32x8::splat(pixmap.width() as i32) + x.trunc_int()).to_u32x8_bitcast()
308}
309
310#[inline(always)]
311fn ulp_sub(v: f32) -> f32 {
312 bytemuck::cast::<u32, f32>(bytemuck::cast::<f32, u32>(v) - 1)
314}
315
316fn load_mask_u8(_: &mut Pipeline) {
317 }
319
320fn mask_u8(p: &mut Pipeline) {
321 let offset = p.mask_ctx.offset(p.dx, p.dy);
322 let mut c = [0.0; 8];
323 for i in 0..p.tail {
324 c[i] = p.mask_ctx.data[offset + i] as f32;
325 }
326 let c = f32x8::from(c) / f32x8::splat(255.0);
327
328 if c == f32x8::default() {
329 return;
330 }
331
332 p.r *= c;
333 p.g *= c;
334 p.b *= c;
335 p.a *= c;
336
337 p.next_stage();
338}
339
340fn scale_u8(p: &mut Pipeline) {
341 let data = p.aa_mask_ctx.copy_at_xy(p.dx, p.dy, p.tail);
343 let c = f32x8::from([data[0] as f32, data[1] as f32, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
344 let c = c / f32x8::splat(255.0);
345
346 p.r *= c;
347 p.g *= c;
348 p.b *= c;
349 p.a *= c;
350
351 p.next_stage();
352}
353
354fn lerp_u8(p: &mut Pipeline) {
355 let data = p.aa_mask_ctx.copy_at_xy(p.dx, p.dy, p.tail);
357 let c = f32x8::from([data[0] as f32, data[1] as f32, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
358 let c = c / f32x8::splat(255.0);
359
360 p.r = lerp(p.dr, p.r, c);
361 p.g = lerp(p.dg, p.g, c);
362 p.b = lerp(p.db, p.b, c);
363 p.a = lerp(p.da, p.a, c);
364
365 p.next_stage();
366}
367
368fn scale_1_float(p: &mut Pipeline) {
369 let c = f32x8::splat(p.ctx.current_coverage);
370 p.r *= c;
371 p.g *= c;
372 p.b *= c;
373 p.a *= c;
374
375 p.next_stage();
376}
377
378fn lerp_1_float(p: &mut Pipeline) {
379 let c = f32x8::splat(p.ctx.current_coverage);
380 p.r = lerp(p.dr, p.r, c);
381 p.g = lerp(p.dg, p.g, c);
382 p.b = lerp(p.db, p.b, c);
383 p.a = lerp(p.da, p.a, c);
384
385 p.next_stage();
386}
387
388macro_rules! blend_fn {
389 ($name:ident, $f:expr) => {
390 fn $name(p: &mut Pipeline) {
391 p.r = $f(p.r, p.dr, p.a, p.da);
392 p.g = $f(p.g, p.dg, p.a, p.da);
393 p.b = $f(p.b, p.db, p.a, p.da);
394 p.a = $f(p.a, p.da, p.a, p.da);
395
396 p.next_stage();
397 }
398 };
399}
400
401blend_fn!(clear, |_, _, _, _| f32x8::default());
402blend_fn!(source_atop, |s, d, sa, da| s * da + d * inv(sa));
403blend_fn!(destination_atop, |s, d, sa, da| d * sa + s * inv(da));
404blend_fn!(source_in, |s, _, _, da| s * da);
405blend_fn!(destination_in, |_, d, sa, _| d * sa);
406blend_fn!(source_out, |s, _, _, da| s * inv(da));
407blend_fn!(destination_out, |_, d, sa, _| d * inv(sa));
408blend_fn!(source_over, |s, d, sa, _| mad(d, inv(sa), s));
409blend_fn!(destination_over, |s, d, _, da| mad(s, inv(da), d));
410blend_fn!(modulate, |s, d, _, _| s * d);
411blend_fn!(multiply, |s, d, sa, da| s * inv(da) + d * inv(sa) + s * d);
412blend_fn!(screen, |s, d, _, _| s + d - s * d);
413blend_fn!(xor, |s, d, sa, da| s * inv(da) + d * inv(sa));
414
415blend_fn!(plus, |s: f32x8, d: f32x8, _, _| (s + d).min(f32x8::splat(1.0)));
417
418macro_rules! blend_fn2 {
419 ($name:ident, $f:expr) => {
420 fn $name(p: &mut Pipeline) {
421 p.r = $f(p.r, p.dr, p.a, p.da);
423 p.g = $f(p.g, p.dg, p.a, p.da);
424 p.b = $f(p.b, p.db, p.a, p.da);
425 p.a = mad(p.da, inv(p.a), p.a);
426
427 p.next_stage();
428 }
429 };
430}
431
432blend_fn2!(darken, |s: f32x8, d, sa, da: f32x8| s + d - (s * da).max(d * sa));
433blend_fn2!(lighten, |s: f32x8, d, sa, da: f32x8| s + d - (s * da).min(d * sa));
434blend_fn2!(difference, |s: f32x8, d, sa, da: f32x8| s + d - two((s * da).min(d * sa)));
435blend_fn2!(exclusion, |s: f32x8, d, _, _| s + d - two(s * d));
436
437blend_fn2!(color_burn, |s: f32x8, d: f32x8, sa: f32x8, da: f32x8|
438 d.cmp_eq(da).blend(
439 d + s * inv(da),
440 s.cmp_eq(f32x8::default()).blend(
441 d * inv(sa),
442 sa * (da - da.min((da - d) * sa * s.recip_fast())) + s * inv(da) + d * inv(sa)
443 )
444 )
445);
446
447blend_fn2!(color_dodge, |s: f32x8, d: f32x8, sa: f32x8, da: f32x8|
448 d.cmp_eq(f32x8::default()).blend(
449 s * inv(da),
450 s.cmp_eq(sa).blend(
451 s + d * inv(sa),
452 sa * da.min((d * sa) * (sa - s).recip_fast()) + s * inv(da) + d * inv(sa)
453 )
454 )
455);
456
457blend_fn2!(hard_light, |s: f32x8, d: f32x8, sa, da|
458 s * inv(da) + d * inv(sa) + two(s).cmp_le(sa).blend(
459 two(s * d),
460 sa * da - two((da - d) * (sa - s))
461 )
462);
463
464blend_fn2!(overlay, |s: f32x8, d: f32x8, sa, da|
465 s * inv(da) + d * inv(sa) + two(d).cmp_le(da).blend(
466 two(s * d),
467 sa * da - two((da - d) * (sa - s))
468 )
469);
470
471blend_fn2!(soft_light, |s: f32x8, d: f32x8, sa: f32x8, da: f32x8| {
472 let m = da.cmp_gt(f32x8::default()).blend(d / da, f32x8::default());
473 let s2 = two(s);
474 let m4 = two(two(m));
475
476 let dark_src = d * (sa + (s2 - sa) * (f32x8::splat(1.0) - m));
481 let dark_dst = (m4 * m4 + m4) * (m - f32x8::splat(1.0)) + f32x8::splat(7.0) * m;
482 let lite_dst = m.sqrt() - m;
483 let lite_src = d * sa + da * (s2 - sa)
484 * two(two(d)).cmp_le(da).blend(dark_dst, lite_dst); s * inv(da) + d * inv(sa) + s2.cmp_le(sa).blend(dark_src, lite_src) });
488
489macro_rules! blend_fn3 {
498 ($name:ident, $f:expr) => {
499 fn $name(p: &mut Pipeline) {
500 let (tr, tg, tb, ta) = $f(p.r, p.g, p.b, p.a, p.dr, p.dg, p.db, p.da);
501 p.r = tr;
502 p.g = tg;
503 p.b = tb;
504 p.a = ta;
505
506 p.next_stage();
507 }
508 };
509}
510
511blend_fn3!(hue, hue_k);
512
513#[inline(always)]
514fn hue_k(
515 r: f32x8, g: f32x8, b: f32x8, a: f32x8,
516 dr: f32x8, dg: f32x8, db: f32x8, da: f32x8,
517) -> (f32x8, f32x8, f32x8, f32x8) {
518 let rr = &mut (r * a);
519 let gg = &mut (g * a);
520 let bb = &mut (b * a);
521
522 set_sat(rr, gg, bb, sat(dr, dg, db) * a);
523 set_lum(rr, gg, bb, lum(dr, dg, db) * a);
524 clip_color(rr, gg, bb, a * da);
525
526 let r = r * inv(da) + dr * inv(a) + *rr;
527 let g = g * inv(da) + dg * inv(a) + *gg;
528 let b = b * inv(da) + db * inv(a) + *bb;
529 let a = a + da - a * da;
530
531 (r, g, b, a)
532}
533
534blend_fn3!(saturation, saturation_k);
535
536#[inline(always)]
537fn saturation_k(
538 r: f32x8, g: f32x8, b: f32x8, a: f32x8,
539 dr: f32x8, dg: f32x8, db: f32x8, da: f32x8,
540) -> (f32x8, f32x8, f32x8, f32x8) {
541 let rr = &mut (dr * a);
542 let gg = &mut (dg * a);
543 let bb = &mut (db * a);
544
545 set_sat(rr, gg, bb, sat(r, g, b) * da);
546 set_lum(rr, gg, bb, lum(dr, dg, db) * a); clip_color(rr, gg, bb, a * da);
548
549 let r = r * inv(da) + dr * inv(a) + *rr;
550 let g = g * inv(da) + dg * inv(a) + *gg;
551 let b = b * inv(da) + db * inv(a) + *bb;
552 let a = a + da - a * da;
553
554 (r, g, b, a)
555}
556
557blend_fn3!(color, color_k);
558
559#[inline(always)]
560fn color_k(
561 r: f32x8, g: f32x8, b: f32x8, a: f32x8,
562 dr: f32x8, dg: f32x8, db: f32x8, da: f32x8,
563) -> (f32x8, f32x8, f32x8, f32x8) {
564 let rr = &mut (r * da);
565 let gg = &mut (g * da);
566 let bb = &mut (b * da);
567
568 set_lum(rr, gg, bb, lum(dr, dg, db) * a);
569 clip_color(rr, gg, bb, a * da);
570
571 let r = r * inv(da) + dr * inv(a) + *rr;
572 let g = g * inv(da) + dg * inv(a) + *gg;
573 let b = b * inv(da) + db * inv(a) + *bb;
574 let a = a + da - a * da;
575
576 (r, g, b, a)
577}
578
579blend_fn3!(luminosity, luminosity_k);
580
581#[inline(always)]
582fn luminosity_k(
583 r: f32x8, g: f32x8, b: f32x8, a: f32x8,
584 dr: f32x8, dg: f32x8, db: f32x8, da: f32x8,
585) -> (f32x8, f32x8, f32x8, f32x8) {
586 let rr = &mut (dr * a);
587 let gg = &mut (dg * a);
588 let bb = &mut (db * a);
589
590 set_lum(rr, gg, bb, lum(r, g, b) * da);
591 clip_color(rr, gg, bb, a * da);
592
593 let r = r * inv(da) + dr * inv(a) + *rr;
594 let g = g * inv(da) + dg * inv(a) + *gg;
595 let b = b * inv(da) + db * inv(a) + *bb;
596 let a = a + da - a * da;
597
598 (r, g, b, a)
599}
600
601#[inline(always)]
602fn sat(r: f32x8, g: f32x8, b: f32x8) -> f32x8 {
603 r.max(g.max(b)) - r.min(g.min(b))
604}
605
606#[inline(always)]
607fn lum(r: f32x8, g: f32x8, b: f32x8) -> f32x8 {
608 r * f32x8::splat(0.30) + g * f32x8::splat(0.59) + b * f32x8::splat(0.11)
609}
610
611#[inline(always)]
612fn set_sat(r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, s: f32x8) {
613 let mn = r.min(g.min(*b));
614 let mx = r.max(g.max(*b));
615 let sat = mx - mn;
616
617 let scale = |c| sat.cmp_eq(f32x8::default())
619 .blend(f32x8::default(), (c - mn) * s / sat);
620
621 *r = scale(*r);
622 *g = scale(*g);
623 *b = scale(*b);
624}
625
626#[inline(always)]
627fn set_lum(r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, l: f32x8) {
628 let diff = l - lum(*r, *g, *b);
629 *r += diff;
630 *g += diff;
631 *b += diff;
632}
633
634#[inline(always)]
635fn clip_color(r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: f32x8) {
636 let mn = r.min(g.min(*b));
637 let mx = r.max(g.max(*b));
638 let l = lum(*r, *g, *b);
639
640 let clip = |mut c| {
641 c = mx.cmp_ge(f32x8::default()).blend(c, l + (c - l) * l / (l - mn));
642 c = mx.cmp_gt(a).blend(l + (c - l) * (a - l) / (mx - l), c);
643 c = c.max(f32x8::default()); c
645 };
646
647 *r = clip(*r);
648 *g = clip(*g);
649 *b = clip(*b);
650}
651
652pub fn source_over_rgba(p: &mut Pipeline) {
653 let pixels = p.pixmap_dst.slice4_at_xy(p.dx, p.dy);
654 load_8888(pixels, &mut p.dr, &mut p.dg, &mut p.db, &mut p.da);
655 p.r = mad(p.dr, inv(p.a), p.r);
656 p.g = mad(p.dg, inv(p.a), p.g);
657 p.b = mad(p.db, inv(p.a), p.b);
658 p.a = mad(p.da, inv(p.a), p.a);
659 store_8888(&p.r, &p.g, &p.b, &p.a, pixels);
660
661 p.next_stage();
662}
663
664pub fn source_over_rgba_tail(p: &mut Pipeline) {
665 let pixels = p.pixmap_dst.slice_at_xy(p.dx, p.dy);
666 load_8888_tail(p.tail, pixels, &mut p.dr, &mut p.dg, &mut p.db, &mut p.da);
667 p.r = mad(p.dr, inv(p.a), p.r);
668 p.g = mad(p.dg, inv(p.a), p.g);
669 p.b = mad(p.db, inv(p.a), p.b);
670 p.a = mad(p.da, inv(p.a), p.a);
671 store_8888_tail(&p.r, &p.g, &p.b, &p.a, p.tail, pixels);
672
673 p.next_stage();
674}
675
676fn transform(p: &mut Pipeline) {
677 let ts = &p.ctx.transform;
678
679 let tr = mad(p.r, f32x8::splat(ts.sx), mad(p.g, f32x8::splat(ts.kx), f32x8::splat(ts.tx)));
680 let tg = mad(p.r, f32x8::splat(ts.ky), mad(p.g, f32x8::splat(ts.sy), f32x8::splat(ts.ty)));
681 p.r = tr;
682 p.g = tg;
683
684 p.next_stage();
685}
686
687fn reflect(p: &mut Pipeline) {
692 let ctx = &p.ctx.limit_x;
693 p.r = exclusive_reflect(p.r, ctx.scale, ctx.inv_scale);
694
695 let ctx = &p.ctx.limit_y;
696 p.g = exclusive_reflect(p.g, ctx.scale, ctx.inv_scale);
697
698 p.next_stage();
699}
700
701#[inline(always)]
702fn exclusive_reflect(v: f32x8, limit: f32, inv_limit: f32) -> f32x8 {
703 let limit = f32x8::splat(limit);
704 let inv_limit = f32x8::splat(inv_limit);
705 ((v - limit) - (limit + limit)
706 * ((v - limit) * (inv_limit * f32x8::splat(0.5))).floor() - limit).abs()
707}
708
709fn repeat(p: &mut Pipeline) {
710 let ctx = &p.ctx.limit_x;
711 p.r = exclusive_repeat(p.r, ctx.scale, ctx.inv_scale);
712
713 let ctx = &p.ctx.limit_y;
714 p.g = exclusive_repeat(p.g, ctx.scale, ctx.inv_scale);
715
716 p.next_stage();
717}
718
719#[inline(always)]
720fn exclusive_repeat(v: f32x8, limit: f32, inv_limit: f32) -> f32x8 {
721 v - (v * f32x8::splat(inv_limit)).floor() * f32x8::splat(limit)
722}
723
724fn bilinear(p: &mut Pipeline) {
725 let x = p.r;
726 let fx = (x + f32x8::splat(0.5)).fract();
727 let y = p.g;
728 let fy = (y + f32x8::splat(0.5)).fract();
729 let one = f32x8::splat(1.0);
730 let wx = [one - fx, fx];
731 let wy = [one - fy, fy];
732
733 sampler_2x2(p.pixmap_src, &p.ctx.sampler, x, y, &wx, &wy, &mut p.r, &mut p.g, &mut p.b, &mut p.a);
734
735 p.next_stage();
736}
737
738fn bicubic(p: &mut Pipeline) {
739 let x = p.r;
740 let fx = (x + f32x8::splat(0.5)).fract();
741 let y = p.g;
742 let fy = (y + f32x8::splat(0.5)).fract();
743 let one = f32x8::splat(1.0);
744 let wx = [bicubic_far(one - fx), bicubic_near(one - fx), bicubic_near(fx), bicubic_far(fx)];
745 let wy = [bicubic_far(one - fy), bicubic_near(one - fy), bicubic_near(fy), bicubic_far(fy)];
746
747 sampler_4x4(p.pixmap_src, &p.ctx.sampler, x, y, &wx, &wy, &mut p.r, &mut p.g, &mut p.b, &mut p.a);
748
749 p.next_stage();
750}
751
752#[inline(always)]
758fn bicubic_near(t: f32x8) -> f32x8 {
759 mad(
761 t,
762 mad(t,
763 mad(
764 f32x8::splat(-21.0/18.0),
765 t,
766 f32x8::splat(27.0/18.0),
767 ),
768 f32x8::splat(9.0/18.0),
769 ),
770 f32x8::splat(1.0/18.0),
771 )
772}
773
774#[inline(always)]
775fn bicubic_far(t: f32x8) -> f32x8 {
776 (t * t) * mad(f32x8::splat(7.0/18.0), t, f32x8::splat(-6.0/18.0))
778}
779
780#[inline(always)]
781fn sampler_2x2(
782 pixmap: PixmapRef,
783 ctx: &super::SamplerCtx,
784 cx: f32x8, cy: f32x8,
785 wx: &[f32x8; 2], wy: &[f32x8; 2],
786 r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8,
787) {
788 *r = f32x8::default();
789 *g = f32x8::default();
790 *b = f32x8::default();
791 *a = f32x8::default();
792
793 let one = f32x8::splat(1.0);
794 let start = -0.5;
795 let mut y = cy + f32x8::splat(start);
796 for j in 0..2 {
797 let mut x = cx + f32x8::splat(start);
798 for i in 0..2 {
799 let mut rr = f32x8::default();
800 let mut gg = f32x8::default();
801 let mut bb = f32x8::default();
802 let mut aa = f32x8::default();
803 sample(pixmap, ctx, x,y, &mut rr, &mut gg, &mut bb, &mut aa);
804
805 let w = wx[i] * wy[j];
806 *r = mad(w, rr, *r);
807 *g = mad(w, gg, *g);
808 *b = mad(w, bb, *b);
809 *a = mad(w, aa, *a);
810
811 x += one;
812 }
813
814 y += one;
815 }
816}
817
818#[inline(always)]
819fn sampler_4x4(
820 pixmap: PixmapRef,
821 ctx: &super::SamplerCtx,
822 cx: f32x8, cy: f32x8,
823 wx: &[f32x8; 4], wy: &[f32x8; 4],
824 r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8,
825) {
826 *r = f32x8::default();
827 *g = f32x8::default();
828 *b = f32x8::default();
829 *a = f32x8::default();
830
831 let one = f32x8::splat(1.0);
832 let start = -1.5;
833 let mut y = cy + f32x8::splat(start);
834 for j in 0..4 {
835 let mut x = cx + f32x8::splat(start);
836 for i in 0..4 {
837 let mut rr = f32x8::default();
838 let mut gg = f32x8::default();
839 let mut bb = f32x8::default();
840 let mut aa = f32x8::default();
841 sample(pixmap, ctx, x,y, &mut rr, &mut gg, &mut bb, &mut aa);
842
843 let w = wx[i] * wy[j];
844 *r = mad(w, rr, *r);
845 *g = mad(w, gg, *g);
846 *b = mad(w, bb, *b);
847 *a = mad(w, aa, *a);
848
849 x += one;
850 }
851
852 y += one;
853 }
854}
855
856#[inline(always)]
857fn sample(
858 pixmap: PixmapRef, ctx: &super::SamplerCtx, mut x: f32x8, mut y: f32x8,
859 r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8,
860) {
861 x = tile(x, ctx.spread_mode, pixmap.width() as f32, ctx.inv_width);
862 y = tile(y, ctx.spread_mode, pixmap.height() as f32, ctx.inv_height);
863
864 let ix = gather_ix(pixmap, x, y);
865 load_8888(&pixmap.gather(ix), r, g, b, a);
866}
867
868#[inline(always)]
869fn tile(v: f32x8, mode: SpreadMode, limit: f32, inv_limit: f32) -> f32x8 {
870 match mode {
871 SpreadMode::Pad => v,
872 SpreadMode::Repeat => exclusive_repeat(v, limit, inv_limit),
873 SpreadMode::Reflect => exclusive_reflect(v, limit, inv_limit),
874 }
875}
876
877fn pad_x1(p: &mut Pipeline) {
878 p.r = p.r.normalize();
879
880 p.next_stage();
881}
882
883fn reflect_x1(p: &mut Pipeline) {
884 p.r = (
885 (p.r - f32x8::splat(1.0))
886 - two(((p.r - f32x8::splat(1.0)) * f32x8::splat(0.5)).floor())
887 - f32x8::splat(1.0)
888 ).abs().normalize();
889
890 p.next_stage();
891}
892
893fn repeat_x1(p: &mut Pipeline) {
894 p.r = (p.r - p.r.floor()).normalize();
895
896 p.next_stage();
897}
898
899fn gradient(p: &mut Pipeline) {
900 let ctx = &p.ctx.gradient;
901
902 let t: [f32; 8] = p.r.into();
904 let mut idx = u32x8::default();
905 for i in 1..ctx.len {
906 let tt = ctx.t_values[i].get();
907 let n: u32x8 = bytemuck::cast([
908 (t[0] >= tt) as u32,
909 (t[1] >= tt) as u32,
910 (t[2] >= tt) as u32,
911 (t[3] >= tt) as u32,
912 (t[4] >= tt) as u32,
913 (t[5] >= tt) as u32,
914 (t[6] >= tt) as u32,
915 (t[7] >= tt) as u32,
916 ]);
917 idx = idx + n;
918 }
919 gradient_lookup(ctx, &idx, p.r, &mut p.r, &mut p.g, &mut p.b, &mut p.a);
920
921 p.next_stage();
922}
923
924fn gradient_lookup(
925 ctx: &super::GradientCtx, idx: &u32x8, t: f32x8,
926 r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8,
927) {
928 let idx: [u32; 8] = bytemuck::cast(*idx);
929
930 macro_rules! gather {
931 ($d:expr, $c:ident) => {
932 f32x8::from([
935 $d[idx[0] as usize].$c,
936 $d[idx[1] as usize].$c,
937 $d[idx[2] as usize].$c,
938 $d[idx[3] as usize].$c,
939 $d[idx[4] as usize].$c,
940 $d[idx[5] as usize].$c,
941 $d[idx[6] as usize].$c,
942 $d[idx[7] as usize].$c,
943 ])
944 };
945 }
946
947 let fr = gather!(&ctx.factors, r);
948 let fg = gather!(&ctx.factors, g);
949 let fb = gather!(&ctx.factors, b);
950 let fa = gather!(&ctx.factors, a);
951
952 let br = gather!(&ctx.biases, r);
953 let bg = gather!(&ctx.biases, g);
954 let bb = gather!(&ctx.biases, b);
955 let ba = gather!(&ctx.biases, a);
956
957 *r = mad(t, fr, br);
958 *g = mad(t, fg, bg);
959 *b = mad(t, fb, bb);
960 *a = mad(t, fa, ba);
961}
962
963fn evenly_spaced_2_stop_gradient(p: &mut Pipeline) {
964 let ctx = &p.ctx.evenly_spaced_2_stop_gradient;
965
966 let t = p.r;
967 p.r = mad(t, f32x8::splat(ctx.factor.r), f32x8::splat(ctx.bias.r));
968 p.g = mad(t, f32x8::splat(ctx.factor.g), f32x8::splat(ctx.bias.g));
969 p.b = mad(t, f32x8::splat(ctx.factor.b), f32x8::splat(ctx.bias.b));
970 p.a = mad(t, f32x8::splat(ctx.factor.a), f32x8::splat(ctx.bias.a));
971
972 p.next_stage();
973}
974
975fn xy_to_radius(p: &mut Pipeline) {
976 let x2 = p.r * p.r;
977 let y2 = p.g * p.g;
978 p.r = (x2 + y2).sqrt();
979
980 p.next_stage();
981}
982
983fn xy_to_2pt_conical_focal_on_circle(p: &mut Pipeline) {
984 let x = p.r;
985 let y = p.g;
986 p.r = x + y * y / x;
987
988 p.next_stage();
989}
990
991fn xy_to_2pt_conical_well_behaved(p: &mut Pipeline) {
992 let ctx = &p.ctx.two_point_conical_gradient;
993
994 let x = p.r;
995 let y = p.g;
996 p.r = (x * x + y * y).sqrt() - x * f32x8::splat(ctx.p0);
997
998 p.next_stage();
999}
1000
1001fn xy_to_2pt_conical_greater(p: &mut Pipeline) {
1002 let ctx = &p.ctx.two_point_conical_gradient;
1003
1004 let x = p.r;
1005 let y = p.g;
1006 p.r = (x * x - y * y).sqrt() - x * f32x8::splat(ctx.p0);
1007
1008 p.next_stage();
1009}
1010
1011fn mask_2pt_conical_degenerates(p: &mut Pipeline) {
1012 let ctx = &mut p.ctx.two_point_conical_gradient;
1013
1014 let t = p.r;
1015 let is_degenerate = t.cmp_le(f32x8::default()) | t.cmp_ne(t);
1016 p.r = is_degenerate.blend(f32x8::default(), t);
1017
1018 let is_not_degenerate = !is_degenerate.to_u32x8_bitcast();
1019 let is_not_degenerate: [u32; 8] = bytemuck::cast(is_not_degenerate);
1020 ctx.mask = bytemuck::cast([
1021 if is_not_degenerate[0] != 0 { !0 } else { 0 },
1022 if is_not_degenerate[1] != 0 { !0 } else { 0 },
1023 if is_not_degenerate[2] != 0 { !0 } else { 0 },
1024 if is_not_degenerate[3] != 0 { !0 } else { 0 },
1025 if is_not_degenerate[4] != 0 { !0 } else { 0 },
1026 if is_not_degenerate[5] != 0 { !0 } else { 0 },
1027 if is_not_degenerate[6] != 0 { !0 } else { 0 },
1028 if is_not_degenerate[7] != 0 { !0 } else { 0 },
1029 ]);
1030
1031 p.next_stage();
1032}
1033
1034fn apply_vector_mask(p: &mut Pipeline) {
1035 let ctx = &p.ctx.two_point_conical_gradient;
1036
1037 p.r = (p.r.to_u32x8_bitcast() & ctx.mask).to_f32x8_bitcast();
1038 p.g = (p.g.to_u32x8_bitcast() & ctx.mask).to_f32x8_bitcast();
1039 p.b = (p.b.to_u32x8_bitcast() & ctx.mask).to_f32x8_bitcast();
1040 p.a = (p.a.to_u32x8_bitcast() & ctx.mask).to_f32x8_bitcast();
1041
1042 p.next_stage();
1043}
1044
1045pub fn just_return(_: &mut Pipeline) {
1046 }
1048
1049#[inline(always)]
1050fn load_8888(
1051 data: &[PremultipliedColorU8; STAGE_WIDTH],
1052 r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8,
1053) {
1054 const FACTOR: f32 = 1.0 / 255.0;
1057
1058 *r = f32x8::from([
1059 data[0].red() as f32 * FACTOR, data[1].red() as f32 * FACTOR,
1060 data[2].red() as f32 * FACTOR, data[3].red() as f32 * FACTOR,
1061 data[4].red() as f32 * FACTOR, data[5].red() as f32 * FACTOR,
1062 data[6].red() as f32 * FACTOR, data[7].red() as f32 * FACTOR,
1063 ]);
1064
1065 *g = f32x8::from([
1066 data[0].green() as f32 * FACTOR, data[1].green() as f32 * FACTOR,
1067 data[2].green() as f32 * FACTOR, data[3].green() as f32 * FACTOR,
1068 data[4].green() as f32 * FACTOR, data[5].green() as f32 * FACTOR,
1069 data[6].green() as f32 * FACTOR, data[7].green() as f32 * FACTOR,
1070 ]);
1071
1072 *b = f32x8::from([
1073 data[0].blue() as f32 * FACTOR, data[1].blue() as f32 * FACTOR,
1074 data[2].blue() as f32 * FACTOR, data[3].blue() as f32 * FACTOR,
1075 data[4].blue() as f32 * FACTOR, data[5].blue() as f32 * FACTOR,
1076 data[6].blue() as f32 * FACTOR, data[7].blue() as f32 * FACTOR,
1077 ]);
1078
1079 *a = f32x8::from([
1080 data[0].alpha() as f32 * FACTOR, data[1].alpha() as f32 * FACTOR,
1081 data[2].alpha() as f32 * FACTOR, data[3].alpha() as f32 * FACTOR,
1082 data[4].alpha() as f32 * FACTOR, data[5].alpha() as f32 * FACTOR,
1083 data[6].alpha() as f32 * FACTOR, data[7].alpha() as f32 * FACTOR,
1084 ]);
1085}
1086
1087#[inline(always)]
1088fn load_8888_tail(
1089 tail: usize, data: &[PremultipliedColorU8],
1090 r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8,
1091) {
1092 let mut tmp = [PremultipliedColorU8::TRANSPARENT; STAGE_WIDTH];
1095 tmp[0..tail].copy_from_slice(&data[0..tail]);
1096 load_8888(&tmp, r, g, b, a);
1097}
1098
1099#[inline(always)]
1100fn store_8888(
1101 r: &f32x8, g: &f32x8, b: &f32x8, a: &f32x8,
1102 data: &mut [PremultipliedColorU8; STAGE_WIDTH],
1103) {
1104 let r: [i32; 8] = unnorm(r).into();
1105 let g: [i32; 8] = unnorm(g).into();
1106 let b: [i32; 8] = unnorm(b).into();
1107 let a: [i32; 8] = unnorm(a).into();
1108
1109 let conv = |rr, gg, bb, aa|
1110 PremultipliedColorU8::from_rgba_unchecked(rr as u8, gg as u8, bb as u8, aa as u8);
1111
1112 data[0] = conv(r[0], g[0], b[0], a[0]);
1113 data[1] = conv(r[1], g[1], b[1], a[1]);
1114 data[2] = conv(r[2], g[2], b[2], a[2]);
1115 data[3] = conv(r[3], g[3], b[3], a[3]);
1116 data[4] = conv(r[4], g[4], b[4], a[4]);
1117 data[5] = conv(r[5], g[5], b[5], a[5]);
1118 data[6] = conv(r[6], g[6], b[6], a[6]);
1119 data[7] = conv(r[7], g[7], b[7], a[7]);
1120}
1121
1122#[inline(always)]
1123fn store_8888_tail(
1124 r: &f32x8, g: &f32x8, b: &f32x8, a: &f32x8,
1125 tail: usize, data: &mut [PremultipliedColorU8],
1126) {
1127 let r: [i32; 8] = unnorm(r).into();
1128 let g: [i32; 8] = unnorm(g).into();
1129 let b: [i32; 8] = unnorm(b).into();
1130 let a: [i32; 8] = unnorm(a).into();
1131
1132 for i in 0..STAGE_WIDTH {
1136 data[i] = PremultipliedColorU8::from_rgba_unchecked(
1137 r[i] as u8, g[i] as u8, b[i] as u8, a[i] as u8,
1138 );
1139
1140 if i + 1 == tail {
1141 break;
1142 }
1143 }
1144}
1145
1146#[inline(always)]
1147fn unnorm(v: &f32x8) -> i32x8 {
1148 (v.max(f32x8::default()).min(f32x8::splat(1.0)) * f32x8::splat(255.0)).round_int()
1149}
1150
1151#[inline(always)]
1152fn inv(v: f32x8) -> f32x8 {
1153 f32x8::splat(1.0) - v
1154}
1155
1156#[inline(always)]
1157fn two(v: f32x8) -> f32x8 {
1158 v + v
1159}
1160
1161#[inline(always)]
1162fn mad(f: f32x8, m: f32x8, a: f32x8) -> f32x8 {
1163 f * m + a
1164}
1165
1166#[inline(always)]
1167fn lerp(from: f32x8, to: f32x8, t: f32x8) -> f32x8 {
1168 mad(to - from, t, from)
1169}