1use num_traits::Bounded;
2
3use crate::imageops::filter_1d::{SafeAdd, SafeMul};
4use crate::{ImageBuffer, Pixel, Primitive};
5
6#[must_use]
21pub fn fast_blur<P: Pixel>(
22 input_buffer: &ImageBuffer<P, Vec<P::Subpixel>>,
23 sigma: f32,
24) -> ImageBuffer<P, Vec<P::Subpixel>> {
25 let (width, height) = input_buffer.dimensions();
26
27 if width == 0 || height == 0 {
28 return input_buffer.clone();
29 }
30
31 let num_passes = 3;
32
33 let boxes = boxes_for_gauss(sigma, num_passes);
34 if boxes.is_empty() {
35 return input_buffer.clone();
36 }
37
38 let samples = input_buffer.as_flat_samples().samples;
39
40 let destination_size = match (width as usize)
41 .safe_mul(height as usize)
42 .and_then(|x| x.safe_mul(P::CHANNEL_COUNT as usize))
43 {
44 Ok(s) => s,
45 Err(_) => panic!("Width and height and channels count exceeded pointer size"),
46 };
47
48 let first_box = boxes[0];
49
50 let mut transient = vec![P::Subpixel::min_value(); destination_size];
51 let mut dst = vec![P::Subpixel::min_value(); destination_size];
52
53 let stride = width as usize * P::CHANNEL_COUNT as usize;
55
56 test_radius_size(width as usize, first_box);
58 test_radius_size(height as usize, first_box);
59
60 box_blur_horizontal_pass_strategy::<P, P::Subpixel>(
61 samples,
62 stride,
63 &mut transient,
64 stride,
65 width,
66 first_box,
67 );
68
69 box_blur_vertical_pass_strategy::<P, P::Subpixel>(
70 &transient, stride, &mut dst, stride, width, height, first_box,
71 );
72
73 for &box_container in boxes.iter().skip(1) {
74 test_radius_size(width as usize, box_container);
76 test_radius_size(height as usize, box_container);
77
78 box_blur_horizontal_pass_strategy::<P, P::Subpixel>(
79 &dst,
80 stride,
81 &mut transient,
82 stride,
83 width,
84 box_container,
85 );
86
87 box_blur_vertical_pass_strategy::<P, P::Subpixel>(
88 &transient,
89 stride,
90 &mut dst,
91 stride,
92 width,
93 height,
94 box_container,
95 );
96 }
97
98 let mut buffer = ImageBuffer::from_raw(width, height, dst).unwrap();
99 buffer.copy_color_space_from(input_buffer);
100 buffer
101}
102
103#[inline]
104fn test_radius_size(bound: usize, radius: usize) {
105 match bound.safe_add(radius) {
106 Ok(_) => {}
107 Err(_) => panic!("Radius overflowed maximum possible size"),
108 }
109}
110
111fn boxes_for_gauss(sigma: f32, n: usize) -> Vec<usize> {
112 let w_ideal = f32::sqrt((12.0 * sigma.powi(2) / (n as f32)) + 1.0);
113 let mut w_l = w_ideal.floor();
114 if w_l % 2.0 == 0.0 {
115 w_l -= 1.0;
116 }
117 let w_u = w_l + 2.0;
118
119 let m_ideal = 0.25 * (n as f32) * (w_l + 3.0) - 3.0 * sigma.powi(2) * (w_l + 1.0).recip();
120
121 let m = f32::round(m_ideal) as usize;
122
123 (0..n)
124 .map(|i| if i < m { w_l as usize } else { w_u as usize })
125 .map(|i| ceil_to_odd(i.saturating_sub(1) / 2))
126 .collect::<Vec<_>>()
127}
128
129#[inline]
130fn ceil_to_odd(x: usize) -> usize {
131 if x % 2 == 0 {
132 x + 1
133 } else {
134 x
135 }
136}
137
138#[inline]
139#[allow(clippy::manual_clamp)]
140fn rounding_saturating_mul<T: Primitive>(v: f32, w: f32) -> T {
141 if T::DEFAULT_MAX_VALUE.to_f32().unwrap() != 1.0 {
144 T::from(
145 (v * w)
146 .round()
147 .min(T::DEFAULT_MAX_VALUE.to_f32().unwrap())
148 .max(T::DEFAULT_MIN_VALUE.to_f32().unwrap()),
149 )
150 .unwrap()
151 } else {
152 T::from(
153 (v * w)
154 .min(T::DEFAULT_MAX_VALUE.to_f32().unwrap())
155 .max(T::DEFAULT_MIN_VALUE.to_f32().unwrap()),
156 )
157 .unwrap()
158 }
159}
160
161fn box_blur_horizontal_pass_strategy<T, P: Primitive>(
162 src: &[P],
163 src_stride: usize,
164 dst: &mut [P],
165 dst_stride: usize,
166 width: u32,
167 radius: usize,
168) where
169 T: Pixel,
170{
171 if T::CHANNEL_COUNT == 1 {
172 box_blur_horizontal_pass_impl::<P, 1>(src, src_stride, dst, dst_stride, width, radius);
173 } else if T::CHANNEL_COUNT == 2 {
174 box_blur_horizontal_pass_impl::<P, 2>(src, src_stride, dst, dst_stride, width, radius);
175 } else if T::CHANNEL_COUNT == 3 {
176 box_blur_horizontal_pass_impl::<P, 3>(src, src_stride, dst, dst_stride, width, radius);
177 } else if T::CHANNEL_COUNT == 4 {
178 box_blur_horizontal_pass_impl::<P, 4>(src, src_stride, dst, dst_stride, width, radius);
179 } else {
180 unimplemented!("More than 4 channels is not yet implemented");
181 }
182}
183
184fn box_blur_vertical_pass_strategy<T: Pixel, P: Primitive>(
185 src: &[P],
186 src_stride: usize,
187 dst: &mut [P],
188 dst_stride: usize,
189 width: u32,
190 height: u32,
191 radius: usize,
192) {
193 if T::CHANNEL_COUNT == 1 {
194 box_blur_vertical_pass_impl::<P, 1>(
195 src, src_stride, dst, dst_stride, width, height, radius,
196 );
197 } else if T::CHANNEL_COUNT == 2 {
198 box_blur_vertical_pass_impl::<P, 2>(
199 src, src_stride, dst, dst_stride, width, height, radius,
200 );
201 } else if T::CHANNEL_COUNT == 3 {
202 box_blur_vertical_pass_impl::<P, 3>(
203 src, src_stride, dst, dst_stride, width, height, radius,
204 );
205 } else if T::CHANNEL_COUNT == 4 {
206 box_blur_vertical_pass_impl::<P, 4>(
207 src, src_stride, dst, dst_stride, width, height, radius,
208 );
209 } else {
210 unimplemented!("More than 4 channels is not yet implemented");
211 }
212}
213
214fn box_blur_horizontal_pass_impl<T, const CN: usize>(
215 src: &[T],
216 src_stride: usize,
217 dst: &mut [T],
218 dst_stride: usize,
219 width: u32,
220 radius: usize,
221) where
222 T: Primitive,
223{
224 assert!(width > 0, "Width must be sanitized before this method");
225 test_radius_size(width as usize, radius);
226
227 let kernel_size = radius * 2 + 1;
228 let edge_count = ((kernel_size / 2) + 1) as f32;
229 let half_kernel = kernel_size / 2;
230
231 let weight = 1f32 / (radius * 2 + 1) as f32;
232
233 let width_bound = width as usize - 1;
234
235 for (dst, src) in dst
242 .chunks_exact_mut(dst_stride)
243 .zip(src.chunks_exact(src_stride))
244 {
245 let mut weight1: f32 = 0.;
246 let mut weight2: f32 = 0.;
247 let mut weight3: f32 = 0.;
248
249 let chunk0 = &src[..CN];
250
251 let mut weight0 = chunk0[0].to_f32().unwrap() * edge_count;
253 if CN > 1 {
254 weight1 = chunk0[1].to_f32().unwrap() * edge_count;
255 }
256 if CN > 2 {
257 weight2 = chunk0[2].to_f32().unwrap() * edge_count;
258 }
259 if CN == 4 {
260 weight3 = chunk0[3].to_f32().unwrap() * edge_count;
261 }
262
263 for x in 1..=half_kernel {
264 let px = x.min(width_bound) * CN;
265 let chunk0 = &src[px..px + CN];
266 weight0 += chunk0[0].to_f32().unwrap();
267 if CN > 1 {
268 weight1 += chunk0[1].to_f32().unwrap();
269 }
270 if CN > 2 {
271 weight2 += chunk0[2].to_f32().unwrap();
272 }
273 if CN == 4 {
274 weight3 += chunk0[3].to_f32().unwrap();
275 }
276 }
277
278 for x in 0..half_kernel.min(width as usize) {
279 let next = (x + half_kernel + 1).min(width_bound) * CN;
280 let previous = (x as i64 - half_kernel as i64).max(0) as usize * CN;
281
282 let dst_chunk = &mut dst[x * CN..x * CN + CN];
283 dst_chunk[0] = rounding_saturating_mul(weight0, weight);
284 if CN > 1 {
285 dst_chunk[1] = rounding_saturating_mul(weight1, weight);
286 }
287 if CN > 2 {
288 dst_chunk[2] = rounding_saturating_mul(weight2, weight);
289 }
290 if CN == 4 {
291 dst_chunk[3] = rounding_saturating_mul(weight3, weight);
292 }
293
294 let next_chunk = &src[next..next + CN];
295 let previous_chunk = &src[previous..previous + CN];
296
297 weight0 += next_chunk[0].to_f32().unwrap();
298 if CN > 1 {
299 weight1 += next_chunk[1].to_f32().unwrap();
300 }
301 if CN > 2 {
302 weight2 += next_chunk[2].to_f32().unwrap();
303 }
304 if CN == 4 {
305 weight3 += next_chunk[3].to_f32().unwrap();
306 }
307
308 weight0 -= previous_chunk[0].to_f32().unwrap();
309 if CN > 1 {
310 weight1 -= previous_chunk[1].to_f32().unwrap();
311 }
312 if CN > 2 {
313 weight2 -= previous_chunk[2].to_f32().unwrap();
314 }
315 if CN == 4 {
316 weight3 -= previous_chunk[3].to_f32().unwrap();
317 }
318 }
319
320 let max_x_before_clamping = width_bound.saturating_sub(half_kernel + 1);
321 let row_length = src.len();
322
323 let mut last_processed_item = half_kernel;
324
325 if ((half_kernel * 2 + 1) * CN < row_length) && ((max_x_before_clamping * CN) < row_length)
326 {
327 let data_section = src;
328 let advanced_kernel_part = &data_section[(half_kernel * 2 + 1) * CN..];
329 let section_length = max_x_before_clamping - half_kernel;
330 let dst = &mut dst[half_kernel * CN..(half_kernel * CN + section_length * CN)];
331
332 for ((dst, src_previous), src_next) in dst
333 .chunks_exact_mut(CN)
334 .zip(data_section.chunks_exact(CN))
335 .zip(advanced_kernel_part.chunks_exact(CN))
336 {
337 let dst_chunk = &mut dst[..CN];
338 dst_chunk[0] = rounding_saturating_mul(weight0, weight);
339 if CN > 1 {
340 dst_chunk[1] = rounding_saturating_mul(weight1, weight);
341 }
342 if CN > 2 {
343 dst_chunk[2] = rounding_saturating_mul(weight2, weight);
344 }
345 if CN == 4 {
346 dst_chunk[3] = rounding_saturating_mul(weight3, weight);
347 }
348
349 weight0 += src_next[0].to_f32().unwrap();
350 if CN > 1 {
351 weight1 += src_next[1].to_f32().unwrap();
352 }
353 if CN > 2 {
354 weight2 += src_next[2].to_f32().unwrap();
355 }
356 if CN == 4 {
357 weight3 += src_next[3].to_f32().unwrap();
358 }
359
360 weight0 -= src_previous[0].to_f32().unwrap();
361 if CN > 1 {
362 weight1 -= src_previous[1].to_f32().unwrap();
363 }
364 if CN > 2 {
365 weight2 -= src_previous[2].to_f32().unwrap();
366 }
367 if CN == 4 {
368 weight3 -= src_previous[3].to_f32().unwrap();
369 }
370 }
371
372 last_processed_item = max_x_before_clamping;
373 }
374
375 for x in last_processed_item..width as usize {
376 let next = (x + half_kernel + 1).min(width_bound) * CN;
377 let previous = (x as i64 - half_kernel as i64).max(0) as usize * CN;
378 let dst_chunk = &mut dst[x * CN..x * CN + CN];
379 dst_chunk[0] = rounding_saturating_mul(weight0, weight);
380 if CN > 1 {
381 dst_chunk[1] = rounding_saturating_mul(weight1, weight);
382 }
383 if CN > 2 {
384 dst_chunk[2] = rounding_saturating_mul(weight2, weight);
385 }
386 if CN == 4 {
387 dst_chunk[3] = rounding_saturating_mul(weight3, weight);
388 }
389
390 let next_chunk = &src[next..next + CN];
391 let previous_chunk = &src[previous..previous + CN];
392
393 weight0 += next_chunk[0].to_f32().unwrap();
394 if CN > 1 {
395 weight1 += next_chunk[1].to_f32().unwrap();
396 }
397 if CN > 2 {
398 weight2 += next_chunk[2].to_f32().unwrap();
399 }
400 if CN == 4 {
401 weight3 += next_chunk[3].to_f32().unwrap();
402 }
403
404 weight0 -= previous_chunk[0].to_f32().unwrap();
405 if CN > 1 {
406 weight1 -= previous_chunk[1].to_f32().unwrap();
407 }
408 if CN > 2 {
409 weight2 -= previous_chunk[2].to_f32().unwrap();
410 }
411 if CN == 4 {
412 weight3 -= previous_chunk[3].to_f32().unwrap();
413 }
414 }
415 }
416}
417
418fn box_blur_vertical_pass_impl<T: Primitive, const CN: usize>(
419 src: &[T],
420 src_stride: usize,
421 dst: &mut [T],
422 dst_stride: usize,
423 width: u32,
424 height: u32,
425 radius: usize,
426) {
427 assert!(width > 0, "Width must be sanitized before this method");
428 assert!(height > 0, "Height must be sanitized before this method");
429 test_radius_size(width as usize, radius);
430
431 let kernel_size = radius * 2 + 1;
432
433 let edge_count = ((kernel_size / 2) + 1) as f32;
434 let half_kernel = kernel_size / 2;
435
436 let weight = 1f32 / (radius * 2 + 1) as f32;
437
438 let buf_size = width as usize * CN;
439
440 let buf_cap = buf_size;
441
442 let height_bound = height as usize - 1;
443
444 let mut buffer = vec![0f32; buf_cap];
451
452 for (x, (v, bf)) in src.iter().zip(buffer.iter_mut()).enumerate() {
453 let mut w = v.to_f32().unwrap() * edge_count;
454 for y in 1..=half_kernel {
455 let y_src_shift = y.min(height_bound) * src_stride;
456 w += src[y_src_shift + x].to_f32().unwrap();
457 }
458 *bf = w;
459 }
460
461 for (dst, y) in dst.chunks_exact_mut(dst_stride).zip(0..height as usize) {
462 let next = (y + half_kernel + 1).min(height_bound) * src_stride;
463 let previous = (y as i64 - half_kernel as i64).max(0) as usize * src_stride;
464
465 let next_row = &src[next..next + width as usize * CN];
466 let previous_row = &src[previous..previous + width as usize * CN];
467
468 for (((src_next, src_previous), buffer), dst) in next_row
469 .iter()
470 .zip(previous_row.iter())
471 .zip(buffer.iter_mut())
472 .zip(dst.iter_mut())
473 {
474 let mut weight0 = *buffer;
475
476 *dst = rounding_saturating_mul(weight0, weight);
477
478 weight0 += src_next.to_f32().unwrap();
479 weight0 -= src_previous.to_f32().unwrap();
480
481 *buffer = weight0;
482 }
483 }
484}
485
486#[cfg(test)]
487mod tests {
488 use crate::{DynamicImage, GrayAlphaImage, GrayImage, RgbImage, RgbaImage};
489 use std::time::{SystemTime, UNIX_EPOCH};
490
491 struct Rng {
492 state: u64,
493 }
494
495 impl Rng {
496 fn new(seed: u64) -> Self {
497 Self { state: seed }
498 }
499 fn next_u32(&mut self) -> u32 {
500 self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1);
501 (self.state >> 32) as u32
502 }
503
504 fn next_u8(&mut self) -> u8 {
505 (self.next_u32() % 256) as u8
506 }
507
508 fn next_f32_in_range(&mut self, a: f32, b: f32) -> f32 {
509 let u = self.next_u32();
510 let unit = (u as f32) / (u32::MAX as f32 + 1.0);
511 a + (b - a) * unit
512 }
513 }
514
515 #[test]
516 fn test_box_blur() {
517 let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
518 let mut rng = Rng::new((now.as_millis() & 0xffff_ffff_ffff_ffff) as u64);
519 for _ in 0..35 {
520 let width = rng.next_u8();
521 let height = rng.next_u8();
522 let sigma = rng.next_f32_in_range(0., 100.);
523 let px = rng.next_u8();
524 let cn = rng.next_u8();
525 if width == 0 || height == 0 || sigma <= 0. {
526 continue;
527 }
528 match cn % 4 {
529 0 => {
530 let vc = vec![px; width as usize * height as usize];
531 let image = DynamicImage::from(
532 GrayImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
533 );
534 let res = image.fast_blur(sigma);
535 for clr in res.as_bytes() {
536 assert_eq!(*clr, px);
537 }
538 }
539 1 => {
540 let vc = vec![px; width as usize * height as usize * 2];
541 let image = DynamicImage::from(
542 GrayAlphaImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
543 );
544 let res = image.fast_blur(sigma);
545 for clr in res.as_bytes() {
546 assert_eq!(*clr, px);
547 }
548 }
549 2 => {
550 let vc = vec![px; width as usize * height as usize * 3];
551 let image = DynamicImage::from(
552 RgbImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
553 );
554 let res = image.fast_blur(sigma);
555 for clr in res.as_bytes() {
556 assert_eq!(*clr, px);
557 }
558 }
559 3 => {
560 let vc = vec![px; width as usize * height as usize * 4];
561 let image = DynamicImage::from(
562 RgbaImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
563 );
564 let res = image.fast_blur(sigma);
565 for clr in res.as_bytes() {
566 assert_eq!(*clr, px);
567 }
568 }
569 _ => {}
570 }
571 }
572 }
573}