image/imageops/
fast_blur.rs

1use num_traits::Bounded;
2
3use crate::imageops::filter_1d::{SafeAdd, SafeMul};
4use crate::{ImageBuffer, Pixel, Primitive};
5
6/// Approximation of Gaussian blur.
7///
8/// # Arguments
9///
10/// * `image_buffer` - source image.
11/// * `sigma` - value controls image flattening level.
12///
13/// This method assumes alpha pre-multiplication for images that contain non-constant alpha.
14///
15/// This method typically assumes that the input is scene-linear light.
16/// If it is not, color distortion may occur.
17///
18/// Source: Kovesi, P.:  Fast Almost-Gaussian Filtering The Australian Pattern
19/// Recognition Society Conference: DICTA 2010. December 2010. Sydney.
20#[must_use]
21pub fn fast_blur<P: Pixel>(
22    input_buffer: &ImageBuffer<P, Vec<P::Subpixel>>,
23    sigma: f32,
24) -> ImageBuffer<P, Vec<P::Subpixel>> {
25    let (width, height) = input_buffer.dimensions();
26
27    if width == 0 || height == 0 {
28        return input_buffer.clone();
29    }
30
31    let num_passes = 3;
32
33    let boxes = boxes_for_gauss(sigma, num_passes);
34    if boxes.is_empty() {
35        return input_buffer.clone();
36    }
37
38    let samples = input_buffer.as_flat_samples().samples;
39
40    let destination_size = match (width as usize)
41        .safe_mul(height as usize)
42        .and_then(|x| x.safe_mul(P::CHANNEL_COUNT as usize))
43    {
44        Ok(s) => s,
45        Err(_) => panic!("Width and height and channels count exceeded pointer size"),
46    };
47
48    let first_box = boxes[0];
49
50    let mut transient = vec![P::Subpixel::min_value(); destination_size];
51    let mut dst = vec![P::Subpixel::min_value(); destination_size];
52
53    // If destination_size isn't failed this one must not fail either
54    let stride = width as usize * P::CHANNEL_COUNT as usize;
55
56    // bound + radius + 1 must fit in a pointer size
57    test_radius_size(width as usize, first_box);
58    test_radius_size(height as usize, first_box);
59
60    box_blur_horizontal_pass_strategy::<P, P::Subpixel>(
61        samples,
62        stride,
63        &mut transient,
64        stride,
65        width,
66        first_box,
67    );
68
69    box_blur_vertical_pass_strategy::<P, P::Subpixel>(
70        &transient, stride, &mut dst, stride, width, height, first_box,
71    );
72
73    for &box_container in boxes.iter().skip(1) {
74        // bound + radius + 1 must fit in a pointer size
75        test_radius_size(width as usize, box_container);
76        test_radius_size(height as usize, box_container);
77
78        box_blur_horizontal_pass_strategy::<P, P::Subpixel>(
79            &dst,
80            stride,
81            &mut transient,
82            stride,
83            width,
84            box_container,
85        );
86
87        box_blur_vertical_pass_strategy::<P, P::Subpixel>(
88            &transient,
89            stride,
90            &mut dst,
91            stride,
92            width,
93            height,
94            box_container,
95        );
96    }
97
98    let mut buffer = ImageBuffer::from_raw(width, height, dst).unwrap();
99    buffer.copy_color_space_from(input_buffer);
100    buffer
101}
102
103#[inline]
104fn test_radius_size(bound: usize, radius: usize) {
105    match bound.safe_add(radius) {
106        Ok(_) => {}
107        Err(_) => panic!("Radius overflowed maximum possible size"),
108    }
109}
110
111fn boxes_for_gauss(sigma: f32, n: usize) -> Vec<usize> {
112    let w_ideal = f32::sqrt((12.0 * sigma.powi(2) / (n as f32)) + 1.0);
113    let mut w_l = w_ideal.floor();
114    if w_l % 2.0 == 0.0 {
115        w_l -= 1.0;
116    }
117    let w_u = w_l + 2.0;
118
119    let m_ideal = 0.25 * (n as f32) * (w_l + 3.0) - 3.0 * sigma.powi(2) * (w_l + 1.0).recip();
120
121    let m = f32::round(m_ideal) as usize;
122
123    (0..n)
124        .map(|i| if i < m { w_l as usize } else { w_u as usize })
125        .map(|i| ceil_to_odd(i.saturating_sub(1) / 2))
126        .collect::<Vec<_>>()
127}
128
129#[inline]
130fn ceil_to_odd(x: usize) -> usize {
131    if x % 2 == 0 {
132        x + 1
133    } else {
134        x
135    }
136}
137
138#[inline]
139#[allow(clippy::manual_clamp)]
140fn rounding_saturating_mul<T: Primitive>(v: f32, w: f32) -> T {
141    // T::DEFAULT_MAX_VALUE is equal to 1.0 only in cases where storage type if `f32/f64`,
142    // that means it should be safe to round here.
143    if T::DEFAULT_MAX_VALUE.to_f32().unwrap() != 1.0 {
144        T::from(
145            (v * w)
146                .round()
147                .min(T::DEFAULT_MAX_VALUE.to_f32().unwrap())
148                .max(T::DEFAULT_MIN_VALUE.to_f32().unwrap()),
149        )
150        .unwrap()
151    } else {
152        T::from(
153            (v * w)
154                .min(T::DEFAULT_MAX_VALUE.to_f32().unwrap())
155                .max(T::DEFAULT_MIN_VALUE.to_f32().unwrap()),
156        )
157        .unwrap()
158    }
159}
160
161fn box_blur_horizontal_pass_strategy<T, P: Primitive>(
162    src: &[P],
163    src_stride: usize,
164    dst: &mut [P],
165    dst_stride: usize,
166    width: u32,
167    radius: usize,
168) where
169    T: Pixel,
170{
171    if T::CHANNEL_COUNT == 1 {
172        box_blur_horizontal_pass_impl::<P, 1>(src, src_stride, dst, dst_stride, width, radius);
173    } else if T::CHANNEL_COUNT == 2 {
174        box_blur_horizontal_pass_impl::<P, 2>(src, src_stride, dst, dst_stride, width, radius);
175    } else if T::CHANNEL_COUNT == 3 {
176        box_blur_horizontal_pass_impl::<P, 3>(src, src_stride, dst, dst_stride, width, radius);
177    } else if T::CHANNEL_COUNT == 4 {
178        box_blur_horizontal_pass_impl::<P, 4>(src, src_stride, dst, dst_stride, width, radius);
179    } else {
180        unimplemented!("More than 4 channels is not yet implemented");
181    }
182}
183
184fn box_blur_vertical_pass_strategy<T: Pixel, P: Primitive>(
185    src: &[P],
186    src_stride: usize,
187    dst: &mut [P],
188    dst_stride: usize,
189    width: u32,
190    height: u32,
191    radius: usize,
192) {
193    if T::CHANNEL_COUNT == 1 {
194        box_blur_vertical_pass_impl::<P, 1>(
195            src, src_stride, dst, dst_stride, width, height, radius,
196        );
197    } else if T::CHANNEL_COUNT == 2 {
198        box_blur_vertical_pass_impl::<P, 2>(
199            src, src_stride, dst, dst_stride, width, height, radius,
200        );
201    } else if T::CHANNEL_COUNT == 3 {
202        box_blur_vertical_pass_impl::<P, 3>(
203            src, src_stride, dst, dst_stride, width, height, radius,
204        );
205    } else if T::CHANNEL_COUNT == 4 {
206        box_blur_vertical_pass_impl::<P, 4>(
207            src, src_stride, dst, dst_stride, width, height, radius,
208        );
209    } else {
210        unimplemented!("More than 4 channels is not yet implemented");
211    }
212}
213
214fn box_blur_horizontal_pass_impl<T, const CN: usize>(
215    src: &[T],
216    src_stride: usize,
217    dst: &mut [T],
218    dst_stride: usize,
219    width: u32,
220    radius: usize,
221) where
222    T: Primitive,
223{
224    assert!(width > 0, "Width must be sanitized before this method");
225    test_radius_size(width as usize, radius);
226
227    let kernel_size = radius * 2 + 1;
228    let edge_count = ((kernel_size / 2) + 1) as f32;
229    let half_kernel = kernel_size / 2;
230
231    let weight = 1f32 / (radius * 2 + 1) as f32;
232
233    let width_bound = width as usize - 1;
234
235    // Horizontal blurring consists from 4 phases
236    // 1 - Fill initial sliding window
237    // 2 - Blur dangerous leading zone where clamping is required
238    // 3 - Blur *normal* zone where clamping is not required
239    // 4 - Blur dangerous trailing zone where clamping is required
240
241    for (dst, src) in dst
242        .chunks_exact_mut(dst_stride)
243        .zip(src.chunks_exact(src_stride))
244    {
245        let mut weight1: f32 = 0.;
246        let mut weight2: f32 = 0.;
247        let mut weight3: f32 = 0.;
248
249        let chunk0 = &src[..CN];
250
251        // replicate edge
252        let mut weight0 = chunk0[0].to_f32().unwrap() * edge_count;
253        if CN > 1 {
254            weight1 = chunk0[1].to_f32().unwrap() * edge_count;
255        }
256        if CN > 2 {
257            weight2 = chunk0[2].to_f32().unwrap() * edge_count;
258        }
259        if CN == 4 {
260            weight3 = chunk0[3].to_f32().unwrap() * edge_count;
261        }
262
263        for x in 1..=half_kernel {
264            let px = x.min(width_bound) * CN;
265            let chunk0 = &src[px..px + CN];
266            weight0 += chunk0[0].to_f32().unwrap();
267            if CN > 1 {
268                weight1 += chunk0[1].to_f32().unwrap();
269            }
270            if CN > 2 {
271                weight2 += chunk0[2].to_f32().unwrap();
272            }
273            if CN == 4 {
274                weight3 += chunk0[3].to_f32().unwrap();
275            }
276        }
277
278        for x in 0..half_kernel.min(width as usize) {
279            let next = (x + half_kernel + 1).min(width_bound) * CN;
280            let previous = (x as i64 - half_kernel as i64).max(0) as usize * CN;
281
282            let dst_chunk = &mut dst[x * CN..x * CN + CN];
283            dst_chunk[0] = rounding_saturating_mul(weight0, weight);
284            if CN > 1 {
285                dst_chunk[1] = rounding_saturating_mul(weight1, weight);
286            }
287            if CN > 2 {
288                dst_chunk[2] = rounding_saturating_mul(weight2, weight);
289            }
290            if CN == 4 {
291                dst_chunk[3] = rounding_saturating_mul(weight3, weight);
292            }
293
294            let next_chunk = &src[next..next + CN];
295            let previous_chunk = &src[previous..previous + CN];
296
297            weight0 += next_chunk[0].to_f32().unwrap();
298            if CN > 1 {
299                weight1 += next_chunk[1].to_f32().unwrap();
300            }
301            if CN > 2 {
302                weight2 += next_chunk[2].to_f32().unwrap();
303            }
304            if CN == 4 {
305                weight3 += next_chunk[3].to_f32().unwrap();
306            }
307
308            weight0 -= previous_chunk[0].to_f32().unwrap();
309            if CN > 1 {
310                weight1 -= previous_chunk[1].to_f32().unwrap();
311            }
312            if CN > 2 {
313                weight2 -= previous_chunk[2].to_f32().unwrap();
314            }
315            if CN == 4 {
316                weight3 -= previous_chunk[3].to_f32().unwrap();
317            }
318        }
319
320        let max_x_before_clamping = width_bound.saturating_sub(half_kernel + 1);
321        let row_length = src.len();
322
323        let mut last_processed_item = half_kernel;
324
325        if ((half_kernel * 2 + 1) * CN < row_length) && ((max_x_before_clamping * CN) < row_length)
326        {
327            let data_section = src;
328            let advanced_kernel_part = &data_section[(half_kernel * 2 + 1) * CN..];
329            let section_length = max_x_before_clamping - half_kernel;
330            let dst = &mut dst[half_kernel * CN..(half_kernel * CN + section_length * CN)];
331
332            for ((dst, src_previous), src_next) in dst
333                .chunks_exact_mut(CN)
334                .zip(data_section.chunks_exact(CN))
335                .zip(advanced_kernel_part.chunks_exact(CN))
336            {
337                let dst_chunk = &mut dst[..CN];
338                dst_chunk[0] = rounding_saturating_mul(weight0, weight);
339                if CN > 1 {
340                    dst_chunk[1] = rounding_saturating_mul(weight1, weight);
341                }
342                if CN > 2 {
343                    dst_chunk[2] = rounding_saturating_mul(weight2, weight);
344                }
345                if CN == 4 {
346                    dst_chunk[3] = rounding_saturating_mul(weight3, weight);
347                }
348
349                weight0 += src_next[0].to_f32().unwrap();
350                if CN > 1 {
351                    weight1 += src_next[1].to_f32().unwrap();
352                }
353                if CN > 2 {
354                    weight2 += src_next[2].to_f32().unwrap();
355                }
356                if CN == 4 {
357                    weight3 += src_next[3].to_f32().unwrap();
358                }
359
360                weight0 -= src_previous[0].to_f32().unwrap();
361                if CN > 1 {
362                    weight1 -= src_previous[1].to_f32().unwrap();
363                }
364                if CN > 2 {
365                    weight2 -= src_previous[2].to_f32().unwrap();
366                }
367                if CN == 4 {
368                    weight3 -= src_previous[3].to_f32().unwrap();
369                }
370            }
371
372            last_processed_item = max_x_before_clamping;
373        }
374
375        for x in last_processed_item..width as usize {
376            let next = (x + half_kernel + 1).min(width_bound) * CN;
377            let previous = (x as i64 - half_kernel as i64).max(0) as usize * CN;
378            let dst_chunk = &mut dst[x * CN..x * CN + CN];
379            dst_chunk[0] = rounding_saturating_mul(weight0, weight);
380            if CN > 1 {
381                dst_chunk[1] = rounding_saturating_mul(weight1, weight);
382            }
383            if CN > 2 {
384                dst_chunk[2] = rounding_saturating_mul(weight2, weight);
385            }
386            if CN == 4 {
387                dst_chunk[3] = rounding_saturating_mul(weight3, weight);
388            }
389
390            let next_chunk = &src[next..next + CN];
391            let previous_chunk = &src[previous..previous + CN];
392
393            weight0 += next_chunk[0].to_f32().unwrap();
394            if CN > 1 {
395                weight1 += next_chunk[1].to_f32().unwrap();
396            }
397            if CN > 2 {
398                weight2 += next_chunk[2].to_f32().unwrap();
399            }
400            if CN == 4 {
401                weight3 += next_chunk[3].to_f32().unwrap();
402            }
403
404            weight0 -= previous_chunk[0].to_f32().unwrap();
405            if CN > 1 {
406                weight1 -= previous_chunk[1].to_f32().unwrap();
407            }
408            if CN > 2 {
409                weight2 -= previous_chunk[2].to_f32().unwrap();
410            }
411            if CN == 4 {
412                weight3 -= previous_chunk[3].to_f32().unwrap();
413            }
414        }
415    }
416}
417
418fn box_blur_vertical_pass_impl<T: Primitive, const CN: usize>(
419    src: &[T],
420    src_stride: usize,
421    dst: &mut [T],
422    dst_stride: usize,
423    width: u32,
424    height: u32,
425    radius: usize,
426) {
427    assert!(width > 0, "Width must be sanitized before this method");
428    assert!(height > 0, "Height must be sanitized before this method");
429    test_radius_size(width as usize, radius);
430
431    let kernel_size = radius * 2 + 1;
432
433    let edge_count = ((kernel_size / 2) + 1) as f32;
434    let half_kernel = kernel_size / 2;
435
436    let weight = 1f32 / (radius * 2 + 1) as f32;
437
438    let buf_size = width as usize * CN;
439
440    let buf_cap = buf_size;
441
442    let height_bound = height as usize - 1;
443
444    // Instead of summing each column separately we use here transient buffer that
445    // averages columns in row manner.
446    // So, we make the initial buffer at the top edge
447    // and then doing blur by averaging the whole row ( which is in buffer )
448    // and subtracting and adding next and previous rows in horizontal manner.
449
450    let mut buffer = vec![0f32; buf_cap];
451
452    for (x, (v, bf)) in src.iter().zip(buffer.iter_mut()).enumerate() {
453        let mut w = v.to_f32().unwrap() * edge_count;
454        for y in 1..=half_kernel {
455            let y_src_shift = y.min(height_bound) * src_stride;
456            w += src[y_src_shift + x].to_f32().unwrap();
457        }
458        *bf = w;
459    }
460
461    for (dst, y) in dst.chunks_exact_mut(dst_stride).zip(0..height as usize) {
462        let next = (y + half_kernel + 1).min(height_bound) * src_stride;
463        let previous = (y as i64 - half_kernel as i64).max(0) as usize * src_stride;
464
465        let next_row = &src[next..next + width as usize * CN];
466        let previous_row = &src[previous..previous + width as usize * CN];
467
468        for (((src_next, src_previous), buffer), dst) in next_row
469            .iter()
470            .zip(previous_row.iter())
471            .zip(buffer.iter_mut())
472            .zip(dst.iter_mut())
473        {
474            let mut weight0 = *buffer;
475
476            *dst = rounding_saturating_mul(weight0, weight);
477
478            weight0 += src_next.to_f32().unwrap();
479            weight0 -= src_previous.to_f32().unwrap();
480
481            *buffer = weight0;
482        }
483    }
484}
485
486#[cfg(test)]
487mod tests {
488    use crate::{DynamicImage, GrayAlphaImage, GrayImage, RgbImage, RgbaImage};
489    use std::time::{SystemTime, UNIX_EPOCH};
490
491    struct Rng {
492        state: u64,
493    }
494
495    impl Rng {
496        fn new(seed: u64) -> Self {
497            Self { state: seed }
498        }
499        fn next_u32(&mut self) -> u32 {
500            self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1);
501            (self.state >> 32) as u32
502        }
503
504        fn next_u8(&mut self) -> u8 {
505            (self.next_u32() % 256) as u8
506        }
507
508        fn next_f32_in_range(&mut self, a: f32, b: f32) -> f32 {
509            let u = self.next_u32();
510            let unit = (u as f32) / (u32::MAX as f32 + 1.0);
511            a + (b - a) * unit
512        }
513    }
514
515    #[test]
516    fn test_box_blur() {
517        let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
518        let mut rng = Rng::new((now.as_millis() & 0xffff_ffff_ffff_ffff) as u64);
519        for _ in 0..35 {
520            let width = rng.next_u8();
521            let height = rng.next_u8();
522            let sigma = rng.next_f32_in_range(0., 100.);
523            let px = rng.next_u8();
524            let cn = rng.next_u8();
525            if width == 0 || height == 0 || sigma <= 0. {
526                continue;
527            }
528            match cn % 4 {
529                0 => {
530                    let vc = vec![px; width as usize * height as usize];
531                    let image = DynamicImage::from(
532                        GrayImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
533                    );
534                    let res = image.fast_blur(sigma);
535                    for clr in res.as_bytes() {
536                        assert_eq!(*clr, px);
537                    }
538                }
539                1 => {
540                    let vc = vec![px; width as usize * height as usize * 2];
541                    let image = DynamicImage::from(
542                        GrayAlphaImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
543                    );
544                    let res = image.fast_blur(sigma);
545                    for clr in res.as_bytes() {
546                        assert_eq!(*clr, px);
547                    }
548                }
549                2 => {
550                    let vc = vec![px; width as usize * height as usize * 3];
551                    let image = DynamicImage::from(
552                        RgbImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
553                    );
554                    let res = image.fast_blur(sigma);
555                    for clr in res.as_bytes() {
556                        assert_eq!(*clr, px);
557                    }
558                }
559                3 => {
560                    let vc = vec![px; width as usize * height as usize * 4];
561                    let image = DynamicImage::from(
562                        RgbaImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
563                    );
564                    let res = image.fast_blur(sigma);
565                    for clr in res.as_bytes() {
566                        assert_eq!(*clr, px);
567                    }
568                }
569                _ => {}
570            }
571        }
572    }
573}