zune_jpeg/worker.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
/*
* Copyright (c) 2023.
*
* This software is free software;
*
* You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
*/
use alloc::format;
use core::convert::TryInto;
use zune_core::colorspace::ColorSpace;
use crate::color_convert::ycbcr_to_grayscale;
use crate::components::{Components, SampleRatios};
use crate::decoder::{ColorConvert16Ptr, MAX_COMPONENTS};
use crate::errors::DecodeErrors;
/// fast 0..255 * 0..255 => 0..255 rounded multiplication
///
/// Borrowed from stb
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
#[inline]
fn blinn_8x8(in_val: u8, y: u8) -> u8 {
let t = i32::from(in_val) * i32::from(y) + 128;
return ((t + (t >> 8)) >> 8) as u8;
}
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
pub(crate) fn color_convert(
unprocessed: &[&[i16]; MAX_COMPONENTS], color_convert_16: ColorConvert16Ptr,
input_colorspace: ColorSpace, output_colorspace: ColorSpace, output: &mut [u8], width: usize,
padded_width: usize
) -> Result<(), DecodeErrors> // so many parameters..
{
// maximum sampling factors are in Y-channel, no need to pass them.
if input_colorspace.num_components() == 3 && input_colorspace == output_colorspace {
// sort things like RGB to RGB conversion
copy_removing_padding(unprocessed, width, padded_width, output);
return Ok(());
}
if input_colorspace.num_components() == 4 && input_colorspace == output_colorspace {
copy_removing_padding_4x(unprocessed, width, padded_width, output);
return Ok(());
}
// color convert
match (input_colorspace, output_colorspace) {
(ColorSpace::YCbCr | ColorSpace::Luma, ColorSpace::Luma) => {
ycbcr_to_grayscale(unprocessed[0], width, padded_width, output);
}
(
ColorSpace::YCbCr,
ColorSpace::RGB | ColorSpace::RGBA | ColorSpace::BGR | ColorSpace::BGRA
) => {
color_convert_ycbcr(
unprocessed,
width,
padded_width,
output_colorspace,
color_convert_16,
output
);
}
(ColorSpace::YCCK, ColorSpace::RGB) => {
color_convert_ycck_to_rgb::<3>(
unprocessed,
width,
padded_width,
output_colorspace,
color_convert_16,
output
);
}
(ColorSpace::YCCK, ColorSpace::RGBA) => {
color_convert_ycck_to_rgb::<4>(
unprocessed,
width,
padded_width,
output_colorspace,
color_convert_16,
output
);
}
(ColorSpace::CMYK, ColorSpace::RGB) => {
color_convert_cymk_to_rgb::<3>(unprocessed, width, padded_width, output);
}
(ColorSpace::CMYK, ColorSpace::RGBA) => {
color_convert_cymk_to_rgb::<4>(unprocessed, width, padded_width, output);
}
// For the other components we do nothing(currently)
_ => {
let msg = format!(
"Unimplemented colorspace mapping from {input_colorspace:?} to {output_colorspace:?}");
return Err(DecodeErrors::Format(msg));
}
}
Ok(())
}
/// Copy a block to output removing padding bytes from input
/// if necessary
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
fn copy_removing_padding(
mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
) {
for (((pix_w, c_w), m_w), y_w) in output
.chunks_exact_mut(width * 3)
.zip(mcu_block[0].chunks_exact(padded_width))
.zip(mcu_block[1].chunks_exact(padded_width))
.zip(mcu_block[2].chunks_exact(padded_width))
{
for (((pix, c), y), m) in pix_w.chunks_exact_mut(3).zip(c_w).zip(m_w).zip(y_w) {
pix[0] = *c as u8;
pix[1] = *y as u8;
pix[2] = *m as u8;
}
}
}
fn copy_removing_padding_4x(
mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
) {
for ((((pix_w, c_w), m_w), y_w), k_w) in output
.chunks_exact_mut(width * 4)
.zip(mcu_block[0].chunks_exact(padded_width))
.zip(mcu_block[1].chunks_exact(padded_width))
.zip(mcu_block[2].chunks_exact(padded_width))
.zip(mcu_block[3].chunks_exact(padded_width))
{
for ((((pix, c), y), m), k) in pix_w
.chunks_exact_mut(4)
.zip(c_w)
.zip(m_w)
.zip(y_w)
.zip(k_w)
{
pix[0] = *c as u8;
pix[1] = *y as u8;
pix[2] = *m as u8;
pix[3] = *k as u8;
}
}
}
/// Convert YCCK image to rgb
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
fn color_convert_ycck_to_rgb<const NUM_COMPONENTS: usize>(
mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
) {
color_convert_ycbcr(
mcu_block,
width,
padded_width,
output_colorspace,
color_convert_16,
output
);
for (pix_w, m_w) in output
.chunks_exact_mut(width * 3)
.zip(mcu_block[3].chunks_exact(padded_width))
{
for (pix, m) in pix_w.chunks_exact_mut(NUM_COMPONENTS).zip(m_w) {
let m = (*m) as u8;
pix[0] = blinn_8x8(255 - pix[0], m);
pix[1] = blinn_8x8(255 - pix[1], m);
pix[2] = blinn_8x8(255 - pix[2], m);
}
}
}
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
fn color_convert_cymk_to_rgb<const NUM_COMPONENTS: usize>(
mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
) {
for ((((pix_w, c_w), m_w), y_w), k_w) in output
.chunks_exact_mut(width * NUM_COMPONENTS)
.zip(mcu_block[0].chunks_exact(padded_width))
.zip(mcu_block[1].chunks_exact(padded_width))
.zip(mcu_block[2].chunks_exact(padded_width))
.zip(mcu_block[3].chunks_exact(padded_width))
{
for ((((pix, c), m), y), k) in pix_w
.chunks_exact_mut(3)
.zip(c_w)
.zip(m_w)
.zip(y_w)
.zip(k_w)
{
let c = *c as u8;
let m = *m as u8;
let y = *y as u8;
let k = *k as u8;
pix[0] = blinn_8x8(c, k);
pix[1] = blinn_8x8(m, k);
pix[2] = blinn_8x8(y, k);
}
}
}
/// Do color-conversion for interleaved MCU
#[allow(
clippy::similar_names,
clippy::too_many_arguments,
clippy::needless_pass_by_value,
clippy::unwrap_used
)]
fn color_convert_ycbcr(
mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
) {
let num_components = output_colorspace.num_components();
let stride = width * num_components;
// Allocate temporary buffer for small widths less than 16.
let mut temp = [0; 64];
// We need to chunk per width to ensure we can discard extra values at the end of the width.
// Since the encoder may pad bits to ensure the width is a multiple of 8.
for (((y_width, cb_width), cr_width), out) in mcu_block[0]
.chunks_exact(padded_width)
.zip(mcu_block[1].chunks_exact(padded_width))
.zip(mcu_block[2].chunks_exact(padded_width))
.zip(output.chunks_exact_mut(stride))
{
if width < 16 {
// allocate temporary buffers for the values received from idct
let mut y_out = [0; 16];
let mut cb_out = [0; 16];
let mut cr_out = [0; 16];
// copy those small widths to that buffer
y_out[0..y_width.len()].copy_from_slice(y_width);
cb_out[0..cb_width.len()].copy_from_slice(cb_width);
cr_out[0..cr_width.len()].copy_from_slice(cr_width);
// we handle widths less than 16 a bit differently, allocating a temporary
// buffer and writing to that and then flushing to the out buffer
// because of the optimizations applied below,
(color_convert_16)(&y_out, &cb_out, &cr_out, &mut temp, &mut 0);
// copy to stride
out[0..width * num_components].copy_from_slice(&temp[0..width * num_components]);
// next
continue;
}
// Chunk in outputs of 16 to pass to color_convert as an array of 16 i16's.
for (((y, cb), cr), out_c) in y_width
.chunks_exact(16)
.zip(cb_width.chunks_exact(16))
.zip(cr_width.chunks_exact(16))
.zip(out.chunks_exact_mut(16 * num_components))
{
(color_convert_16)(
y.try_into().unwrap(),
cb.try_into().unwrap(),
cr.try_into().unwrap(),
out_c,
&mut 0
);
}
//we have more pixels in the end that can't be handled by the main loop.
//move pointer back a little bit to get last 16 bytes,
//color convert, and overwrite
//This means some values will be color converted twice.
for ((y, cb), cr) in y_width[width - 16..]
.chunks_exact(16)
.zip(cb_width[width - 16..].chunks_exact(16))
.zip(cr_width[width - 16..].chunks_exact(16))
.take(1)
{
(color_convert_16)(
y.try_into().unwrap(),
cb.try_into().unwrap(),
cr.try_into().unwrap(),
&mut temp,
&mut 0
);
}
let rem = out[(width - 16) * num_components..]
.chunks_exact_mut(16 * num_components)
.next()
.unwrap();
rem.copy_from_slice(&temp[0..rem.len()]);
}
}
pub(crate) fn upsample(
component: &mut Components, mcu_height: usize, i: usize, upsampler_scratch_space: &mut [i16],
has_vertical_sample: bool
) {
match component.sample_ratio {
SampleRatios::V | SampleRatios::HV => {
/*
When upsampling vertically sampled images, we have a certain problem
which is that we do not have all MCU's decoded, this usually sucks at boundaries
e.g we can't upsample the last mcu row, since the row_down currently doesn't exist
To solve this we need to do two things
1. Carry over coefficients when we lack enough data to upsample
2. Upsample when we have enough data
To achieve (1), we store a previous row, and the current row in components themselves
which will later be used to make (2)
To achieve (2), we take the stored previous row(second last MCU row),
current row(last mcu row) and row down(first row of newly decoded MCU)
and upsample that and store it in first_row_upsample_dest, this contains
up-sampled coefficients for the last for the previous decoded mcu row.
The caller is then expected to process first_row_upsample_dest before processing data
in component.upsample_dest which stores the up-sampled components excluding the last row
*/
let mut dest_start = 0;
let stride_bytes_written = component.width_stride * component.sample_ratio.sample();
if i > 0 {
// Handle the last MCU of the previous row
// This wasn't up-sampled as we didn't have the row_down
// so we do it now
let stride = component.width_stride;
let dest = &mut component.first_row_upsample_dest[0..stride_bytes_written];
// get current row
let row = &component.row[..];
let row_up = &component.row_up[..];
let row_down = &component.raw_coeff[0..stride];
(component.up_sampler)(row, row_up, row_down, upsampler_scratch_space, dest);
}
// we have the Y component width stride.
// this may be higher than the actual width,(2x because vertical sampling)
//
// This will not upsample the last row
// if false, do not upsample.
// set to false on the last row of an mcu
let mut upsample = true;
let stride = component.width_stride * component.vertical_sample;
let stop_offset = component.raw_coeff.len() / component.width_stride;
for (pos, curr_row) in component
.raw_coeff
.chunks_exact(component.width_stride)
.enumerate()
{
let mut dest: &mut [i16] = &mut [];
let mut row_up: &[i16] = &[];
// row below current sample
let mut row_down: &[i16] = &[];
// Order of ifs matters
if i == 0 && pos == 0 {
// first IMAGE row, row_up is the same as current row
// row_down is the row below.
row_up = &component.raw_coeff[pos * stride..(pos + 1) * stride];
row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
} else if i > 0 && pos == 0 {
// first row of a new mcu, previous row was copied so use that
row_up = &component.row[..];
row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
} else if i == mcu_height.saturating_sub(1) && pos == stop_offset - 1 {
// last IMAGE row, adjust pointer to use previous row and current row
row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
row_down = &component.raw_coeff[pos * stride..(pos + 1) * stride];
} else if pos > 0 && pos < stop_offset - 1 {
// other rows, get row up and row down relative to our current row
// ignore last row of each mcu
row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
} else if pos == stop_offset - 1 {
// last MCU in a row
//
// we need a row at the next MCU but we haven't decoded that MCU yet
// so we should save this and when we have the next MCU,
// do the upsampling
// store the current row and previous row in a buffer
let prev_row = &component.raw_coeff[(pos - 1) * stride..pos * stride];
component.row_up.copy_from_slice(prev_row);
component.row.copy_from_slice(curr_row);
upsample = false;
} else {
unreachable!("Uh oh!");
}
if upsample {
dest =
&mut component.upsample_dest[dest_start..dest_start + stride_bytes_written];
dest_start += stride_bytes_written;
}
if upsample {
// upsample
(component.up_sampler)(
curr_row,
row_up,
row_down,
upsampler_scratch_space,
dest
);
}
}
}
SampleRatios::H => {
assert_eq!(component.raw_coeff.len() * 2, component.upsample_dest.len());
let raw_coeff = &component.raw_coeff;
let dest_coeff = &mut component.upsample_dest;
if has_vertical_sample {
/*
There have been images that have the following configurations.
Component ID:Y HS:2 VS:2 QT:0
Component ID:Cb HS:1 VS:1 QT:1
Component ID:Cr HS:1 VS:2 QT:1
This brings out a nasty case of misaligned sampling factors. Cr will need to save a row because
of the way we process boundaries but Cb won't since Cr is horizontally sampled while Cb is
HV sampled with respect to the image sampling factors.
So during decoding of one MCU, we could only do 7 and not 8 rows, but the SampleRatio::H never had to
save a single line, since it doesn't suffer from boundary issues.
Now this takes care of that, saving the last MCU row in case it will be needed.
We save the previous row before up-sampling this row because the boundary issue is in
the last MCU row of the previous MCU.
PS(cae): I can't add the image to the repo as it is nsfw, but can send if required
*/
let length = component.first_row_upsample_dest.len();
component
.first_row_upsample_dest
.copy_from_slice(&dest_coeff.rchunks_exact(length).next().unwrap());
}
// up-sample each row
for (single_row, output_stride) in raw_coeff
.chunks_exact(component.width_stride)
.zip(dest_coeff.chunks_exact_mut(component.width_stride * 2))
{
// upsample using the fn pointer, should only be H, so no need for
// row up and row down
(component.up_sampler)(single_row, &[], &[], &mut [], output_stride);
}
}
SampleRatios::None => {}
};
}