From 0a120506564b65a419b99b52da7c7e8a0c9c4429 Mon Sep 17 00:00:00 2001 From: Maria Pospelova Date: Tue, 7 Apr 2026 17:33:09 +0000 Subject: [PATCH 1/2] Add IPTC parsing logic for tiff --- src/codecs/tiff.rs | 105 ++++++++++++++++++ .../testsuite/iptc_both_tags_malformed.tiff | Bin 0 -> 101 bytes .../images/tiff/testsuite/iptc_photoshop.tiff | Bin 0 -> 227 bytes .../images/tiff/testsuite/iptc_standard.tiff | Bin 0 -> 211 bytes .../tiff/testsuite/iptc_standard_normal.tiff | Bin 0 -> 101 bytes tests/metadata.rs | 82 ++++++++++++++ 6 files changed, 187 insertions(+) create mode 100644 tests/images/tiff/testsuite/iptc_both_tags_malformed.tiff create mode 100644 tests/images/tiff/testsuite/iptc_photoshop.tiff create mode 100644 tests/images/tiff/testsuite/iptc_standard.tiff create mode 100644 tests/images/tiff/testsuite/iptc_standard_normal.tiff diff --git a/src/codecs/tiff.rs b/src/codecs/tiff.rs index d945619cae..5cca074007 100644 --- a/src/codecs/tiff.rs +++ b/src/codecs/tiff.rs @@ -22,6 +22,8 @@ use crate::{utils, ImageDecoder, ImageEncoder, ImageFormat}; const TAG_XML_PACKET: Tag = Tag::Unknown(700); const TAG_YCBCR_COEFFICIENTS: Tag = Tag::Unknown(529); const TAG_YCBCR_SUBSAMPLING: Tag = Tag::Unknown(530); +const TAG_RICHTIFFIPTC: Tag = Tag::Unknown(33723); +const TAG_PHOTOSHOP: Tag = Tag::Unknown(34377); /// Decoder for TIFF images. pub struct TiffDecoder @@ -431,6 +433,43 @@ impl ImageDecoder for TiffDecoder { Ok(()) } + fn iptc_metadata(&mut self) -> ImageResult>> { + let Some(decoder) = &mut self.inner else { + return Ok(None); + }; + + // Try Photoshop tag + if let Ok(data) = decoder.get_tag_u8_vec(TAG_PHOTOSHOP) { + if extract_iptc_from_photoshop_irb(&data).is_some() { + return Ok(Some(data)); + } + } + + // Try RichTIFFIPTC tag + if let Ok(value) = decoder.get_tag(TAG_RICHTIFFIPTC) { + // Standard representation: defined as UNDEFINED or BYTE. + if let Some(vec) = value.clone().into_u8_vec().ok().filter(|v| !v.is_empty()) { + return Ok(Some(vec)); + } + // Fallback: Adobe software sometimes incorrectly writes this as LONG (u32). + // We convert the u32 integers back to raw little-endian bytes to recover the payload. + if let Some(vec) = value + .into_u32_vec() + .ok() + .map(|vec| { + vec.into_iter() + .flat_map(|v| v.to_le_bytes()) + .collect::>() + }) + .filter(|v| !v.is_empty()) + { + return Ok(Some(vec)); + } + } + + Ok(None) + } + fn read_image(mut self, buf: &mut [u8]) -> ImageResult<()> { assert_eq!(u64::try_from(buf.len()), Ok(self.total_bytes())); @@ -692,3 +731,69 @@ impl ImageEncoder for TiffEncoder { Ok(()) } } + +struct IrbReader<'a> { + data: &'a [u8], +} + +impl<'a> IrbReader<'a> { + fn new(data: &'a [u8]) -> Self { + Self { data } + } + + fn read_slice(&mut self, len: usize) -> Option<&'a [u8]> { + if self.data.len() < len { + return None; + } + let (head, tail) = self.data.split_at(len); + self.data = tail; + Some(head) + } + + fn read_u16(&mut self) -> Option { + let bytes = self.read_slice(2)?; + Some(u16::from_be_bytes([bytes[0], bytes[1]])) + } + + fn read_u32(&mut self) -> Option { + let bytes = self.read_slice(4)?; + Some(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])) + } + + fn skip_padding(&mut self, size: usize) { + if size % 2 != 0 && !self.data.is_empty() { + self.data = &self.data[1..]; + } + } +} + +fn extract_iptc_from_photoshop_irb(data: &[u8]) -> Option<&[u8]> { + const SIGNATURE: &[u8] = b"8BIM"; + const IPTC_ID: u16 = 0x0404; + const MIN_IRB_BLOCK_SIZE: usize = 12; + + let mut reader = IrbReader::new(data); + + while reader.data.len() >= MIN_IRB_BLOCK_SIZE { + let sig = reader.read_slice(SIGNATURE.len())?; + if sig != SIGNATURE { + break; + } + + let id = reader.read_u16()?; + + let name_len = reader.read_slice(1)?[0] as usize; + reader.read_slice(name_len)?; + reader.skip_padding(1 + name_len); + + let size = reader.read_u32()? as usize; + let block_data = reader.read_slice(size)?; + + if id == IPTC_ID { + return Some(block_data); + } + + reader.skip_padding(size); + } + None +} diff --git a/tests/images/tiff/testsuite/iptc_both_tags_malformed.tiff b/tests/images/tiff/testsuite/iptc_both_tags_malformed.tiff new file mode 100644 index 0000000000000000000000000000000000000000..c7a59710a751c253f138cf6ef2ef6c30359f7d70 GIT binary patch literal 101 zcmebD)MDUZU|?WpU|?isU<9(j7>Uh>#1>>^0n4WV)rmvdyPJV}p}@0^oq+|&ws7+F Ig^2-a0Ke1(tN;K2 literal 0 HcmV?d00001 diff --git a/tests/images/tiff/testsuite/iptc_photoshop.tiff b/tests/images/tiff/testsuite/iptc_photoshop.tiff new file mode 100644 index 0000000000000000000000000000000000000000..611decc89071c392e67cb970ed12d20b5aba378f GIT binary patch literal 227 zcmebD)MC(NU|?WG!xm1SzAP+2K?VjERyKAH27aI#7KAb+HWMQ=Seyf>mKn+h8OMgi z7KEzT0E&wuiHkwS9h~?X#F5ODVq}GzDGg-{0QG7h>9s{-2P3gP+ZY)@Y9)Xe1OT5D B1*8A~ literal 0 HcmV?d00001 diff --git a/tests/images/tiff/testsuite/iptc_standard.tiff b/tests/images/tiff/testsuite/iptc_standard.tiff new file mode 100644 index 0000000000000000000000000000000000000000..fea1703a241a06e6fd9f14a8fd221ab2e6ed3815 GIT binary patch literal 211 zcmebD)MAiiU|?WG!wmdDF&2a{5}S#U87$5L)WQs9gS4|Du?3;(C4l0hNaA8paR(=U n25}@ar5IV^W=ccZ0zkbQNP2CN*uhBb-OW&UGBL9NwJ-nxkGBM) literal 0 HcmV?d00001 diff --git a/tests/images/tiff/testsuite/iptc_standard_normal.tiff b/tests/images/tiff/testsuite/iptc_standard_normal.tiff new file mode 100644 index 0000000000000000000000000000000000000000..a3475c5380a68a1f7bd0580c18e0f485d631b667 GIT binary patch literal 101 zcmebD)MDUZU|?WlU|?isU<9(j7>Uh>#1>>^0n4WV)rmvdAoaVO*%??sdYPC(6gmI^ Dnw Result<(), image::ImageError> { Ok(()) } + +#[test] +#[cfg(feature = "tiff")] +fn test_read_iptc_tiff_no_iptc_metadata() -> Result<(), image::ImageError> { + const PATH: &str = "tests/images/tiff/testsuite/ycbcr_lzw_bt709.tif"; + let img_path = PathBuf::from_str(PATH).unwrap(); + + let data = fs::read(img_path)?; + let mut decoder = TiffDecoder::new(std::io::Cursor::new(data))?; + let metadata = decoder.iptc_metadata()?; + assert!(metadata.is_none()); + + Ok(()) +} + +#[test] +#[cfg(feature = "tiff")] +fn test_read_iptc_tiff_both_tags_malformed() -> Result<(), image::ImageError> { + // The testfile contains the photoshop tag with invalid IRB data and RichTIFFIPTC tag with type UNDEFINED but count 0 + const PATH: &str = "tests/images/tiff/testsuite/iptc_both_tags_malformed.tiff"; + let img_path = PathBuf::from_str(PATH).unwrap(); + + let data = fs::read(img_path)?; + let mut decoder = TiffDecoder::new(std::io::Cursor::new(data))?; + let metadata = decoder.iptc_metadata()?; + assert!(metadata.is_none()); + + Ok(()) +} + +#[test] +#[cfg(feature = "tiff")] +fn test_read_iptc_tiff_standard_normal() -> Result<(), image::ImageError> { + const PATH: &str = "tests/images/tiff/testsuite/iptc_standard_normal.tiff"; + let img_path = PathBuf::from_str(PATH).unwrap(); + + let data = fs::read(img_path)?; + let mut decoder = TiffDecoder::new(std::io::Cursor::new(data))?; + let metadata = decoder.iptc_metadata()?; + assert!(metadata.is_some()); + + let expected_iptc_metadata = &[1, 2, 3, 4]; + assert_eq!(expected_iptc_metadata, metadata.unwrap().as_slice()); + + Ok(()) +} + +#[test] +#[cfg(feature = "tiff")] +fn test_read_iptc_tiff_standard() -> Result<(), image::ImageError> { + // The testfile was generated with the RichTIFFIPTC tag stored as LONG instead of UNDEFINED/BYTE to test the fallback recovery logic + const PATH: &str = "tests/images/tiff/testsuite/iptc_standard.tiff"; + let img_path = PathBuf::from_str(PATH).unwrap(); + + let data = fs::read(img_path)?; + let mut decoder = TiffDecoder::new(std::io::Cursor::new(data))?; + let metadata = decoder.iptc_metadata()?; + assert!(metadata.is_some()); + + let expected_iptc_metadata = &[1, 2, 3, 4]; + assert_eq!(expected_iptc_metadata, metadata.unwrap().as_slice()); + + Ok(()) +} + +#[test] +#[cfg(feature = "tiff")] +fn test_read_iptc_tiff_photoshop() -> Result<(), image::ImageError> { + const PATH: &str = "tests/images/tiff/testsuite/iptc_photoshop.tiff"; + let img_path = PathBuf::from_str(PATH).unwrap(); + + let data = fs::read(img_path)?; + let mut decoder = TiffDecoder::new(std::io::Cursor::new(data))?; + let metadata = decoder.iptc_metadata()?; + assert!(metadata.is_some()); + + let expected_iptc_metadata = vec![b'8', b'B', b'I', b'M', 4, 4, 0, 0, 0, 0, 0, 4, 5, 6, 7, 8]; + assert_eq!(expected_iptc_metadata, metadata.unwrap()); + + Ok(()) +} From a2b9cc43f07d0ce8187dc484f46aa0a02fabe7b7 Mon Sep 17 00:00:00 2001 From: Maria Pospelova Date: Wed, 8 Apr 2026 09:20:49 +0000 Subject: [PATCH 2/2] Replace manual check for even size with is_multiple_of --- src/codecs/tiff.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codecs/tiff.rs b/src/codecs/tiff.rs index 5cca074007..6f5ffbb3cd 100644 --- a/src/codecs/tiff.rs +++ b/src/codecs/tiff.rs @@ -761,7 +761,7 @@ impl<'a> IrbReader<'a> { } fn skip_padding(&mut self, size: usize) { - if size % 2 != 0 && !self.data.is_empty() { + if !size.is_multiple_of(2) && !self.data.is_empty() { self.data = &self.data[1..]; } }