diff options
Diffstat (limited to 'src/zip.rs')
| -rw-r--r-- | src/zip.rs | 203 |
1 files changed, 203 insertions, 0 deletions
diff --git a/src/zip.rs b/src/zip.rs new file mode 100644 index 0000000..f72359a --- /dev/null +++ b/src/zip.rs @@ -0,0 +1,203 @@ +#![allow(dead_code)] + +use std::cmp::min; +use std::collections::HashMap; +use std::fs::File; +use std::io::{Error, ErrorKind, Read, Result, Seek, SeekFrom}; + +pub enum Compression { + None, + Deflate, +} + +pub struct Entry { + name: String, + offset: u64, + compressed_size: u64, + uncompressed_size: u64, + compression: Compression, +} + +impl Entry { + pub fn name(&self) -> &str { + return self.name.as_str(); + } + + pub fn offset(&self) -> u64 { + return self.offset; + } + + pub fn compressed_size(&self) -> u64 { + return self.compressed_size; + } + + pub fn uncompressed_size(&self) -> u64 { + return self.uncompressed_size; + } + + pub fn compression(&self) -> &Compression { + return &self.compression; + } +} + +pub struct Layout { + entries: Vec<Entry>, + names: HashMap<String, usize>, +} + +fn get_u16(buf: &[u8], offset: usize) -> u16 { + return u16::from_le_bytes(buf[offset..offset + 2].try_into().unwrap()); +} + +fn get_u32(buf: &[u8], offset: usize) -> u32 { + return u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap()); +} + +fn read_u16(buf: &[u8], offset: &mut usize) -> u16 { + let end = *offset + 2; + let value = u16::from_le_bytes(buf[*offset..end].try_into().unwrap()); + *offset = end; + return value; +} + +fn read_u32(buf: &[u8], offset: &mut usize) -> u32 { + let end = *offset + 4; + let value = u32::from_le_bytes(buf[*offset..end].try_into().unwrap()); + *offset = end; + return value; +} + +const CFH_SIZE: usize = 24 + 22; +const CFH_SIGNATURE: [u8; 4] = [0x50, 0x4B, 0x01, 0x02]; + +fn parse_central_directory<'a, 'b>(buf: &'a [u8]) -> Result<Layout> { + let mut i = 0; + let mut layout = Layout { + entries: Vec::new(), + names: HashMap::new(), + }; + + while i < buf.len() { + if buf.len() - i < CFH_SIZE || buf[i..i + 4] != CFH_SIGNATURE { + return Err(Error::new(ErrorKind::Other, "Invalid central directory")); + } + i += 4; + let _version_made_by = read_u16(buf, &mut i); + let _version_needed_to_extract = read_u16(buf, &mut i); + let _general_purpose_bit_flag = read_u16(buf, &mut i); + let compression_method = read_u16(buf, &mut i); + let _last_mod_file_time = read_u16(buf, &mut i); + let _last_mod_file_date = read_u16(buf, &mut i); + let _crc32 = read_u32(buf, &mut i); + let compressed_size = read_u32(buf, &mut i); + let uncompressed_size = read_u32(buf, &mut i); + let filename_length = usize::from(read_u16(buf, &mut i)); + let extra_field_length = usize::from(read_u16(buf, &mut i)); + let file_comment_length = usize::from(read_u16(buf, &mut i)); + let _disk_number_start = read_u16(buf, &mut i); + let _internal_file_attributes = read_u16(buf, &mut i); + let _external_file_attributes = read_u32(buf, &mut i); + let relative_offset_of_local_header = read_u32(buf, &mut i); + let need = filename_length + extra_field_length + file_comment_length; + if need > buf.len() || buf.len() - need < i { + return Err(Error::new(ErrorKind::Other, "Invalid central file entry")); + } + let maybe_filename = String::from_utf8(buf[i..i + filename_length].to_vec()); + if maybe_filename.is_err() { + return Err(Error::new( + ErrorKind::Other, + "Bad UTF-8 in central file entry", + )); + } + let filename = maybe_filename.unwrap(); + i += filename_length; + i += extra_field_length; + i += file_comment_length; + + let compression = match compression_method { + 0 => Compression::None, + 8 => Compression::Deflate, + _ => { + return Err(Error::new( + ErrorKind::Other, + "Unsupported compression in central file entry", + )); + } + }; + + layout.names.insert(filename.clone(), layout.entries.len()); + layout.entries.push(Entry { + name: filename, + offset: u64::from(relative_offset_of_local_header), + compressed_size: u64::from(compressed_size), + uncompressed_size: u64::from(uncompressed_size), + compression, + }); + } + return Ok(layout); +} + +const EOCD_SIZE: usize = 22; +const EOCD_SIGNATURE: [u8; 4] = [0x50, 0x4B, 0x05, 0x06]; + +impl Layout { + pub fn new(file: &mut File) -> Result<Layout> { + let size = file.seek(SeekFrom::End(0))?; + // sizeof(End of central dir record) + max size of comment + let max = min(EOCD_SIZE + 65535, usize::try_from(size).unwrap()); + let buf_start = file.seek(SeekFrom::End(-i64::try_from(max).unwrap()))?; + let mut buf = vec![0u8; usize::try_from(max).unwrap()]; + file.read_exact(buf.as_mut_slice())?; + + for i in (0..=max - EOCD_SIZE).rev() { + if buf[i..i + 4] == EOCD_SIGNATURE + && get_u16(&buf, i + EOCD_SIZE - 2) == u16::try_from(max - (i + EOCD_SIZE)).unwrap() + { + let mut i = i + 4; + let number_of_this_disk = read_u16(&buf, &mut i); + let number_of_the_disk_with_the_start_of_the_central_directory = + read_u16(&buf, &mut i); + if number_of_this_disk != 0 + || number_of_the_disk_with_the_start_of_the_central_directory != 0 + { + return Err(Error::new( + ErrorKind::Other, + "Multi disk ZIPs are not supported.", + )); + } + let _total_number_of_entries_in_the_central_dir_on_this_disk = + read_u16(&buf, &mut i); + let _total_number_of_entries_in_the_central_directory = read_u16(&buf, &mut i); + let size_of_central_directory = read_u32(&buf, &mut i); + let offset_of_start_of_central_directory = read_u32(&buf, &mut i); + let usize_size = usize::try_from(size_of_central_directory).unwrap(); + if usize_size <= max { + let u64_start = u64::from(offset_of_start_of_central_directory); + if u64_start >= buf_start { + let offset = usize::try_from(u64_start - buf_start).unwrap(); + return parse_central_directory(&buf[offset..offset + usize_size]); + } + } else { + buf.resize(usize_size, 0); + } + file.seek(SeekFrom::Start(u64::from( + offset_of_start_of_central_directory, + )))?; + file.read_exact(&mut buf[0..usize_size])?; + return parse_central_directory(&buf[0..usize_size]); + } + } + Err(Error::new( + ErrorKind::Other, + "Unable to find end of central directory record, is it a ZIP file?", + )) + } + + pub fn entries(&self) -> &Vec<Entry> { + return &self.entries; + } + + pub fn names(&self) -> &HashMap<String, usize> { + return &self.names; + } +} |
