summaryrefslogtreecommitdiff
path: root/src/zip.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/zip.rs')
-rw-r--r--src/zip.rs203
1 files changed, 203 insertions, 0 deletions
diff --git a/src/zip.rs b/src/zip.rs
new file mode 100644
index 0000000..f72359a
--- /dev/null
+++ b/src/zip.rs
@@ -0,0 +1,203 @@
+#![allow(dead_code)]
+
+use std::cmp::min;
+use std::collections::HashMap;
+use std::fs::File;
+use std::io::{Error, ErrorKind, Read, Result, Seek, SeekFrom};
+
+pub enum Compression {
+ None,
+ Deflate,
+}
+
+pub struct Entry {
+ name: String,
+ offset: u64,
+ compressed_size: u64,
+ uncompressed_size: u64,
+ compression: Compression,
+}
+
+impl Entry {
+ pub fn name(&self) -> &str {
+ return self.name.as_str();
+ }
+
+ pub fn offset(&self) -> u64 {
+ return self.offset;
+ }
+
+ pub fn compressed_size(&self) -> u64 {
+ return self.compressed_size;
+ }
+
+ pub fn uncompressed_size(&self) -> u64 {
+ return self.uncompressed_size;
+ }
+
+ pub fn compression(&self) -> &Compression {
+ return &self.compression;
+ }
+}
+
+pub struct Layout {
+ entries: Vec<Entry>,
+ names: HashMap<String, usize>,
+}
+
+fn get_u16(buf: &[u8], offset: usize) -> u16 {
+ return u16::from_le_bytes(buf[offset..offset + 2].try_into().unwrap());
+}
+
+fn get_u32(buf: &[u8], offset: usize) -> u32 {
+ return u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap());
+}
+
+fn read_u16(buf: &[u8], offset: &mut usize) -> u16 {
+ let end = *offset + 2;
+ let value = u16::from_le_bytes(buf[*offset..end].try_into().unwrap());
+ *offset = end;
+ return value;
+}
+
+fn read_u32(buf: &[u8], offset: &mut usize) -> u32 {
+ let end = *offset + 4;
+ let value = u32::from_le_bytes(buf[*offset..end].try_into().unwrap());
+ *offset = end;
+ return value;
+}
+
+const CFH_SIZE: usize = 24 + 22;
+const CFH_SIGNATURE: [u8; 4] = [0x50, 0x4B, 0x01, 0x02];
+
+fn parse_central_directory<'a, 'b>(buf: &'a [u8]) -> Result<Layout> {
+ let mut i = 0;
+ let mut layout = Layout {
+ entries: Vec::new(),
+ names: HashMap::new(),
+ };
+
+ while i < buf.len() {
+ if buf.len() - i < CFH_SIZE || buf[i..i + 4] != CFH_SIGNATURE {
+ return Err(Error::new(ErrorKind::Other, "Invalid central directory"));
+ }
+ i += 4;
+ let _version_made_by = read_u16(buf, &mut i);
+ let _version_needed_to_extract = read_u16(buf, &mut i);
+ let _general_purpose_bit_flag = read_u16(buf, &mut i);
+ let compression_method = read_u16(buf, &mut i);
+ let _last_mod_file_time = read_u16(buf, &mut i);
+ let _last_mod_file_date = read_u16(buf, &mut i);
+ let _crc32 = read_u32(buf, &mut i);
+ let compressed_size = read_u32(buf, &mut i);
+ let uncompressed_size = read_u32(buf, &mut i);
+ let filename_length = usize::from(read_u16(buf, &mut i));
+ let extra_field_length = usize::from(read_u16(buf, &mut i));
+ let file_comment_length = usize::from(read_u16(buf, &mut i));
+ let _disk_number_start = read_u16(buf, &mut i);
+ let _internal_file_attributes = read_u16(buf, &mut i);
+ let _external_file_attributes = read_u32(buf, &mut i);
+ let relative_offset_of_local_header = read_u32(buf, &mut i);
+ let need = filename_length + extra_field_length + file_comment_length;
+ if need > buf.len() || buf.len() - need < i {
+ return Err(Error::new(ErrorKind::Other, "Invalid central file entry"));
+ }
+ let maybe_filename = String::from_utf8(buf[i..i + filename_length].to_vec());
+ if maybe_filename.is_err() {
+ return Err(Error::new(
+ ErrorKind::Other,
+ "Bad UTF-8 in central file entry",
+ ));
+ }
+ let filename = maybe_filename.unwrap();
+ i += filename_length;
+ i += extra_field_length;
+ i += file_comment_length;
+
+ let compression = match compression_method {
+ 0 => Compression::None,
+ 8 => Compression::Deflate,
+ _ => {
+ return Err(Error::new(
+ ErrorKind::Other,
+ "Unsupported compression in central file entry",
+ ));
+ }
+ };
+
+ layout.names.insert(filename.clone(), layout.entries.len());
+ layout.entries.push(Entry {
+ name: filename,
+ offset: u64::from(relative_offset_of_local_header),
+ compressed_size: u64::from(compressed_size),
+ uncompressed_size: u64::from(uncompressed_size),
+ compression,
+ });
+ }
+ return Ok(layout);
+}
+
+const EOCD_SIZE: usize = 22;
+const EOCD_SIGNATURE: [u8; 4] = [0x50, 0x4B, 0x05, 0x06];
+
+impl Layout {
+ pub fn new(file: &mut File) -> Result<Layout> {
+ let size = file.seek(SeekFrom::End(0))?;
+ // sizeof(End of central dir record) + max size of comment
+ let max = min(EOCD_SIZE + 65535, usize::try_from(size).unwrap());
+ let buf_start = file.seek(SeekFrom::End(-i64::try_from(max).unwrap()))?;
+ let mut buf = vec![0u8; usize::try_from(max).unwrap()];
+ file.read_exact(buf.as_mut_slice())?;
+
+ for i in (0..=max - EOCD_SIZE).rev() {
+ if buf[i..i + 4] == EOCD_SIGNATURE
+ && get_u16(&buf, i + EOCD_SIZE - 2) == u16::try_from(max - (i + EOCD_SIZE)).unwrap()
+ {
+ let mut i = i + 4;
+ let number_of_this_disk = read_u16(&buf, &mut i);
+ let number_of_the_disk_with_the_start_of_the_central_directory =
+ read_u16(&buf, &mut i);
+ if number_of_this_disk != 0
+ || number_of_the_disk_with_the_start_of_the_central_directory != 0
+ {
+ return Err(Error::new(
+ ErrorKind::Other,
+ "Multi disk ZIPs are not supported.",
+ ));
+ }
+ let _total_number_of_entries_in_the_central_dir_on_this_disk =
+ read_u16(&buf, &mut i);
+ let _total_number_of_entries_in_the_central_directory = read_u16(&buf, &mut i);
+ let size_of_central_directory = read_u32(&buf, &mut i);
+ let offset_of_start_of_central_directory = read_u32(&buf, &mut i);
+ let usize_size = usize::try_from(size_of_central_directory).unwrap();
+ if usize_size <= max {
+ let u64_start = u64::from(offset_of_start_of_central_directory);
+ if u64_start >= buf_start {
+ let offset = usize::try_from(u64_start - buf_start).unwrap();
+ return parse_central_directory(&buf[offset..offset + usize_size]);
+ }
+ } else {
+ buf.resize(usize_size, 0);
+ }
+ file.seek(SeekFrom::Start(u64::from(
+ offset_of_start_of_central_directory,
+ )))?;
+ file.read_exact(&mut buf[0..usize_size])?;
+ return parse_central_directory(&buf[0..usize_size]);
+ }
+ }
+ Err(Error::new(
+ ErrorKind::Other,
+ "Unable to find end of central directory record, is it a ZIP file?",
+ ))
+ }
+
+ pub fn entries(&self) -> &Vec<Entry> {
+ return &self.entries;
+ }
+
+ pub fn names(&self) -> &HashMap<String, usize> {
+ return &self.names;
+ }
+}