summaryrefslogtreecommitdiff
path: root/src/zip.rs
blob: f72359a7c6f8d0af88710b97a7a167d6d7230ee5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#![allow(dead_code)]

use std::cmp::min;
use std::collections::HashMap;
use std::fs::File;
use std::io::{Error, ErrorKind, Read, Result, Seek, SeekFrom};

pub enum Compression {
    None,
    Deflate,
}

pub struct Entry {
    name: String,
    offset: u64,
    compressed_size: u64,
    uncompressed_size: u64,
    compression: Compression,
}

impl Entry {
    pub fn name(&self) -> &str {
        return self.name.as_str();
    }

    pub fn offset(&self) -> u64 {
        return self.offset;
    }

    pub fn compressed_size(&self) -> u64 {
        return self.compressed_size;
    }

    pub fn uncompressed_size(&self) -> u64 {
        return self.uncompressed_size;
    }

    pub fn compression(&self) -> &Compression {
        return &self.compression;
    }
}

pub struct Layout {
    entries: Vec<Entry>,
    names: HashMap<String, usize>,
}

fn get_u16(buf: &[u8], offset: usize) -> u16 {
    return u16::from_le_bytes(buf[offset..offset + 2].try_into().unwrap());
}

fn get_u32(buf: &[u8], offset: usize) -> u32 {
    return u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap());
}

fn read_u16(buf: &[u8], offset: &mut usize) -> u16 {
    let end = *offset + 2;
    let value = u16::from_le_bytes(buf[*offset..end].try_into().unwrap());
    *offset = end;
    return value;
}

fn read_u32(buf: &[u8], offset: &mut usize) -> u32 {
    let end = *offset + 4;
    let value = u32::from_le_bytes(buf[*offset..end].try_into().unwrap());
    *offset = end;
    return value;
}

const CFH_SIZE: usize = 24 + 22;
const CFH_SIGNATURE: [u8; 4] = [0x50, 0x4B, 0x01, 0x02];

fn parse_central_directory<'a, 'b>(buf: &'a [u8]) -> Result<Layout> {
    let mut i = 0;
    let mut layout = Layout {
        entries: Vec::new(),
        names: HashMap::new(),
    };

    while i < buf.len() {
        if buf.len() - i < CFH_SIZE || buf[i..i + 4] != CFH_SIGNATURE {
            return Err(Error::new(ErrorKind::Other, "Invalid central directory"));
        }
        i += 4;
        let _version_made_by = read_u16(buf, &mut i);
        let _version_needed_to_extract = read_u16(buf, &mut i);
        let _general_purpose_bit_flag = read_u16(buf, &mut i);
        let compression_method = read_u16(buf, &mut i);
        let _last_mod_file_time = read_u16(buf, &mut i);
        let _last_mod_file_date = read_u16(buf, &mut i);
        let _crc32 = read_u32(buf, &mut i);
        let compressed_size = read_u32(buf, &mut i);
        let uncompressed_size = read_u32(buf, &mut i);
        let filename_length = usize::from(read_u16(buf, &mut i));
        let extra_field_length = usize::from(read_u16(buf, &mut i));
        let file_comment_length = usize::from(read_u16(buf, &mut i));
        let _disk_number_start = read_u16(buf, &mut i);
        let _internal_file_attributes = read_u16(buf, &mut i);
        let _external_file_attributes = read_u32(buf, &mut i);
        let relative_offset_of_local_header = read_u32(buf, &mut i);
        let need = filename_length + extra_field_length + file_comment_length;
        if need > buf.len() || buf.len() - need < i {
            return Err(Error::new(ErrorKind::Other, "Invalid central file entry"));
        }
        let maybe_filename = String::from_utf8(buf[i..i + filename_length].to_vec());
        if maybe_filename.is_err() {
            return Err(Error::new(
                ErrorKind::Other,
                "Bad UTF-8 in central file entry",
            ));
        }
        let filename = maybe_filename.unwrap();
        i += filename_length;
        i += extra_field_length;
        i += file_comment_length;

        let compression = match compression_method {
            0 => Compression::None,
            8 => Compression::Deflate,
            _ => {
                return Err(Error::new(
                    ErrorKind::Other,
                    "Unsupported compression in central file entry",
                ));
            }
        };

        layout.names.insert(filename.clone(), layout.entries.len());
        layout.entries.push(Entry {
            name: filename,
            offset: u64::from(relative_offset_of_local_header),
            compressed_size: u64::from(compressed_size),
            uncompressed_size: u64::from(uncompressed_size),
            compression,
        });
    }
    return Ok(layout);
}

const EOCD_SIZE: usize = 22;
const EOCD_SIGNATURE: [u8; 4] = [0x50, 0x4B, 0x05, 0x06];

impl Layout {
    pub fn new(file: &mut File) -> Result<Layout> {
        let size = file.seek(SeekFrom::End(0))?;
        // sizeof(End of central dir record) + max size of comment
        let max = min(EOCD_SIZE + 65535, usize::try_from(size).unwrap());
        let buf_start = file.seek(SeekFrom::End(-i64::try_from(max).unwrap()))?;
        let mut buf = vec![0u8; usize::try_from(max).unwrap()];
        file.read_exact(buf.as_mut_slice())?;

        for i in (0..=max - EOCD_SIZE).rev() {
            if buf[i..i + 4] == EOCD_SIGNATURE
                && get_u16(&buf, i + EOCD_SIZE - 2) == u16::try_from(max - (i + EOCD_SIZE)).unwrap()
            {
                let mut i = i + 4;
                let number_of_this_disk = read_u16(&buf, &mut i);
                let number_of_the_disk_with_the_start_of_the_central_directory =
                    read_u16(&buf, &mut i);
                if number_of_this_disk != 0
                    || number_of_the_disk_with_the_start_of_the_central_directory != 0
                {
                    return Err(Error::new(
                        ErrorKind::Other,
                        "Multi disk ZIPs are not supported.",
                    ));
                }
                let _total_number_of_entries_in_the_central_dir_on_this_disk =
                    read_u16(&buf, &mut i);
                let _total_number_of_entries_in_the_central_directory = read_u16(&buf, &mut i);
                let size_of_central_directory = read_u32(&buf, &mut i);
                let offset_of_start_of_central_directory = read_u32(&buf, &mut i);
                let usize_size = usize::try_from(size_of_central_directory).unwrap();
                if usize_size <= max {
                    let u64_start = u64::from(offset_of_start_of_central_directory);
                    if u64_start >= buf_start {
                        let offset = usize::try_from(u64_start - buf_start).unwrap();
                        return parse_central_directory(&buf[offset..offset + usize_size]);
                    }
                } else {
                    buf.resize(usize_size, 0);
                }
                file.seek(SeekFrom::Start(u64::from(
                    offset_of_start_of_central_directory,
                )))?;
                file.read_exact(&mut buf[0..usize_size])?;
                return parse_central_directory(&buf[0..usize_size]);
            }
        }
        Err(Error::new(
            ErrorKind::Other,
            "Unable to find end of central directory record, is it a ZIP file?",
        ))
    }

    pub fn entries(&self) -> &Vec<Entry> {
        return &self.entries;
    }

    pub fn names(&self) -> &HashMap<String, usize> {
        return &self.names;
    }
}