blob: b97babc9513fc2db95a709e964ed778989148627 [file] [log] [blame]
Stuart McCullochf3173222012-06-07 21:57:32 +00001package aQute.libg.cafs;
2
3import static aQute.lib.io.IO.*;
4
5import java.io.*;
6import java.nio.channels.*;
7import java.security.*;
8import java.util.*;
9import java.util.concurrent.atomic.*;
10import java.util.zip.*;
11
12import aQute.lib.index.*;
13import aQute.libg.cryptography.*;
14
15/**
16 * CAFS implements a SHA-1 based file store. The basic idea is that every file
17 * in the universe has a unique SHA-1. Hard to believe but people smarter than
18 * me have come to that conclusion. This class maintains a compressed store of
19 * SHA-1 identified files. So if you have the SHA-1, you can get the contents.
Stuart McCullochf3173222012-06-07 21:57:32 +000020 * This makes it easy to store a SHA-1 instead of the whole file or maintain a
21 * naming scheme. An added advantage is that it is always easy to verify you get
22 * the right stuff. The SHA-1 Content Addressable File Store is the core
23 * underlying idea in Git.
24 */
25public class CAFS implements Closeable, Iterable<SHA1> {
Stuart McCulloch669423b2012-06-26 16:34:24 +000026 final static byte[] CAFS;
27 final static byte[] CAFE;
Stuart McCullochf3173222012-06-07 21:57:32 +000028 final static String INDEXFILE = "index.idx";
29 final static String STOREFILE = "store.cafs";
30 final static String ALGORITHM = "SHA-1";
31 final static int KEYLENGTH = 20;
32 final static int HEADERLENGTH = 4 // CAFS
33 + 4 // flags
34 + 4 // compressed length
35 + 4 // uncompressed length
Stuart McCulloch4482c702012-06-15 13:27:53 +000036 + KEYLENGTH // key
Stuart McCullochf3173222012-06-07 21:57:32 +000037 + 2 // header checksum
Stuart McCulloch4482c702012-06-15 13:27:53 +000038 ;
Stuart McCullochf3173222012-06-07 21:57:32 +000039
40 final File home;
41 Index index;
42 RandomAccessFile store;
43 FileChannel channel;
44
Stuart McCulloch669423b2012-06-26 16:34:24 +000045 static {
46 try {
47 CAFS = "CAFS".getBytes("UTF-8");
48 CAFE = "CAFE".getBytes("UTF-8");
49 } catch (Throwable e) {
50 throw new ExceptionInInitializerError(e);
51 }
52 }
53
Stuart McCullochf3173222012-06-07 21:57:32 +000054 /**
55 * Constructor for a Content Addressable File Store
56 *
57 * @param home
58 * @param create
59 * @throws Exception
60 */
61 public CAFS(File home, boolean create) throws Exception {
62 this.home = home;
63 if (!home.isDirectory()) {
64 if (create) {
65 home.mkdirs();
66 } else
67 throw new IllegalArgumentException("CAFS requires a directory with create=false");
68 }
69
70 index = new Index(new File(home, INDEXFILE), KEYLENGTH);
71 store = new RandomAccessFile(new File(home, STOREFILE), "rw");
72 channel = store.getChannel();
73 if (store.length() < 0x100) {
74 if (create) {
75 store.write(CAFS);
76 for (int i = 1; i < 64; i++)
77 store.writeInt(0);
78 channel.force(true);
79 } else
Stuart McCulloch4482c702012-06-15 13:27:53 +000080 throw new IllegalArgumentException("Invalid store file, length is too short " + store);
Stuart McCullochf3173222012-06-07 21:57:32 +000081 System.err.println(store.length());
82 }
83 store.seek(0);
84 if (!verifySignature(store, CAFS))
85 throw new IllegalArgumentException("Not a valid signature: CAFS at start of file");
86
87 }
88
89 /**
90 * Store an input stream in the CAFS while calculating and returning the
91 * SHA-1 code.
92 *
93 * @param in
94 * The input stream to store.
95 * @return The SHA-1 code.
96 * @throws Exception
97 * if anything goes wrong
98 */
99 public SHA1 write(InputStream in) throws Exception {
100
101 Deflater deflater = new Deflater();
102 MessageDigest md = MessageDigest.getInstance(ALGORITHM);
103 DigestInputStream din = new DigestInputStream(in, md);
104 ByteArrayOutputStream bout = new ByteArrayOutputStream();
105 DeflaterOutputStream dout = new DeflaterOutputStream(bout, deflater);
106 copy(din, dout);
107
108 synchronized (store) {
109 // First check if it already exists
110 SHA1 sha1 = new SHA1(md.digest());
Stuart McCulloch4482c702012-06-15 13:27:53 +0000111
Stuart McCullochf3173222012-06-07 21:57:32 +0000112 long search = index.search(sha1.digest());
113 if (search > 0)
114 return sha1;
115
116 byte[] compressed = bout.toByteArray();
117
118 // we need to append this file to our store,
119 // which requires a lock. However, we are in a race
120 // so others can get the lock between us getting
121 // the length and someone else getting the lock.
122 // So we must verify after we get the lock that the
123 // length was unchanged.
124 FileLock lock = null;
125 try {
126 long insertPoint;
127 int recordLength = compressed.length + HEADERLENGTH;
128
129 while (true) {
130 insertPoint = store.length();
131 lock = channel.lock(insertPoint, recordLength, false);
132
133 if (store.length() == insertPoint)
134 break;
135
136 // We got the wrong lock, someone else
137 // got in between reading the length
138 // and locking
139 lock.release();
140 }
141 int totalLength = deflater.getTotalIn();
142 store.seek(insertPoint);
143 update(sha1.digest(), compressed, totalLength);
144 index.insert(sha1.digest(), insertPoint);
145 return sha1;
Stuart McCulloch4482c702012-06-15 13:27:53 +0000146 }
147 finally {
Stuart McCullochf3173222012-06-07 21:57:32 +0000148 if (lock != null)
149 lock.release();
150 }
151 }
152 }
153
154 /**
155 * Read the contents of a sha 1 key.
156 *
157 * @param sha1
158 * The key
159 * @return An Input Stream on the content or null of key not found
160 * @throws Exception
161 */
162 public InputStream read(final SHA1 sha1) throws Exception {
163 synchronized (store) {
164 long offset = index.search(sha1.digest());
165 if (offset < 0)
166 return null;
167
168 byte[] readSha1;
169 byte[] buffer;
170 store.seek(offset);
171 if (!verifySignature(store, CAFE))
172 throw new IllegalArgumentException("No signature");
173
Stuart McCulloch4482c702012-06-15 13:27:53 +0000174 int flags = store.readInt();
Stuart McCullochf3173222012-06-07 21:57:32 +0000175 int compressedLength = store.readInt();
176 int uncompressedLength = store.readInt();
177 readSha1 = new byte[KEYLENGTH];
178 store.read(readSha1);
179 SHA1 rsha1 = new SHA1(readSha1);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000180
Stuart McCullochf3173222012-06-07 21:57:32 +0000181 if (!sha1.equals(rsha1))
182 throw new IOException("SHA1 read and asked mismatch: " + sha1 + " " + rsha1);
183
184 short crc = store.readShort(); // Read CRC
Stuart McCulloch4482c702012-06-15 13:27:53 +0000185 if (crc != checksum(flags, compressedLength, uncompressedLength, readSha1))
Stuart McCullochf3173222012-06-07 21:57:32 +0000186 throw new IllegalArgumentException("Invalid header checksum: " + sha1);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000187
Stuart McCullochf3173222012-06-07 21:57:32 +0000188 buffer = new byte[compressedLength];
189 store.readFully(buffer);
190 return getSha1Stream(sha1, buffer, uncompressedLength);
191 }
192 }
193
194 public boolean exists(byte[] sha1) throws Exception {
195 return index.search(sha1) >= 0;
196 }
197
198 public void reindex() throws Exception {
199 long length;
200 synchronized (store) {
201 length = store.length();
202 if (length < 0x100)
Stuart McCulloch4482c702012-06-15 13:27:53 +0000203 throw new IllegalArgumentException("Store file is too small, need to be at least 256 bytes: " + store);
Stuart McCullochf3173222012-06-07 21:57:32 +0000204 }
205
206 RandomAccessFile in = new RandomAccessFile(new File(home, STOREFILE), "r");
207 try {
208 byte[] signature = new byte[4];
209 in.readFully(signature);
210 if (!Arrays.equals(CAFS, signature))
211 throw new IllegalArgumentException("Store file does not start with CAFS: " + in);
212
213 in.seek(0x100);
214 File ixf = new File(home, "index.new");
215 Index index = new Index(ixf, KEYLENGTH);
216
217 while (in.getFilePointer() < length) {
218 long entry = in.getFilePointer();
219 SHA1 sha1 = verifyEntry(in);
220 index.insert(sha1.digest(), entry);
221 }
222
223 synchronized (store) {
224 index.close();
225 File indexFile = new File(home, INDEXFILE);
226 ixf.renameTo(indexFile);
227 this.index = new Index(indexFile, KEYLENGTH);
228 }
Stuart McCulloch4482c702012-06-15 13:27:53 +0000229 }
230 finally {
Stuart McCullochf3173222012-06-07 21:57:32 +0000231 in.close();
232 }
233 }
234
235 public void close() throws IOException {
236 synchronized (store) {
237 try {
238 store.close();
Stuart McCulloch4482c702012-06-15 13:27:53 +0000239 }
240 finally {
Stuart McCullochf3173222012-06-07 21:57:32 +0000241 index.close();
242 }
243 }
244 }
245
246 private SHA1 verifyEntry(RandomAccessFile in) throws IOException, NoSuchAlgorithmException {
247 byte[] signature = new byte[4];
248 in.readFully(signature);
249 if (!Arrays.equals(CAFE, signature))
250 throw new IllegalArgumentException("File is corrupted: " + in);
251
252 /* int flags = */in.readInt();
253 int compressedSize = in.readInt();
254 int uncompressedSize = in.readInt();
255 byte[] key = new byte[KEYLENGTH];
256 in.readFully(key);
257 SHA1 sha1 = new SHA1(key);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000258
Stuart McCullochf3173222012-06-07 21:57:32 +0000259 byte[] buffer = new byte[compressedSize];
260 in.readFully(buffer);
261
262 InputStream xin = getSha1Stream(sha1, buffer, uncompressedSize);
263 xin.skip(uncompressedSize);
264 xin.close();
265 return sha1;
266 }
267
268 private boolean verifySignature(DataInput din, byte[] org) throws IOException {
269 byte[] read = new byte[org.length];
270 din.readFully(read);
271 return Arrays.equals(read, org);
272 }
273
Stuart McCulloch4482c702012-06-15 13:27:53 +0000274 private InputStream getSha1Stream(final SHA1 sha1, byte[] buffer, final int total) throws NoSuchAlgorithmException {
Stuart McCullochf3173222012-06-07 21:57:32 +0000275 ByteArrayInputStream in = new ByteArrayInputStream(buffer);
276 InflaterInputStream iin = new InflaterInputStream(in) {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000277 int count = 0;
278 final MessageDigest digestx = MessageDigest.getInstance(ALGORITHM);
279 final AtomicBoolean calculated = new AtomicBoolean();
280
281 @Override
282 public int read(byte[] data, int offset, int length) throws IOException {
Stuart McCullochf3173222012-06-07 21:57:32 +0000283 int size = super.read(data, offset, length);
284 if (size <= 0)
285 eof();
286 else {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000287 count += size;
Stuart McCullochf3173222012-06-07 21:57:32 +0000288 this.digestx.update(data, offset, size);
289 }
290 return size;
291 }
292
293 public int read() throws IOException {
294 int c = super.read();
295 if (c < 0)
296 eof();
297 else {
298 count++;
299 this.digestx.update((byte) c);
300 }
301 return c;
302 }
303
304 void eof() throws IOException {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000305 if (calculated.getAndSet(true))
Stuart McCullochf3173222012-06-07 21:57:32 +0000306 return;
Stuart McCulloch4482c702012-06-15 13:27:53 +0000307
308 if (count != total)
309 throw new IOException("Counts do not match. Expected to read: " + total + " Actually read: "
310 + count);
311
Stuart McCullochf3173222012-06-07 21:57:32 +0000312 SHA1 calculatedSha1 = new SHA1(digestx.digest());
313 if (!sha1.equals(calculatedSha1))
Stuart McCulloch4482c702012-06-15 13:27:53 +0000314 throw (new IOException("SHA1 caclulated and asked mismatch, asked: " + sha1 + ", \nfound: "
315 + calculatedSha1));
Stuart McCullochf3173222012-06-07 21:57:32 +0000316 }
317
318 public void close() throws IOException {
319 eof();
320 super.close();
321 }
322 };
323 return iin;
324 }
325
326 /**
327 * Update a record in the store, assuming the store is at the right
328 * position.
329 *
330 * @param sha1
331 * The checksum
332 * @param compressed
333 * The compressed length
334 * @param totalLength
335 * The uncompressed length
336 * @throws IOException
337 * The exception
338 */
339 private void update(byte[] sha1, byte[] compressed, int totalLength) throws IOException {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000340 // System.err.println("pos: " + store.getFilePointer());
Stuart McCullochf3173222012-06-07 21:57:32 +0000341 store.write(CAFE); // 00-03 Signature
342 store.writeInt(0); // 04-07 Flags for the future
343 store.writeInt(compressed.length); // 08-11 Length deflated data
344 store.writeInt(totalLength); // 12-15 Length
345 store.write(sha1); // 16-35
Stuart McCulloch4482c702012-06-15 13:27:53 +0000346 store.writeShort(checksum(0, compressed.length, totalLength, sha1));
Stuart McCullochf3173222012-06-07 21:57:32 +0000347 store.write(compressed);
348 channel.force(false);
349 }
350
Stuart McCulloch2b3253e2012-06-17 20:38:35 +0000351 short checksum(int flags, int compressedLength, int totalLength, byte[] sha1) {
Stuart McCullochf3173222012-06-07 21:57:32 +0000352 CRC32 crc = new CRC32();
353 crc.update(flags);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000354 crc.update(flags >> 8);
355 crc.update(flags >> 16);
356 crc.update(flags >> 24);
Stuart McCullochf3173222012-06-07 21:57:32 +0000357 crc.update(compressedLength);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000358 crc.update(compressedLength >> 8);
359 crc.update(compressedLength >> 16);
360 crc.update(compressedLength >> 24);
Stuart McCullochf3173222012-06-07 21:57:32 +0000361 crc.update(totalLength);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000362 crc.update(totalLength >> 8);
363 crc.update(totalLength >> 16);
364 crc.update(totalLength >> 24);
Stuart McCullochf3173222012-06-07 21:57:32 +0000365 crc.update(sha1);
366 return (short) crc.getValue();
367 }
368
369 public Iterator<SHA1> iterator() {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000370
Stuart McCullochf3173222012-06-07 21:57:32 +0000371 return new Iterator<SHA1>() {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000372 long position = 0x100;
373
Stuart McCullochf3173222012-06-07 21:57:32 +0000374 public boolean hasNext() {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000375 synchronized (store) {
Stuart McCullochf3173222012-06-07 21:57:32 +0000376 try {
377 return position < store.length();
Stuart McCulloch4482c702012-06-15 13:27:53 +0000378 }
379 catch (IOException e) {
Stuart McCullochf3173222012-06-07 21:57:32 +0000380 throw new RuntimeException(e);
381 }
382 }
383 }
384
385 public SHA1 next() {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000386 synchronized (store) {
Stuart McCullochf3173222012-06-07 21:57:32 +0000387 try {
388 store.seek(position);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000389 byte[] signature = new byte[4];
Stuart McCullochf3173222012-06-07 21:57:32 +0000390 store.readFully(signature);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000391 if (!Arrays.equals(CAFE, signature))
Stuart McCullochf3173222012-06-07 21:57:32 +0000392 throw new IllegalArgumentException("No signature");
393
394 int flags = store.readInt();
395 int compressedLength = store.readInt();
396 int totalLength = store.readInt();
Stuart McCulloch4482c702012-06-15 13:27:53 +0000397 byte[] sha1 = new byte[KEYLENGTH];
Stuart McCullochf3173222012-06-07 21:57:32 +0000398 store.readFully(sha1);
399 short crc = store.readShort();
Stuart McCulloch4482c702012-06-15 13:27:53 +0000400 if (crc != checksum(flags, compressedLength, totalLength, sha1))
Stuart McCullochf3173222012-06-07 21:57:32 +0000401 throw new IllegalArgumentException("Header checksum fails");
Stuart McCulloch4482c702012-06-15 13:27:53 +0000402
Stuart McCullochf3173222012-06-07 21:57:32 +0000403 position += HEADERLENGTH + compressedLength;
404 return new SHA1(sha1);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000405 }
406 catch (IOException e) {
Stuart McCullochf3173222012-06-07 21:57:32 +0000407 throw new RuntimeException(e);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000408 }
Stuart McCullochf3173222012-06-07 21:57:32 +0000409 }
410 }
411
412 public void remove() {
413 throw new UnsupportedOperationException("Remvoe not supported, CAFS is write once");
Stuart McCulloch4482c702012-06-15 13:27:53 +0000414 }
Stuart McCullochf3173222012-06-07 21:57:32 +0000415 };
416 }
417
Stuart McCullochf3173222012-06-07 21:57:32 +0000418 public boolean isEmpty() throws IOException {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000419 synchronized (store) {
Stuart McCullochf3173222012-06-07 21:57:32 +0000420 return store.getFilePointer() <= 256;
421 }
422 }
423}