blob: c89c5f2fed0a23b0511b150d012a54640e47feda [file] [log] [blame]
Stuart McCullochbb014372012-06-07 21:57:32 +00001package aQute.libg.cafs;
2
3import static aQute.lib.io.IO.*;
4
5import java.io.*;
6import java.nio.channels.*;
7import java.security.*;
8import java.util.*;
9import java.util.concurrent.atomic.*;
10import java.util.zip.*;
11
12import aQute.lib.index.*;
13import aQute.libg.cryptography.*;
14
15/**
16 * CAFS implements a SHA-1 based file store. The basic idea is that every file
17 * in the universe has a unique SHA-1. Hard to believe but people smarter than
18 * me have come to that conclusion. This class maintains a compressed store of
19 * SHA-1 identified files. So if you have the SHA-1, you can get the contents.
Stuart McCullochbb014372012-06-07 21:57:32 +000020 * This makes it easy to store a SHA-1 instead of the whole file or maintain a
21 * naming scheme. An added advantage is that it is always easy to verify you get
22 * the right stuff. The SHA-1 Content Addressable File Store is the core
23 * underlying idea in Git.
24 */
25public class CAFS implements Closeable, Iterable<SHA1> {
Stuart McCullochd4826102012-06-26 16:34:24 +000026 final static byte[] CAFS;
27 final static byte[] CAFE;
Stuart McCullochbb014372012-06-07 21:57:32 +000028 final static String INDEXFILE = "index.idx";
29 final static String STOREFILE = "store.cafs";
30 final static String ALGORITHM = "SHA-1";
31 final static int KEYLENGTH = 20;
32 final static int HEADERLENGTH = 4 // CAFS
33 + 4 // flags
34 + 4 // compressed length
35 + 4 // uncompressed length
Stuart McCulloch2286f232012-06-15 13:27:53 +000036 + KEYLENGTH // key
Stuart McCullochbb014372012-06-07 21:57:32 +000037 + 2 // header checksum
Stuart McCulloch2286f232012-06-15 13:27:53 +000038 ;
Stuart McCullochbb014372012-06-07 21:57:32 +000039
40 final File home;
41 Index index;
42 RandomAccessFile store;
43 FileChannel channel;
44
Stuart McCullochd4826102012-06-26 16:34:24 +000045 static {
46 try {
47 CAFS = "CAFS".getBytes("UTF-8");
48 CAFE = "CAFE".getBytes("UTF-8");
49 } catch (Throwable e) {
50 throw new ExceptionInInitializerError(e);
51 }
52 }
53
Stuart McCullochbb014372012-06-07 21:57:32 +000054 /**
55 * Constructor for a Content Addressable File Store
56 *
57 * @param home
58 * @param create
59 * @throws Exception
60 */
61 public CAFS(File home, boolean create) throws Exception {
62 this.home = home;
63 if (!home.isDirectory()) {
64 if (create) {
Stuart McCulloch55d4dfe2012-08-07 10:57:21 +000065 if (home.exists()) {
66 throw new IOException(home + " is not a directory");
67 }
68 if (!home.mkdirs()) {
69 throw new IOException("Could not create directory " + home);
70 }
Stuart McCullochbb014372012-06-07 21:57:32 +000071 } else
72 throw new IllegalArgumentException("CAFS requires a directory with create=false");
73 }
74
75 index = new Index(new File(home, INDEXFILE), KEYLENGTH);
76 store = new RandomAccessFile(new File(home, STOREFILE), "rw");
77 channel = store.getChannel();
78 if (store.length() < 0x100) {
79 if (create) {
80 store.write(CAFS);
81 for (int i = 1; i < 64; i++)
82 store.writeInt(0);
83 channel.force(true);
84 } else
Stuart McCulloch2286f232012-06-15 13:27:53 +000085 throw new IllegalArgumentException("Invalid store file, length is too short " + store);
Stuart McCullochbb014372012-06-07 21:57:32 +000086 System.err.println(store.length());
87 }
88 store.seek(0);
89 if (!verifySignature(store, CAFS))
90 throw new IllegalArgumentException("Not a valid signature: CAFS at start of file");
91
92 }
93
94 /**
95 * Store an input stream in the CAFS while calculating and returning the
96 * SHA-1 code.
97 *
98 * @param in
99 * The input stream to store.
100 * @return The SHA-1 code.
101 * @throws Exception
102 * if anything goes wrong
103 */
104 public SHA1 write(InputStream in) throws Exception {
105
106 Deflater deflater = new Deflater();
107 MessageDigest md = MessageDigest.getInstance(ALGORITHM);
108 DigestInputStream din = new DigestInputStream(in, md);
109 ByteArrayOutputStream bout = new ByteArrayOutputStream();
110 DeflaterOutputStream dout = new DeflaterOutputStream(bout, deflater);
111 copy(din, dout);
112
113 synchronized (store) {
114 // First check if it already exists
115 SHA1 sha1 = new SHA1(md.digest());
Stuart McCulloch2286f232012-06-15 13:27:53 +0000116
Stuart McCullochbb014372012-06-07 21:57:32 +0000117 long search = index.search(sha1.digest());
118 if (search > 0)
119 return sha1;
120
121 byte[] compressed = bout.toByteArray();
122
123 // we need to append this file to our store,
124 // which requires a lock. However, we are in a race
125 // so others can get the lock between us getting
126 // the length and someone else getting the lock.
127 // So we must verify after we get the lock that the
128 // length was unchanged.
129 FileLock lock = null;
130 try {
131 long insertPoint;
132 int recordLength = compressed.length + HEADERLENGTH;
133
134 while (true) {
135 insertPoint = store.length();
136 lock = channel.lock(insertPoint, recordLength, false);
137
138 if (store.length() == insertPoint)
139 break;
140
141 // We got the wrong lock, someone else
142 // got in between reading the length
143 // and locking
144 lock.release();
145 }
146 int totalLength = deflater.getTotalIn();
147 store.seek(insertPoint);
148 update(sha1.digest(), compressed, totalLength);
149 index.insert(sha1.digest(), insertPoint);
150 return sha1;
Stuart McCulloch2286f232012-06-15 13:27:53 +0000151 }
152 finally {
Stuart McCullochbb014372012-06-07 21:57:32 +0000153 if (lock != null)
154 lock.release();
155 }
156 }
157 }
158
159 /**
160 * Read the contents of a sha 1 key.
161 *
162 * @param sha1
163 * The key
164 * @return An Input Stream on the content or null of key not found
165 * @throws Exception
166 */
167 public InputStream read(final SHA1 sha1) throws Exception {
168 synchronized (store) {
169 long offset = index.search(sha1.digest());
170 if (offset < 0)
171 return null;
172
173 byte[] readSha1;
174 byte[] buffer;
175 store.seek(offset);
176 if (!verifySignature(store, CAFE))
177 throw new IllegalArgumentException("No signature");
178
Stuart McCulloch2286f232012-06-15 13:27:53 +0000179 int flags = store.readInt();
Stuart McCullochbb014372012-06-07 21:57:32 +0000180 int compressedLength = store.readInt();
181 int uncompressedLength = store.readInt();
182 readSha1 = new byte[KEYLENGTH];
183 store.read(readSha1);
184 SHA1 rsha1 = new SHA1(readSha1);
Stuart McCulloch2286f232012-06-15 13:27:53 +0000185
Stuart McCullochbb014372012-06-07 21:57:32 +0000186 if (!sha1.equals(rsha1))
187 throw new IOException("SHA1 read and asked mismatch: " + sha1 + " " + rsha1);
188
189 short crc = store.readShort(); // Read CRC
Stuart McCulloch2286f232012-06-15 13:27:53 +0000190 if (crc != checksum(flags, compressedLength, uncompressedLength, readSha1))
Stuart McCullochbb014372012-06-07 21:57:32 +0000191 throw new IllegalArgumentException("Invalid header checksum: " + sha1);
Stuart McCulloch2286f232012-06-15 13:27:53 +0000192
Stuart McCullochbb014372012-06-07 21:57:32 +0000193 buffer = new byte[compressedLength];
194 store.readFully(buffer);
195 return getSha1Stream(sha1, buffer, uncompressedLength);
196 }
197 }
198
199 public boolean exists(byte[] sha1) throws Exception {
200 return index.search(sha1) >= 0;
201 }
202
203 public void reindex() throws Exception {
204 long length;
205 synchronized (store) {
206 length = store.length();
207 if (length < 0x100)
Stuart McCulloch2286f232012-06-15 13:27:53 +0000208 throw new IllegalArgumentException("Store file is too small, need to be at least 256 bytes: " + store);
Stuart McCullochbb014372012-06-07 21:57:32 +0000209 }
210
211 RandomAccessFile in = new RandomAccessFile(new File(home, STOREFILE), "r");
212 try {
213 byte[] signature = new byte[4];
214 in.readFully(signature);
215 if (!Arrays.equals(CAFS, signature))
216 throw new IllegalArgumentException("Store file does not start with CAFS: " + in);
217
218 in.seek(0x100);
219 File ixf = new File(home, "index.new");
220 Index index = new Index(ixf, KEYLENGTH);
221
222 while (in.getFilePointer() < length) {
223 long entry = in.getFilePointer();
224 SHA1 sha1 = verifyEntry(in);
225 index.insert(sha1.digest(), entry);
226 }
227
228 synchronized (store) {
229 index.close();
230 File indexFile = new File(home, INDEXFILE);
231 ixf.renameTo(indexFile);
232 this.index = new Index(indexFile, KEYLENGTH);
233 }
Stuart McCulloch2286f232012-06-15 13:27:53 +0000234 }
235 finally {
Stuart McCullochbb014372012-06-07 21:57:32 +0000236 in.close();
237 }
238 }
239
240 public void close() throws IOException {
241 synchronized (store) {
242 try {
243 store.close();
Stuart McCulloch2286f232012-06-15 13:27:53 +0000244 }
245 finally {
Stuart McCullochbb014372012-06-07 21:57:32 +0000246 index.close();
247 }
248 }
249 }
250
251 private SHA1 verifyEntry(RandomAccessFile in) throws IOException, NoSuchAlgorithmException {
252 byte[] signature = new byte[4];
253 in.readFully(signature);
254 if (!Arrays.equals(CAFE, signature))
255 throw new IllegalArgumentException("File is corrupted: " + in);
256
257 /* int flags = */in.readInt();
258 int compressedSize = in.readInt();
259 int uncompressedSize = in.readInt();
260 byte[] key = new byte[KEYLENGTH];
261 in.readFully(key);
262 SHA1 sha1 = new SHA1(key);
Stuart McCulloch2286f232012-06-15 13:27:53 +0000263
Stuart McCullochbb014372012-06-07 21:57:32 +0000264 byte[] buffer = new byte[compressedSize];
265 in.readFully(buffer);
266
267 InputStream xin = getSha1Stream(sha1, buffer, uncompressedSize);
268 xin.skip(uncompressedSize);
269 xin.close();
270 return sha1;
271 }
272
273 private boolean verifySignature(DataInput din, byte[] org) throws IOException {
274 byte[] read = new byte[org.length];
275 din.readFully(read);
276 return Arrays.equals(read, org);
277 }
278
Stuart McCulloch2286f232012-06-15 13:27:53 +0000279 private InputStream getSha1Stream(final SHA1 sha1, byte[] buffer, final int total) throws NoSuchAlgorithmException {
Stuart McCullochbb014372012-06-07 21:57:32 +0000280 ByteArrayInputStream in = new ByteArrayInputStream(buffer);
281 InflaterInputStream iin = new InflaterInputStream(in) {
Stuart McCulloch2286f232012-06-15 13:27:53 +0000282 int count = 0;
283 final MessageDigest digestx = MessageDigest.getInstance(ALGORITHM);
284 final AtomicBoolean calculated = new AtomicBoolean();
285
286 @Override
287 public int read(byte[] data, int offset, int length) throws IOException {
Stuart McCullochbb014372012-06-07 21:57:32 +0000288 int size = super.read(data, offset, length);
289 if (size <= 0)
290 eof();
291 else {
Stuart McCulloch2286f232012-06-15 13:27:53 +0000292 count += size;
Stuart McCullochbb014372012-06-07 21:57:32 +0000293 this.digestx.update(data, offset, size);
294 }
295 return size;
296 }
297
Stuart McCulloch55d4dfe2012-08-07 10:57:21 +0000298 @Override
Stuart McCullochbb014372012-06-07 21:57:32 +0000299 public int read() throws IOException {
300 int c = super.read();
301 if (c < 0)
302 eof();
303 else {
304 count++;
305 this.digestx.update((byte) c);
306 }
307 return c;
308 }
309
310 void eof() throws IOException {
Stuart McCulloch2286f232012-06-15 13:27:53 +0000311 if (calculated.getAndSet(true))
Stuart McCullochbb014372012-06-07 21:57:32 +0000312 return;
Stuart McCulloch2286f232012-06-15 13:27:53 +0000313
314 if (count != total)
315 throw new IOException("Counts do not match. Expected to read: " + total + " Actually read: "
316 + count);
317
Stuart McCullochbb014372012-06-07 21:57:32 +0000318 SHA1 calculatedSha1 = new SHA1(digestx.digest());
319 if (!sha1.equals(calculatedSha1))
Stuart McCulloch2286f232012-06-15 13:27:53 +0000320 throw (new IOException("SHA1 caclulated and asked mismatch, asked: " + sha1 + ", \nfound: "
321 + calculatedSha1));
Stuart McCullochbb014372012-06-07 21:57:32 +0000322 }
323
Stuart McCulloch55d4dfe2012-08-07 10:57:21 +0000324 @Override
Stuart McCullochbb014372012-06-07 21:57:32 +0000325 public void close() throws IOException {
326 eof();
327 super.close();
328 }
329 };
330 return iin;
331 }
332
333 /**
334 * Update a record in the store, assuming the store is at the right
335 * position.
336 *
337 * @param sha1
338 * The checksum
339 * @param compressed
340 * The compressed length
341 * @param totalLength
342 * The uncompressed length
343 * @throws IOException
344 * The exception
345 */
346 private void update(byte[] sha1, byte[] compressed, int totalLength) throws IOException {
Stuart McCulloch2286f232012-06-15 13:27:53 +0000347 // System.err.println("pos: " + store.getFilePointer());
Stuart McCullochbb014372012-06-07 21:57:32 +0000348 store.write(CAFE); // 00-03 Signature
349 store.writeInt(0); // 04-07 Flags for the future
350 store.writeInt(compressed.length); // 08-11 Length deflated data
351 store.writeInt(totalLength); // 12-15 Length
352 store.write(sha1); // 16-35
Stuart McCulloch2286f232012-06-15 13:27:53 +0000353 store.writeShort(checksum(0, compressed.length, totalLength, sha1));
Stuart McCullochbb014372012-06-07 21:57:32 +0000354 store.write(compressed);
355 channel.force(false);
356 }
357
Stuart McCullochffa8aaf2012-06-17 20:38:35 +0000358 short checksum(int flags, int compressedLength, int totalLength, byte[] sha1) {
Stuart McCullochbb014372012-06-07 21:57:32 +0000359 CRC32 crc = new CRC32();
360 crc.update(flags);
Stuart McCulloch2286f232012-06-15 13:27:53 +0000361 crc.update(flags >> 8);
362 crc.update(flags >> 16);
363 crc.update(flags >> 24);
Stuart McCullochbb014372012-06-07 21:57:32 +0000364 crc.update(compressedLength);
Stuart McCulloch2286f232012-06-15 13:27:53 +0000365 crc.update(compressedLength >> 8);
366 crc.update(compressedLength >> 16);
367 crc.update(compressedLength >> 24);
Stuart McCullochbb014372012-06-07 21:57:32 +0000368 crc.update(totalLength);
Stuart McCulloch2286f232012-06-15 13:27:53 +0000369 crc.update(totalLength >> 8);
370 crc.update(totalLength >> 16);
371 crc.update(totalLength >> 24);
Stuart McCullochbb014372012-06-07 21:57:32 +0000372 crc.update(sha1);
373 return (short) crc.getValue();
374 }
375
376 public Iterator<SHA1> iterator() {
Stuart McCulloch2286f232012-06-15 13:27:53 +0000377
Stuart McCullochbb014372012-06-07 21:57:32 +0000378 return new Iterator<SHA1>() {
Stuart McCulloch2286f232012-06-15 13:27:53 +0000379 long position = 0x100;
380
Stuart McCullochbb014372012-06-07 21:57:32 +0000381 public boolean hasNext() {
Stuart McCulloch2286f232012-06-15 13:27:53 +0000382 synchronized (store) {
Stuart McCullochbb014372012-06-07 21:57:32 +0000383 try {
384 return position < store.length();
Stuart McCulloch2286f232012-06-15 13:27:53 +0000385 }
386 catch (IOException e) {
Stuart McCullochbb014372012-06-07 21:57:32 +0000387 throw new RuntimeException(e);
388 }
389 }
390 }
391
392 public SHA1 next() {
Stuart McCulloch2286f232012-06-15 13:27:53 +0000393 synchronized (store) {
Stuart McCullochbb014372012-06-07 21:57:32 +0000394 try {
395 store.seek(position);
Stuart McCulloch2286f232012-06-15 13:27:53 +0000396 byte[] signature = new byte[4];
Stuart McCullochbb014372012-06-07 21:57:32 +0000397 store.readFully(signature);
Stuart McCulloch2286f232012-06-15 13:27:53 +0000398 if (!Arrays.equals(CAFE, signature))
Stuart McCullochbb014372012-06-07 21:57:32 +0000399 throw new IllegalArgumentException("No signature");
400
401 int flags = store.readInt();
402 int compressedLength = store.readInt();
403 int totalLength = store.readInt();
Stuart McCulloch2286f232012-06-15 13:27:53 +0000404 byte[] sha1 = new byte[KEYLENGTH];
Stuart McCullochbb014372012-06-07 21:57:32 +0000405 store.readFully(sha1);
406 short crc = store.readShort();
Stuart McCulloch2286f232012-06-15 13:27:53 +0000407 if (crc != checksum(flags, compressedLength, totalLength, sha1))
Stuart McCullochbb014372012-06-07 21:57:32 +0000408 throw new IllegalArgumentException("Header checksum fails");
Stuart McCulloch2286f232012-06-15 13:27:53 +0000409
Stuart McCullochbb014372012-06-07 21:57:32 +0000410 position += HEADERLENGTH + compressedLength;
411 return new SHA1(sha1);
Stuart McCulloch2286f232012-06-15 13:27:53 +0000412 }
413 catch (IOException e) {
Stuart McCullochbb014372012-06-07 21:57:32 +0000414 throw new RuntimeException(e);
Stuart McCulloch2286f232012-06-15 13:27:53 +0000415 }
Stuart McCullochbb014372012-06-07 21:57:32 +0000416 }
417 }
418
419 public void remove() {
420 throw new UnsupportedOperationException("Remvoe not supported, CAFS is write once");
Stuart McCulloch2286f232012-06-15 13:27:53 +0000421 }
Stuart McCullochbb014372012-06-07 21:57:32 +0000422 };
423 }
424
Stuart McCullochbb014372012-06-07 21:57:32 +0000425 public boolean isEmpty() throws IOException {
Stuart McCulloch2286f232012-06-15 13:27:53 +0000426 synchronized (store) {
Stuart McCullochbb014372012-06-07 21:57:32 +0000427 return store.getFilePointer() <= 256;
428 }
429 }
430}