blob: 38e67beca258d88c0f645345831b149eb5ec30ef [file] [log] [blame]
Stuart McCullochf3173222012-06-07 21:57:32 +00001package aQute.libg.cafs;
2
3import static aQute.lib.io.IO.*;
4
5import java.io.*;
6import java.nio.channels.*;
7import java.security.*;
8import java.util.*;
9import java.util.concurrent.atomic.*;
10import java.util.zip.*;
11
12import aQute.lib.index.*;
13import aQute.libg.cryptography.*;
14
15/**
16 * CAFS implements a SHA-1 based file store. The basic idea is that every file
17 * in the universe has a unique SHA-1. Hard to believe but people smarter than
18 * me have come to that conclusion. This class maintains a compressed store of
19 * SHA-1 identified files. So if you have the SHA-1, you can get the contents.
Stuart McCullochf3173222012-06-07 21:57:32 +000020 * This makes it easy to store a SHA-1 instead of the whole file or maintain a
21 * naming scheme. An added advantage is that it is always easy to verify you get
22 * the right stuff. The SHA-1 Content Addressable File Store is the core
23 * underlying idea in Git.
24 */
25public class CAFS implements Closeable, Iterable<SHA1> {
26 final static byte[] CAFS = "CAFS".getBytes();
27 final static byte[] CAFE = "CAFE".getBytes();
28 final static String INDEXFILE = "index.idx";
29 final static String STOREFILE = "store.cafs";
30 final static String ALGORITHM = "SHA-1";
31 final static int KEYLENGTH = 20;
32 final static int HEADERLENGTH = 4 // CAFS
33 + 4 // flags
34 + 4 // compressed length
35 + 4 // uncompressed length
Stuart McCulloch4482c702012-06-15 13:27:53 +000036 + KEYLENGTH // key
Stuart McCullochf3173222012-06-07 21:57:32 +000037 + 2 // header checksum
Stuart McCulloch4482c702012-06-15 13:27:53 +000038 ;
Stuart McCullochf3173222012-06-07 21:57:32 +000039
40 final File home;
41 Index index;
42 RandomAccessFile store;
43 FileChannel channel;
44
45 /**
46 * Constructor for a Content Addressable File Store
47 *
48 * @param home
49 * @param create
50 * @throws Exception
51 */
52 public CAFS(File home, boolean create) throws Exception {
53 this.home = home;
54 if (!home.isDirectory()) {
55 if (create) {
56 home.mkdirs();
57 } else
58 throw new IllegalArgumentException("CAFS requires a directory with create=false");
59 }
60
61 index = new Index(new File(home, INDEXFILE), KEYLENGTH);
62 store = new RandomAccessFile(new File(home, STOREFILE), "rw");
63 channel = store.getChannel();
64 if (store.length() < 0x100) {
65 if (create) {
66 store.write(CAFS);
67 for (int i = 1; i < 64; i++)
68 store.writeInt(0);
69 channel.force(true);
70 } else
Stuart McCulloch4482c702012-06-15 13:27:53 +000071 throw new IllegalArgumentException("Invalid store file, length is too short " + store);
Stuart McCullochf3173222012-06-07 21:57:32 +000072 System.err.println(store.length());
73 }
74 store.seek(0);
75 if (!verifySignature(store, CAFS))
76 throw new IllegalArgumentException("Not a valid signature: CAFS at start of file");
77
78 }
79
80 /**
81 * Store an input stream in the CAFS while calculating and returning the
82 * SHA-1 code.
83 *
84 * @param in
85 * The input stream to store.
86 * @return The SHA-1 code.
87 * @throws Exception
88 * if anything goes wrong
89 */
90 public SHA1 write(InputStream in) throws Exception {
91
92 Deflater deflater = new Deflater();
93 MessageDigest md = MessageDigest.getInstance(ALGORITHM);
94 DigestInputStream din = new DigestInputStream(in, md);
95 ByteArrayOutputStream bout = new ByteArrayOutputStream();
96 DeflaterOutputStream dout = new DeflaterOutputStream(bout, deflater);
97 copy(din, dout);
98
99 synchronized (store) {
100 // First check if it already exists
101 SHA1 sha1 = new SHA1(md.digest());
Stuart McCulloch4482c702012-06-15 13:27:53 +0000102
Stuart McCullochf3173222012-06-07 21:57:32 +0000103 long search = index.search(sha1.digest());
104 if (search > 0)
105 return sha1;
106
107 byte[] compressed = bout.toByteArray();
108
109 // we need to append this file to our store,
110 // which requires a lock. However, we are in a race
111 // so others can get the lock between us getting
112 // the length and someone else getting the lock.
113 // So we must verify after we get the lock that the
114 // length was unchanged.
115 FileLock lock = null;
116 try {
117 long insertPoint;
118 int recordLength = compressed.length + HEADERLENGTH;
119
120 while (true) {
121 insertPoint = store.length();
122 lock = channel.lock(insertPoint, recordLength, false);
123
124 if (store.length() == insertPoint)
125 break;
126
127 // We got the wrong lock, someone else
128 // got in between reading the length
129 // and locking
130 lock.release();
131 }
132 int totalLength = deflater.getTotalIn();
133 store.seek(insertPoint);
134 update(sha1.digest(), compressed, totalLength);
135 index.insert(sha1.digest(), insertPoint);
136 return sha1;
Stuart McCulloch4482c702012-06-15 13:27:53 +0000137 }
138 finally {
Stuart McCullochf3173222012-06-07 21:57:32 +0000139 if (lock != null)
140 lock.release();
141 }
142 }
143 }
144
145 /**
146 * Read the contents of a sha 1 key.
147 *
148 * @param sha1
149 * The key
150 * @return An Input Stream on the content or null of key not found
151 * @throws Exception
152 */
153 public InputStream read(final SHA1 sha1) throws Exception {
154 synchronized (store) {
155 long offset = index.search(sha1.digest());
156 if (offset < 0)
157 return null;
158
159 byte[] readSha1;
160 byte[] buffer;
161 store.seek(offset);
162 if (!verifySignature(store, CAFE))
163 throw new IllegalArgumentException("No signature");
164
Stuart McCulloch4482c702012-06-15 13:27:53 +0000165 int flags = store.readInt();
Stuart McCullochf3173222012-06-07 21:57:32 +0000166 int compressedLength = store.readInt();
167 int uncompressedLength = store.readInt();
168 readSha1 = new byte[KEYLENGTH];
169 store.read(readSha1);
170 SHA1 rsha1 = new SHA1(readSha1);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000171
Stuart McCullochf3173222012-06-07 21:57:32 +0000172 if (!sha1.equals(rsha1))
173 throw new IOException("SHA1 read and asked mismatch: " + sha1 + " " + rsha1);
174
175 short crc = store.readShort(); // Read CRC
Stuart McCulloch4482c702012-06-15 13:27:53 +0000176 if (crc != checksum(flags, compressedLength, uncompressedLength, readSha1))
Stuart McCullochf3173222012-06-07 21:57:32 +0000177 throw new IllegalArgumentException("Invalid header checksum: " + sha1);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000178
Stuart McCullochf3173222012-06-07 21:57:32 +0000179 buffer = new byte[compressedLength];
180 store.readFully(buffer);
181 return getSha1Stream(sha1, buffer, uncompressedLength);
182 }
183 }
184
185 public boolean exists(byte[] sha1) throws Exception {
186 return index.search(sha1) >= 0;
187 }
188
189 public void reindex() throws Exception {
190 long length;
191 synchronized (store) {
192 length = store.length();
193 if (length < 0x100)
Stuart McCulloch4482c702012-06-15 13:27:53 +0000194 throw new IllegalArgumentException("Store file is too small, need to be at least 256 bytes: " + store);
Stuart McCullochf3173222012-06-07 21:57:32 +0000195 }
196
197 RandomAccessFile in = new RandomAccessFile(new File(home, STOREFILE), "r");
198 try {
199 byte[] signature = new byte[4];
200 in.readFully(signature);
201 if (!Arrays.equals(CAFS, signature))
202 throw new IllegalArgumentException("Store file does not start with CAFS: " + in);
203
204 in.seek(0x100);
205 File ixf = new File(home, "index.new");
206 Index index = new Index(ixf, KEYLENGTH);
207
208 while (in.getFilePointer() < length) {
209 long entry = in.getFilePointer();
210 SHA1 sha1 = verifyEntry(in);
211 index.insert(sha1.digest(), entry);
212 }
213
214 synchronized (store) {
215 index.close();
216 File indexFile = new File(home, INDEXFILE);
217 ixf.renameTo(indexFile);
218 this.index = new Index(indexFile, KEYLENGTH);
219 }
Stuart McCulloch4482c702012-06-15 13:27:53 +0000220 }
221 finally {
Stuart McCullochf3173222012-06-07 21:57:32 +0000222 in.close();
223 }
224 }
225
226 public void close() throws IOException {
227 synchronized (store) {
228 try {
229 store.close();
Stuart McCulloch4482c702012-06-15 13:27:53 +0000230 }
231 finally {
Stuart McCullochf3173222012-06-07 21:57:32 +0000232 index.close();
233 }
234 }
235 }
236
237 private SHA1 verifyEntry(RandomAccessFile in) throws IOException, NoSuchAlgorithmException {
238 byte[] signature = new byte[4];
239 in.readFully(signature);
240 if (!Arrays.equals(CAFE, signature))
241 throw new IllegalArgumentException("File is corrupted: " + in);
242
243 /* int flags = */in.readInt();
244 int compressedSize = in.readInt();
245 int uncompressedSize = in.readInt();
246 byte[] key = new byte[KEYLENGTH];
247 in.readFully(key);
248 SHA1 sha1 = new SHA1(key);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000249
Stuart McCullochf3173222012-06-07 21:57:32 +0000250 byte[] buffer = new byte[compressedSize];
251 in.readFully(buffer);
252
253 InputStream xin = getSha1Stream(sha1, buffer, uncompressedSize);
254 xin.skip(uncompressedSize);
255 xin.close();
256 return sha1;
257 }
258
259 private boolean verifySignature(DataInput din, byte[] org) throws IOException {
260 byte[] read = new byte[org.length];
261 din.readFully(read);
262 return Arrays.equals(read, org);
263 }
264
Stuart McCulloch4482c702012-06-15 13:27:53 +0000265 private InputStream getSha1Stream(final SHA1 sha1, byte[] buffer, final int total) throws NoSuchAlgorithmException {
Stuart McCullochf3173222012-06-07 21:57:32 +0000266 ByteArrayInputStream in = new ByteArrayInputStream(buffer);
267 InflaterInputStream iin = new InflaterInputStream(in) {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000268 int count = 0;
269 final MessageDigest digestx = MessageDigest.getInstance(ALGORITHM);
270 final AtomicBoolean calculated = new AtomicBoolean();
271
272 @Override
273 public int read(byte[] data, int offset, int length) throws IOException {
Stuart McCullochf3173222012-06-07 21:57:32 +0000274 int size = super.read(data, offset, length);
275 if (size <= 0)
276 eof();
277 else {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000278 count += size;
Stuart McCullochf3173222012-06-07 21:57:32 +0000279 this.digestx.update(data, offset, size);
280 }
281 return size;
282 }
283
284 public int read() throws IOException {
285 int c = super.read();
286 if (c < 0)
287 eof();
288 else {
289 count++;
290 this.digestx.update((byte) c);
291 }
292 return c;
293 }
294
295 void eof() throws IOException {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000296 if (calculated.getAndSet(true))
Stuart McCullochf3173222012-06-07 21:57:32 +0000297 return;
Stuart McCulloch4482c702012-06-15 13:27:53 +0000298
299 if (count != total)
300 throw new IOException("Counts do not match. Expected to read: " + total + " Actually read: "
301 + count);
302
Stuart McCullochf3173222012-06-07 21:57:32 +0000303 SHA1 calculatedSha1 = new SHA1(digestx.digest());
304 if (!sha1.equals(calculatedSha1))
Stuart McCulloch4482c702012-06-15 13:27:53 +0000305 throw (new IOException("SHA1 caclulated and asked mismatch, asked: " + sha1 + ", \nfound: "
306 + calculatedSha1));
Stuart McCullochf3173222012-06-07 21:57:32 +0000307 }
308
309 public void close() throws IOException {
310 eof();
311 super.close();
312 }
313 };
314 return iin;
315 }
316
317 /**
318 * Update a record in the store, assuming the store is at the right
319 * position.
320 *
321 * @param sha1
322 * The checksum
323 * @param compressed
324 * The compressed length
325 * @param totalLength
326 * The uncompressed length
327 * @throws IOException
328 * The exception
329 */
330 private void update(byte[] sha1, byte[] compressed, int totalLength) throws IOException {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000331 // System.err.println("pos: " + store.getFilePointer());
Stuart McCullochf3173222012-06-07 21:57:32 +0000332 store.write(CAFE); // 00-03 Signature
333 store.writeInt(0); // 04-07 Flags for the future
334 store.writeInt(compressed.length); // 08-11 Length deflated data
335 store.writeInt(totalLength); // 12-15 Length
336 store.write(sha1); // 16-35
Stuart McCulloch4482c702012-06-15 13:27:53 +0000337 store.writeShort(checksum(0, compressed.length, totalLength, sha1));
Stuart McCullochf3173222012-06-07 21:57:32 +0000338 store.write(compressed);
339 channel.force(false);
340 }
341
Stuart McCullochf3173222012-06-07 21:57:32 +0000342 private short checksum(int flags, int compressedLength, int totalLength, byte[] sha1) {
343 CRC32 crc = new CRC32();
344 crc.update(flags);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000345 crc.update(flags >> 8);
346 crc.update(flags >> 16);
347 crc.update(flags >> 24);
Stuart McCullochf3173222012-06-07 21:57:32 +0000348 crc.update(compressedLength);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000349 crc.update(compressedLength >> 8);
350 crc.update(compressedLength >> 16);
351 crc.update(compressedLength >> 24);
Stuart McCullochf3173222012-06-07 21:57:32 +0000352 crc.update(totalLength);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000353 crc.update(totalLength >> 8);
354 crc.update(totalLength >> 16);
355 crc.update(totalLength >> 24);
Stuart McCullochf3173222012-06-07 21:57:32 +0000356 crc.update(sha1);
357 return (short) crc.getValue();
358 }
359
360 public Iterator<SHA1> iterator() {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000361
Stuart McCullochf3173222012-06-07 21:57:32 +0000362 return new Iterator<SHA1>() {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000363 long position = 0x100;
364
Stuart McCullochf3173222012-06-07 21:57:32 +0000365 public boolean hasNext() {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000366 synchronized (store) {
Stuart McCullochf3173222012-06-07 21:57:32 +0000367 try {
368 return position < store.length();
Stuart McCulloch4482c702012-06-15 13:27:53 +0000369 }
370 catch (IOException e) {
Stuart McCullochf3173222012-06-07 21:57:32 +0000371 throw new RuntimeException(e);
372 }
373 }
374 }
375
376 public SHA1 next() {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000377 synchronized (store) {
Stuart McCullochf3173222012-06-07 21:57:32 +0000378 try {
379 store.seek(position);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000380 byte[] signature = new byte[4];
Stuart McCullochf3173222012-06-07 21:57:32 +0000381 store.readFully(signature);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000382 if (!Arrays.equals(CAFE, signature))
Stuart McCullochf3173222012-06-07 21:57:32 +0000383 throw new IllegalArgumentException("No signature");
384
385 int flags = store.readInt();
386 int compressedLength = store.readInt();
387 int totalLength = store.readInt();
Stuart McCulloch4482c702012-06-15 13:27:53 +0000388 byte[] sha1 = new byte[KEYLENGTH];
Stuart McCullochf3173222012-06-07 21:57:32 +0000389 store.readFully(sha1);
390 short crc = store.readShort();
Stuart McCulloch4482c702012-06-15 13:27:53 +0000391 if (crc != checksum(flags, compressedLength, totalLength, sha1))
Stuart McCullochf3173222012-06-07 21:57:32 +0000392 throw new IllegalArgumentException("Header checksum fails");
Stuart McCulloch4482c702012-06-15 13:27:53 +0000393
Stuart McCullochf3173222012-06-07 21:57:32 +0000394 position += HEADERLENGTH + compressedLength;
395 return new SHA1(sha1);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000396 }
397 catch (IOException e) {
Stuart McCullochf3173222012-06-07 21:57:32 +0000398 throw new RuntimeException(e);
Stuart McCulloch4482c702012-06-15 13:27:53 +0000399 }
Stuart McCullochf3173222012-06-07 21:57:32 +0000400 }
401 }
402
403 public void remove() {
404 throw new UnsupportedOperationException("Remvoe not supported, CAFS is write once");
Stuart McCulloch4482c702012-06-15 13:27:53 +0000405 }
Stuart McCullochf3173222012-06-07 21:57:32 +0000406 };
407 }
408
Stuart McCullochf3173222012-06-07 21:57:32 +0000409 public boolean isEmpty() throws IOException {
Stuart McCulloch4482c702012-06-15 13:27:53 +0000410 synchronized (store) {
Stuart McCullochf3173222012-06-07 21:57:32 +0000411 return store.getFilePointer() <= 256;
412 }
413 }
414}