root/feedmelinks/lib/Cache/File.pm

Revision 1448, 16.7 kB (checked in by jm3, 2 years ago)

new spammer blacklist, perl libs, and better splinker hunting tools

Line 
1 =head1 NAME
2
3 Cache::File - Filesystem based implementation of the Cache interface
4
5 =head1 SYNOPSIS
6
7   use Cache::File;
8
9   my $cache = Cache::File->new( cache_root => '/tmp/mycache',
10                                 default_expires => '600 sec' );
11
12 See Cache for the usage synopsis.
13
14 =head1 DESCRIPTION
15
16 The Cache::File class implements the Cache interface.  This cache stores
17 data in the filesystem so that it can be shared between processes and persists
18 between process invocations.
19
20 =cut
21 package Cache::File;
22
23 require 5.006;
24 use strict;
25 use warnings;
26 use Cache::File::Heap;
27 use Cache::File::Entry;
28 use Digest::SHA1 qw(sha1_hex);
29 use Fcntl qw(LOCK_EX LOCK_NB);
30 use Symbol ();
31 use File::Spec;
32 use File::Path;
33 use File::NFSLock;
34 use DB_File;
35 use Storable;
36 use Carp;
37
38 use base qw(Cache);
39 use fields qw(
40     root depth umask locklevel
41     expheap ageheap useheap index lockfile
42     lock lockcount openexp openage openuse openidx);
43
44 our $VERSION = '2.04';
45
46 sub LOCK_NONE ()  { 0 }
47 sub LOCK_LOCAL () { 1 }
48 sub LOCK_NFS ()   { 2 }
49
50
51 my $DEFAULT_DEPTH = 2;
52 my $DEFAULT_UMASK = 077;
53 my $DEFAULT_LOCKLEVEL = LOCK_NFS;
54
55 my $INDEX       = 'index.db';
56 my $EXPIRY_HEAP = 'expheap.db';
57 my $AGE_HEAP    = 'ageheap.db';
58 my $USE_HEAP    = 'useheap.db';
59 my $LOCKFILE    = 'lock';
60
61 our $STALE_LOCK_TIMEOUT = 30;  # 30 second timeout on lockfiles
62 our $LOCK_EXT   = '.lock';
63
64 # keys to store count and size in the index
65 my $SIZE_KEY  = '__cache_size';
66 my $COUNT_KEY = '__cache_count';
67
68
69 =head1 CONSTRUCTOR
70
71   my $cache = Cache::File->new( %options )
72
73 The constructor takes cache properties as named arguments, for example:
74
75   my $cache = Cache::File->new( cache_root => '/tmp/mycache',
76                                 lock_level => Cache::File::LOCK_LOCAL(),
77                                 default_expires => '600 sec' );
78
79 Note that you MUST provide a cache_root property.
80
81 See 'PROPERTIES' below and in the Cache documentation for a list of all
82 available properties that can be set.
83
84 =cut
85
86 sub new {
87     my Cache::File $self = shift;
88     my $args = $#_? { @_ } : shift;
89
90     $self = fields::new($self) unless ref $self;
91     $self->SUPER::new($args);
92
93     $self->_set_cache_lock_level($args->{lock_level});
94     $self->_set_cache_umask($args->{cache_umask});
95     $self->_set_cache_depth($args->{cache_depth});
96     $self->_set_cache_root($args->{cache_root});
97
98     return $self;
99 }
100
101 =head1 METHODS
102
103 See 'Cache' for the API documentation.
104
105 =cut
106
107 sub entry {
108     my Cache::File $self = shift;
109     my ($key) = @_;
110     return Cache::File::Entry->new($self, $key);
111 }
112
113 sub purge {
114     my Cache::File $self = shift;
115     my $time = time();
116
117     # if it's locked, someone else will probably be doing a purge already
118     $self->trylock() or return;
119
120     # open expiry index
121     my $expheap = $self->get_exp_heap();
122
123     # check for expiry
124     my $minimum = $expheap->minimum();
125     if ($minimum and $minimum <= $time) {
126         # open other indexes
127         my $ageheap = $self->get_age_heap();
128         my $useheap = $self->get_use_heap();
129         my $index = $self->get_index();
130
131         # loop removing minimums
132         do {
133             my $keys;
134             ($minimum, $keys) = $expheap->extract_minimum_dup();
135
136             foreach (@$keys) {
137                 # update all the indexes (remove references to this key)
138                 my $path = $self->cache_file_path($_);
139
140                 my $index_entries = $self->get_index_entries($_)
141                     or warnings::warnif('Cache', "missing index entry for $_");
142                 delete $$index{$_};
143
144                 $ageheap->delete($$index_entries{age}, $_)
145                     if $$index_entries{age};
146                 $useheap->delete($$index_entries{lastuse}, $_)
147                     if $$index_entries{lastuse};
148
149                 # reduce the cache size and count
150                 $$index{$COUNT_KEY}--;
151                 $$index{$SIZE_KEY} -= (-s $path);
152
153                 # remove data file
154                 unlink($path);
155             }
156
157             $minimum = $expheap->minimum();
158
159         } while ($minimum and $minimum <= $time);
160     }
161
162     $self->unlock();
163 }
164
165 sub clear {
166     my Cache::File $self = shift;
167     my $fh = Symbol::gensym();
168
169     $self->lock();
170
171     # Find each directory entries are stored in and remove them
172     opendir($fh, $self->{root})
173         or die "Can't opendir ".$self->{root}.": $!";
174     my @stores =
175         grep { -d $_ }
176         map { File::Spec->catdir($self->{root}, $_) }
177     File::Spec->no_upwards(readdir($fh));
178     closedir($fh);
179
180     rmtree(\@stores,0,1);
181
182     # remove the index files
183     unlink($self->{expheap});
184     unlink($self->{ageheap});
185     unlink($self->{useheap});
186     unlink($self->{index});
187
188     $self->unlock();
189 }
190
191 sub count {
192     my Cache::File $self = shift;
193
194     my $count;
195     $self->lock();
196     my $index = $self->get_index();
197     $count = $$index{$COUNT_KEY};
198     $self->unlock();
199    
200     return $count || 0;
201 }
202
203 sub size {
204     my Cache::File $self = shift;
205
206     my $size;
207     $self->lock();
208     my $index = $self->get_index();
209     $size = $$index{$SIZE_KEY};
210     $self->unlock();
211    
212     return $size || 0;
213 }
214
215 sub sync {
216     my Cache::File $self = shift;
217     # TODO: check entries in cache root and rebuild heaps
218 }
219
220
221 =head1 PROPERTIES
222
223 Cache::File adds the following properties in addition to those discussed in
224 the 'Cache' documentation.
225
226 =over
227
228 =item cache_root
229
230 Used to specify the location of the cache store directory.  All methods will
231 work ONLY data stored within this directory.  This parameter is REQUIRED when
232 creating a Cache::File instance.
233
234  my $ns = $c->cache_root();
235
236 =cut
237
238 sub cache_root {
239     my Cache::File $self = shift;
240     return $self->{root};
241 }
242
243 sub _set_cache_root {
244     my Cache::File $self = shift;
245     my ($cache_root) = @_;
246     $cache_root or croak 'A cache root directory MUST be provided';
247     $self->{root} = File::Spec->canonpath(
248         File::Spec->rel2abs($cache_root, File::Spec->tmpdir()));
249
250     # create root
251     unless (-d $self->{root}) {
252         my $oldmask = umask $self->cache_umask();
253         eval { mkpath($self->{root}) }
254             or die 'Failed to create cache root '.$self->{root}.": $@";
255         umask $oldmask;
256     }
257
258     # set required file paths
259     $self->{expheap} = File::Spec->catfile($self->{root}, $EXPIRY_HEAP);
260     $self->{ageheap} = File::Spec->catfile($self->{root}, $AGE_HEAP);
261     $self->{useheap} = File::Spec->catfile($self->{root}, $USE_HEAP);
262     $self->{index} = File::Spec->catfile($self->{root}, $INDEX);
263     $self->{lockfile} = File::Spec->catfile($self->{root}, $LOCKFILE);
264 }
265
266 =item cache_depth
267
268 The number of subdirectories deep to store cache entires.  This should be
269 large enough that no cache directory has more than a few hundred object.
270 Defaults to 2 unless explicitly set.
271
272  my $depth = $c->cache_depth();
273
274 =cut
275
276 sub cache_depth {
277     my Cache::File $self = shift;
278     return $self->{depth};
279 }
280
281 sub _set_cache_depth {
282     my Cache::File $self = shift;
283     my ($cache_depth) = @_;
284     $self->{depth} = (defined $cache_depth)? $cache_depth : $DEFAULT_DEPTH;
285 }
286
287 =item cache_umask
288
289 Specifies the umask to use when creating entries in the cache directory.  By
290 default the umask is '077', indicating that only the same user may access
291 the cache files.
292
293  my $umask = $c->cache_umask();
294
295 =cut
296
297 sub cache_umask {
298     my Cache::File $self = shift;
299     return $self->{umask};
300 }
301
302 sub _set_cache_umask {
303     my Cache::File $self = shift;
304     my ($cache_umask) = @_;
305     $self->{umask} = (defined $cache_umask)? $cache_umask : $DEFAULT_UMASK;
306 }
307
308 =item lock_level
309
310 Specify the level of locking to be used.  There are three different levels
311 available:
312
313 =over
314
315 =item Cache::File::LOCK_NONE()
316
317 No locking is performed.  Useful when you can guarantee only one process will
318 be accessing the cache at a time.
319
320 =item Cache::File::LOCK_LOCAL()
321
322 Locking is performed, but it is not suitable for use over NFS filesystems.
323 However it is more efficient.
324
325 =item Cache::File::LOCK_NFS()
326
327 Locking is performed in a way that is suitable for use on NFS filesystems.
328
329 =back
330
331  my $level = $c->cache_lock_level();
332
333 =cut
334
335 sub cache_lock_level {
336     my Cache::File $self = shift;
337     return $self->{locklevel};
338 }
339
340 sub _set_cache_lock_level {
341     my Cache::File $self = shift;
342     my ($locklevel) = @_;
343
344     if (defined $locklevel) {
345         croak "Unknown lock level requested"
346             unless ($locklevel =~ /^[0-9]+$/ &&
347                     ($locklevel == LOCK_NONE ||
348                      $locklevel == LOCK_LOCAL ||
349                      $locklevel == LOCK_NFS));
350     } else {
351         $locklevel = $DEFAULT_LOCKLEVEL;
352     }
353
354     $self->{locklevel} = $locklevel;
355 }
356
357
358 # REMOVAL STRATEGY METHODS
359
360 sub remove_oldest {
361     my Cache::File $self = shift;
362
363     # Only called from check_size (via change_size) when the lock is set
364     #$self->lock();
365     my $ageheap = $self->get_age_heap();
366
367     my ($minimum, $key) = $ageheap->extract_minimum();
368     $key or return undef;
369     my $size = $self->remove($key);
370     #$self->unlock();
371     return $size;
372 }
373
374 sub remove_stalest {
375     my Cache::File $self = shift;
376
377     # Only called from check_size (via change_size) when the lock is set
378     #$self->lock();
379     my $useheap = $self->get_use_heap();
380
381     my ($minimum, $key) = $useheap->extract_minimum();
382     $key or return undef;
383     my $size = $self->remove($key);
384     #$self->unlock();
385     return $size;
386 }
387
388
389 # UTILITY METHODS
390
391 sub cache_file_path {
392     my Cache::File $self = shift;
393     my ($key) = @_;
394
395     my $shakey = sha1_hex($key);
396     my (@path) = unpack('A2'x$self->{depth}.'A*', $shakey);
397
398     if (wantarray) {
399         my $file = pop(@path);
400         return (File::Spec->catdir($self->{root}, @path), $file);
401     } else {
402         return File::Spec->catfile($self->{root}, @path);
403     }
404 }
405
406 sub lock {
407     my Cache::File $self = shift;
408     my ($tryonly) = @_;
409
410     # already have the lock?
411     if ($self->{lock}) {
412         $self->{lockcount}++;
413         return 1;
414     }
415
416     if ($self->{locklevel} == LOCK_NONE) {
417         $self->{lock} = 1;
418     }
419     else {
420         # TODO: implement LOCK_LOCAL
421
422         my $oldmask = umask $self->cache_umask();
423         my $lock = File::NFSLock->new({
424                 file                => $self->{lockfile},
425                 lock_type           => LOCK_EX | ($tryonly? LOCK_NB : 0),
426                 stale_lock_timeout  => $STALE_LOCK_TIMEOUT,
427             });
428         umask $oldmask;
429
430         unless ($lock) {
431             $tryonly and return 0;
432             die "Failed to obtain lock on lockfile '".$self->{lockfile}."': ".
433                 $File::NFSLock::errstr."\n";
434         }
435         $self->{lock} = $lock;
436     }
437
438     $self->{lockcount} = 1;
439     return 1;
440 }
441
442 sub trylock {
443     my Cache::File $self = shift;
444     return $self->lock(1);
445 }
446
447 sub unlock {
448     my Cache::File $self = shift;
449     $self->{lock} or croak "not locked";
450     return unless --$self->{lockcount} == 0;
451
452     # close heaps and save counts
453     $self->{openexp} = undef;
454     $self->{openage} = undef;
455     $self->{openuse} = undef;
456     $self->{openidx} = undef;
457
458     # unlock
459     $self->{lock}->unlock unless $self->{locklevel} == LOCK_NONE;
460     $self->{lock} = undef;
461 }
462
463 sub create_entry {
464     my Cache::File $self = shift;
465     my ($key, $time) = @_;
466
467     my $ageheap = $self->get_age_heap();
468     $ageheap->add($time, $key);
469     my $useheap = $self->get_use_heap();
470     $useheap->add($time, $key);
471
472     $self->set_index_entries($key, { age => $time, lastuse => $time });
473 }
474
475 sub update_last_use {
476     my Cache::File $self = shift;
477     my ($key, $time) = @_;
478
479     my $index_entries = $self->get_index_entries($key)
480         or warnings::warnif('Cache', "missing index entry for $key");
481
482     my $useheap = $self->get_use_heap();
483     $useheap->delete($$index_entries{lastuse}, $key);
484     $useheap->add($time, $key);
485
486     $$index_entries{lastuse} = $time;
487     $self->set_index_entries($key, $index_entries);
488 }
489
490 sub change_count {
491     my Cache::File $self = shift;
492     my ($count) = @_;
493     my $index = $self->get_index();
494     my $oldcount = $$index{$COUNT_KEY};
495     $$index{$COUNT_KEY} = $oldcount? $oldcount + $count : $count;
496 }
497
498 sub change_size {
499     my Cache::File $self = shift;
500     my ($size) = @_;
501     my $index = $self->get_index();
502     my $oldsize = $$index{$SIZE_KEY};
503     $$index{$SIZE_KEY} = $oldsize? $oldsize + $size : $size;
504     $self->check_size($$index{$SIZE_KEY}) if $size > 0;
505 }
506
507 sub get_index_entries {
508     my Cache::File $self = shift;
509     my ($key) = @_;
510
511     my $index = $self->get_index();
512     my $index_entry = $$index{$key}
513         or return undef;
514
515     my $index_entries = Storable::thaw($index_entry);
516     $$index_entries{age} and $$index_entries{lastuse}
517         or warnings::warnif('Cache', "invalid index entry for $_");
518
519     return $index_entries;
520 }
521
522 sub set_index_entries {
523     my Cache::File $self = shift;
524     my $key = shift;
525     my $index_entries = $#_? { @_ } : shift;
526
527     $$index_entries{age} and $$index_entries{lastuse}
528         or croak "failed to supply age and lastuse for index update on $key";
529
530     my $index = $self->get_index();
531     $$index{$key} = Storable::nfreeze($index_entries);
532 }
533
534 sub get_index {
535     my Cache::File $self = shift;
536     unless ($self->{openidx}) {
537         $self->{lock} or croak "not locked";
538
539         my $indexfile = $self->{index};
540         File::NFSLock::uncache($indexfile) if $self->{locklevel} == LOCK_NFS;
541
542         my $oldmask = umask $self->cache_umask();
543         my %indexhash;
544         my $index =
545             tie %indexhash, 'DB_File', $indexfile,O_CREAT|O_RDWR,0666,$DB_HASH;
546         umask $oldmask;
547
548         $index or die "Failed to open index $indexfile: $!";
549
550         $self->{openidx} = \%indexhash;
551     }
552     return $self->{openidx};
553 }
554
555 sub get_exp_heap {
556     my Cache::File $self = shift;
557     return $self->{openexp} ||= $self->_open_heap($self->{expheap});
558 }
559
560 sub get_age_heap {
561     my Cache::File $self = shift;
562     return $self->{openage} ||= $self->_open_heap($self->{ageheap});
563 }
564
565 sub get_use_heap {
566     my Cache::File $self = shift;
567     return $self->{openuse} ||= $self->_open_heap($self->{useheap});
568 }
569
570 sub _open_heap {
571     my Cache::File $self = shift;
572     my ($heapfile) = @_;
573     $self->{lock} or croak "not locked";
574
575     File::NFSLock::uncache($heapfile) if $self->{locklevel} == LOCK_NFS;
576
577     my $oldmask = umask $self->cache_umask();
578     my $heap = Cache::File::Heap->new($heapfile);
579     umask $oldmask;
580     $heap or die "Failed to open heap $heapfile: $!";
581