#+##############################################################################
#                                                                              #
# File: Directory/Queue/Simple.pm                                              #
#                                                                              #
# Description: object oriented interface to a simple directory based queue     #
#                                                                              #
#-##############################################################################

#
# module definition
#

package Directory::Queue::Simple;
use strict;
use warnings;
our $VERSION  = "1.4";
our $REVISION = sprintf("%d.%02d", q$Revision: 1.5 $ =~ /(\d+)\.(\d+)/);

#
# used modules
#

use Directory::Queue::Base qw(:DIR :FILE :RE :ST _fatal _name SYSBUFSIZE);
use POSIX qw(:errno_h);

#
# inheritance
#

our(@ISA) = qw(Directory::Queue::Base);

#
# constants
#

# suffix indicating a temporary element
use constant TEMPORARY_SUFFIX => ".tmp";

# suffix indicating a locked element
use constant LOCKED_SUFFIX => ".lck";

#
# object constructor
#

sub new : method {
    my($class, %option) = @_;
    my($self, $name);

    # default object
    $self = __PACKAGE__->SUPER::new(%option);
    foreach $name (qw(path umask)) {
	delete($option{$name});
    }
    # check granularity
    if (defined($option{granularity})) {
	_fatal("invalid granularity: %s", $option{granularity})
	    unless $option{granularity} =~ /^\d+$/;
	$self->{granularity} = delete($option{granularity});
    } else {
	$self->{granularity} = 60; # default
    }
    # check unexpected options
    foreach $name (keys(%option)) {
	_fatal("unexpected option: %s", $name);
    }
    # so far so good...
    return($self);
}

#
# helpers for the add methods
#

sub _add_dir ($) {
    my($self) = @_;
    my($time);

    $time = time();
    $time -= $time % $self->{granularity} if $self->{granularity};
    return(sprintf("%08x", $time));
}

sub _add_data ($$) {
    my($self, $dataref) = @_;
    my($dir, $name, $tmp, $fh, $length, $offset, $done);

    $dir = _add_dir($self);
    while (1) {
	$name = _name();
	$tmp = $self->{path} . "/" . $dir . "/" . $name . TEMPORARY_SUFFIX;
	$fh = _file_create($tmp, $self->{umask});
	last if $fh;
	_special_mkdir($self->{path} . "/" . $dir, $self->{umask}) if $! == ENOENT;
    }
    $length = length($$dataref);
    $offset = 0;
    while ($length) {
	$done = syswrite($fh, $$dataref, SYSBUFSIZE, $offset);
	_fatal("cannot syswrite(%s): %s", $tmp, $!) unless defined($done);
	$length -= $done;
	$offset += $done;
    }
    close($fh) or _fatal("cannot close(%s): %s", $tmp, $!);
    return($dir, $tmp);
}

sub _add_path ($$$) {
    my($self, $tmp, $dir) = @_;
    my($name, $new);

    while (1) {
	$name = _name();
	$new = $self->{path} . "/" . $dir . "/" . $name;
	# N.B. we use link() + unlink() to make sure $new is never overwritten
	if (link($tmp, $new)) {
	    unlink($tmp) or _fatal("cannot unlink(%s): %s", $tmp, $!);
	    return($dir . "/" . $name);
	}
	_fatal("cannot link(%s, %s): %s", $tmp, $new, $!) unless $! == EEXIST;
    }
}

#
# add a new element to the queue and return its name
#

sub add : method {
    my($self, $data) = @_;
    my($dir, $path);

    ($dir, $path) = _add_data($self, \$data);
    return(_add_path($self, $path, $dir));
}

sub add_ref : method {
    my($self, $dataref) = @_;
    my($dir, $path);

    ($dir, $path) = _add_data($self, $dataref);
    return(_add_path($self, $path, $dir));
}

sub add_path : method {
    my($self, $path) = @_;
    my($dir);

    $dir = _add_dir($self);
    _special_mkdir($self->{path} . "/" . $dir, $self->{umask});
    return(_add_path($self, $path, $dir));
}

#
# get a locked element
#

sub get : method {
    my($self, $name) = @_;

    return(${ _file_read($self->{path} . "/" . $name . LOCKED_SUFFIX, 0) });
}

sub get_ref : method {
    my($self, $name) = @_;

    return(_file_read($self->{path} . "/" . $name . LOCKED_SUFFIX, 0));
}

sub get_path : method {
    my($self, $name) = @_;

    return($self->{path} . "/" . $name . LOCKED_SUFFIX);
}

#
# lock an element:
#  - return true on success
#  - return false in case the element could not be locked (in permissive mode)
#

sub lock : method {
    my($self, $name, $permissive) = @_;
    my($path, $lock, $time);

    $permissive = 1 unless defined($permissive);
    $path = $self->{path} . "/" . $name;
    $lock = $path . LOCKED_SUFFIX;
    if (link($path, $lock)) {
	# we also touch the element to indicate the lock time
	$time = time();
	utime($time, $time, $path)
	    or _fatal("cannot utime(%d, %d, %s): %s", $time, $time, $path, $!);
	return(1);
    }
    return(0) if $permissive and ($! == EEXIST or $! == ENOENT);
    _fatal("cannot link(%s, %s): %s", $path, $lock, $!);
}

#
# unlock an element:
#  - return true on success
#  - return false in case the element could not be unlocked (in permissive mode)
#

sub unlock : method {
    my($self, $name, $permissive) = @_;
    my($path, $lock);

    $permissive = 0 unless defined($permissive);
    $path = $self->{path} . "/" . $name;
    $lock = $path . LOCKED_SUFFIX;
    return(1) if unlink($lock);
    return(0) if $permissive and $! == ENOENT;
    _fatal("cannot unlink(%s): %s", $lock, $!);
}

#
# remove a locked element from the queue
#

sub remove : method {
    my($self, $name) = @_;
    my($path, $lock);

    $path = $self->{path} . "/" . $name;
    $lock = $path . LOCKED_SUFFIX;
    unlink($path) or _fatal("cannot unlink(%s): %s", $path, $!);
    unlink($lock) or _fatal("cannot unlink(%s): %s", $lock, $!);
}

#
# return the number of elements in the queue, locked or not (but not temporary)
#

sub count : method {
    my($self) = @_;
    my($count, $name, @list);

    $count = 0;
    # get the list of directories
    foreach $name (_special_getdir($self->{path}, "strict")) {
	push(@list, $1) if $name =~ /^($_DirectoryRegexp)$/o; # untaint
    }
    # count the elements inside
    foreach $name (@list) {
	$count += grep(/^(?:$_ElementRegexp)$/o, _special_getdir($self->{path} . "/" . $name));
    }
    # that's all
    return($count);
}

#
# purge the queue
#

sub purge : method {
    my($self, %option) = @_;
    my(@list, $name, $path, $oldtemp, $oldlock, $old, @stat);

    # check options
    $option{maxtemp} = 300 unless defined($option{maxtemp});
    $option{maxlock} = 600 unless defined($option{maxlock});
    foreach $name (keys(%option)) {
	_fatal("unexpected option: %s", $name)
	    unless $name =~ /^(maxtemp|maxlock)$/;
	_fatal("invalid %s: %s", $name, $option{$name})
	    unless $option{$name} =~ /^\d+$/;
    }
    # get the list of intermediate directories
    @list = ();
    foreach $name (_special_getdir($self->{path}, "strict")) {
	push(@list, $1) if $name =~ /^($_DirectoryRegexp)$/o; # untaint
    }
    # remove the old temporary or locked elements
    $oldtemp = time() - $option{maxtemp} if $option{maxtemp};
    $oldlock = time() - $option{maxlock} if $option{maxlock};
    if ($oldtemp or $oldlock) {
	foreach $name (@list) {
	    $path = $self->{path} . "/" . $name;
	    foreach $old (grep(/\./, _special_getdir($path))) {
		@stat = stat($path . "/" . $old);
		unless (@stat) {
		    _fatal("cannot stat(%s/%s): %s", $path, $old, $!) unless $! == ENOENT;
		    next;
		}
		next if substr($old, -4) eq TEMPORARY_SUFFIX and $stat[ST_MTIME] >= $oldtemp;
		next if substr($old, -4) eq LOCKED_SUFFIX    and $stat[ST_MTIME] >= $oldlock;
		warn("* removing too old volatile file: $path/$old\n");
		next if unlink($path . "/" . $old);
		_fatal("cannot unlink(%s/%s): %s", $path, $old, $!) unless $! == ENOENT;
	    }
	}
    }
    # try to purge all but the last intermediate directory
    @list = sort(@list);
    if (@list > 1) {
	pop(@list);
	foreach $name (@list) {
	    $path = $self->{path} . "/" . $name;
	    _special_rmdir($path) unless _special_getdir($path);
	}
    }
}

1;

__END__

=head1 NAME

Directory::Queue::Simple - object oriented interface to a simple directory based queue

=head1 SYNOPSIS

  use Directory::Queue::Simple;

  #
  # sample producer
  #

  $dirq = Directory::Queue::Simple->new(path => "/tmp/test");
  foreach $count (1 .. 100) {
      $name = $dirq->add("element $count\n");
      printf("# added element %d as %s\n", $count, $name);
  }

  #
  # sample consumer (one pass only)
  #

  $dirq = Directory::Queue::Simple->new(path => "/tmp/test");
  for ($name = $dirq->first(); $name; $name = $dirq->next()) {
      next unless $dirq->lock($name);
      printf("# reading element %s\n", $name);
      $data = $dirq->get($name);
      # one could use $dirq->unlock($name) to only browse the queue...
      $dirq->remove($name);
  }

=head1 DESCRIPTION

This module is very similar to L<Directory::Queue> but uses a
different way to store data in the filesystem, using less
directories. Its API is almost identical.

Compared to L<Directory::Queue>, this module:

=over

=item *

is simpler

=item *

is faster

=item *

uses less space on disk

=item *

can be given existing files to store

=item *

does not support schemas

=item *

can only store and retrieve binary strings

=item *

is not compatible (at filesystem level) with Directory::Queue

=back

Please refer to L<Directory::Queue> for general information about
directory queues.

=head1 CONSTRUCTOR

The new() method can be used to create a Directory::Queue::Simple
object that will later be used to interact with the queue. The
following attributes are supported:

=over

=item path

the queue toplevel directory (mandatory)

=item umask

the umask to use when creating files and directories
(default: use the running process' umask)

=item granularity

the time granularity for intermediate directories, see L</DIRECTORY STRUCTURE>
(default: 60)

=back

=head1 METHODS

The following methods are available:

=over

=item new()

return a new Directory::Queue::Simple object (class method)

=item copy()

return a copy of the object; this can be useful to have independent
iterators on the same queue

=item path()

return the queue toplevel path

=item id()

return a unique identifier for the queue

=item count()

return the number of elements in the queue

=item first()

return the first element in the queue, resetting the iterator;
return an empty string if the queue is empty

=item next()

return the next element in the queue, incrementing the iterator;
return an empty string if there is no next element

=item add(DATA)

add the given data (a binary string) to the queue and return the
corresponding element name

=item add_ref(REF)

add the given data (a reference to a binary string) to the queue and
return the corresponding element name, this can avoid string copies
with large strings

=item add_path(PATH)

add the given file (identified by its path) to the queue and return
the corresponding element name, the file must be on the same
filesystem and will be moved to the queue

=item lock(ELEMENT[, PERMISSIVE])

attempt to lock the given element and return true on success; if the
PERMISSIVE option is true (which is the default), it is not a fatal
error if the element cannot be locked and false is returned

=item unlock(ELEMENT[, PERMISSIVE])

attempt to unlock the given element and return true on success; if the
PERMISSIVE option is true (which is I<not> the default), it is not a
fatal error if the element cannot be unlocked and false is returned

=item touch(ELEMENT)

update the access and modification times on the element's file to
indicate that it is still being used; this is useful for elements that
are locked for long periods of time (see the purge() method)

=item remove(ELEMENT)

remove the given element (which must be locked) from the queue

=item get(ELEMENT)

get the data from the given element (which must be locked) and return
a binary string

=item get_ref(ELEMENT)

get the data from the given element (which must be locked) and return
a reference to a binary string, this can avoid string copies with
large strings

=item get_path(ELEMENT)

get the file path of the given element (which must be locked), this
file can be read but not removed, you must use the remove() method for
this

=item purge([OPTIONS])

purge the queue by removing unused intermediate directories, removing
too old temporary elements and unlocking too old locked elements (aka
staled locks); note: this can take a long time on queues with many
elements; OPTIONS can be:

=over

=item maxtemp

maximum time for a temporary element (in seconds, default 300);
if set to 0, temporary elements will not be removed

=item maxlock

maximum time for a locked element (in seconds, default 600);
if set to 0, locked elements will not be unlocked

=back

=back

=head1 DIRECTORY STRUCTURE

The toplevel directory contains intermediate directories that contain
the stored elements, each of them in a file.

The names of the intermediate directories are time based: the element
insertion time is used to create a 8-digits long hexadecimal number.
The granularity (see the new() method) is used to limit the number of
new directories. For instance, with a granularity of 60 (the default),
new directories will be created at most once per minute.

Since there is usually a filesystem limit in the number of directories
a directory can hold, there is a trade-off to be made. If you want to
support many added elements per second, you should use a low
granularity to keep small directories. However, in this case, you will
create many directories and this will limit the total number of
elements you can store.

The elements themselves are stored in files (one per element) with a
14-digits long hexadecimal name I<SSSSSSSSMMMMMR> where:

=over

=item I<SSSSSSSS>

represents the number of seconds since the Epoch

=item I<MMMMM>

represents the microsecond part of the time since the Epoch

=item I<R>

is a random digit used to reduce name collisions

=back

A temporary element (being added to the queue) will have a C<.tmp>
suffix.

A locked element will have a hard link with the same name and the
C<.lck> suffix.

=head1 AUTHOR

Lionel Cons L<http://cern.ch/lionel.cons>

Copyright CERN 2011
