#! /usr/bin/perl

# This script creates initial PhotoML descriptions of film rolls using
# data recorded in a simple text format

# Copyright © 2005-2010 Brendt Wohlberg <photoml@wohlberg.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License at
# http://www.gnu.org/licenses/gpl-2.0.txt.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.

# Most recent modification: 22 October 2010

use strict;
use File::Basename;
use Getopt::Std;
use Date::Manip;

# Set up path variables
my $pmlpath = dirname($0) . "/..";
my $xsl = "$pmlpath/xsl/misc/create.xsl";

# Ensure xsltproc is available
die "pmlcreate: error executing xstlproc\n"
    if (`which xsltproc 2>/dev/null` eq '');

# Parse command line switches
my $ustr = <<EOF;
usage: pmlcreate [-h] [-e] [-x] infile
                 -h Display usage information
                 -e Display an example input file
                 -x Output intermediate XML representation of image data
EOF
my $options = {};
getopts('hex', $options);
if (defined $options->{'e'}) {
  print input_example();
  exit 0;
}
die "$ustr" if (defined $options->{'h'} or @ARGV == 0);
my $xout = defined ($options->{'x'});

# Read input file lines
my $input = @ARGV[0];
open(FH, "$input") or die "pmlcreate: failed to open $input\n";
my $data = {};
my ($line, $section, $subsection);
while ($line = <FH>) {
  next if ($line =~ /(^#)|(^\s*$)/);
  if ($line =~ /^\+([^\+]+)\+$/) {
    $section = $1;
    $data->{$section} = {};
    $subsection = undef;
    next;
  }
  if ($line =~ /^\+{2}([^\+]+)\+{2}$/) {
    $subsection = $1;
    $data->{$section}->{$subsection} = {};
    next;
  }
  if ($line =~ /^\s*(\S+)\s+(.*)\s*$/) {
    if (defined $subsection) {
      $data->{$section}->{$subsection}->{$1} = $2;
    } else {
      $data->{$section}->{$1} = $2;
    }
  }
}
close(FH);

# Set up hashes for group id and collection data
my $initnum = $data->{'Config'}->{'initnum'};
my $collect = $data->{'Config'}->{'collect'};

# Parse camera descriptions
my $cameras = {};
my ($c, $l);
foreach $c ( keys %{$data->{'Camera'}} ) {
  $l = $data->{'Camera'}->{$c};
  if ($l =~ /([^\s\"]+|\"[^\"]+\")\s+([^\s\"]+|\"[^\"]+\")/) {
    $cameras->{$c}->{'make'} = $1;
    $cameras->{$c}->{'model'} = $2;
    if ($cameras->{$c}->{'model'} =~ /\"([^\"]+)\"/) {
      $cameras->{$c}->{'model'} = $1;
    }
  }
}

# Parse film descriptions
my $films = {};
my $f;
foreach $f ( keys %{$data->{'Film'}} ) {
  $l = $data->{'Film'}->{$f};
  if ($l =~ /([^\s\"]+|\"[^\"]+\")\s+(\"[^\"]+\"|\S+)\s+(\S+)/) {
    $films->{$f}->{'make'} = $1;
    $films->{$f}->{'name'} = $2;
    $films->{$f}->{'speed'} = $3;
    if ($films->{$f}->{'name'} =~ /\"([^\"]+)\"/) {
      $films->{$f}->{'name'} = $1;
    }
  }
}

# Parse roll descriptions
my $rolls = {};
my $r;
foreach $r ( keys %{$data->{'Load'}} ) {
  $l = $data->{'Load'}->{$r};
  if ($l =~ /(\S+)\s+(\S+)\s+(\d{4}-\d{2}-\d{2})\s+(\d{4}-\d{2}-\d{2})\s+(\d+)(?:\s+(\d{4}-\d{2}))?/) {
    $rolls->{$r}->{'camera'} = $1;
    $rolls->{$r}->{'film'} = $2;
    $rolls->{$r}->{'load'} = $3;
    $rolls->{$r}->{'unload'} = $4;
    $rolls->{$r}->{'nframes'} = $5;
    $rolls->{$r}->{'expire'} = $6;
    $rolls->{$r}->{'defc'} = {};
  }
}

# Parse day-end-frame-count section
my $d;
foreach $d ( keys %{$data->{'DEFC'}} ) {
  $l = $data->{'DEFC'}->{$d};
  while ($l =~ /(\S+:)?(\d+),(\d*)\s*/) {
    $rolls->{$2}->{'defc'}->{$d} = $3;
    $l = $';
  }
}

# Parse laboratory section
my $lab = {};
foreach $c ( keys %{$data->{'Lab'}} ) {
  $l = $data->{'Lab'}->{$c};
  if ($l =~ /([^\s\"]+|\"[^\"]+\")\s+([^\s\"]+|\"[^\"]+\")/) {
    $lab->{$c}->{'name'} = $1;
    $lab->{$c}->{'address'} = $2;
    if ($lab->{$c}->{'name'} =~ /\"([^\"]+)\"/) {
      $lab->{$c}->{'name'} = $1;
    }
    if ($lab->{$c}->{'address'} =~ /\"([^\"]+)\"/) {
      $lab->{$c}->{'address'} = $1;
    }
  }
}

# Parse processing section
my $proc = {};
my $p;
foreach $p ( keys %{$data->{'Processing'}} ) {
  $l = $data->{'Processing'}->{$p};
  if ($l =~ /(\S+)\s+(\S+)\s+(\d{4}-\d{2}-\d{2})/) {
    $proc->{$p}->{'lab'} = $1;
    $proc->{$p}->{'tag'} = $2;
    $proc->{$p}->{'date'} = $3;
  }
}

# Parse exposure section
my $exp = {};
foreach $r ( keys %{$data->{'Exposure'}} ) {
  next if (ref($data->{'Exposure'}->{$r}) ne "HASH");
  foreach $f ( keys %{$data->{'Exposure'}->{$r}} ) {
    $exp->{$r} = {} if (!defined $exp->{$r});
    $l = $data->{'Exposure'}->{$r}->{$f};
    if ($l =~ /([\d\.]+\"?)\s+f([\d\.]+)(?:\s+(\-?\+?[\d\.]+))?/) {
      $exp->{$r}->{$f}->{'shutter'} = $1;
      $exp->{$r}->{$f}->{'aperture'} = $2;
      $exp->{$r}->{$f}->{'expcomp'} = $3;
      if ($exp->{$r}->{$f}->{'shutter'} =~ /(\d+)\"/) {
	$exp->{$r}->{$f}->{'shutter'} = $1;
      } else {
	$exp->{$r}->{$f}->{'shutter'} = "1/$1";
      }
    }
  }
}

# Create a description file for each roll
my ($gid, $pml, $pipe, $cattr, $f0, $f1, $f2, $y, $nd);
foreach $r ( sort {$a <=> $b} keys %$rolls ) {
  # If gid defined, get next gid, otherwise initialise it
  $gid = nextgid($rolls->{$r}->{'load'}, $gid, 'r', $initnum);
  # Open file after ensuring it doesn't already exist
  $pml = "$gid.xml";
  die "pmlcreate: file $pml exists\n" if (-f $pml);
  $pipe = ($xout)?"> $pml":"| xsltproc $xsl - > $pml";
  open(FH, "$pipe") or
    die "pmlcreate: failed to open pipe or file for writing\n";
  $cattr = (defined $collect)?" collect=\'$collect\'":'';
  print FH "<rolldata$cattr>\n";
  # Emit roll gid and load and unload dates
  print FH "  <roll number=\'$r\' gid=\'$gid\' ".
           "load=\'$rolls->{$r}->{'load'}\' ".
           "unload=\'$rolls->{$r}->{'unload'}\'>\n";
  # Emit camera body information
  $c = $rolls->{$r}->{'camera'};
  print FH "    <camera make=\"$cameras->{$c}->{'make'}\" ".
           "model=\"$cameras->{$c}->{'model'}\"/>\n";
  # Emit film information
  $f = $rolls->{$r}->{'film'};
  print FH "    <film make=\"$films->{$f}->{'make'}\" ".
           "name=\"$films->{$f}->{'name'}\" ".
	   "speed=\"$films->{$f}->{'speed'}\" ".
	   "expire=\"$rolls->{$r}->{'expire'}\"/>\n";
  # Emit film processing information
  $p = $proc->{$r};
  $a = $lab->{$p->{'lab'}};
  print FH "    <processing name=\"$a->{'name'}\" ".
           "address=\"$a->{'address'}\" tag=\"$p->{'tag'}\" ".
	   "date=\"$p->{'date'}\"/>\n";
  print FH "    <fnum>".$data->{'FN'}->{$r}."</fnum>\n";
  # Emit exposure information for each frame
  foreach $f ( sort {$a <=> $b} keys %{$exp->{$r}} ) {
    print FH "    <exposure frame=\"$f\" ".
             "aperture=\"$exp->{$r}->{$f}->{'aperture'}\" ".
             "shutter=\"$exp->{$r}->{$f}->{'shutter'}\" ".
	     "exp-comp=\"$exp->{$r}->{$f}->{'expcomp'}\"/>\n";
  }
  # Compute and emit frame date information
  $nd = $rolls->{$r}->{'load'};
  $f0 = 1;
  foreach $d ( sort keys %{$rolls->{$r}->{'defc'}} ) {
    $f1 = $rolls->{$r}->{'defc'}->{$d};
    if ($f1 > $f0) {
      $f2 = $f1 - 1;
      print FH "    <frames date=\'$d\' first=\'$f0\' last=\'$f2\'/>\n";
      $f0 = $f1;
    } elsif ($f1 == '') {
      $f2 = $rolls->{$r}->{'nframes'};
      print FH "    <frames date=\'$d\' first=\'$f0\' last=\'$f2\'/>\n";
      $f0 = $f2 + 1; # Increment by one to avoid this frame being written
                     # again in last frame handling section below
    }
    $nd = nextday($d);
  }
  # Handle last frames on roll (after last specified DEFC for that roll)
  if ($f0 <= $rolls->{$r}->{'nframes'}) {
    $f2 = $rolls->{$r}->{'nframes'};
    print FH "    <frames date=\'$nd\' first=\'$f0\' last=\'$f2\'/>\n";
  }
  print FH "  </roll>\n";
  print FH "</rolldata>\n";
  close(FH);
}

exit 0;



# Compute the date of the following day for a date in format YYYY-MM-DD
sub nextday {
  my $date = shift;

  return UnixDate(DateCalc(ParseDate($date),"+ 1 day"), "%Y-%m-%d");
}


# Determine the gid for the next roll
sub nextgid {
  my $load = shift;
  my $gid0 = shift;
  my $c0 = shift;
  my $n0 = shift;

  my $gid = undef;
  if ($load =~ /^(\d{4})-/) {
    my $ly = $1;
    my ($y, $c, $n);
    if ($gid0 =~ /^(\d{4})(.)(\d+)$/) {
      $y = $1;
      $c = $2;
      $n = $3 + 1;
      if ($ly > $y) {
	$y = $ly;
	$n = 1;
      }
    } else {
      $y = $ly;
      $c = (defined $c0)?$c0:'r';
      $n = (defined $n0)?$n0:1;
    }
    $gid = sprintf("%04d%s%02d",$y, $c, $n);
  }

  return $gid;
}


# Construct an example input file
sub input_example {
  my $ex = <<EOF;
#
#                  Example pmlcreate input file
#
#
# The file consists of a number of sections, the start of which is
# indicated by +<section-name>+, and the end of which is indicated by
# the start of the next section. An optional subsection is indicated
# by ++<subsection-name>++, the end being indicated by the start of
# the next subsection or section. Within each section (and
# subsection), each significant line consists of a number of
# whitespace separated fields, the first playing the role of a lookup
# index for the remainder of the line. Any line beginning with a '#'
# character is a comment.
#
#


# The 'Config' section specifies global options. An initial roll
# number is specified by 'initnum', and a collection name is specified
# by 'collect'.

+Config+
initnum		07
collect		Mexico-2004


# Each line in the 'Camera' section defines a camera body label
# followed by the manufacturer and model names. Any of these fields
# containing whitespace should be delimited by double quote 
# characters '"'.

+Camera+
NikonF80	Nikon	  F80
YashicaD	Yashica	  D


# Each line in the 'Film' section defines a film label followed by the
# manufacturer, name, and speed. Any of these fields containing
# whitespace should be delimited by double quote characters '"'.

+Film+
Provia100F	Fuji	"Provia 100F"	100
Provia400F	Fuji	"Provia 400F"	400
IlfordFP4	Ilford	FP4+		125


# Each line of the 'Load' section defines a roll number, followed by
# the corresponding camera body label and film label, load and unload
# date, number of frames on that roll, and (optional) film expiry date.

+Load+
 1   NikonF80	Provia100F   2004-12-27	  2004-12-30   37  2005-10
 2   YashicaD	Provia100F   2004-12-27	  2004-12-31   12  2006-01
 3   NikonF80	Provia400F   2004-12-30	  2005-01-02   37
 4   YashicaD	IlfordFP4    2004-12-31	  2005-01-04   12  2006-04
 5   NikonF80	Provia100F   2005-01-03	  2005-01-05   37


# Each line of the 'DEFC' (Day End Frame Count) section specifies a
# date followed by a list of roll and frame count specifications for
# each camera body. Each specification has the form
# "<roll-number>,<frame-number>", or optionally
# "<camera-label>:<roll-number>,<frame-number>", where <frame-number>
# is the last frame counter index on the specified date. A camera body
# with a completed but loaded roll is indicated by
# "<camera-label>:<roll-number>," or "<roll-number>,".

+DEFC+
2004-12-27	NikonF80:1,5	YashicaD:2,4
# An equivalent specification for the above line would be
# 2004-12-27	1,5	        2,4
2004-12-28	NikonF80:1,12	YashicaD:2,7
2004-12-29	NikonF80:1,25	YashicaD:2,8
2004-12-30	NikonF80:3,8	YashicaD:2,11
2004-12-31	NikonF80:3,21	YashicaD:4,5
2005-01-01	NikonF80:3,37	YashicaD:4,8
2005-01-02			YashicaD:4,10
2005-01-03	NikonF80:5,5	YashicaD:4,
#                               ^^^^^^^^^^^
# Indicates that the roll was completed but still loaded at the end of
# the day
2005-01-04	NikonF80:5,19	


# Each line in the 'Lab' section defines a processing laboratory label
# followed by the laboratory name and address. Any of these fields
# containing whitespace should be delimited by double quote 
# characters '"'.

+Lab+
Carl	"Carl&apos;s Darkroom"	"Albuquerque, NM"
Desert	"Desert Photo"		"Albuquerque, NM"


# Each line of the 'Processing' section specifies a roll number
# followed by a processing laboratory label, double check tag number,
# and processing date.

+Processing+
 1	Carl	2701	2005-01-10
 2	Carl	2702	2005-01-10
 3	Carl	2703	2005-01-10
 4	Desert	7285	2005-01-11
 5	Carl	2704	2005-01-10


# Each line of the 'FN' (Frame Numbers) section specifies a roll
# number followed by a consecutive list of frame labels (present on
# the film itself).

+FN+
 1	1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 E
 2	1 3 5 6 8 9 11 12 14 15 17 19
 3	1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 E
 4	1 2 3 4 5 6 7 8 9 10 11 12
 5	1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 E


# Each subsection of the 'Exposure' section specifies a roll
# number. Each line of one of these subsections specifies a frame
# number followed by the shutter speed (either a number x representing
# 1/x seconds, or x" representing x seconds), aperture (the character
# 'f' followed by the relevant numerical value), and exposure
# compensation.

+Exposure+
++1++
2     90        f8      0.0
8     250       f11     0.0
14    20        f16     0.0
20    8         f22     0.0
26    45        f11     0.0
32    30        f13     0.0
++3++
2     90        f8      0.0
8     180       f8      0.0
14    350       f5.6    0.0
20    60        f8      0.0
26    3"        f16    -1.0
32    60        f8      0.0
++5++
2     350       f8      0.0
8     8         f5.6    0.0
14    15        f8      0.0
20    180       f5.6    0.0
26    60        f5.6    0.0
32    30        f19     0.0
EOF
  return $ex;
}
