#!/usr/bin/perl -w

use XML::LibXML;
use Getopt::Long;
use iPE::Globals;
use strict;

my $g = new iPE::Globals();
my $dirstring = join("\n", map { "    [--$_ $_-dir]" } @{$g->seqtypes});
my $usage = <<END
$0 
    template-instance 
    annotation-dir 
    annotation-ext 
    dna-dir 
    [--ghmm-file ghmm-file]
    [--feature-map-file feature-map-file]
    [--output-dir output-dir]
    [--zhmm output-zhmm]

$dirstring

Creates an instance file based off of a set of annotations and a template 
instance file.  This script will attempt to create an instance with respect
to the file names of the annotations, attempting to find sequence files with
everything before the filename extension in them.

template-instance -- an instance that contains the options you want to run iPE
annotation-dir    -- where the annotations are
annotation-ext    -- extension to ignore when finding corresponding sequences
output-dir        -- where to put the output file(s)
output-zhmm       -- name of the zhmm file to output

X-dir             -- where the sequences of type X are

All directories can also be globs of files, however they must be
single-quoted in order to prevent your shell from globbing.  Example:

'/bio/db/Homo_sapiens/assembly/hg17/chr_seq/*.interspersed.fa'
END
;

my @seqdir_opts = map { "$_=s" } @{$g->seqtypes};
scalar(@ARGV >= 3) or die $usage;

my ($inst_file, $ann_dir, $ann_ext) = @ARGV;
#Getopt::Long::Configure('debug');

my %opt;
GetOptions(\%opt, 'output-dir=s', 'zhmm=s', 'feature-map-file=s@', 'ghmm-file=s', 
  @seqdir_opts);


# XXX: Directories may contain single quotes if this is called from a script.
#      the function findFiles fixes this.  I would put another 'fixDir' function
#      here but that makes it harder to slap new directories into this script.
#      So until 'findFiles' is called, the directory strings must be considered
#      invalid.

my (@anns, @dnas);
my @ids;
opendir(FH, $ann_dir) or die "Could not open directory $ann_dir.\n";
my @files = readdir FH;
closedir FH; 

my $id;
for my $file (@files) {
    if($file =~ m/(.*)$ann_ext/) {
        $id = $1;
        $id =~ s/\./\\./g;
        $id = $id."\\." unless($id =~ m/\\\./);
        push @ids, $id;
        push @anns, $file;
    }
}

# pass references to the directories so they can be fixed in findFiles.
my %seqs;
for my $seqtype (@{$g->seqtypes}) {
  $seqs{$seqtype} = findFiles(\@ids, \$opt{$seqtype}, $seqtype) 
    if(defined($opt{$seqtype}));
}

my $cur_node;
my $parser = new XML::LibXML;
my $inst = $parser->parse_file($inst_file);

my $inst_root = $inst->documentElement();
die "$inst_file is not an instance file.\n" 
    unless($inst_root->nodeName eq "iPE_instance");
for my $child ($inst_root->childNodes) {
    if($child->nodeName =~ m/gHMM_file/ && defined($opt{'ghmm-file'})) {
        changeText($child, $opt{'ghmm-file'});
        $child->removeAttribute('basedir');
    }
    elsif($child->nodeName eq "feature_map_files") {
        $cur_node = $child;
        changeText($child, join("\n", @{$opt{'feature-map-file'}})) 
          if(defined($opt{'feature-map-file'}));
        $child->removeAttribute('basedir');
    }
    elsif($child->nodeName =~ m/_files$/) {
        $inst_root->removeChild($child);
    }
    elsif($child->nodeName eq "options") {
        for my $gchild ($child->childNodes) {
            if($gchild->nodeName eq "zoeOutputFile" && defined $opt{'zhmm'}) {
                changeText($gchild, $opt{'zhmm'});
            }
            elsif($gchild->nodeName eq "outputBaseDir" 
                    && defined $opt{'output-dir'}) {
                changeText($gchild, $opt{'output-dir'});
            }
        }
    }
}

$cur_node = addFilesElement($inst, $inst_root, $cur_node, "annotation_files", 
    $ann_dir, \@anns);
for my $seqtype (keys(%seqs)) {
  $cur_node = addFilesElement($inst, $inst_root, $cur_node, "seq_files", 
    $opt{$seqtype}, $seqs{$seqtype}, { type => $seqtype });
}

print $inst->toString();

sub findFiles {
    my ($ids_ref, $dir, $type) = @_;

    my @return_files;

    # single quotes can be left over from scripts
    $$dir =~ s/\'//g;

    my @f;
    if(-d $$dir) {
        opendir(FH, $$dir) or die "Could not open directory $$dir\n";
        @f = readdir FH;
        closedir FH;
    }
    else {
        @f = glob $$dir;
        die "No such file $$dir\n" if(!-e $f[0]);
        for my $file (@f) {
            $file =~ s/^.*\/([^\/]+)$/$1/;
        }
    }

    for my $id (@$ids_ref) {
        my $found = 0;
        for my $file (@f) {
            if($file =~ m/$id/) {
                push @return_files, $file;
                $found = 1;
                last;
            }
        }
        die "No file of type $type for id $id found.\n" if (!$found);
    }

    return \@return_files;
}

sub addFilesElement {
    my ($doc, $root, $prev_node, $tag, $basedir, $files, $attrs) = @_;
    my $node = $doc->createElement($tag);
    $root->insertAfter($node, $prev_node);
    $basedir =~ s/\/[^\/]+$// if(!-d $basedir);
    $node->setAttribute("basedir", $basedir);
    my $files_str = join ("\n         ", @$files);
    $node->appendTextNode("\n         $files_str");
    if(defined($attrs)) {
      for my $attr (keys %$attrs) {
        $node->setAttribute($attr, $attrs->{$attr});
      }
    }
    return $node;
}

sub changeText {
    my ($node, $text) = @_; 
    for my $child ($node->childNodes) {
        if ($child->nodeType == XML_TEXT_NODE) {
            $node->removeChild($child);
        }
    }
    $node->appendTextNode($text);
}
