#!/usr/bin/perl
# IBM_PROLOG_BEGIN_TAG 
# This is an automatically generated prolog. 
#  
# bos720 src/bos/usr/bin/cdat/cdat-check.pl 1.5 
#  
# Licensed Materials - Property of IBM 
#  
# COPYRIGHT International Business Machines Corp. 2010,2011 
# All Rights Reserved 
#  
# US Government Users Restricted Rights - Use, duplication or 
# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. 
#  
# IBM_PROLOG_END_TAG 
# @(#)21    1.5  src/bos/usr/bin/cdat/cdat-check.pl, cdat, bos720 7/14/11 20:14:19
use strict;
use warnings;
use Getopt::Long;
use Fcntl ':flock';	# LOCK_* constants
use XML::LibXML;
use cdat;
use messages;

#
# Constants.
#
my $XMLFILE   = 'cdat.xml';
my $XMLSCHEMA = 'file:///usr/lib/cdat/schemas/cdat.xsd';

######################################################################
# Function:	usage
# Purpose:	Display usage.
# Tasks:	Print usage and exit.
# Input:	None
# Output:	None
######################################################################
sub usage
{
    printf(STDERR catgets(MSG_CDAT_CHECK_USAGE,
	"Usage: cdat check -h\n".
	"       cdat check [-d]\n"));
    exit(1);
}

######################################################################
# Function:	main
# Purpose:	Entry point of the check subcommand.
# Tasks:	Parse command line and check repository.
# Input:	None
# Output:	None
######################################################################
sub main
{
    my ($rc, $correct);
    my @directories = ();
    my $updatexml = 0;

    # Parse command line options
    Getopt::Long::Configure('bundling', 'no_ignore_case');
    $rc = GetOptions(
	'h' => \&usage,
	'd' => \$correct
	);
    if (!$rc || @ARGV != 0) {
	usage();
    }

    my $path = cdat::odm_get_path();
    if (!defined($path)) {
	printf(STDERR catgets(MSG_DIR_NOT_DEFINED,
	    "The cdat directory path is not defined.\n".
	    "Please, run 'cdat init' first.\n"));
	exit(1);
    }

    cdat::switch_user();

    my $fh;
    if (!open($fh, "+<", "$path/$XMLFILE")) {
	printf(STDERR catgets(MSG_CANNOT_OPEN,
	    "Cannot open %s.\n"), "$path/$XMLFILE");
	exit(2);
    }
    # Try to acquire exclusive write access to the XML file (but do not block)
    if (!flock($fh, LOCK_EX | LOCK_NB)) {
        printf(STDERR catgets(MSG_BUSY,
	    "Another instance of the cdat command is running.\n".
	    "Please wait until this instance terminates.\n"));
        exit(2);
    }

    my $parser = XML::LibXML->new();
    if (!defined($parser)) {
        printf(STDERR catgets(MSG_CANNOT_CREATE_XML_PARSER,
	    "Cannot create XML parser.\n"));
        exit(1);
    }
    $parser->keep_blanks(0);

    my $xsd = XML::LibXML::Schema->new(location => $XMLSCHEMA);
    if (!defined($xsd)) {
	printf(STDERR catgets(MSG_CANNOT_OPEN_XML_SCHEMA,
	    "Cannot open XML schema definition %s.\n"), $XMLSCHEMA);
        exit(1);
    }

    # ask XML::LibXML to warn about malformed XML documents
    $parser->recover(1);

    my $tree = $parser->parse_fh($fh);
    if (!defined($tree)) {
        printf(STDERR catgets(MSG_CANNOT_PARSE,
	    "Cannot parse %s.\n"), "$path/$XMLFILE");
	exit(1);
    }

    # use eval, otherwise validate will die on errors
    eval { $xsd->validate($tree); };
    if ($@) {
        print($@);
        printf(STDERR catgets(MSG_DOESNT_MATCH_SCHEMA,
	    "Document %s does not match XML schema, trying anyway...\n"),
	    "$path/$XMLFILE");
    }

    my $root = $tree->getDocumentElement;

    #
    # Check collects.
    #
    my @collects = $root->getElementsByTagName('collect');
    my $nerrors = 0;
    my $nfixed = 0;
    foreach my $collect (@collects) {
	my $id = $collect->getAttribute('id');
	next if (!defined($id));

	my @locations = $collect->getElementsByTagName('location');
	if (@locations == 0) {
	    # the <location> element is missing
	    $nerrors++;
	    printf(catgets(MSG_NO_ASSOCIATED_DIR,
		"Collect %s does not have any associated directory.\n"), $id);
	    if ($correct) {
		printf(catgets(MSG_REMOVE_COLLECT,
		    "Remove collect %s from XML file?"), $id);
		if (cdat::yes_no(0)) {
		    $root->removeChild($collect);
		    $nfixed++;	# this error has been corrected
		    $updatexml = 1;
		}
	    }
	    next;
	}
	my $location = $locations[0]->textContent;
	$location =~ s/\/+$//;	# remove trailing "/"
	if (! -d "$path/$location") {
	    # the <location> element points to a directory that does not exist
	    $nerrors++;
	    printf(catgets(MSG_DIR_DOES_NOT_EXIST,
		"Directory %s does not exist for collect %s.\n"),
		"$path/$location", $id);
	    if ($correct) {
		printf(catgets(MSG_REMOVE_COLLECT,
		    "Remove collect %s from XML file?"), $id);
		if (cdat::yes_no(0)) {
		    $root->removeChild($collect);
		    $nfixed++;	# this error has been corrected
		    $updatexml = 1;
		}
	    }
	    next;
	}
	if (! -f "$path/$location/collect.xml") {
	    # there is no collect.xml file under the directory pointed by
	    # the <location> element.
	    $nerrors++;
	    printf(catgets(MSG_FILE_DOES_NOT_EXIST,
		"File %s does not exist for collect %s.\n"),
		"$path/$location/collect.xml", $id);
	    if ($correct) {
		printf(catgets(MSG_REMOVE_COLLECT_AND_DIR,
		    "Remove collect %s and associated directory?"), $id);
		if (cdat::yes_no(0)) {
		    system("rm -fr \"$path/$location\"");
		    if ($? != 0) {
			printf(STDERR catgets(MSG_CANNOT_REMOVE_DIR,
			    "Cannot remove directory %s.\n"),
			    "$path/$location");
		    } else {
		        $nfixed++;	# this error has been corrected
		    }
		    $root->removeChild($collect);
		    $updatexml = 1;
		}
	    }
	    next;
	}

	# mark the directory pointed by <location> as being referenced
	push(@directories, $location);
    }

    if ($updatexml) {
	printf(catgets(MSG_UPDATING, "Updating %s.\n"), "$path/$XMLFILE");
	seek($fh, 0, Fcntl::SEEK_SET);
	truncate($fh, 0);
	$tree->toFH($fh, 1);
    }

    #
    # Check for orphan directories in repository.
    #
    if (!opendir(DIR, $path)) {
	printf(STDERR catgets(MSG_CANNOT_LIST_DIR,
	    "Cannot list directory %s.\n"), $path);
	exit(1);
    }
    while (defined(my $dir = readdir(DIR))) {
	next if ($dir eq '.' || $dir eq '..' || $dir eq "lost+found" ||
                 ! -d "$path/$dir");

	# check if directory is referenced from XML
	next if (grep({ $_ eq $dir } @directories));

	$nerrors++;
	printf(catgets(MSG_DANGLING_DIR,
	    "Directory %s is not referenced by any collect.\n"), "$path/$dir");
	if ($correct) {
	    printf(catgets(MSG_REMOVE_DIR,
		"Remove directory %s?"), "$path/$dir");
	    if (cdat::yes_no(0)) {
		system("rm -fr \"$path/$dir\"");
		if ($? != 0) {
		    printf(STDERR catgets(MSG_CANNOT_REMOVE_DIR,
			"Cannot remove directory %s.\n"), "$path/$dir");
		} else {
		    $nfixed++;	# this error has been corrected
		}
	    }
	}
    }
    closedir(DIR);

    DEBUG(1, "$nerrors inconsistencies found, $nfixed fixed\n");

    # exit with status code 2 if inconsistencies were found (and not fixed)
    exit(2) if ($nerrors > $nfixed);

    if ($nerrors > 0) {
        # inconsistencies were found, but fixed
        printf(catgets(MSG_REPOSITORY_REPAIRED, "Repository repaired.\n"));
    } else {
        # no inconsistencies were found
	printf(catgets(MSG_REPOSITORY_OK, "Repository is valid.\n"));
    }
}

main;