#!/usr/bin/perl
# IBM_PROLOG_BEGIN_TAG 
# This is an automatically generated prolog. 
#  
# bos720 src/bos/usr/bin/cdat/cdat-collect.pl 1.8 
#  
# Licensed Materials - Property of IBM 
#  
# COPYRIGHT International Business Machines Corp. 2010,2011 
# All Rights Reserved 
#  
# US Government Users Restricted Rights - Use, duplication or 
# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. 
#  
# IBM_PROLOG_END_TAG 
# @(#)22    1.8  src/bos/usr/bin/cdat/cdat-collect.pl, cdat, bos720 7/15/11 03:44:39
use warnings;
use strict;
use Getopt::Long;
use XML::LibXML;
use POSIX;		# needed for fork, exec, wait
use Fcntl ':flock';	# LOCK_* constants
use cdat;
use messages;

#
# Constants.
#
my $XMLFILE = 'cdat.xml';
# status of collects
use constant SUCCESS     => 0;
use constant IGNORED     => 1;
use constant UNREACHABLE => 2;

#
# Globals.
#
our @phases = qw(check init execute terminate grab clean);
our (%nodes, $collecttype, $collectopts, $phase);
# Default verbosity level is set to 1
my $verbose = 1;
# Do not automatically extend the size of the filesystem by default
my $growfs;
my ($user, $pmr, $dir, $scriptsdir, $tree2, $collect_elt, $node_list_elt);
my $isnew;
my %scripts;

######################################################################
# Function:	usage
# Purpose:	Display usage.
# Tasks:	Print usage and exit.
# Input:	None
# Output:	None
######################################################################
sub usage
{
    printf(STDERR catgets(MSG_CDAT_COLLECT_USAGE,
        "Usage: cdat collect -h\n".
	"       cdat collect [-gqv] [-i Id] [-p PMR] [-m Comment] [-u User]\n".
	"                    -t Type ... -n Type:[User@]Node ...\n".
	"       cdat collect [-gqv] [-i Id] [-p PMR] [-m Comment] [-u User]\n".
	"                    -t Type ... -f File ...\n"));
    exit(1);
}

######################################################################
# Function:	INT_handler
# Purpose:	Signal SIGINT handler.
# Tasks:	Make sure cleanup is performed on the remote nodes if
#		a collect is in progress.
# Input:	None
# Output:	None
######################################################################
sub INT_handler
{
    # restore default handlers
    $SIG{'INT'}  = 'DEFAULT';
    $SIG{'KILL'} = 'DEFAULT';
    $SIG{'HUP'}  = 'DEFAULT';
    $SIG{'TERM'} = 'DEFAULT';
    $SIG{'ABRT'} = 'DEFAULT';
    $SIG{'QUIT'} = 'DEFAULT';

    exit(2) if (!defined($phase));
    printf(catgets(MSG_COLLECT_INTERRUPTED,
        "Collect interrupted during phase \"%s\" of collect type \"%s\".\n"),
	$phase, $collecttype);
    printf(catgets(MSG_WAITING_FOR_CLEANUP, "Waiting for cleanup...\n"));

    # execute "terminate" phase
    do_phase('terminate') if ($phase eq 'execute');
    # execute "clean" phase
    do_phase('clean');

    printf(catgets(MSG_DONE, "Done.\n"));
    close(LOG);
    # exit without updating the XML
    exit(2);
}

######################################################################
# Function:	mlog
# Purpose:	Log a message.
# Tasks:	Write the message into the log file with a timestamp,
#		print it to stderr too if the debug level is lower
#		than the verbosity level.
# Input:	Debug level, message to log
# Output:	None
######################################################################
sub mlog
{
    my $level = shift;
    my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time);

    printf(LOG "%4d-%02d-%02d %02d:%02d:%02d: %s\n",
        1900 + $year, 1 + $mon, $mday, $hour, $min, $sec, "@_");
    LOG->flush();

    print(STDERR "@_\n") if ($verbose >= $level);
}

######################################################################
# Function:	do_collect
# Purpose:	Perform a collect of the specified type.
# Tasks:	Connect to remote nodes, execute all collect phases,
#		perform synchronization
# Input:	None
# Output:	None
######################################################################
sub do_collect
{
    # Extract collect arguments (if any)
    ($collecttype, $collectopts) = split(/,/, $collecttype, 2);
    $collectopts = "" if (!defined($collectopts));

    mlog(1, "Starting collect type \"$collecttype\"");

    # check that this collect type is defined
    $scriptsdir = $scripts{$collecttype};
    if (!defined($scriptsdir)) {
    	mlog(1, "Collect type \"$collecttype\" not found, skipping");
    	return;	# skip this collect type
    }

    if ($isnew || ! -d "$dir/$collecttype") {
	# create a subdirectory for this collect type
	mlog(2, "Creating directory \"$dir/$collecttype\"");
	if (!mkdir("$dir/$collecttype")) {
	    mlog(1, "could not create directory $dir/$collecttype\n");
	    return;	# skip this collect type
	}
	$collect_elt->appendTextChild('type', $collecttype);
    }
    # create one subdirectory per node and initialize nodes status
    while (my ($name, $node) = each(%nodes)) {
	# skip unreachable nodes
	next if ($node->{status} == UNREACHABLE);
	# initialize the status for this node/collect type to SUCCESS
	$node->{status} = SUCCESS;

	# create subdirectory if it does not already exists
	next if (!$isnew && -d "$dir/$collecttype/$name");
    	mlog(2, "Creating directory \"$dir/$collecttype/$name\"");
    	if (!mkdir("$dir/$collecttype/$name")) {
    	    mlog(1, "could not create directory for $name\n");
    	    $node->{status} = IGNORED;	# skip this node
    	}
    }

    # run all phases
    foreach $phase (@phases) {
        do_phase($phase);
    }

    # All phases for this collect type are now terminated, complete the
    # node XML elements with the exit status information for each node.
    my @node_list_elt = $tree2->getElementsByTagName('node');
    foreach my $node_elt (@node_list_elt) {
	my $name = $node_elt->getAttribute('hostname');
	next if !defined($name);	# should not happen
	my $node = $nodes{$name};

	my $status_elt = $tree2->createElement('status');
	$status_elt->setAttribute('type', $collecttype);
	$status_elt->appendTextNode($node->{status});
	$node_elt->appendChild($status_elt);
    }

    printf(catgets(MSG_COLLECT_RESULTS,
        "Collect type \"%s\" done, see results in \"%s\".\n"),
	$collecttype, "$dir/$collecttype/");

    printf(catgets(MSG_STATUS_REPORT,
        "==============\n".
	"Status report:\n".
	"==============\n"));
    foreach my $name (sort(keys %nodes)) {
    	my $status = $nodes{$name}{status};
	print("$name: ");
	if ($status == UNREACHABLE) {
	    printf(catgets(MSG_UNREACHABLE, "UNREACHABLE\n"));
	} elsif ($status == IGNORED) {
	    printf(catgets(MSG_IGNORED, "IGNORED\n"));
	} elsif ($status == SUCCESS) {
	    printf(catgets(MSG_SUCCEEDED, "SUCCEEDED\n"));
	} else {
	    printf(catgets(MSG_FAILED, "FAILED (%d)\n"), $status);
	}
    }
}

######################################################################
# Function:	do_phase
# Purpose:	Execute the specified phase for the current collect
#		type.
# Tasks:	Connect to remote nodes, execute the collect phases,
#		wait for completion on all nodes
# Input:	Phase name
# Output:	None
######################################################################
sub do_phase
{
    my ($phase) = @_;
    my $rc;

    my %pid2host;
    while (my ($name, $node) = each(%nodes)) {
	# skip this node/phase if the node is marked as IGNORE
	next if ($node->{status} == IGNORED || $node->{status} == UNREACHABLE);

	if ($node->{status} != SUCCESS) {
	    # do the terminate phase only if the node failed in the
	    # execute phase; always do the clean phase if the check
	    # phase succeeded
	    next if ($node->{phase} eq 'check');
	    next if ($phase ne 'clean' &&
	        ($phase ne 'terminate' || $node->{phase} ne 'execute'));
	}

	my $pid = fork();
	if (!defined($pid)) {
	    printf(STDERR catgets(MSG_CANNOT_FORK,
	        "Cannot run %s phase %s for %s (fork).\n"),
		$collecttype, $phase, $name);
	    # ignore this node in next phases
	    $node->{status} = IGNORED;

	} elsif ($pid == 0) {	# child process
	    # change working directory to the node subdirectory
	    $rc = chdir("$dir/$collecttype/$name");
	    if (!$rc) {
	        printf(STDERR catgets(MSG_CANNOT_CHANGE_WORKING_DIR,
		    "Cannot change working directory to %s.\n"),
		    "$collecttype/$name");
		# exit from child with status code "IGNORE"
		exit(IGNORED);
	    }

	    # open the log file
	    $rc = open(NODE, ">>$dir/$collecttype/$name/logs.txt");
	    if (!$rc) {
	        printf(STDERR catgets(MSG_CANNOT_OPEN_LOG_FILE,
		    "Cannot open log file %s.\n"),
		    "$dir/$collecttype/$name/logs.txt");
	        # exit from child with status code "IGNORE"
		exit(IGNORED);
	    }
	    print(NODE "*** \"$phase\" phase ***\n");
	    print(NODE "Running \"$scriptsdir/$collecttype $collectopts\"\n");

	    # redirect stdout and stderr to the log file
	    $rc = open(STDOUT, ">&NODE");
	    if (!$rc) {
	        printf(STDERR catgets(MSG_CANNOT_REDIRECT_STDOUT,
		    "Cannot redirect standard output.\n"));
	        # exit from child with status code "IGNORE"
		exit(IGNORED);
	    }
	    $rc = open(STDERR, ">&STDOUT");
	    if (!$rc) {
	        printf(STDERR catgets(MSG_CANNOT_REDIRECT_STDERR,
		    "Cannot redirect standard error output.\n"));
	        # exit from child with status code "IGNORE"
		exit(IGNORED);
	    }

	    # set environment variables for script
	    $ENV{CDAT_PHASE} = "$phase";
	    $ENV{CDAT_TYPE} = "$node->{type}";
	    $ENV{CDAT_HOST} = "$name";
	    $ENV{CDAT_USER} = "$node->{user}";
	    $ENV{CDAT_SRVC_DIR} = '/usr/lib/cdat/helpers';
	    $ENV{CDAT_TYPE_DIR} = "$scriptsdir";
	    $ENV{CDAT_DEST_DIR} = "$dir/$collecttype/$name";
	    $ENV{CDAT_PMR} = $pmr if (defined($pmr));
	    $ENV{CDAT_GROWFS} = '1' if (defined($growfs));

	    # run collect script
	    exec("$scriptsdir/$collecttype $collectopts") or
	    print(NODE "Exec failed.\n");
	    # exit from child with status code "IGNORE"
	    exit(IGNORED);

	} else {		# parent process
	    mlog(2, "Starting \"$phase\" phase on $name ($node->{type}): pid $pid");
	    $pid2host{$pid} = $name;
	}
    }

    # wait for all child processes forked above to terminate
    mlog(3, "Waiting for children to terminate") if (keys(%pid2host) != 0);
    while ((my $pid = wait()) != -1) {
	my $status = WEXITSTATUS($?);

	# find the node corresponding to this PID
	my $name = $pid2host{$pid};
	next if !defined($name);	# should not happen
	my $node = $nodes{$name};

	mlog(3, "pid $pid ($name) terminated with exit status $status");

	if ($status != SUCCESS && -f "$dir/$collecttype/$name/errors.txt") {
	    if (open(my $fh, "< $dir/$collecttype/$name/errors.txt")) {
		mlog(1, "Phase \"$phase\" of collect type \"$collecttype\" failed for node $name:");
		mlog(1, "### BEGIN REASON");
		while (my $errstr = <$fh>) {
		    chomp($errstr);
		    mlog(1, $errstr);
		}
		close($fh);
		mlog(1, "### END REASON");
		unlink("$dir/$collecttype/$name/errors.txt");
	    }
	}

	# save exit status for this node/collect type (if not in fail path)
	if ($node->{status} == SUCCESS) {
	    $node->{status} = $status;
	    $node->{phase} = $phase;
	}
	mlog(2, "Ignoring $name in next phases") if ($status == IGNORED);
    }
}

######################################################################
# Function:	create_node_info
# Purpose:	Create the XML node element with information about
#		the specified node.
# Tasks:	Log onto the remote node, retrieve some information
#		and create the XML node element based on it.
# Input:	Node (name, type, user)
# Output:	None
######################################################################
sub create_node_info
{
    my ($name, $node) = @_;

    if (!defined($node->{user})) {
        # no user@ specified for this node, use -u (or default)
	$node->{user} = $user;
    }

    my $node_elt = $tree2->createElement('node');
    if (!defined($node_elt)) {
        printf(STDERR catgets(MSG_CANNOT_CREATE_XML_ELEMENT,
	    "Cannot create XML element for node %s, skipping this node.\n"),
	    $name);
    	delete($nodes{$name});	# ignore this node for all collect types!
	return;
    }

    $node_elt->setAttribute('hostname', $name);
    $node_elt->appendTextChild('node-type', $node->{type});
    $node_elt->appendTextChild('user', $node->{user});

    mlog(2, "Retrieving node information for $name");
    my %info = cdat::collect_node_info($node->{type}, $node->{user}, $name);
    if (defined($info{tz})) {
        $node_elt->appendTextChild('timezone', $info{tz});
	$node->{status} = SUCCESS;
    } else {
       	mlog(1, "Could not retrieve node information from $name");
	$node->{status} = UNREACHABLE;
    	return;
    }
    if (defined($info{machine_id})) {
        $node_elt->appendTextChild('machine_id', $info{machine_id});
    }
    if (defined($info{lpar_id})) {
	# this is for LPAR/VIOS only
	$node_elt->appendTextChild('lpar_id', $info{lpar_id});
    }
    $node_list_elt->appendChild($node_elt);
}

######################################################################
# Function:	main
# Purpose:	Entry point of the collect subcommand.
# Tasks:	Parse command line and run collects on remote nodes.
# Input:	None
# Output:	None
######################################################################
sub main
{
    my (@filenames, @rawnodes, @collecttypes);
    my ($rc, $comment, $collectid, $quiet);

    # Parse command line options
    Getopt::Long::Configure('bundling', 'no_ignore_case');
    $rc = GetOptions(
	'h'   => \&usage,
	'n=s' => \@rawnodes,
	't=s' => \@collecttypes,
	'i=s' => \$collectid,
	'p=s' => \$pmr,
	'm=s' => \$comment,
	'u=s' => \$user,
	'f=s' => \@filenames,
	'v+'  => \$verbose,
	'q'   => \$quiet,
	'g'   => \$growfs
	);
    if (!$rc || @collecttypes == 0 || @ARGV != 0) {
	usage();
    }

    # make sure we have -n or -f, but not both
    if (!((@filenames != 0) ^ (@rawnodes != 0))) {
        usage();
    }

    cdat::switch_user();

    if (@filenames != 0) {
	%nodes = cdat::read_nodes_from_files(@filenames);
    } else {
        %nodes = cdat::read_nodes_from_array(@rawnodes);
    }
    if (!%nodes) {
        printf(STDERR catgets(MSG_NO_NODES_TO_CONNECT,
	    "No nodes to connect to.\n"));
	exit(1);
    }

    # Get the list of defined collect types
    %scripts = cdat::get_collecttypes();

    my $try = 0;
    if (defined($pmr)) {
	# validate PMR number
	if (!($pmr =~ m/\d{5,}(,[0-9A-Za-z]{3,}){2}/)) {
	    printf(STDERR catgets(MSG_INVALID_PMR,
		"Invalid PMR '%s', format should be 'PMR#,BRANCH#,COUNTRY#'.\n"), $pmr);
	        exit(1);
	}
    } elsif (!$quiet) {
	# option -p was not specified and no -q, ask user
	printf(catgets(MSG_IBM_SUPPORT, "Is the collect for IBM support?"));
	$try = cdat::yes_no(1);
    }
    while ($try) {
        printf(catgets(MSG_PLEASE_ENTER_PMR_NUMBER,
	    "Please enter a PMR number: "));
	$pmr = <STDIN>;
	chomp($pmr);

	# validate PMR number
	if (!($pmr =~ m/\d{5,}(,[0-9A-Za-z]{3,}){2}/)) {
	    printf(STDERR catgets(MSG_INVALID_PMR,
	        "Invalid PMR '%s', format should be 'PMR#,BRANCH#,COUNTRY#'.\n"), $pmr);
            exit(1) if ($try++ == 3);
        } else {
	    last;
        }
    }

    my $root_dir = cdat::odm_get_path();
    if (!defined($root_dir)) {
	printf(STDERR catgets(MSG_DIR_NOT_DEFINED,
	    "The cdat directory path is not defined.\n".
	    "Please, run 'cdat init' first.\n"));
	exit(2);
    }

    if (!defined($user)) {
	$user = cdat::odm_get_user();
    }

    my $fh;
    if (!open($fh, "+<", "$root_dir/$XMLFILE")) {
	printf(STDERR catgets(MSG_CANNOT_OPEN,
	    "Cannot open %s.\n"), "$root_dir/$XMLFILE");
	exit(2);
    }
    # Try to acquire exclusive write access to the XML file (but do not block)
    if (!flock($fh, LOCK_EX | LOCK_NB)) {
        printf(STDERR catgets(MSG_BUSY,
	    "Another instance of the cdat command is running.\n".
	    "Please wait until this instance terminates.\n"));
        exit(2);
    }

    my $parser = XML::LibXML->new();
    if (!defined($parser)) {
        printf(STDERR catgets(MSG_CANNOT_CREATE_XML_PARSER,
	    "Cannot create XML parser.\n"));
	exit(2);
    }
    $parser->keep_blanks(0);

    my $tree = $parser->parse_fh($fh);
    if (!defined($tree)) {
	printf(STDERR catgets(MSG_CANNOT_PARSE,
	    "Cannot parse %s.\n"), "$root_dir/$XMLFILE");
	exit(2);
    }
    my $root = $tree->getDocumentElement;

    my $collect_dir;
    if (!defined($collectid)) {
        # no -i specified, generate a unique collect id (max of all
	# existing collect ids + 1)
        $collectid = 0;
        foreach ($root->getElementsByTagName('collect')) {
	    my $id = $_->getAttribute('id');
	    $collectid = $id if ($id =~ /^\d+$/ && $id > $collectid);
        }
        $collectid++;
	$collect_dir = sprintf("%08u", $collectid);
	$isnew = 1;
    } else {
        # check if the specified id already exists (recurrent collect)
	$isnew = 1;	# assume it does not
        foreach ($root->getElementsByTagName('collect')) {
	    my $id = $_->getAttribute('id');
	    if ($collectid eq $id) {
	    	$collect_elt = $_;
		$isnew = 0;
		last;
	    }
	}
	# refuse the creation of collect ids consisting in digits only
	# since these are already used by the framework.
	if ($isnew && $collectid =~ /^\d+$/) {
	    printf(STDERR catgets(MSG_NO_COLLECT_FOUND_FOR_ID,
		"No collect found for id %s.\n"), $collectid);
	    exit(2);
	}
        $collect_dir = $collectid;
    }

    $dir = "$root_dir/$collect_dir";
    if ($isnew) {
	if (-e "$dir") {
            printf(STDERR catgets(MSG_DIRECTORY_EXISTS,
	        "Directory %s already exists, please run '%s' to repair the repository.\n"),
	        $dir, "cdat check -d");
	    exit(2);
	}
	# create collect directory
	$rc = mkdir($dir);
	if (!$rc) {
	    printf(STDERR catgets(MSG_CANNOT_CREATE_DIR,
	        "Cannot create directory %s.\n"), $dir);
	    exit(2);
	}
    }

    # open the log file (create it if necessary)
    $rc = open(LOG, ">>$dir/logs.txt");
    if (!$rc) {
        printf(STDERR catgets(MSG_CANNOT_CREATE_LOG_FILE,
	    "Cannot create log file %s.\n"), "$dir/logs.txt");
	rmdir($dir) if $isnew;
	exit(2);
    }

    printf(STDERR catgets(MSG_SEE_FILE_FOR_DETAILED_STATUS,
        "See file %s for detailed status.\n"), "$dir/logs.txt");

    #
    # Update the cdat.xml file (add a new collect element if necessary).
    #
    if ($isnew) {
	$collect_elt = $tree->createElement('collect');
	$collect_elt->setAttribute('id', $collectid);
	$collect_elt->appendTextChild('location', "$collect_dir/");
    }

    my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time);
    my $localtime_str = sprintf("%4d-%02d-%02dT%02d:%02d:%02d",
        1900 + $year, 1 + $mon, $mday, $hour, $min, $sec);
    my $date_elt = $tree->createElement('date');
    $date_elt->appendTextNode($localtime_str);
    if (!$isnew) {
        # replace previous date
        foreach ($collect_elt->getElementsByTagName('date')) {
            $collect_elt->replaceChild($date_elt, $_);
        }
    } else {
        $collect_elt->appendChild($date_elt);
    }

    if (defined($pmr)) {
        my $pmr_elt = $tree->createElement('pmr');
	$pmr_elt->appendTextNode($pmr);
        if (!$isnew) {
	    # remove previous PMR number if it exists
	    foreach ($collect_elt->getElementsByTagName('pmr')) {
	        $collect_elt->removeChild($_);
	    }
	}
	$collect_elt->insertBefore($pmr_elt, $date_elt);
    }

    if (defined($comment)) {
        my $comment_elt = $tree->createElement('description');
	$comment_elt->appendTextNode($comment);
        if (!$isnew) {
	    # remove previous description if it exists
	    foreach ($collect_elt->getElementsByTagName('description')) {
	        $collect_elt->removeChild($_);
	    }
	}
	$collect_elt->insertAfter($comment_elt, $date_elt);
    }

    #
    # Create a new collect.xml file.
    #
    mlog(2, "Creating \"$dir/collect.xml\"");
    $tree2 = XML::LibXML->createDocument('1.0', cdat::locale_charmap);
    if (!defined($tree2)) {
	mlog(1, "Cannot create \"$dir/collect.xml\"");
	exit(2);
    }

    # sequentially collect information from the nodes
    $node_list_elt = $tree2->createElement('node-list');
    while (my ($name, $node) = each(%nodes)) {
        create_node_info($name, $node);
    }
    $tree2->setDocumentElement($node_list_elt);

    # install signal handler before starting collects
    $SIG{'INT'}  = \&INT_handler;
    $SIG{'KILL'} = \&INT_handler;
    $SIG{'HUP'}  = \&INT_handler;
    $SIG{'TERM'} = \&INT_handler;
    $SIG{'ABRT'} = \&INT_handler;
    $SIG{'QUIT'} = \&INT_handler;

    #
    # Start collects.
    #
    foreach $collecttype (@collecttypes) {
        do_collect;
    }

    # Dump the collect XML document to the collect.xml file
    $tree2->toFile("$dir/collect.xml", 1);

    if ($isnew) {
	# Add the new XML collect element to the cdat.xml file
	$root->appendChild($collect_elt);
    }
    # Dump the cdat XML document to the cdat.xml file
    seek($fh, 0, Fcntl::SEEK_SET);
    truncate($fh, 0);
    $tree->toFH($fh, 1);
    exit(0);
}

main;