#!/usr/bin/perl # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # bos720 src/bos/usr/bin/cdat/cdat-collect.pl 1.8 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 2010,2011 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # @(#)22 1.8 src/bos/usr/bin/cdat/cdat-collect.pl, cdat, bos720 7/15/11 03:44:39 use warnings; use strict; use Getopt::Long; use XML::LibXML; use POSIX; # needed for fork, exec, wait use Fcntl ':flock'; # LOCK_* constants use cdat; use messages; # # Constants. # my $XMLFILE = 'cdat.xml'; # status of collects use constant SUCCESS => 0; use constant IGNORED => 1; use constant UNREACHABLE => 2; # # Globals. # our @phases = qw(check init execute terminate grab clean); our (%nodes, $collecttype, $collectopts, $phase); # Default verbosity level is set to 1 my $verbose = 1; # Do not automatically extend the size of the filesystem by default my $growfs; my ($user, $pmr, $dir, $scriptsdir, $tree2, $collect_elt, $node_list_elt); my $isnew; my %scripts; ###################################################################### # Function: usage # Purpose: Display usage. # Tasks: Print usage and exit. # Input: None # Output: None ###################################################################### sub usage { printf(STDERR catgets(MSG_CDAT_COLLECT_USAGE, "Usage: cdat collect -h\n". " cdat collect [-gqv] [-i Id] [-p PMR] [-m Comment] [-u User]\n". " -t Type ... -n Type:[User@]Node ...\n". " cdat collect [-gqv] [-i Id] [-p PMR] [-m Comment] [-u User]\n". " -t Type ... -f File ...\n")); exit(1); } ###################################################################### # Function: INT_handler # Purpose: Signal SIGINT handler. # Tasks: Make sure cleanup is performed on the remote nodes if # a collect is in progress. # Input: None # Output: None ###################################################################### sub INT_handler { # restore default handlers $SIG{'INT'} = 'DEFAULT'; $SIG{'KILL'} = 'DEFAULT'; $SIG{'HUP'} = 'DEFAULT'; $SIG{'TERM'} = 'DEFAULT'; $SIG{'ABRT'} = 'DEFAULT'; $SIG{'QUIT'} = 'DEFAULT'; exit(2) if (!defined($phase)); printf(catgets(MSG_COLLECT_INTERRUPTED, "Collect interrupted during phase \"%s\" of collect type \"%s\".\n"), $phase, $collecttype); printf(catgets(MSG_WAITING_FOR_CLEANUP, "Waiting for cleanup...\n")); # execute "terminate" phase do_phase('terminate') if ($phase eq 'execute'); # execute "clean" phase do_phase('clean'); printf(catgets(MSG_DONE, "Done.\n")); close(LOG); # exit without updating the XML exit(2); } ###################################################################### # Function: mlog # Purpose: Log a message. # Tasks: Write the message into the log file with a timestamp, # print it to stderr too if the debug level is lower # than the verbosity level. # Input: Debug level, message to log # Output: None ###################################################################### sub mlog { my $level = shift; my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time); printf(LOG "%4d-%02d-%02d %02d:%02d:%02d: %s\n", 1900 + $year, 1 + $mon, $mday, $hour, $min, $sec, "@_"); LOG->flush(); print(STDERR "@_\n") if ($verbose >= $level); } ###################################################################### # Function: do_collect # Purpose: Perform a collect of the specified type. # Tasks: Connect to remote nodes, execute all collect phases, # perform synchronization # Input: None # Output: None ###################################################################### sub do_collect { # Extract collect arguments (if any) ($collecttype, $collectopts) = split(/,/, $collecttype, 2); $collectopts = "" if (!defined($collectopts)); mlog(1, "Starting collect type \"$collecttype\""); # check that this collect type is defined $scriptsdir = $scripts{$collecttype}; if (!defined($scriptsdir)) { mlog(1, "Collect type \"$collecttype\" not found, skipping"); return; # skip this collect type } if ($isnew || ! -d "$dir/$collecttype") { # create a subdirectory for this collect type mlog(2, "Creating directory \"$dir/$collecttype\""); if (!mkdir("$dir/$collecttype")) { mlog(1, "could not create directory $dir/$collecttype\n"); return; # skip this collect type } $collect_elt->appendTextChild('type', $collecttype); } # create one subdirectory per node and initialize nodes status while (my ($name, $node) = each(%nodes)) { # skip unreachable nodes next if ($node->{status} == UNREACHABLE); # initialize the status for this node/collect type to SUCCESS $node->{status} = SUCCESS; # create subdirectory if it does not already exists next if (!$isnew && -d "$dir/$collecttype/$name"); mlog(2, "Creating directory \"$dir/$collecttype/$name\""); if (!mkdir("$dir/$collecttype/$name")) { mlog(1, "could not create directory for $name\n"); $node->{status} = IGNORED; # skip this node } } # run all phases foreach $phase (@phases) { do_phase($phase); } # All phases for this collect type are now terminated, complete the # node XML elements with the exit status information for each node. my @node_list_elt = $tree2->getElementsByTagName('node'); foreach my $node_elt (@node_list_elt) { my $name = $node_elt->getAttribute('hostname'); next if !defined($name); # should not happen my $node = $nodes{$name}; my $status_elt = $tree2->createElement('status'); $status_elt->setAttribute('type', $collecttype); $status_elt->appendTextNode($node->{status}); $node_elt->appendChild($status_elt); } printf(catgets(MSG_COLLECT_RESULTS, "Collect type \"%s\" done, see results in \"%s\".\n"), $collecttype, "$dir/$collecttype/"); printf(catgets(MSG_STATUS_REPORT, "==============\n". "Status report:\n". "==============\n")); foreach my $name (sort(keys %nodes)) { my $status = $nodes{$name}{status}; print("$name: "); if ($status == UNREACHABLE) { printf(catgets(MSG_UNREACHABLE, "UNREACHABLE\n")); } elsif ($status == IGNORED) { printf(catgets(MSG_IGNORED, "IGNORED\n")); } elsif ($status == SUCCESS) { printf(catgets(MSG_SUCCEEDED, "SUCCEEDED\n")); } else { printf(catgets(MSG_FAILED, "FAILED (%d)\n"), $status); } } } ###################################################################### # Function: do_phase # Purpose: Execute the specified phase for the current collect # type. # Tasks: Connect to remote nodes, execute the collect phases, # wait for completion on all nodes # Input: Phase name # Output: None ###################################################################### sub do_phase { my ($phase) = @_; my $rc; my %pid2host; while (my ($name, $node) = each(%nodes)) { # skip this node/phase if the node is marked as IGNORE next if ($node->{status} == IGNORED || $node->{status} == UNREACHABLE); if ($node->{status} != SUCCESS) { # do the terminate phase only if the node failed in the # execute phase; always do the clean phase if the check # phase succeeded next if ($node->{phase} eq 'check'); next if ($phase ne 'clean' && ($phase ne 'terminate' || $node->{phase} ne 'execute')); } my $pid = fork(); if (!defined($pid)) { printf(STDERR catgets(MSG_CANNOT_FORK, "Cannot run %s phase %s for %s (fork).\n"), $collecttype, $phase, $name); # ignore this node in next phases $node->{status} = IGNORED; } elsif ($pid == 0) { # child process # change working directory to the node subdirectory $rc = chdir("$dir/$collecttype/$name"); if (!$rc) { printf(STDERR catgets(MSG_CANNOT_CHANGE_WORKING_DIR, "Cannot change working directory to %s.\n"), "$collecttype/$name"); # exit from child with status code "IGNORE" exit(IGNORED); } # open the log file $rc = open(NODE, ">>$dir/$collecttype/$name/logs.txt"); if (!$rc) { printf(STDERR catgets(MSG_CANNOT_OPEN_LOG_FILE, "Cannot open log file %s.\n"), "$dir/$collecttype/$name/logs.txt"); # exit from child with status code "IGNORE" exit(IGNORED); } print(NODE "*** \"$phase\" phase ***\n"); print(NODE "Running \"$scriptsdir/$collecttype $collectopts\"\n"); # redirect stdout and stderr to the log file $rc = open(STDOUT, ">&NODE"); if (!$rc) { printf(STDERR catgets(MSG_CANNOT_REDIRECT_STDOUT, "Cannot redirect standard output.\n")); # exit from child with status code "IGNORE" exit(IGNORED); } $rc = open(STDERR, ">&STDOUT"); if (!$rc) { printf(STDERR catgets(MSG_CANNOT_REDIRECT_STDERR, "Cannot redirect standard error output.\n")); # exit from child with status code "IGNORE" exit(IGNORED); } # set environment variables for script $ENV{CDAT_PHASE} = "$phase"; $ENV{CDAT_TYPE} = "$node->{type}"; $ENV{CDAT_HOST} = "$name"; $ENV{CDAT_USER} = "$node->{user}"; $ENV{CDAT_SRVC_DIR} = '/usr/lib/cdat/helpers'; $ENV{CDAT_TYPE_DIR} = "$scriptsdir"; $ENV{CDAT_DEST_DIR} = "$dir/$collecttype/$name"; $ENV{CDAT_PMR} = $pmr if (defined($pmr)); $ENV{CDAT_GROWFS} = '1' if (defined($growfs)); # run collect script exec("$scriptsdir/$collecttype $collectopts") or print(NODE "Exec failed.\n"); # exit from child with status code "IGNORE" exit(IGNORED); } else { # parent process mlog(2, "Starting \"$phase\" phase on $name ($node->{type}): pid $pid"); $pid2host{$pid} = $name; } } # wait for all child processes forked above to terminate mlog(3, "Waiting for children to terminate") if (keys(%pid2host) != 0); while ((my $pid = wait()) != -1) { my $status = WEXITSTATUS($?); # find the node corresponding to this PID my $name = $pid2host{$pid}; next if !defined($name); # should not happen my $node = $nodes{$name}; mlog(3, "pid $pid ($name) terminated with exit status $status"); if ($status != SUCCESS && -f "$dir/$collecttype/$name/errors.txt") { if (open(my $fh, "< $dir/$collecttype/$name/errors.txt")) { mlog(1, "Phase \"$phase\" of collect type \"$collecttype\" failed for node $name:"); mlog(1, "### BEGIN REASON"); while (my $errstr = <$fh>) { chomp($errstr); mlog(1, $errstr); } close($fh); mlog(1, "### END REASON"); unlink("$dir/$collecttype/$name/errors.txt"); } } # save exit status for this node/collect type (if not in fail path) if ($node->{status} == SUCCESS) { $node->{status} = $status; $node->{phase} = $phase; } mlog(2, "Ignoring $name in next phases") if ($status == IGNORED); } } ###################################################################### # Function: create_node_info # Purpose: Create the XML node element with information about # the specified node. # Tasks: Log onto the remote node, retrieve some information # and create the XML node element based on it. # Input: Node (name, type, user) # Output: None ###################################################################### sub create_node_info { my ($name, $node) = @_; if (!defined($node->{user})) { # no user@ specified for this node, use -u (or default) $node->{user} = $user; } my $node_elt = $tree2->createElement('node'); if (!defined($node_elt)) { printf(STDERR catgets(MSG_CANNOT_CREATE_XML_ELEMENT, "Cannot create XML element for node %s, skipping this node.\n"), $name); delete($nodes{$name}); # ignore this node for all collect types! return; } $node_elt->setAttribute('hostname', $name); $node_elt->appendTextChild('node-type', $node->{type}); $node_elt->appendTextChild('user', $node->{user}); mlog(2, "Retrieving node information for $name"); my %info = cdat::collect_node_info($node->{type}, $node->{user}, $name); if (defined($info{tz})) { $node_elt->appendTextChild('timezone', $info{tz}); $node->{status} = SUCCESS; } else { mlog(1, "Could not retrieve node information from $name"); $node->{status} = UNREACHABLE; return; } if (defined($info{machine_id})) { $node_elt->appendTextChild('machine_id', $info{machine_id}); } if (defined($info{lpar_id})) { # this is for LPAR/VIOS only $node_elt->appendTextChild('lpar_id', $info{lpar_id}); } $node_list_elt->appendChild($node_elt); } ###################################################################### # Function: main # Purpose: Entry point of the collect subcommand. # Tasks: Parse command line and run collects on remote nodes. # Input: None # Output: None ###################################################################### sub main { my (@filenames, @rawnodes, @collecttypes); my ($rc, $comment, $collectid, $quiet); # Parse command line options Getopt::Long::Configure('bundling', 'no_ignore_case'); $rc = GetOptions( 'h' => \&usage, 'n=s' => \@rawnodes, 't=s' => \@collecttypes, 'i=s' => \$collectid, 'p=s' => \$pmr, 'm=s' => \$comment, 'u=s' => \$user, 'f=s' => \@filenames, 'v+' => \$verbose, 'q' => \$quiet, 'g' => \$growfs ); if (!$rc || @collecttypes == 0 || @ARGV != 0) { usage(); } # make sure we have -n or -f, but not both if (!((@filenames != 0) ^ (@rawnodes != 0))) { usage(); } cdat::switch_user(); if (@filenames != 0) { %nodes = cdat::read_nodes_from_files(@filenames); } else { %nodes = cdat::read_nodes_from_array(@rawnodes); } if (!%nodes) { printf(STDERR catgets(MSG_NO_NODES_TO_CONNECT, "No nodes to connect to.\n")); exit(1); } # Get the list of defined collect types %scripts = cdat::get_collecttypes(); my $try = 0; if (defined($pmr)) { # validate PMR number if (!($pmr =~ m/\d{5,}(,[0-9A-Za-z]{3,}){2}/)) { printf(STDERR catgets(MSG_INVALID_PMR, "Invalid PMR '%s', format should be 'PMR#,BRANCH#,COUNTRY#'.\n"), $pmr); exit(1); } } elsif (!$quiet) { # option -p was not specified and no -q, ask user printf(catgets(MSG_IBM_SUPPORT, "Is the collect for IBM support?")); $try = cdat::yes_no(1); } while ($try) { printf(catgets(MSG_PLEASE_ENTER_PMR_NUMBER, "Please enter a PMR number: ")); $pmr = ; chomp($pmr); # validate PMR number if (!($pmr =~ m/\d{5,}(,[0-9A-Za-z]{3,}){2}/)) { printf(STDERR catgets(MSG_INVALID_PMR, "Invalid PMR '%s', format should be 'PMR#,BRANCH#,COUNTRY#'.\n"), $pmr); exit(1) if ($try++ == 3); } else { last; } } my $root_dir = cdat::odm_get_path(); if (!defined($root_dir)) { printf(STDERR catgets(MSG_DIR_NOT_DEFINED, "The cdat directory path is not defined.\n". "Please, run 'cdat init' first.\n")); exit(2); } if (!defined($user)) { $user = cdat::odm_get_user(); } my $fh; if (!open($fh, "+<", "$root_dir/$XMLFILE")) { printf(STDERR catgets(MSG_CANNOT_OPEN, "Cannot open %s.\n"), "$root_dir/$XMLFILE"); exit(2); } # Try to acquire exclusive write access to the XML file (but do not block) if (!flock($fh, LOCK_EX | LOCK_NB)) { printf(STDERR catgets(MSG_BUSY, "Another instance of the cdat command is running.\n". "Please wait until this instance terminates.\n")); exit(2); } my $parser = XML::LibXML->new(); if (!defined($parser)) { printf(STDERR catgets(MSG_CANNOT_CREATE_XML_PARSER, "Cannot create XML parser.\n")); exit(2); } $parser->keep_blanks(0); my $tree = $parser->parse_fh($fh); if (!defined($tree)) { printf(STDERR catgets(MSG_CANNOT_PARSE, "Cannot parse %s.\n"), "$root_dir/$XMLFILE"); exit(2); } my $root = $tree->getDocumentElement; my $collect_dir; if (!defined($collectid)) { # no -i specified, generate a unique collect id (max of all # existing collect ids + 1) $collectid = 0; foreach ($root->getElementsByTagName('collect')) { my $id = $_->getAttribute('id'); $collectid = $id if ($id =~ /^\d+$/ && $id > $collectid); } $collectid++; $collect_dir = sprintf("%08u", $collectid); $isnew = 1; } else { # check if the specified id already exists (recurrent collect) $isnew = 1; # assume it does not foreach ($root->getElementsByTagName('collect')) { my $id = $_->getAttribute('id'); if ($collectid eq $id) { $collect_elt = $_; $isnew = 0; last; } } # refuse the creation of collect ids consisting in digits only # since these are already used by the framework. if ($isnew && $collectid =~ /^\d+$/) { printf(STDERR catgets(MSG_NO_COLLECT_FOUND_FOR_ID, "No collect found for id %s.\n"), $collectid); exit(2); } $collect_dir = $collectid; } $dir = "$root_dir/$collect_dir"; if ($isnew) { if (-e "$dir") { printf(STDERR catgets(MSG_DIRECTORY_EXISTS, "Directory %s already exists, please run '%s' to repair the repository.\n"), $dir, "cdat check -d"); exit(2); } # create collect directory $rc = mkdir($dir); if (!$rc) { printf(STDERR catgets(MSG_CANNOT_CREATE_DIR, "Cannot create directory %s.\n"), $dir); exit(2); } } # open the log file (create it if necessary) $rc = open(LOG, ">>$dir/logs.txt"); if (!$rc) { printf(STDERR catgets(MSG_CANNOT_CREATE_LOG_FILE, "Cannot create log file %s.\n"), "$dir/logs.txt"); rmdir($dir) if $isnew; exit(2); } printf(STDERR catgets(MSG_SEE_FILE_FOR_DETAILED_STATUS, "See file %s for detailed status.\n"), "$dir/logs.txt"); # # Update the cdat.xml file (add a new collect element if necessary). # if ($isnew) { $collect_elt = $tree->createElement('collect'); $collect_elt->setAttribute('id', $collectid); $collect_elt->appendTextChild('location', "$collect_dir/"); } my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time); my $localtime_str = sprintf("%4d-%02d-%02dT%02d:%02d:%02d", 1900 + $year, 1 + $mon, $mday, $hour, $min, $sec); my $date_elt = $tree->createElement('date'); $date_elt->appendTextNode($localtime_str); if (!$isnew) { # replace previous date foreach ($collect_elt->getElementsByTagName('date')) { $collect_elt->replaceChild($date_elt, $_); } } else { $collect_elt->appendChild($date_elt); } if (defined($pmr)) { my $pmr_elt = $tree->createElement('pmr'); $pmr_elt->appendTextNode($pmr); if (!$isnew) { # remove previous PMR number if it exists foreach ($collect_elt->getElementsByTagName('pmr')) { $collect_elt->removeChild($_); } } $collect_elt->insertBefore($pmr_elt, $date_elt); } if (defined($comment)) { my $comment_elt = $tree->createElement('description'); $comment_elt->appendTextNode($comment); if (!$isnew) { # remove previous description if it exists foreach ($collect_elt->getElementsByTagName('description')) { $collect_elt->removeChild($_); } } $collect_elt->insertAfter($comment_elt, $date_elt); } # # Create a new collect.xml file. # mlog(2, "Creating \"$dir/collect.xml\""); $tree2 = XML::LibXML->createDocument('1.0', cdat::locale_charmap); if (!defined($tree2)) { mlog(1, "Cannot create \"$dir/collect.xml\""); exit(2); } # sequentially collect information from the nodes $node_list_elt = $tree2->createElement('node-list'); while (my ($name, $node) = each(%nodes)) { create_node_info($name, $node); } $tree2->setDocumentElement($node_list_elt); # install signal handler before starting collects $SIG{'INT'} = \&INT_handler; $SIG{'KILL'} = \&INT_handler; $SIG{'HUP'} = \&INT_handler; $SIG{'TERM'} = \&INT_handler; $SIG{'ABRT'} = \&INT_handler; $SIG{'QUIT'} = \&INT_handler; # # Start collects. # foreach $collecttype (@collecttypes) { do_collect; } # Dump the collect XML document to the collect.xml file $tree2->toFile("$dir/collect.xml", 1); if ($isnew) { # Add the new XML collect element to the cdat.xml file $root->appendChild($collect_elt); } # Dump the cdat XML document to the cdat.xml file seek($fh, 0, Fcntl::SEEK_SET); truncate($fh, 0); $tree->toFH($fh, 1); exit(0); } main;