#!/usr/bin/perl -w

#
# dbdump.pl
#
# This makes a complete dump of an entire MySQL database, using the mysqldump utility.  
# By default, it will dump all the tables for all the databases on localhost. It stores 
# the dumps in a user-defined directory, and keeps a log of MD5 checksums for all the 
# dumps it has made and stored.  The logs are unique for each host which is dumped.  It
# compares the sum from the last dump made with the sum for the current dump to see if 
# the contents of the two are the same.  If they are, then the database contents are 
# already archived, so the script discards the dump it just made.  If they sums are 
# different, then it gzips the current file and records the MD5 sum, dump file name, and
# what databases were dumped in the log file.
#
# Note that the sum log for each host MUST be in the same directory as the dump files 
# themselves!  Moving to the log file is akin to starting from scratch as far as the 
# script is concerned.
#
# OPTIONS:
#
# You can dump just one (or more) database(s) when by giving it the "-n database_name" 
# option. If you want to dump more than one DB, seperate the names with spaces and 
# enclose everything in quotes, like "-n 'db1 db2 .. dbN'". 
#
# Dump databases on other hosts by specifying the "-h hostname" switch.  Default is 
# 'localhost'.
#
# Specify a username and password with the -u and -p switches respectively.  The user
# given must have at least SELECT access on all the tables in the databases specified 
# by the -n switch. If -n is omitted, the user must be able to read every table in the
# database. The defaults are 'dbdump' and 'dbdump';
#
# This script was meant to be run from cron, but can also be run manually. If running 
# from cron specify a output path with the -o switch. If no path is given, cwd is used,
# which is probably not what you want if you are not running interactively.  This path 
# is where both the log files and the dumps are stored.
#
# If you want to keep incremental files (with each new dump getting saved with a unique 
# filename), then pass the -i switch.  The script will incorporate epoch seconds into 
# the file names.  This is useful if you want to keep a "history", see what's changed, 
# etc.
#
# Run with -v for verbose mode, including output from mysqldump.
# 
# Copyright (C) 2003 Wm. Rhodes
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation at: <http://www.gnu.org/copyleft/gpl.html>.
#

use strict;
use Cwd;
use Digest::MD5;
use File::Basename;
use Time::HiRes;
use Getopt::Std;
use Sys::Hostname;


my ($archive_file, $lastline, $dbprintlist);
my $start_time = Time::HiRes::time;

# Change these two to match the system with the db. This should be fine for any Linux system.
my $mysqldump = "/usr/bin/mysqldump";
my $gzip = "/bin/gzip -f";

# Get options
getopts("vn:h:u:p:o:i");
our ($opt_v, $opt_n, $opt_h, $opt_u, $opt_p, $opt_o, $opt_i);

# Any optional database name, or default to all databases
my $dbnames;
if ($opt_n) {
	$dbnames = "--databases $opt_n";
	$dbprintlist = $opt_n;
} else {
	$dbnames = "-A";
	$dbprintlist = "All";
}

# Define this to print out informative messages to STDOUT
my $debug = $opt_v ? "-v" : '';

# Default to localhost.  MySQL uses the string 'localhost' to grant local access, but we 
# don't want to use 'localhost' in file names since it's not portable. 
my $hostprint = $opt_h ? $opt_h : hostname();
$opt_h ||= "localhost";

# All the files we create have these perms. I don't know how tightly we want to restrict 
# the output files, so I might want this to be 644 or something.
my $mode = 0600;

# This should be something other than cwd if run from cron
$opt_o ||= cwd;
$opt_o =~ s/\/$//;	#/
unless (-e $opt_o && -d $opt_o) {
	die "Could not find output directory '$opt_o'. Quitting.\n";
}

# The path and file of our dump, using epoch secs for uniqueness if need be. 
my $outputfile = $opt_o ."/". $hostprint;
$outputfile .= "_". time() if ($opt_i);
$outputfile .="_mysqldump.sql";

# This is a file with our dump file names and their MD5 sums. 
my $script = basename($0);
$script =~ s/(.+)\.\w+/$1/;
my $sumlog = $opt_o ."/". $hostprint ."_". $script .".log";
print "DEBUG: Using $sumlog for sum log file.\n" if $debug;

# This should be a user that can select from the entire DB. You could also use 
# the mysql root user and put this script in root's crontab file.
$opt_u ||= "dbdump";
$opt_p ||= "dbdump";

# Add whatever mysqldump options you want here.
my $cmdline = "$mysqldump -u $opt_u --password=$opt_p -q -c $debug --host=$opt_h -r $outputfile $dbnames";
print "DEBUG: Using command line:\n      $cmdline\n" if $debug;

# Do the dump
ForkCommand($cmdline);

# See if new file differs from the last file we made
print "DEBUG: Created new dump file $outputfile\n" if $debug;
my $oldsum;
my $fh;
my $md5 = Digest::MD5->new;
open($fh, "< $outputfile") || die "Couldn't open $outputfile: $!\n";
$md5->reset;
$md5->addfile($fh);
my $newsum = $md5->hexdigest;	
close($fh);
print "DEBUG: Sum for $outputfile is $newsum\n" if $debug;

# We compare $newsum to the sum from the last dump file and if they are different, we 
# archive the file. If they are the same, we unlink the file we just dumped.
# If we have no log file, then obviously we record the current sum and save our file.
if (-e $sumlog) {
	open(LOG, "+<$sumlog") || die $!;
	while (<LOG>) {
		$lastline = $_;
	}
	chomp($lastline);
	print "DEBUG: Last log file line found is '$lastline'\n"  if $debug;
	$oldsum = (split(/\t/, $lastline))[0];
	print "DEBUG: Old sum = $oldsum\nDEBUG: New sum = $newsum.\n" if $debug;
} else {
	print "DEBUG: Log file $sumlog doesn't exist, creating...\n" if $debug;
	open(LOG, ">$sumlog") || die $!;
	$oldsum = 1;
}

# Save our dump and write to log, or quit and get rid of dump file. 
if ($newsum ne $oldsum) {
	print "DEBUG: Old and new sums do not match, will save DB dump file.\n" if $debug;
	print LOG "$newsum\t$outputfile\t". time() ."\t$dbprintlist\n";
	close(LOG);

	print "DEBUG: Zipping dump file $outputfile\n" if $debug;  
	ForkCommand("$gzip $outputfile");
	my $zipped_file = "$outputfile.gz";
	chmod($mode, $zipped_file);
	chmod($mode, $sumlog);

	printf "Run completed in %.2f seconds.", Time::HiRes::time - $start_time;
	print "  Dump file saved as $zipped_file\n";
} else {
	print "DEBUG: Old and new sums match, will not save DB dump file.\n" if $debug;
	unlink($outputfile);
	print "No new MySQL data found, dump file not created.\n";
}


#
# END
#


# Just because I was lazy, and I also kept changing the die message.
sub ForkCommand {
	my $cmdline = shift;
	unless (system($cmdline) == 0) {
		unlink($outputfile);
		die "system call $cmdline failed: $?";
	}
}