#!/usr/bin/perl -w
#!perl
#!/usr/bin/perl
#############################################################################
# Copyright 2009 #
# Purpose: Generate large text file. #
# #
#############################################################################
#############################################################################
# PIL #
# Software Development Group #
# Peter Mortensen #
# E-mail: mortens.spammaygohere@drmortensen.eu #
# WWW: http://www.pil.sdu.dk/ #
# #
# Program for ...................................... #
# #
# FILENAME: generate.pl #
# #
# CREATED: PM 2009-08-29 Vrs. 1.0. #
# UPDATED: PM 2009-09-03 Cleaned up. #
# PM 2009-09-05 Added detection and output for delays in #
# writing to output file. #
# PM 2009-09-07 Now samples time with better than 1 second #
# resolution. Numbers in output rounded to 0.1 #
# #
# #
#############################################################################
#Future:
# 1. Also round $writeRate at the end of the script.
#require 5.002;
require 5.008; #Perl 5.8 and later: for built-in Time::HiRes module. This requirement
#can be lowered, but then CPAN module Time::HiRes must be
#downloaded and installed.
use strict;
use warnings;
use diagnostics;
#Time::HiRes:
#
use Time::HiRes qw ( time); #Higher resolution replacement for
#the time function provided with
#core Perl.
##############################################################
# Function: getTimes
#
##############################################################
sub getTimes
{
my( $anOoldTime, $aStartTime) = @_;
my $currentTime2 = time();
#print "currentTime2: $currentTime2\n\n";
#die;
my $timeSinceLastZeroCount = $currentTime2 - $anOoldTime;
my $totalTimeElapsed = $currentTime2 - $aStartTime;
return ($currentTime2,
$timeSinceLastZeroCount,
$totalTimeElapsed);
}
print "\n\n--------------------- start ----------------------------------\n";
my $MBtargetSize;
my $outPutFileName;
#For using parameters from AppleScript and Windows NT
if ( $ENV{MBSIZE} )
{
$MBtargetSize = $ENV{'MBSIZE'};
print "Approximate output file size: $MBtargetSize MB\n";
$outPutFileName = $ENV{'OUTFILE'};
print "Output file: $outPutFileName\n";
print "\n";
}
else
{
print "\n\n\nNo environment variables detected.... Bailing out. Bye-bye!\n\n\n";
die;
}
open ( NUMBERS_OUTFILE,">$outPutFileName")
|| die "Could not open output file $outPutFileName\n\n";
my $end2 = 0;
my $lineCount = 0;
my $sizeInBytes = 0;
my $sampleLineLength = 0;
#As it is the line length depends exponentially on this
#number:
#
# 4: approx 250 characters
# 6: approx 1000 characters 38 MB/secs until 2 GB written.
# Long delay and then continues at 38 MB/secs.
# 8: approx 4100 characters 74 MB/secs until 2 GB written.
# 9: approx 8500 characters 97 MB/secs until 1 GB written.
# 11 secs, then 23 secs delay.
# 10: approx 17000 characters 117 MB/secs until 2.3 GB written.
# 20 secs, then 56 secs delay.
# 11: approx 34000 characters 130 MB/secs until 1.2 GB written.
# 9 secs, then 31 secs delay.
# 130 MB/secs until 2.7 GB written.
my $linePower = 11;
my $downCounterStartValue = 2000; #This value will change dynamically during
#the run. This particular start value is
#based on experience running it on a
#particular system. It should not be set
#too high, otherwise the interval between
#the few first progress lines will be
#too high.
my $downCounter = $downCounterStartValue;
my $startTime = time();
my $oldTime = $startTime;
my $oldProgressTime = $oldTime;
my $progressInterval = 2; #Unit: seconds.
$progressInterval = 1;
$progressInterval = 2;
$progressInterval = 1.6;
srand 43152332; #Make the sequence of numbers deterministic (this makes
#for a better comparison between runs.).
while (! $end2)
{
if ($downCounter > 0)
{
my $line = "";
#Output one line to the file.
my $i;
for ($i = 0; $i < $linePower; $i++)
{
my $rn = rand 10;
my $rnStr = "$rn ";
#Append and double line (in order to get very long line for minimum overhead).
$line = $line . $line . $rnStr;
$lineCount++;
} #for
$line = $line . "\n";
my $lineLength = length( $line);
$sizeInBytes += $lineLength;
$sampleLineLength = $lineLength;
#Sample start of $line:
#
#2.94036865234375 2.94036865234375 3.19793701171875 2.94036865234375 2.94036865234375 3.19793701171875 5.66619873046875
#
print NUMBERS_OUTFILE $line;
}
else
{
my ($currentTime, $timeSinceLastZeroCount, $totalTimeElapsed) =
&getTimes( $oldTime, $startTime);
#Adjust $downCounterStartValue depending on the actual time consumed.
if ( $timeSinceLastZeroCount < $progressInterval)
{
#Operation time was too low (too high overhead for system
#calls), increase $downCounter.
my $numerator = int( $downCounterStartValue * 4);
$downCounterStartValue = int( $numerator / 3);
}
else
{
if ($timeSinceLastZeroCount > $progressInterval)
{
my $numerator = int( $downCounterStartValue * 4);
$downCounterStartValue = int( $numerator / 5);
}
else
{
#Right on the money. Keep using the current value.
#Now that we are using a high resolution version
#of time() we will probably never end up here.
}
}
$oldTime = $currentTime;
print "downCounterStartValue: $downCounterStartValue. " .
"Sample line length: $sampleLineLength.\n";
if ($totalTimeElapsed > 0)
{
my $sizeInMegaBytes = $sizeInBytes / 1024 / 1024;
my $writeRate = $sizeInMegaBytes / $totalTimeElapsed;
print sprintf (
"After %4.1f seconds: %d lines written (%4.1f MB). %4.1f MB/s\n\n",
$totalTimeElapsed, $lineCount, $sizeInMegaBytes, $writeRate);
if ($timeSinceLastZeroCount > 2 * $progressInterval)
{
print sprintf ("*********************** Long delay: %4.1f seconds **********************************\n\n", $timeSinceLastZeroCount);
}
if ( $sizeInMegaBytes > $MBtargetSize)
{
$end2 = 1;
}
}
$downCounter = $downCounterStartValue;
} #Downcounter reached zero.
$downCounter--;
} #while
close NUMBERS_OUTFILE;
my ($currentTime, $timeSinceLastZeroCount, $totalTimeElapsed) =
&getTimes( $oldTime, $startTime);
my $writeRate = $sizeInBytes / 1024 / 1024 / $totalTimeElapsed;
print "Totals:\n\n";
print "$totalTimeElapsed seconds. $lineCount lines written. $writeRate MB/s\n\n";
print "\n\n--------------------- end ----------------------------------\n";