#!/usr/bin/perl # # compcor.pl [,...] # computes the correlation between columns 1 and 5 # in the specified files, ignoring lines starting with # # # Author: Austin Parker # This code in the public domain -- feel free to modify to your hearts content. # If you find a bug, tell me. # # The author makes no guarantees about the quality or effectiveness of this # code, and bears no responsibility should anything go wrong. # $printOnlyCor = 0; $c1 = shift @ARGV or die "usage: compcor.pl [,...]"; if ($c1 =~ "-p") { $printOnlyCor = 1; $c1 = shift @ARGV or die "usage: compcor.pl [,...]"; } $c2 = shift @ARGV or die "usage: compcor.pl [,...]"; $sum_sq_x = 0; $sum_sq_y = 0; $sum_coproduct = 0; $mean_x = 0; $mean_y = 0; $n = 0; foreach $file (@ARGV) { print "\nWorking on file $file\n" if ($printOnlyCor == 0); open IN, "<$file"; while () { next if /^#/; # get x and y. @vals = split /\s+/; $x = $vals[$c1]; $y = $vals[$c2]; $n++; # print; # print (join "\t",@vals); # print "\nLine $n: $x,$y\n";; if ($n == 0) { $mean_x = $x; $mean_y = $y; } else { $sweep = ($n - 1) / $n; $delta_x = $x - $mean_x; $delta_y = $y - $mean_y; $sum_sq_x += $delta_x * $delta_x * $sweep; $sum_sq_y += $delta_y * $delta_y * $sweep; $sum_coproduct += $delta_x * $delta_y * $sweep; $mean_x += $delta_x / $n; $mean_y += $delta_y / $n; } } close IN; } $pop_sd_x = sqrt( $sum_sq_x / $n ); $pop_sd_y = sqrt( $sum_sq_y / $n ); $cov_x_y = $sum_coproduct / $n; $correlation = 0; $correlation = $cov_x_y / ($pop_sd_x * $pop_sd_y) if ($pop_sd_x != 0 && $pop_sd_y != 0); if ($printOnlyCor == 0) { print "no standard deviation in data!\n" if ($pop_sd_x == 0 || $pop_sd_y == 0); print "\nCorrelation is $correlation\n"; } else { print "$correlation"; }