Email address extractor
A ruby script that extracts email addresses from a text file. The name of the text file to read is the first and only command line argument. The script picks up all portions of text that match the RFC2822/RFC2821 definition of an email address.
file = File.new(ARGV[0])
while line = file.gets
while pos = line =~ /([\w=!#&~+%*?|^{}`'\.\/\$-]{1,64}@[A-Za-z0-9-]+(\.[A-Za-z0-9-]{1,50})+)/ and email = "#{$1}"
# Index past 'email' so we can read the next email address on the next iteration
line.slice!(0..(pos + email.size))
# Check that . is not used at the beginning or end, or used more than once in a row,
# in the local portion
email =~ /([^@]+)@/ and local = "#{$1}"
break if local[0] == '.' or local[local.size] == '.' or local.index("..")
print email + "\n"
end
end
file.close