The Easiest Way to Save and Share Code Snippets on the web

speak.pl

perl

posted: Feb, 8th 2012 | jump to bottom

  1. #!/usr/bin/perl
  2.  
  3. #--------------------------------------------------
  4. # Usage:
  5. # echo "Hello world" | ./speak.pl en speech.mp3
  6. # cat file.txt | ./speak.pl en speech.mp3
  7. #
  8. #
  9. # Prerequisites:
  10. # sudo apt-get install libwww-perl sox libsox-fmt-mp3
  11. #
  12. # Compiling sox:
  13. # Older versions of sox package might not have support for mp3 codec,
  14. # so just download sox from http://sox.sourceforge.net/
  15. # install packages libmp3lame-dev libmad0-dev
  16. # and compile sox
  17. #--------------------------------------------------
  18.  
  19. use LWP;
  20. use strict;
  21.  
  22. if (scalar(@ARGV) != 2) {
  23. print STDERR "Usage: $0 LANGUAGE OUT.mp3\n";
  24. print STDERR "\n";
  25. print STDERR "Examples: \n";
  26. print STDERR " echo \"Hello world\" | ./speak.pl en speech.mp3\n";
  27. print STDERR " cat file.txt | ./speak.pl en speech.mp3\n";
  28. }
  29.  
  30. my $language = $ARGV[0]; #"sk"
  31. my $all_mp3_out = $ARGV[1];
  32. my $TMP_DIR = "$all_mp3_out.tmp";
  33. my $RECAPTCHA_URL = "http://www.google.com/sorry/?continue=http%3A%2F%2Ftranslate.google.com%2Ftranslate_tts%3Ftl=en%26q=Your+identity+was+successfuly+confirmed.";
  34. my $RECAPTCHA_SLEEP_SECONDS = 5;
  35. my $SYSTEM_WEBBROWSER = "firefox";
  36. mkdir $TMP_DIR;
  37.  
  38. my $silence_duration_paragraphs = 0.8;
  39. my $silence_duration_sentences = 0.2;
  40. my $silence_duration_comma = 0.1;
  41. my $silence_duration_brace = 0.1;
  42. my $silence_duration_semicolon = 0.2;
  43. my $silence_duration_words = 0.05;
  44.  
  45. my @headers = (
  46. 'Host' => 'translate.google.com',
  47. 'User-Agent' => 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.5) Gecko/20091109 Ubuntu/9.10 (karmic) Firefox/3.5.5',
  48. 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  49. 'Accept-Language' => 'en-us,en;q=0.5',
  50. 'Accept-Encoding' => 'gzip,deflate',
  51. 'Accept-Charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  52. 'Keep-Alive' => '300',
  53. 'Connection' => 'keep-alive',
  54. );
  55.  
  56. my $browser = LWP::UserAgent->new;
  57.  
  58. my @all_mp3s = ();
  59. my $sentence_idx = 0;
  60. # For each input line
  61. while (my $line = <STDIN>)
  62. {
  63. chomp($line);
  64. print "line: $line\n";
  65. # Check for empty lines - paragraphs separator
  66. if ($line =~ /^\s*$/) {
  67. push @all_mp3s, SilenceToMp3($sentence_idx++, $silence_duration_paragraphs);
  68. } else {
  69. my @words = split(/\s+/, $line);
  70. my $sentence = "";
  71. # For each word
  72. for (my $i=0; $i<scalar(@words); $i++)
  73. {
  74. my $word = $words[$i];
  75. $sentence .= " $word"; # add another word to the sentence
  76. my $say = 0;
  77. my $silence_duration = 0.0;
  78. if (length($sentence) >= 100) {
  79. # Remove the last word;
  80. $sentence = substr($sentence, 0, length($sentence)-length($word)-1);
  81. $say = 1;
  82. $silence_duration = $silence_duration_words;
  83. $i --; # one word back
  84. }
  85. # If a separator was found
  86. elsif (substr($word, length($word)-1, 1) =~ /[.!?]/ ) {
  87. $say = 1;
  88. $silence_duration = $silence_duration_sentences;
  89. }
  90. elsif (substr($word, length($word)-1, 1) eq ",") {
  91. $say = 1;
  92. $silence_duration = $silence_duration_comma;
  93. }
  94. elsif (substr($word, length($word)-1, 1) eq ";") {
  95. $say = 1;
  96. $silence_duration = $silence_duration_semicolon;
  97. }
  98. elsif (substr($word, length($word)-1, 1) eq ")") {
  99. $say = 1;
  100. $silence_duration = $silence_duration_brace;
  101. }
  102. # If there are no more words
  103. elsif ($i == scalar(@words)-1) {
  104. $say = 1;
  105. $silence_duration = $silence_duration_words;
  106. }
  107.  
  108. if ($say) {
  109. push @all_mp3s, TrimSilence( SentenceToMp3($sentence, $sentence_idx++) );
  110. push @all_mp3s, SilenceToMp3($sentence_idx++, $silence_duration);
  111. $sentence = ""; # start a new sentence
  112. }
  113. }
  114. }
  115. }
  116.  
  117. print "Concatenate: @all_mp3s\n";
  118. print "Writing output to $all_mp3_out...";
  119. JoinMp3s(\@all_mp3s, $all_mp3_out);
  120. print "done\n";
  121.  
  122. sub JoinMp3s() {
  123. my $mp3s_ref = shift;
  124. my $mp3_out = shift;
  125.  
  126. Exec("sox @{$mp3s_ref} $mp3_out");
  127. }
  128.  
  129. sub SilenceToMp3() {
  130. my $idx = shift;
  131. my $duration = shift;
  132.  
  133. my $mp3_out = sprintf("$TMP_DIR/%04d_sil.mp3", $sentence_idx);
  134. Exec("sox -n -r 22050 $mp3_out trim 0.0 $duration");
  135. return $mp3_out;
  136. }
  137.  
  138. sub SentenceToMp3() {
  139. my $sentence = shift;
  140. my $sentence_idx = shift;
  141.  
  142. print "sentence: $sentence\n";
  143. $sentence =~ s/ /+/g;
  144. if (length($sentence) > 100) {
  145. die ("ERROR: sentence has more than 100 characters: '$sentence'");
  146. }
  147.  
  148. my $mp3_out = sprintf("$TMP_DIR/%04d.mp3", $sentence_idx);
  149. #print "mp3_out: $mp3_out\n";
  150. #print "http://translate.google.com/translate_tts?q=$sentence\n";
  151.  
  152. my $recaptcha_waiting = 0;
  153. my $resp;
  154. while (1) {
  155. $resp = $browser->get("http://translate.google.com/translate_tts?tl=$language&q=$sentence", @headers);
  156. # open my $fh, '<', "recaptcha_response.html" or die "error opening file: $!";
  157. # $resp = do { local $/; <$fh> };
  158. if ($resp->content =~ "<!DOCTYPE") {
  159. if (!$recaptcha_waiting) {
  160. $recaptcha_waiting = 1;
  161. ReCaptcha();
  162. }
  163. } else {
  164. last;
  165. }
  166. sleep($RECAPTCHA_SLEEP_SECONDS);
  167. PrintWaitingDot();
  168. }
  169. if (length($resp->content) == 0) {
  170. print "EMPTY SENTENCE: '$sentence'\n";
  171. return "";
  172. }
  173. open(FILE,">$mp3_out");
  174. print FILE $resp->content;
  175. close(FILE);
  176. return $mp3_out;
  177. }
  178.  
  179. sub PrintWaitingDot() {
  180. select STDOUT;
  181. print ".";
  182. $|=1;
  183. }
  184.  
  185. sub ReCaptcha() {
  186. print "\n\n--------------------------------------------------\n\n";
  187. print " RE-CAPTCHA";
  188. print "\n\n--------------------------------------------------\n\n";
  189. print "This URL is going to be opened in your browser:\n";
  190. print "$RECAPTCHA_URL\n";
  191. print "\n";
  192. print "Please, enter the captcha code in your browser's window.\n";
  193. print "This program will continue automatically then.\n";
  194. system("$SYSTEM_WEBBROWSER '$RECAPTCHA_URL'");
  195. }
  196.  
  197. sub TrimSilence() {
  198. my $mp3 = shift;
  199.  
  200. if ($mp3 eq "") {
  201. return "";
  202. }
  203.  
  204. my $mp3_out = $mp3;
  205. $mp3_out =~ s/\.mp3$/_trim.mp3/;
  206. Exec("
  207. sox $mp3 -p silence 1 0.1 -40d \\
  208. | sox -p -p reverse \\
  209. | sox -p -p silence 1 0.1 -40d \\
  210. | sox -p $mp3_out reverse
  211. ");
  212. return $mp3_out;
  213. }
  214.  
  215. sub Exec() {
  216. my $cmd = shift;
  217. # print "exec $cmd\n";
  218. system $cmd;
  219. }
46 views