chrome/browser/resources/network_speech_synthesis/tts_extension.js - chromium/src - Git at Google

 // Copyright 2013 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 /**
  * @fileoverview
  * This is a component extension that implements a text-to-speech (TTS)
  * engine powered by Google's speech synthesis API.
  *
  * This is an "event page", so it's not loaded when the API isn't being used,
  * and doesn't waste resources. When a web page or web app makes a speech
  * request and the parameters match one of the voices in this extension's
  * manifest, it makes a request to Google's API using Chrome's private key
  * and plays the resulting speech using HTML5 audio.
  */

 /**
  * The main class for this extension. Adds listeners to
  * chrome.ttsEngine.onSpeak and chrome.ttsEngine.onStop and implements
  * them using Google's speech synthesis API.
  * @constructor
  */
 function TtsExtension() {}

 TtsExtension.prototype = {
   /**
    * The url prefix of the speech server, including static query
    * parameters that don't change.
    * @type {string}
    * @const
    * @private
    */
   SPEECH_SERVER_URL_: 'https://www.google.com/speech-api/v2/synthesize?' +
       'enc=mpeg&client=chromium',

   /**
    * A mapping from language and gender to voice name, hardcoded for now
    * until the speech synthesis server capabilities response provides this.
    * The key of this map is of the form '<lang>-<gender>'.
    * @type {Object<string>}
    * @private
    */
   LANG_AND_GENDER_TO_VOICE_NAME_: {
     'en-gb-male': 'rjs',
     'en-gb-female': 'fis',
   },

   /**
    * The arguments passed to the onSpeak event handler for the utterance
    * that's currently being spoken. Should be null when no object is
    * pending.
    *
    * @type {?{utterance: string, options: Object, callback: Function}}
    * @private
    */
   currentUtterance_: null,

   /**
    * The HTML5 audio element we use for playing the sound served by the
    * speech server.
    * @type {HTMLAudioElement}
    * @private
    */
   audioElement_: null,

   /**
    * A mapping from voice name to language and gender, derived from the
    * manifest file.  This is used in case the speech synthesis request
    * specifies a voice name but doesn't specify a language code or gender.
    * @type {Object<{lang: string, gender: string}>}
    * @private
    */
   voiceNameToLangAndGender_: {},

   /**
    * This is the main function called to initialize this extension.
    * Initializes data structures and adds event listeners.
    */
   init: function() {
     // Get voices from manifest.
     const voices = chrome.app.getDetails().tts_engine.voices;
     for (let i = 0; i < voices.length; i++) {
       this.voiceNameToLangAndGender_[voices[i].voice_name] = {
         lang: voices[i].lang,
         gender: voices[i].gender
       };
     }

     // Initialize the audio element and event listeners on it.
     this.audioElement_ = document.createElement('audio');
     document.body.appendChild(this.audioElement_);
     this.audioElement_.addEventListener(
         'ended', this.onStop_.bind(this), false);
     this.audioElement_.addEventListener(
         'canplaythrough', this.onStart_.bind(this), false);

     // Install event listeners for the ttsEngine API.
     chrome.ttsEngine.onSpeak.addListener(this.onSpeak_.bind(this));
     chrome.ttsEngine.onStop.addListener(this.onStop_.bind(this));
     chrome.ttsEngine.onPause.addListener(this.onPause_.bind(this));
     chrome.ttsEngine.onResume.addListener(this.onResume_.bind(this));
   },

   /**
    * Handler for the chrome.ttsEngine.onSpeak interface.
    * Gets Chrome's Google API key and then uses it to generate a request
    * url for the requested speech utterance. Sets that url as the source
    * of the HTML5 audio element.
    * @param {string} utterance The text to be spoken.
    * @param {Object} options Options to control the speech, as defined
    *     in the Chrome ttsEngine extension API.
    * @private
    */
   onSpeak_: function(utterance, options, callback) {
     // Truncate the utterance if it's too long. Both Chrome's tts
     // extension api and the web speech api specify 32k as the
     // maximum limit for an utterance.
     if (utterance.length > 32768)
       utterance = utterance.substr(0, 32768);

     try {
       // First, stop any pending audio.
       this.onStop_();

       this.currentUtterance_ = {
         utterance: utterance,
         options: options,
         callback: callback
       };

       let lang = options.lang;
       let gender = options.gender;
       if (options.voiceName) {
         lang = this.voiceNameToLangAndGender_[options.voiceName].lang;
         gender = this.voiceNameToLangAndGender_[options.voiceName].gender;
       }

       if (!lang)
         lang = navigator.language;

       // Look up the specific voice name for this language and gender.
       // If it's not in the map, it doesn't matter - the language will
       // be used directly. This is only used for languages where more
       // than one gender is actually available.
       const key = lang.toLowerCase() + '-' + gender;
       const voiceName = this.LANG_AND_GENDER_TO_VOICE_NAME_[key];

       let url = this.SPEECH_SERVER_URL_;
       chrome.systemPrivate.getApiKey(
           (function(key) {
             url += '&key=' + key;
             url += '&text=' + encodeURIComponent(utterance);
             url += '&lang=' + lang.toLowerCase();

             if (voiceName)
               url += '&name=' + voiceName;

             if (options.rate) {
               // Input rate is between 0.1 and 10.0 with a default of 1.0.
               // Output speed is between 0.0 and 1.0 with a default of 0.5.
               url += '&speed=' + (options.rate / 2.0);
             }

             if (options.pitch) {
               // Input pitch is between 0.0 and 2.0 with a default of 1.0.
               // Output pitch is between 0.0 and 1.0 with a default of 0.5.
               url += '&pitch=' + (options.pitch / 2.0);
             }

             // This begins loading the audio but does not play it.
             // When enough of the audio has loaded to begin playback,
             // the 'canplaythrough' handler will call this.onStart_,
             // which sends a start event to the ttsEngine callback and
             // then begins playing audio.
             this.audioElement_.src = url;
           }).bind(this));
     } catch (err) {
       console.error(String(err));
       callback({'type': 'error', 'errorMessage': String(err)});
       this.currentUtterance_ = null;
     }
   },

   /**
    * Handler for the chrome.ttsEngine.onStop interface.
    * Called either when the ttsEngine API requests us to stop, or when
    * we reach the end of the audio stream. Pause the audio element to
    * silence it, and send a callback to the ttsEngine API to let it know
    * that we've completed. Note that the ttsEngine API manages callback
    * messages and will automatically replace the 'end' event with a
    * more specific callback like 'interrupted' when sending it to the
    * TTS client.
    * @private
    */
   onStop_: function() {
     if (this.currentUtterance_) {
       this.audioElement_.pause();
       this.currentUtterance_.callback({
         'type': 'end',
         'charIndex': this.currentUtterance_.utterance.length
       });
     }
     this.currentUtterance_ = null;
   },

   /**
    * Handler for the canplaythrough event on the audio element.
    * Called when the audio element has buffered enough audio to begin
    * playback. Send the 'start' event to the ttsEngine callback and
    * then begin playing the audio element.
    * @private
    */
   onStart_: function() {
     if (this.currentUtterance_) {
       if (this.currentUtterance_.options.volume !== undefined) {
         // Both APIs use the same range for volume, between 0.0 and 1.0.
         this.audioElement_.volume = this.currentUtterance_.options.volume;
       }
       this.audioElement_.play();
       this.currentUtterance_.callback({'type': 'start', 'charIndex': 0});
     }
   },

   /**
    * Handler for the chrome.ttsEngine.onPause interface.
    * Pauses audio if we're in the middle of an utterance.
    * @private
    */
   onPause_: function() {
     if (this.currentUtterance_) {
       this.audioElement_.pause();
     }
   },

   /**
    * Handler for the chrome.ttsEngine.onPause interface.
    * Resumes audio if we're in the middle of an utterance.
    * @private
    */
   onResume_: function() {
     if (this.currentUtterance_) {
       this.audioElement_.play();
     }
   }

 };

 (new TtsExtension()).init();
	// Copyright 2013 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	/**
	* @fileoverview
	* This is a component extension that implements a text-to-speech (TTS)
	* engine powered by Google's speech synthesis API.
	*
	* This is an "event page", so it's not loaded when the API isn't being used,
	* and doesn't waste resources. When a web page or web app makes a speech
	* request and the parameters match one of the voices in this extension's
	* manifest, it makes a request to Google's API using Chrome's private key
	* and plays the resulting speech using HTML5 audio.
	*/

	/**
	* The main class for this extension. Adds listeners to
	* chrome.ttsEngine.onSpeak and chrome.ttsEngine.onStop and implements
	* them using Google's speech synthesis API.
	* @constructor
	*/
	function TtsExtension() {}

	TtsExtension.prototype = {
	/**
	* The url prefix of the speech server, including static query
	* parameters that don't change.
	* @type {string}
	* @const
	* @private
	*/
	SPEECH_SERVER_URL_: 'https://www.google.com/speech-api/v2/synthesize?' +
	'enc=mpeg&client=chromium',

	/**
	* A mapping from language and gender to voice name, hardcoded for now
	* until the speech synthesis server capabilities response provides this.
	* The key of this map is of the form '<lang>-<gender>'.
	* @type {Object<string>}
	* @private
	*/
	LANG_AND_GENDER_TO_VOICE_NAME_: {
	'en-gb-male': 'rjs',
	'en-gb-female': 'fis',
	},

	/**
	* The arguments passed to the onSpeak event handler for the utterance
	* that's currently being spoken. Should be null when no object is
	* pending.
	*
	* @type {?{utterance: string, options: Object, callback: Function}}
	* @private
	*/
	currentUtterance_: null,

	/**
	* The HTML5 audio element we use for playing the sound served by the
	* speech server.
	* @type {HTMLAudioElement}
	* @private
	*/
	audioElement_: null,

	/**
	* A mapping from voice name to language and gender, derived from the
	* manifest file. This is used in case the speech synthesis request
	* specifies a voice name but doesn't specify a language code or gender.
	* @type {Object<{lang: string, gender: string}>}
	* @private
	*/
	voiceNameToLangAndGender_: {},

	/**
	* This is the main function called to initialize this extension.
	* Initializes data structures and adds event listeners.
	*/
	init: function() {
	// Get voices from manifest.
	const voices = chrome.app.getDetails().tts_engine.voices;
	for (let i = 0; i < voices.length; i++) {
	this.voiceNameToLangAndGender_[voices[i].voice_name] = {
	lang: voices[i].lang,
	gender: voices[i].gender
	};
	}

	// Initialize the audio element and event listeners on it.
	this.audioElement_ = document.createElement('audio');
	document.body.appendChild(this.audioElement_);
	this.audioElement_.addEventListener(
	'ended', this.onStop_.bind(this), false);
	this.audioElement_.addEventListener(
	'canplaythrough', this.onStart_.bind(this), false);

	// Install event listeners for the ttsEngine API.
	chrome.ttsEngine.onSpeak.addListener(this.onSpeak_.bind(this));
	chrome.ttsEngine.onStop.addListener(this.onStop_.bind(this));
	chrome.ttsEngine.onPause.addListener(this.onPause_.bind(this));
	chrome.ttsEngine.onResume.addListener(this.onResume_.bind(this));
	},

	/**
	* Handler for the chrome.ttsEngine.onSpeak interface.
	* Gets Chrome's Google API key and then uses it to generate a request
	* url for the requested speech utterance. Sets that url as the source
	* of the HTML5 audio element.
	* @param {string} utterance The text to be spoken.
	* @param {Object} options Options to control the speech, as defined
	* in the Chrome ttsEngine extension API.
	* @private
	*/
	onSpeak_: function(utterance, options, callback) {
	// Truncate the utterance if it's too long. Both Chrome's tts
	// extension api and the web speech api specify 32k as the
	// maximum limit for an utterance.
	if (utterance.length > 32768)
	utterance = utterance.substr(0, 32768);

	try {
	// First, stop any pending audio.
	this.onStop_();

	this.currentUtterance_ = {
	utterance: utterance,
	options: options,
	callback: callback
	};

	let lang = options.lang;
	let gender = options.gender;
	if (options.voiceName) {
	lang = this.voiceNameToLangAndGender_[options.voiceName].lang;
	gender = this.voiceNameToLangAndGender_[options.voiceName].gender;
	}

	if (!lang)
	lang = navigator.language;

	// Look up the specific voice name for this language and gender.
	// If it's not in the map, it doesn't matter - the language will
	// be used directly. This is only used for languages where more
	// than one gender is actually available.
	const key = lang.toLowerCase() + '-' + gender;
	const voiceName = this.LANG_AND_GENDER_TO_VOICE_NAME_[key];

	let url = this.SPEECH_SERVER_URL_;
	chrome.systemPrivate.getApiKey(
	(function(key) {
	url += '&key=' + key;
	url += '&text=' + encodeURIComponent(utterance);
	url += '&lang=' + lang.toLowerCase();

	if (voiceName)
	url += '&name=' + voiceName;

	if (options.rate) {
	// Input rate is between 0.1 and 10.0 with a default of 1.0.
	// Output speed is between 0.0 and 1.0 with a default of 0.5.
	url += '&speed=' + (options.rate / 2.0);
	}

	if (options.pitch) {
	// Input pitch is between 0.0 and 2.0 with a default of 1.0.
	// Output pitch is between 0.0 and 1.0 with a default of 0.5.
	url += '&pitch=' + (options.pitch / 2.0);
	}

	// This begins loading the audio but does not play it.
	// When enough of the audio has loaded to begin playback,
	// the 'canplaythrough' handler will call this.onStart_,
	// which sends a start event to the ttsEngine callback and
	// then begins playing audio.
	this.audioElement_.src = url;
	}).bind(this));
	} catch (err) {
	console.error(String(err));
	callback({'type': 'error', 'errorMessage': String(err)});
	this.currentUtterance_ = null;
	}
	},

	/**
	* Handler for the chrome.ttsEngine.onStop interface.
	* Called either when the ttsEngine API requests us to stop, or when
	* we reach the end of the audio stream. Pause the audio element to
	* silence it, and send a callback to the ttsEngine API to let it know
	* that we've completed. Note that the ttsEngine API manages callback
	* messages and will automatically replace the 'end' event with a
	* more specific callback like 'interrupted' when sending it to the
	* TTS client.
	* @private
	*/
	onStop_: function() {
	if (this.currentUtterance_) {
	this.audioElement_.pause();
	this.currentUtterance_.callback({
	'type': 'end',
	'charIndex': this.currentUtterance_.utterance.length
	});
	}
	this.currentUtterance_ = null;
	},

	/**
	* Handler for the canplaythrough event on the audio element.
	* Called when the audio element has buffered enough audio to begin
	* playback. Send the 'start' event to the ttsEngine callback and
	* then begin playing the audio element.
	* @private
	*/
	onStart_: function() {
	if (this.currentUtterance_) {
	if (this.currentUtterance_.options.volume !== undefined) {
	// Both APIs use the same range for volume, between 0.0 and 1.0.
	this.audioElement_.volume = this.currentUtterance_.options.volume;
	}
	this.audioElement_.play();
	this.currentUtterance_.callback({'type': 'start', 'charIndex': 0});
	}
	},

	/**
	* Handler for the chrome.ttsEngine.onPause interface.
	* Pauses audio if we're in the middle of an utterance.
	* @private
	*/
	onPause_: function() {
	if (this.currentUtterance_) {
	this.audioElement_.pause();
	}
	},

	/**
	* Handler for the chrome.ttsEngine.onPause interface.
	* Resumes audio if we're in the middle of an utterance.
	* @private
	*/
	onResume_: function() {
	if (this.currentUtterance_) {
	this.audioElement_.play();
	}
	}

	};

	(new TtsExtension()).init();