Web Speech APIで絵本を読み聞かせ – てまりのユニバーサルデザイン

子供への絵本読み聞かせが大変で、AIに読んでもらうWebアプリを作っています。Cloud Vision APIのOCRで文字は読み取れました。次は合成音声で読み上げます。読み上げに一番手軽なWeb Speech APIを使うことにしました。

ブラウザだけで音声読み上げ「Web Speech API」

早くアプリを作りたいという理由で、クラウドを使った合成音声サービスを使わず、使用経験のある「Web Speech API」を採用しました。

数行のJavaScriptを書くだけで、ブラウザがテキストを合成音声でしゃべってくれます。

Web Speech APIを試してみよう

手元の絵本の表紙から読み取った「はしれはしごしゃ　作絵間瀬なおかた」という文字を合成音声で読み上げてみます。

Google Chrome バージョン83での読み上げです。

まだまだ、カタコトかな。

OCR+読み上げのコード

現状のコードを掲載します。

<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="utf-8">
<title>OCR + Web Speech APIサンプル</title>
<style>
  canvas {
    width: 640px;
    height: 480px;
    background-color: #EEE;
    margin: 10px 0;
  }
  textarea{
    width: 640px;
    height: 240px;
    box-sizing: border-box;
  }
</style>

</head>
<body>
  <h1>OCR + Web Speech APIサンプル</h1>
  <div>
    <input type="file" id="file" accept="image/*">
  </div>
  <div>
    <canvas id="canvas"></canvas>
  </div>
  <div>
    <textarea id="textarea"></textarea>
  </div>

<script>
var VISION = {};

VISION.OCR = function(){
  "use strict";
  var API_KEY = 'あなたのAPIのキー',
      API_URL = 'https://vision.googleapis.com/v1/images:annotate',
      canvas = document.getElementById('canvas'),
      ctx = canvas.getContext('2d'),
      canvasW = 640,
      canvasH = 480,
      originX = 0,
      originY = 0,
      scale = 1;

  var init = function(){
    setUploadBtn();
  },

  setUploadBtn = function(){
  var file = document.getElementById('file');

  var loadLocalImage = function(e){
      var fileData = e.target.files[0],
          reader,
          img;
      if(!fileData.type.match('image.*')) {
        alert('画像ではありません。');
        return false;
      }
      reader = new FileReader();
      reader.onload = function() {
        setCanvas(reader.result);
        sendAPI(reader.result.replace(/^data:image\/(png|jpeg);base64,/, ''));
      }
      reader.readAsDataURL(fileData);
    }
    file.addEventListener('change', loadLocalImage, false);
  },

  setCanvas = function(src){
    var img = new Image();
    canvas.width = canvasW;
    canvas.height = canvasH;
    ctx.clearRect(0, 0, canvasW, canvasH);
    img.src = src;
    img.onload = function() {
      scale = Math.min(canvas.width / this.width, canvas.height / this.height);
      originX = (canvasW / 2) - (this.width / 2) * scale;
      originY = (canvasH / 2) - (this.height / 2) * scale;
      ctx.drawImage(img, originX, originY, this.width * scale, this.height * scale);
    }
  },

  drawCanvas = function(data){
    var x = data[0].x * scale + originX,
        y = data[0].y * scale + originY,
        w = (data[1].x - data[0].x) * scale,
        h = (data[2].y - data[0].y) * scale;
    ctx.beginPath () ;
    ctx.rect(x, y, w, h) ;
    ctx.strokeStyle = "#76FF03";
    ctx.lineWidth = 2;
    ctx.stroke();
  },

  sendAPI = function(img){
    "use strict";
    var httpObj = new XMLHttpRequest(),
        data,
        body = {
          "requests": [
            {
              "image": {
                "content": img
              },
              "features": [
                {
                  "type":"TEXT_DETECTION",
                }
              ]
            }
          ]
        };
    httpObj.open('post', API_URL + '?key=' + API_KEY, true);
    httpObj.setRequestHeader('Content-Type', 'application/json');
    httpObj.onreadystatechange = function(){
      if(this.readyState === 4 && this.status === 200){
        data = JSON.parse(httpObj.responseText);
        setResult(data.responses[0].textAnnotations);
      }
    };
    httpObj.send(JSON.stringify(body));
  },

  setResult = function(data){
    if(typeof data === 'undefined'){
      alert('文字を検出できませんでした。')
    }else{
      VISION.SPEECH().speak(data[0].description);
      document.getElementById('textarea').value = data[0].description;
      drawCanvas(data[0].boundingPoly.vertices);
    }
  };
  init();
};

VISION.SPEECH = function(){
  "use strict";
  var uttr = new SpeechSynthesisUtterance();

  var init = function(){
    uttr.volume = 1;
    uttr.rate = 1;
    uttr.pitch = 1;
    uttr.lang = 'ja-JP';
  },

  speak = function(text){
    uttr.text = text;
    window.speechSynthesis.speak(uttr);
  };

  init();

  return {
    speak : speak
  }
};
VISION.OCR();
VISION.SPEECH();
</script>
</body>
</html>

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

<!DOCTYPE html>

<head>

<title>OCR + Web Speech APIサンプル</title>

<style>

canvas {

width: 640px;

height: 480px;

background-color: #EEE;

margin: 10px 0;

}

textarea{

width: 640px;

height: 240px;

box-sizing: border-box;

}

</style>

</head>

<body>

<h1>OCR + Web Speech APIサンプル</h1>

<div>

</div>

<div>

</div>

<div>

</div>

var VISION = {};

VISION.OCR = function(){

"use strict";

var API_KEY = 'あなたのAPIのキー',

API_URL = 'https://vision.googleapis.com/v1/images:annotate',

canvas = document.getElementById('canvas'),

ctx = canvas.getContext('2d'),

canvasW = 640,

canvasH = 480,

originX = 0,

originY = 0,

scale = 1;

var init = function(){

setUploadBtn();

setUploadBtn = function(){

var file = document.getElementById('file');

var loadLocalImage = function(e){

var fileData = e.target.files[0],

reader,

img;

if(!fileData.type.match('image.*')) {

alert('画像ではありません。');

return false;

}

reader = new FileReader();

reader.onload = function() {

setCanvas(reader.result);

sendAPI(reader.result.replace(/^data:image\/(png|jpeg);base64,/, ''));

}

reader.readAsDataURL(fileData);

}

file.addEventListener('change', loadLocalImage, false);

setCanvas = function(src){

var img = new Image();

canvas.width = canvasW;

canvas.height = canvasH;

ctx.clearRect(0, 0, canvasW, canvasH);

img.src = src;

img.onload = function() {

scale = Math.min(canvas.width / this.width, canvas.height / this.height);

originX = (canvasW / 2) - (this.width / 2) * scale;

originY = (canvasH / 2) - (this.height / 2) * scale;

ctx.drawImage(img, originX, originY, this.width * scale, this.height * scale);

}

drawCanvas = function(data){

var x = data[0].x * scale + originX,

y = data[0].y * scale + originY,

w = (data[1].x - data[0].x) * scale,

h = (data[2].y - data[0].y) * scale;

ctx.beginPath () ;

ctx.rect(x, y, w, h) ;

ctx.strokeStyle = "#76FF03";

ctx.lineWidth = 2;

ctx.stroke();

sendAPI = function(img){

"use strict";

var httpObj = new XMLHttpRequest(),

data,

body = {

"requests": [

{

"image": {

"content": img

"features": [

{

"type":"TEXT_DETECTION",

}

]

}

]

};

httpObj.open('post', API_URL + '?key=' + API_KEY, true);

httpObj.setRequestHeader('Content-Type', 'application/json');

httpObj.onreadystatechange = function(){

if(this.readyState === 4 && this.status === 200){

data = JSON.parse(httpObj.responseText);

setResult(data.responses[0].textAnnotations);

}

};

httpObj.send(JSON.stringify(body));

setResult = function(data){

if(typeof data === 'undefined'){

alert('文字を検出できませんでした。')

}else{

VISION.SPEECH().speak(data[0].description);

document.getElementById('textarea').value = data[0].description;

drawCanvas(data[0].boundingPoly.vertices);

}

};

init();

};

VISION.SPEECH = function(){

"use strict";

var uttr = new SpeechSynthesisUtterance();

var init = function(){

uttr.volume = 1;

uttr.rate = 1;

uttr.pitch = 1;

uttr.lang = 'ja-JP';

speak = function(text){

uttr.text = text;

window.speechSynthesis.speak(uttr);

};

init();

return {

speak : speak

}

};

VISION.OCR();

VISION.SPEECH();

</script>

</body>

</html>

Web Speech APIの記述は、VISION.SPEECH()の中にまとめています。

あとはカメラ機能を実装すれば、絵本読み聞かせWebアプリとなるでしょう。

ブラウザだけで音声読み上げ「Web Speech API」

Web Speech APIを試してみよう

OCR+読み上げのコード

コメントを残す コメントをキャンセル

コメントを残すコメントをキャンセル