// -*- mode: java; -*-

// Indian Language Converter - transliterates from Roman scripts to
// Indic scripts.

// Copyright (C) 2005, 2006 Vijay Lakshminarayanan <liyer.vijay@gmail.com>

// Indian Language Converter is free software; you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.

// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
// 02110-1301, USA.

// 	$Id: converter.js,v 1.1.1.1 2006-05-26 11:49:07 vijay Exp $	
// 	Author: Vijay Lakshminarayanan	
// 	$Date: 2006-05-26 11:49:07 $	

var unicodeEngine = function()
{
	this.vowels = "(A)|(E)|(H)|(I)|(M)|(O)|(a((a)|(e)|(i)|(u))?)|(e(e)?)|(i)|(o((a)|(o))?)|(R((u)|(U))?)|(R)|(u)|(U)"
	this.consonants = "(B(h)?)|(Ch)|(D(h)?)|(J(h)?)|(K(h)?)|(G(h)?)|(L)|(N)|(S(h)?)|(T(h)?)|(b(h)?)|(ch)|(d(h)?)|(f)|(g(h)?)|(h)|(j(h)?)|(k(h)?)|(l)|(m)|(n((G)|(Y))?)|(p(h)?)|(r(R)?)|(s(h)?)|(t(h)?)|(v)|(w)|(y)"
	this.letter_codes = {
	"~a" : "&#3205;",
	"~aa" : "&#3206;",
	"~A" : "&#3206;",
	"~i" : "&#3207;",
	"~ee" : "&#3208;",
	"~I" : "&#3208;",
	"~u" : "&#3209;",
	"~oo" : "&#3210;",
	"~U" : "&#3210;",
	"~e" : "&#3214;",
	"~ae" : "&#3215;",
	"~E" : "&#3215;",
	"~ai" : "&#3216;",
	"~o" : "&#3218;",
	"~oa" : "&#3219;",
	"~O" : "&#3219;",
	"~au" : "&#3220;",
	"~Ru" : "&#3211;",
	"~RU" : "&#3296;",
	"~R" : "&#3248;",
	"~r" : "&#3248;",
	"a" : "",
	"ki" : "&#81;",
	"aa" : "&#3262;",
	"A" : "&#3262;",
	"i" : "&#3263;",
	"ee" : "&#3264;",
	"I" : "&#3264;",
	"u" : "&#3265;",
	"oo" : "&#3266;",
	"U" : "&#3266;",
	"e" : "&#3270;",
	"ae" : "&#3271;",
	"E" : "&#3271;",
	"ai" : "&#3272;",
	"o" : "&#3274;",
	"oa" : "&#3275;",
	"O" : "&#3275;",
	"au" : "&#3276;",
	"Ru" : "&#3267;",
	"RU" : "&#3268;",
	"k" : "&#3221;",
	"K" : "&#3222;",
	"kh" : "&#3222;",
	"Kh" : "&#3222;",
	"g" : "&#3223;",
	"gh" : "&#3224;",
	"G" : "&#3224;",
	"Gh" : "&#3224;",
	"nG" : "&#3225;",
	"ch" : "&#3226;",
	"Ch" : "&#3227;",
	"j" : "&#3228;",
	"jh" : "&#3229;",
	"J" : "&#3229;",
	"Jh" : "&#3229;",
	"nY" : "&#3230;",
	"t" : "&#3236;",
	"T" : "&#3231;",
	"d" : "&#3238;",
	"N" : "&#3235;",
	"th" : "&#3237;",
	"Th" : "&#3232;",
	"dh" : "&#3239;",
	"D" : "&#3233;",
	"Dh" : "&#3239;",
	"n" : "&#3240;",
	"p" : "&#3242;",
	"ph" : "&#3243;",
	"b" : "&#3244;",
	"B" : "&#3245;",
	"bh" : "&#3245;",
	"Bh" : "&#3245;",
	"m" : "&#3246;",
	"y" : "&#3247;",
	"r" : "&#3248;",
	"rR" : "&#3248;",
	"l" : "&#3250;",
	"L" : "&#3251;",
	"v" : "&#3253;",
	"w" : "&#3253;",
	"sh" : "&#3254;",
	"Sh" : "&#3255;",
	"s" : "&#3256;",
	"S" : "&#3254;",
	"h" : "&#3257;",
	"f" : "&#3243;",
	"M" : "&#3202;",
	"H" : "&#3203;",
	"~M" : "&#3202;",
	"~H" : "&#3203;",
	"*" : "&#3277;"
	}
	
}


unicodeEngine.prototype.split_word = function(word)
{
  var syllables = new Array(0);
  var vowel_start_p = true;
  while (word.length) {
    re = new RegExp(this.vowels);
    var index = word.search(this.vowels);
    if (index == 0) {  //the vowel's at the start of word
      var matched = re.exec(word)[0]; //what is it?
      if (vowel_start_p) {
	syllables.push(("~"+matched)); //one more to the syllables
      } else {
	syllables.push(matched);
      }
      vowel_start_p = true;
      word = word.substring(matched.length);
    } else {
      re = new RegExp(this.consonants);
      var index = word.search(this.consonants);
      if (index == 0) {
	var matched = re.exec(word)[0];
	syllables.push(matched);
	vowel_start_p = false;
	word = word.substring(matched.length);

	//look ahead for virama setting
	var next = word.search(this.vowels);
	if (next != 0 || word.length == 0)
	  syllables.push('*');
      } else {
	syllables.push(word.charAt(0));
	word = word.substring(1);
      }
    }
  }
  return syllables;
}



unicodeEngine.prototype.match_code = function(syllable_mcc)
{
  var matched = this.letter_codes[syllable_mcc];

  if (matched != null) return matched;
  return syllable_mcc;
}

unicodeEngine.prototype.one_word = function(word_ow)
{
  if (!word_ow) return "";
  var syllables_ow = this.split_word(word_ow);
  var letters_ow = new Array(0);

  for (var i_ow = 0; i_ow < syllables_ow.length; i_ow++) {
    letters_ow.push(this.match_code(syllables_ow[i_ow]));
  }
  return letters_ow.join("");
}


unicodeEngine.prototype.many_words = function(sentence)
{
  var regex = "((" + this.vowels + ")|(" + this.consonants + "))+";
  var words = new Array(0);
  while (sentence.length >= 1) {
    re = new RegExp("^``" + regex);
    var match = re.exec(sentence);
    if (match != null) {
      match = match[0];
      words.push("`");
      words.push(this.one_word(match.substring(2)));
      sentence = sentence.substring(match.length);
    } else {
      re = new RegExp("^`" + regex);
      match = re.exec(sentence);
      if (match != null) 
	  {
		match = match[0];
		words.push(match.substring(1));
		sentence = sentence.substring(match.length);
      } 
	  else 
	  {
		re = new RegExp("^" + regex);
		match = re.exec(sentence);
		if (match != null) 
		{
		  match = match[0];
		  words.push(this.one_word(match));
		  sentence = sentence.substring(match.length);
		} 
		else 
		{
			if (sentence.indexOf('<lang=eng>') == 0) 
			{
				var pos = sentence.indexOf('<lang=kan>');
				if (pos != -1) {
					words.push('<span class="english">' + sentence.substr(10, pos-10) + '</span>');
//					words.push(sentence.substr(10, pos-10));
					sentence = sentence.substring(pos + 10, sentence.length);
				}
				else {
//					words.push(sentence.substr(10));
					words.push('<span class="english">' + sentence.substr(10) + '</span>');
					sentence = '';
				}
			}
			else if (sentence.indexOf('<lang=kan>') == 0) 
			{
				sentence = sentence.substring(10);
			}
			else {
				words.push(sentence.charAt(0));
				sentence = sentence.substring(1);
			}
		}
      }
    }
  }
  return words.join("");
}

unicodeEngine.prototype.print_many_words = function(inputEle, outputEle)
{
  var text_pmw = inputEle.value;

  text_pmw = text_pmw.replace(/\^/g, '¶');
  text_pmw = text_pmw.replace(/\~j/g, 'Q');
  text_pmw = this.many_words(text_pmw);


  var ans = "";
  while (text_pmw.length) {
    var unicode_chars = /&#[0-9]+;/;
    re = unicode_chars;
    var matche = re.exec(text_pmw);
    if (matche != null) {
      matche = matche[0];
      search = text_pmw.search(unicode_chars);
      ans += text_pmw.substring(0, search);
      ans += String.fromCharCode(matche.match(/[0-9]+/));
      text_pmw = text_pmw.substring(search + matche.length);
    } else {
      ans += text_pmw.substring(0);
      text_pmw = "";
    }
  }

//  document.convarea.converted_text.value = ans;
  ans = ans.replace(/¶/g, '');
  ans = ans.replace(/\n/g, '<br/>');
  outputEle.innerHTML = ans;

  var html_txt = "";
  for (i=0; i<ans.length; i++) {
    var unicode_character = ans.charCodeAt(i);
    switch (unicode_character) {
    case 32:
      html_txt += " ";
      break;
    case 10:
    case 13:
      html_txt += "<br/>\n";
      break;
    default:
      html_txt += "&#" + unicode_character + ";";
    }
  }
}
