Tuesday, March 12, 2013

Javascript function to decode HTML escape sequences.

I found a few samples that will decode the escape sequences in the following html string but none quite took my fancy.  (Not to be confused with unescape witch will decode URL escape sequences.)

Here is the version I wrote:


HtmlDecode("T&His i£s a test.");

function HtmlDecode(s) {
 return s.replace(/&[a-z]+;/gi, function(entity) {
  switch (entity) {
  case '"': return String.fromCharCode(0x0022);
  case '&': return String.fromCharCode(0x0026);
  case '<': return String.fromCharCode(0x003c);
  case '>': return String.fromCharCode(0x003e);
  case ' ': return String.fromCharCode(0x00a0);
  case '¡': return String.fromCharCode(0x00a1);
  case '¢': return String.fromCharCode(0x00a2);
  case '£': return String.fromCharCode(0x00a3);
  case '¤': return String.fromCharCode(0x00a4);
  case '¥': return String.fromCharCode(0x00a5);
  case '¦': return String.fromCharCode(0x00a6);
  case '§': return String.fromCharCode(0x00a7);
  case '¨': return String.fromCharCode(0x00a8);
  case '©': return String.fromCharCode(0x00a9);
  case 'ª': return String.fromCharCode(0x00aa);
  case '«': return String.fromCharCode(0x00ab);
  case '¬': return String.fromCharCode(0x00ac);
  case '­': return String.fromCharCode(0x00ad);
  case '®': return String.fromCharCode(0x00ae);
  case '¯': return String.fromCharCode(0x00af);
  case '°': return String.fromCharCode(0x00b0);
  case '±': return String.fromCharCode(0x00b1);
  case '²': return String.fromCharCode(0x00b2);
  case '³': return String.fromCharCode(0x00b3);
  case '´': return String.fromCharCode(0x00b4);
  case 'µ': return String.fromCharCode(0x00b5);
  case '¶': return String.fromCharCode(0x00b6);
  case '·': return String.fromCharCode(0x00b7);
  case '¸': return String.fromCharCode(0x00b8);
  case '¹': return String.fromCharCode(0x00b9);
  case 'º': return String.fromCharCode(0x00ba);
  case '»': return String.fromCharCode(0x00bb);
  case '¼': return String.fromCharCode(0x00bc);
  case '½': return String.fromCharCode(0x00bd);
  case '¾': return String.fromCharCode(0x00be);
  case '¿': return String.fromCharCode(0x00bf);
  case 'À': return String.fromCharCode(0x00c0);
  case 'Á': return String.fromCharCode(0x00c1);
  case 'Â': return String.fromCharCode(0x00c2);
  case 'Ã': return String.fromCharCode(0x00c3);
  case 'Ä': return String.fromCharCode(0x00c4);
  case 'Å': return String.fromCharCode(0x00c5);
  case 'Æ': return String.fromCharCode(0x00c6);
  case 'Ç': return String.fromCharCode(0x00c7);
  case 'È': return String.fromCharCode(0x00c8);
  case 'É': return String.fromCharCode(0x00c9);
  case 'Ê': return String.fromCharCode(0x00ca);
  case 'Ë': return String.fromCharCode(0x00cb);
  case 'Ì': return String.fromCharCode(0x00cc);
  case 'Í': return String.fromCharCode(0x00cd);
  case 'Î': return String.fromCharCode(0x00ce);
  case 'Ï': return String.fromCharCode(0x00cf);
  case 'Ð': return String.fromCharCode(0x00d0);
  case 'Ñ': return String.fromCharCode(0x00d1);
  case 'Ò': return String.fromCharCode(0x00d2);
  case 'Ó': return String.fromCharCode(0x00d3);
  case 'Ô': return String.fromCharCode(0x00d4);
  case 'Õ': return String.fromCharCode(0x00d5);
  case 'Ö': return String.fromCharCode(0x00d6);
  case '×': return String.fromCharCode(0x00d7);
  case 'Ø': return String.fromCharCode(0x00d8);
  case 'Ù': return String.fromCharCode(0x00d9);
  case 'Ú': return String.fromCharCode(0x00da);
  case 'Û': return String.fromCharCode(0x00db);
  case 'Ü': return String.fromCharCode(0x00dc);
  case 'Ý': return String.fromCharCode(0x00dd);
  case 'Þ': return String.fromCharCode(0x00de);
  case 'ß': return String.fromCharCode(0x00df);
  case 'à': return String.fromCharCode(0x00e0);
  case 'á': return String.fromCharCode(0x00e1);
  case 'â': return String.fromCharCode(0x00e2);
  case 'ã': return String.fromCharCode(0x00e3);
  case 'ä': return String.fromCharCode(0x00e4);
  case 'å': return String.fromCharCode(0x00e5);
  case 'æ': return String.fromCharCode(0x00e6);
  case 'ç': return String.fromCharCode(0x00e7);
  case 'è': return String.fromCharCode(0x00e8);
  case 'é': return String.fromCharCode(0x00e9);
  case 'ê': return String.fromCharCode(0x00ea);
  case 'ë': return String.fromCharCode(0x00eb);
  case 'ì': return String.fromCharCode(0x00ec);
  case 'í': return String.fromCharCode(0x00ed);
  case 'î': return String.fromCharCode(0x00ee);
  case 'ï': return String.fromCharCode(0x00ef);
  case 'ð': return String.fromCharCode(0x00f0);
  case 'ñ': return String.fromCharCode(0x00f1);
  case 'ò': return String.fromCharCode(0x00f2);
  case 'ó': return String.fromCharCode(0x00f3);
  case 'ô': return String.fromCharCode(0x00f4);
  case 'õ': return String.fromCharCode(0x00f5);
  case 'ö': return String.fromCharCode(0x00f6);
  case '÷': return String.fromCharCode(0x00f7);
  case 'ø': return String.fromCharCode(0x00f8);
  case 'ù': return String.fromCharCode(0x00f9);
  case 'ú': return String.fromCharCode(0x00fa);
  case 'û': return String.fromCharCode(0x00fb);
  case 'ü': return String.fromCharCode(0x00fc);
  case 'ý': return String.fromCharCode(0x00fd);
  case 'þ': return String.fromCharCode(0x00fe);
  case 'ÿ': return String.fromCharCode(0x00ff);
  case 'Œ': return String.fromCharCode(0x0152);
  case 'œ': return String.fromCharCode(0x0153);
  case 'Š': return String.fromCharCode(0x0160);
  case 'š': return String.fromCharCode(0x0161);
  case 'Ÿ': return String.fromCharCode(0x0178);
  case 'ƒ': return String.fromCharCode(0x0192);
  case 'ˆ': return String.fromCharCode(0x02c6);
  case '˜': return String.fromCharCode(0x02dc);
  case 'Α': return String.fromCharCode(0x0391);
  case 'Β': return String.fromCharCode(0x0392);
  case 'Γ': return String.fromCharCode(0x0393);
  case 'Δ': return String.fromCharCode(0x0394);
  case 'Ε': return String.fromCharCode(0x0395);
  case 'Ζ': return String.fromCharCode(0x0396);
  case 'Η': return String.fromCharCode(0x0397);
  case 'Θ': return String.fromCharCode(0x0398);
  case 'Ι': return String.fromCharCode(0x0399);
  case 'Κ': return String.fromCharCode(0x039a);
  case 'Λ': return String.fromCharCode(0x039b);
  case 'Μ': return String.fromCharCode(0x039c);
  case 'Ν': return String.fromCharCode(0x039d);
  case 'Ξ': return String.fromCharCode(0x039e);
  case 'Ο': return String.fromCharCode(0x039f);
  case 'Π': return String.fromCharCode(0x03a0);
  case '& Rho ;': return String.fromCharCode(0x03a1);
  case 'Σ': return String.fromCharCode(0x03a3);
  case 'Τ': return String.fromCharCode(0x03a4);
  case 'Υ': return String.fromCharCode(0x03a5);
  case 'Φ': return String.fromCharCode(0x03a6);
  case 'Χ': return String.fromCharCode(0x03a7);
  case 'Ψ': return String.fromCharCode(0x03a8);
  case 'Ω': return String.fromCharCode(0x03a9);
  case 'α': return String.fromCharCode(0x03b1);
  case 'β': return String.fromCharCode(0x03b2);
  case 'γ': return String.fromCharCode(0x03b3);
  case 'δ': return String.fromCharCode(0x03b4);
  case 'ε': return String.fromCharCode(0x03b5);
  case 'ζ': return String.fromCharCode(0x03b6);
  case 'η': return String.fromCharCode(0x03b7);
  case 'θ': return String.fromCharCode(0x03b8);
  case 'ι': return String.fromCharCode(0x03b9);
  case 'κ': return String.fromCharCode(0x03ba);
  case 'λ': return String.fromCharCode(0x03bb);
  case 'μ': return String.fromCharCode(0x03bc);
  case 'ν': return String.fromCharCode(0x03bd);
  case 'ξ': return String.fromCharCode(0x03be);
  case 'ο': return String.fromCharCode(0x03bf);
  case 'π': return String.fromCharCode(0x03c0);
  case 'ρ': return String.fromCharCode(0x03c1);
  case 'ς': return String.fromCharCode(0x03c2);
  case 'σ': return String.fromCharCode(0x03c3);
  case 'τ': return String.fromCharCode(0x03c4);
  case 'υ': return String.fromCharCode(0x03c5);
  case 'φ': return String.fromCharCode(0x03c6);
  case 'χ': return String.fromCharCode(0x03c7);
  case 'ψ': return String.fromCharCode(0x03c8);
  case 'ω': return String.fromCharCode(0x03c9);
  case 'ϑ': return String.fromCharCode(0x03d1);
  case 'ϒ': return String.fromCharCode(0x03d2);
  case 'ϖ': return String.fromCharCode(0x03d6);
  case ' ': return String.fromCharCode(0x2002);
  case ' ': return String.fromCharCode(0x2003);
  case ' ': return String.fromCharCode(0x2009);
  case '‌': return String.fromCharCode(0x200c);
  case '‍': return String.fromCharCode(0x200d);
  case '‎': return String.fromCharCode(0x200e);
  case '‏': return String.fromCharCode(0x200f);
  case '–': return String.fromCharCode(0x2013);
  case '—': return String.fromCharCode(0x2014);
  case '‘': return String.fromCharCode(0x2018);
  case '’': return String.fromCharCode(0x2019);
  case '‚': return String.fromCharCode(0x201a);
  case '“': return String.fromCharCode(0x201c);
  case '”': return String.fromCharCode(0x201d);
  case '„': return String.fromCharCode(0x201e);
  case '†': return String.fromCharCode(0x2020);
  case '‡': return String.fromCharCode(0x2021);
  case '•': return String.fromCharCode(0x2022);
  case '…': return String.fromCharCode(0x2026);
  case '‰': return String.fromCharCode(0x2030);
  case '′': return String.fromCharCode(0x2032);
  case '″': return String.fromCharCode(0x2033);
  case '‹': return String.fromCharCode(0x2039);
  case '›': return String.fromCharCode(0x203a);
  case '‾': return String.fromCharCode(0x203e);
  case '⁄': return String.fromCharCode(0x2044);
  case '€': return String.fromCharCode(0x20ac);
  case 'ℑ': return String.fromCharCode(0x2111);
  case '℘': return String.fromCharCode(0x2118);
  case 'ℜ': return String.fromCharCode(0x211c);
  case '™': return String.fromCharCode(0x2122);
  case 'ℵ': return String.fromCharCode(0x2135);
  case '←': return String.fromCharCode(0x2190);
  case '↑': return String.fromCharCode(0x2191);
  case '→': return String.fromCharCode(0x2192);
  case '↓': return String.fromCharCode(0x2193);
  case '↔': return String.fromCharCode(0x2194);
  case '↵': return String.fromCharCode(0x21b5);
  case '⇐': return String.fromCharCode(0x21d0);
  case '⇑': return String.fromCharCode(0x21d1);
  case '⇒': return String.fromCharCode(0x21d2);
  case '⇓': return String.fromCharCode(0x21d3);
  case '⇔': return String.fromCharCode(0x21d4);
  case '∀': return String.fromCharCode(0x2200);
  case '∂': return String.fromCharCode(0x2202);
  case '∃': return String.fromCharCode(0x2203);
  case '∅': return String.fromCharCode(0x2205);
  case '∇': return String.fromCharCode(0x2207);
  case '∈': return String.fromCharCode(0x2208);
  case '∉': return String.fromCharCode(0x2209);
  case '∋': return String.fromCharCode(0x220b);
  case '∏': return String.fromCharCode(0x220f);
  case '∑': return String.fromCharCode(0x2211);
  case '−': return String.fromCharCode(0x2212);
  case '∗': return String.fromCharCode(0x2217);
  case '√': return String.fromCharCode(0x221a);
  case '∝': return String.fromCharCode(0x221d);
  case '∞': return String.fromCharCode(0x221e);
  case '∠': return String.fromCharCode(0x2220);
  case '∧': return String.fromCharCode(0x2227);
  case '∨': return String.fromCharCode(0x2228);
  case '∩': return String.fromCharCode(0x2229);
  case '∪': return String.fromCharCode(0x222a);
  case '∫': return String.fromCharCode(0x222b);
  case '∴': return String.fromCharCode(0x2234);
  case '∼': return String.fromCharCode(0x223c);
  case '≅': return String.fromCharCode(0x2245);
  case '≈': return String.fromCharCode(0x2248);
  case '≠': return String.fromCharCode(0x2260);
  case '≡': return String.fromCharCode(0x2261);
  case '≤': return String.fromCharCode(0x2264);
  case '≥': return String.fromCharCode(0x2265);
  case '⊂': return String.fromCharCode(0x2282);
  case '⊃': return String.fromCharCode(0x2283);
  case '⊄': return String.fromCharCode(0x2284);
  case '⊆': return String.fromCharCode(0x2286);
  case '⊇': return String.fromCharCode(0x2287);
  case '⊕': return String.fromCharCode(0x2295);
  case '⊗': return String.fromCharCode(0x2297);
  case '⊥': return String.fromCharCode(0x22a5);
  case '⋅': return String.fromCharCode(0x22c5);
  case '⌈': return String.fromCharCode(0x2308);
  case '⌉': return String.fromCharCode(0x2309);
  case '⌊': return String.fromCharCode(0x230a);
  case '⌋': return String.fromCharCode(0x230b);
  case '⟨': return String.fromCharCode(0x2329);
  case '⟩': return String.fromCharCode(0x232a);
  case '◊': return String.fromCharCode(0x25ca);
  case '♠': return String.fromCharCode(0x2660);
  case '♣': return String.fromCharCode(0x2663);
  case '♥': return String.fromCharCode(0x2665);
  case '♦': return String.fromCharCode(0x2666);
  default: return '';
  }
 })
}

No comments: