DCSIMG
Determine if char is Hebrew (2) - Shimmy on .NET

Determine if char is Hebrew (2)

About 2 weeks ago I posted a function that determines whether a given char is hebrew:

// avoid unicode chars in code
private const char FirstHebChar = (char)1488; //א
private const char LastHebChar = (char)1514; //ת
private static bool IsHebrew(this char c)
{
  return c >= FirstHebChar && c <= LastHebChar;
}

Schabse Laks commented that it won't include punctuation marks / vowels etc.

This week I needed a more sophisticated function that does the job, so I ended up with the extension methods bellow (view raw code here).
Note, that I needed the function for a one-time-use application so I didn't optimize the code well enough, any improvements will be welcomed as always, and especially now.

namespace System
{
  using System.Linq;
  using System.Collections.Generic;
 
  public static class HebrewCharsExtensions
  {
    public static bool IsHebrew(this char c)
    {
      return IsHebrew(cHebrewCharTypes.All);
    }
 
    public static bool IsHebrew(this char cHebrewCharTypes charTypes)
    {
      foreach (var kv in Map.Where(ct => charTypes.HasFlag(ct.Key)))
      {
        var locMap = kv.Value;
        for (int i = 0; i <= locMap.GetUpperBound(0); i++)
          if (c >= locMap[i, 0] && c <= locMap[i, 1]) return true;
      }
      return false;
    }
 
    public static HebrewCharTypes GetHebrewCharType(this char c)
    {
      foreach (var kv in Map)
      {
        var locMap = kv.Value;
        for (int i = 0; i <= locMap.GetUpperBound(0); i++)
          if (c >= locMap[i, 0] && c <= locMap[i, 1]) return kv.Key;
      }
      return HebrewCharTypes.None;
    }
 
    public static IEnumerable<charGetHebChars()
    {
      return GetHebChars(HebrewCharTypes.All);
    }
 
    public static IEnumerable<charGetHebChars(HebrewCharTypes charTypes)
    {
      foreach (var kv in Map.Where(ct => charTypes.HasFlag(ct.Key)))
      {
        var locMap = kv.Value;
        for (int i = 0; i <= locMap.GetUpperBound(0); i++)
        {
          var start = locMap[i, 0];
          var end = locMap[i, 1];
          foreach (var ch in Enumerable.Range(startend - start + 1))
            yield return (char)ch;
        }
      }
    }
 
    private static readonly Dictionary<HebrewCharTypesint[,]> Map = new Dictionary<HebrewCharTypesint[,]>
    {
       {HebrewCharTypes.Letter,   new int[,] { { 0x05d0, 0x05ea } } },
       {HebrewCharTypes.Vowel,    new int[,] { { 0x05b0, 0x05b9 }, { 0x05bb, 0x05c4 }, { 0xfb2a, 0xfb36 }, { 0xfb38, 0xfb3c }, { 0xfb3e, 0xfb3e }, { 0xfb40, 0xfb41 }, { 0xfb43, 0xfb44 }, { 0xfb46, 0xfb4e} } },
       {HebrewCharTypes.Biblicalnew int[,] { { 0x0591, 0x05a1 }, { 0x05a3, 0x05af }, { 0xfb1e, 0xfb1e }, { 0xfb4f, 0xfb4f } } },
       {HebrewCharTypes.Yiddish,  new int[,] { { 0x05f0, 0x05f2 }, { 0xfb1f, 0xfb1f } } },
       {HebrewCharTypes.Special,  new int[,] { { 0x05f3, 0x05f4 }, { 0xfb20, 0xfb29 } } }
    };
 
  }
 
  [Flags]
  public enum HebrewCharTypes
  {
    /// <summary>
    /// Not a Hebrew character.
    /// </summary>
    None = 0,
    /// <summary>
    /// Alef to Tav.
    /// </summary>
    Letter = 1,
    /// <summary>
    /// Qamats to Hataf Segol.
    /// </summary>
    Vowel = 2,
    /// <summary>
    /// Kadma Munah Pashta etc.
    /// </summary>
    Biblical = 4,
    /// <summary>
    /// Double Yod etc.
    /// </summary>
    Yiddish = 8,
    /// <summary>
    /// Other special characters (wide chars and more).
    /// </summary>
    Special = 16,
    All = Letter | Vowel | Biblical | Yiddish | Special
  }
}
Published Friday, February 03, 2012 2:52 AM by Shimmy
תגים:, , , , ,

Comments

No Comments

Leave a Comment

(required) 
(required) 
(optional)
(required) 

Enter the numbers above:
Powered by Community Server (Commercial Edition), by Telligent Systems