編碼系列--Base32編碼

0、寫在前面
這裏討論的編碼主要的目的是將不可顯示的二進制數組轉變爲可顯示的字符串,包括其逆運算。通過特定的協議傳輸數據,或者加密解密的時候都會用到類似的方法。
在這類運算中用的比較多的是Base64,比如MIME中,DotNET中更是直接提供了Base64 Encode和Decode的方法,相當方便。但是Base64通常由“a-z”、“A-Z”、0-9以及“+”和“=”這些符號組成,當中包含了很多混淆的字符,例如“1”、“I”和“l”,“0”和“O”或者“2”和“Z”,看起來總是不爽。特別是當作爲序列號編碼時,是不應該包含容易混淆的字母,所以有另一種編碼形式叫做Base24,用過MS產品的兄弟們一定非常熟悉。但是Base24在實現上還要多繞一個彎,先放一放,我們先說Base32,能夠基本滿足要求的,又非常直觀的編碼方式。
 
1、編碼原理
Base32的原理和Base64一模一樣,所以先看一下Base64編碼是怎麼一回事。
Base64顧名思義就是用64個可顯示字符表示所有的ASC字符,64也就是6Bits,而ASC字符一共有256個,也就是8Bits,很簡單了,取一下最小公約數,24位,言下之意就是用4個Base64的字符來表示3個ASC字符。即在編碼時,3個一組ASC字符,產生4個Base64字符,解碼時4個一組,還原3個ASC字符。根據這個原理Base64編碼之後的字符串應該比原先增加1/3的長度。
這裏所謂的編碼就是一次取6Bits,換算出來的值作爲索引號,利用這個索引數,到預先定義的長度爲64的字符數組中取相應的字符替換即可;解碼就是逆運算,根據字符取在預定義數組中的索引值,然後按8Bits一組還原ASC字符。
Base32和Base64相比只有一個區別就是,用32個字符表示256個ASC字符,也就是說5個ASC字符一組可以生成8個Base字符,反之亦然。
 
2、源代碼
對於實現這樣的功能的代碼,毫無難度,如果不考慮效率的,只要細心點,肯定不會錯。有人寫一次就夠了,所以拿出來和大家一起分享。
下面這個類還額外提供了一個功能就是可以自定義編碼的字符串,注意Base32Map這個屬性。
 
/// *******************************************************************************
/// Name: CBase32.cs
/// Module: CBase32
/// Author: Mittermeyer
/// Comment:
///  
/// History:
///  2004-08-31 Mittermeyer Create
///  
///  Copyright 1995-2004 by Mittermeyer.All rights reserved.
/// *******************************************************************************
 
using System;
using System.Text;
 
namespace Mittermeyer
{
 /// <summary>
 /// Summary description for CBase32.
 /// </summary>
 public class CBase32
 {
  private const String DefaultBase32Map = "ABCDEFGHIJKLMNPQRSTUVWXYZ3456789";
  private const Int32  Base32MapLength = 32;
  
  private static Char[] m_acBase32Map = null;
 
  static CBase32()
  {
   m_acBase32Map = DefaultBase32Map.ToCharArray();
  }
  public CBase32()
  {
  }
 
  public static String Base32Map
  {
   get
   {
    return m_acBase32Map.ToString();
   }
   set
   {
    if (value != null && value.Length >= Base32MapLength)
    {
     m_acBase32Map = value.ToCharArray();
    }
    else
    {
     m_acBase32Map = DefaultBase32Map.ToCharArray();
    }
   }
  }
 
  public static Char GetCharacter(Int32 dwIndex)
  {
   Char cMappingData = '/0';
 
   if (m_acBase32Map != null && dwIndex >= 0 && dwIndex < m_acBase32Map.Length)
   {
    cMappingData = m_acBase32Map[dwIndex];
   }
   return cMappingData;
  }
 

  public static Int32 GetCharIndex(Char cData)
  {
   Int32 dwIndex = -1,dwLoop = 0;
 
   if (m_acBase32Map != null)
   {
    for (dwLoop = 0;dwLoop < m_acBase32Map.Length;dwLoop++)
    {
     if (m_acBase32Map[dwLoop] == cData)
     {
      dwIndex = dwLoop;
      break;
     }
    }
   }
   return dwIndex;
  }
 
  public static String Encode(Byte[] abData)
  {
   Int32 dwLoop = 0,dwCharIndex = 0,dwCharCount;
   Char[] acPart = null;
   StringBuilder sbOutput = null;
 
   if (abData == null || m_acBase32Map == null || m_acBase32Map.Length < Base32MapLength)
    return null;
 
   try
   {
    dwCharCount = (Int32) (abData.Length / 5f * 8f) + 1;
    sbOutput = new StringBuilder(dwCharCount);
    acPart = new Char[8];
   }
   catch (Exception)
   {
   }
   if (acPart == null || sbOutput == null)
    return null;
 
   dwCharCount = 0;
   for(dwLoop = 0;dwLoop < abData.Length;dwLoop += 5)
   {
    // every 5 bytes is a unit,can convert to 8 chars
    // data format:
    //   AAAAABBB BBCCCCCD DDDDEEEE EFFFFFGG GGGHHHHH
    switch (abData.Length - dwLoop)
    {
     case 1:
      dwCharIndex = abData[dwLoop] >> 3;    // AAAAA
      acPart[0] = m_acBase32Map[dwCharIndex];  
      dwCharIndex = (abData[dwLoop] & 0x7) << 2;  // BBB00
      acPart[1] = m_acBase32Map[dwCharIndex];
      dwCharCount = 2;
      break;
 
     case 2:
      dwCharIndex = abData[dwLoop] >> 3;    // AAAAA
      acPart[0] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop] & 0x7) << 2) + (abData[dwLoop + 1] >> 6); // BBBBB
      acPart[1] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 1] & 0x3F) >> 1; // CCCCC
      acPart[2] = m_acBase32Map[dwCharIndex];
      dwCharIndex = abData[dwLoop + 1] & 0x1;   // D0000
      acPart[3] = m_acBase32Map[dwCharIndex];
      dwCharCount = 4;
      break;
 
     case 3:
      dwCharIndex = abData[dwLoop] >> 3;    // AAAAA
      acPart[0] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop] & 0x7) << 2) + (abData[dwLoop + 1] >> 6); // BBBBB
      acPart[1] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 1] & 0x3F) >> 1; // CCCCC
      acPart[2] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop + 1] & 0x1) << 4) + (abData[dwLoop + 2] >> 4);// DDDDD
      acPart[3] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 2] & 0xF) << 1; // EEEE0
      acPart[4] = m_acBase32Map[dwCharIndex];
      dwCharCount = 5;
      break;
 
     case 4:
      dwCharIndex = abData[dwLoop] >> 3;    // AAAAA
      acPart[0] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop] & 0x7) << 2) + (abData[dwLoop + 1] >> 6); // BBBBB
      acPart[1] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 1] & 0x3F) >> 1; // CCCCC
      acPart[2] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop + 1] & 0x1) << 4) + (abData[dwLoop + 2] >> 4);// DDDDD
      acPart[3] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop + 2] & 0xF) << 1) + (abData[dwLoop + 3] >> 7);// EEEEE
      acPart[4] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 3] & 0x7F) >> 2; // FFFFF
      acPart[5] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 3] & 0x3) << 3; // GG000
      acPart[6] = m_acBase32Map[dwCharIndex];
      dwCharCount = 7;
      break;
 
     default:  // >= 5
      dwCharIndex = abData[dwLoop] >> 3;    // AAAAA
      acPart[0] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop] & 0x7) << 2) + (abData[dwLoop + 1] >> 6); // BBBBB
      acPart[1] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 1] & 0x3F) >> 1; // CCCCC
      acPart[2] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop + 1] & 0x1) << 4) + (abData[dwLoop + 2] >> 4);// DDDDD
      acPart[3] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop + 2] & 0xF) << 1) + (abData[dwLoop + 3] >> 7);// EEEEE
      acPart[4] = m_acBase32Map[dwCharIndex];
      dwCharIndex = (abData[dwLoop + 3] & 0x7F) >> 2; // FFFFF
      acPart[5] = m_acBase32Map[dwCharIndex];
      dwCharIndex = ((abData[dwLoop + 3] & 0x3) << 3) + (abData[dwLoop + 4] >> 5);// GGGGG
      acPart[6] = m_acBase32Map[dwCharIndex];
      dwCharIndex = abData[dwLoop + 4] & 0x1F;  // HHHHH
      acPart[7] = m_acBase32Map[dwCharIndex];
      dwCharCount = 8;
      break;
    }
    
    sbOutput.Append(acPart,0,dwCharCount);
   }
 
   return sbOutput.ToString();
  }
 
  public static Byte[] Decode(String sData)
  {
   Int32 dwLoop = 0,dwLength = 0;
   Int32[] dwCharIndex = null;
   Byte[] abOutput = null;
   Char[] acInput = null;
 
   if (sData == null || sData == String.Empty)
    return null;
 
   acInput = sData.ToCharArray();
   if (acInput == null)
    return null;
 
   try
   {
    dwLength = (acInput.Length / 8 * 5) + 1;
    abOutput = new Byte[dwLength];
    dwCharIndex = new Int32[8];
   }
   catch (Exception)
   {
   }
   if (acInput == null)
    return null;
 
   dwLength = 0;
   for (dwLoop = 0;dwLoop < acInput.Length;dwLoop += 8)
   {
    switch (acInput.Length - dwLoop)
    {
     case 1:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3);
      break;
 
     case 2:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6);
      break;
 
     case 3:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
      dwCharIndex[2] = GetCharIndex(acInput[dwLoop + 2]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6 + dwCharIndex[2] << 1);
      break;
 
     case 4:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
      dwCharIndex[2] = GetCharIndex(acInput[dwLoop + 2]);
      dwCharIndex[3] = GetCharIndex(acInput[dwLoop + 3]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6 + dwCharIndex[2] << 1 + dwCharIndex[3] >> 4);
      abOutput[dwLength + 2] = (Byte) ((dwCharIndex[3] & 0xF) << 4);
      break;
 
     case 5:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
      dwCharIndex[2] = GetCharIndex(acInput[dwLoop + 2]);
      dwCharIndex[3] = GetCharIndex(acInput[dwLoop + 3]);
      dwCharIndex[4] = GetCharIndex(acInput[dwLoop + 4]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6 + dwCharIndex[2] << 1 + dwCharIndex[3] >> 4);
      abOutput[dwLength + 2] = (Byte) ((dwCharIndex[3] & 0xF) << 4 + dwCharIndex[4] >> 1);
      abOutput[dwLength + 3] = (Byte) ((dwCharIndex[4] & 0x1) << 7);
      break;
 
     case 6:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
      dwCharIndex[2] = GetCharIndex(acInput[dwLoop + 2]);
      dwCharIndex[3] = GetCharIndex(acInput[dwLoop + 3]);
      dwCharIndex[4] = GetCharIndex(acInput[dwLoop + 4]);
      dwCharIndex[5] = GetCharIndex(acInput[dwLoop + 5]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6 + dwCharIndex[2] << 1 + dwCharIndex[3] >> 4);
      abOutput[dwLength + 2] = (Byte) ((dwCharIndex[3] & 0xF) << 4 + dwCharIndex[4] >> 1);
      abOutput[dwLength + 3] = (Byte) ((dwCharIndex[4] & 0x1) << 7 + dwCharIndex[5] << 2);
      break;
      
     case 7:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
      dwCharIndex[2] = GetCharIndex(acInput[dwLoop + 2]);
      dwCharIndex[3] = GetCharIndex(acInput[dwLoop + 3]);
      dwCharIndex[4] = GetCharIndex(acInput[dwLoop + 4]);
      dwCharIndex[5] = GetCharIndex(acInput[dwLoop + 5]);
      dwCharIndex[6] = GetCharIndex(acInput[dwLoop + 6]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6 + dwCharIndex[2] << 1 + dwCharIndex[3] >> 4);
      abOutput[dwLength + 2] = (Byte) ((dwCharIndex[3] & 0xF) << 4 + dwCharIndex[4] >> 1);
      abOutput[dwLength + 3] = (Byte) ((dwCharIndex[4] & 0x1) << 7 + dwCharIndex[5] << 2 + dwCharIndex[6] >> 3);
      abOutput[dwLength + 4] = (Byte) ((dwCharIndex[6] & 0x7) << 5);
      break;
 
     default:
      dwCharIndex[0] = GetCharIndex(acInput[dwLoop]);
      dwCharIndex[1] = GetCharIndex(acInput[dwLoop + 1]);
      dwCharIndex[2] = GetCharIndex(acInput[dwLoop + 2]);
      dwCharIndex[3] = GetCharIndex(acInput[dwLoop + 3]);
      dwCharIndex[4] = GetCharIndex(acInput[dwLoop + 4]);
      dwCharIndex[5] = GetCharIndex(acInput[dwLoop + 5]);
      dwCharIndex[6] = GetCharIndex(acInput[dwLoop + 6]);
      dwCharIndex[7] = GetCharIndex(acInput[dwLoop + 7]);
 
      abOutput[dwLength] = (Byte) (dwCharIndex[0] << 3 + dwCharIndex[1] >> 2);
      abOutput[dwLength + 1] = (Byte) ((dwCharIndex[1] & 0x3) << 6 + dwCharIndex[2] << 1 + dwCharIndex[3] >> 4);
      abOutput[dwLength + 2] = (Byte) ((dwCharIndex[3] & 0xF) << 4 + dwCharIndex[4] >> 1);
      abOutput[dwLength + 3] = (Byte) ((dwCharIndex[4] & 0x1) << 7 + dwCharIndex[5] << 2 + dwCharIndex[6] >> 3);
      abOutput[dwLength + 4] = (Byte) ((dwCharIndex[6] & 0x7) << 5 + dwCharIndex[8]);
      break;
    }
    dwLength += 5;
   }
 
   return abOutput;
  }
 }
}
 
3、後記
其實筆者真正想些的是Base24,至於Base24的代碼是什麼,賣個關子,過幾天再說。當然可能很多老兄們已經知道是怎麼回事了......

 
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章