C標準庫string.h源碼六:strspn/strcspn/strpbrk/strtok

​size_t strspn(const char *str, const char * accept);

【函數說明】strspn() 從參數 str 字符串的開頭計算連續的字符,而這些字符都完全是 accept 所指字符串中的字符。簡單的說,若 strspn() 返回的數值爲n,則代表字符串 str 開頭連續有 n 個字符都是屬於字符串 accept 內的字符。

【返回值】返回字符串 str 開頭連續包含字符串 accept 內的字符數目。

一個例子:
 

#include <stdio.h>
#include <string.h>
int main ()
{
  int i;
  char str[] = "123th789";
  char accept[] = "1234567890";
  i = strspn(str, accept);
  printf("str 前 %d 個字符都屬於 accept\n",i);
  return 0;
}
//執行結果str 前 3 個字符都屬於 accept

實現:
 

/* Return the length of the maximum initial segment
   of S which contains only characters in ACCEPT.  */
size_t strspn(const char *s, const char *accept)
{
  register const char *p;
  register const char *a;
  register size_t count = 0;

  for (p = s; *p != '\0'; ++p)
  {
      for (a = accept; *a != '\0'; ++a)
	      if (*p == *a)
	          break;
      if (*a == '\0')
	      return count;
      else
	      ++count;
  }

  return count;
}

 //哈希表實現查找

/*
int strspn(string, control) - find init substring of control chars

Purpose:
       Finds the index of the first character in string that does belong
       to the set of characters specified by control.  This is
       equivalent to the length of the initial substring of string that
       consists entirely of characters from control.  The '\0' character
       that terminates control is not considered in the matching process.
Exit:
       returns index of first char in string not in control
*/     
int strspn (const char *string, const char *control)
{
//數組map一共32字節,256位,它標記所有的control字符,比如control="*aA"
//那麼map數組中,對應的第42,65,97位('*'=42,'a'=97,'A'=65)被標記爲1,其他位被標記爲0
    
    unsigned char map[32]; 
    int count;

    for (count = 0; count < 32; count++)
        map[count] = 0;//map默認爲0
    //標記control字符對應的位爲1
    while (*control)
    {
        map[*control >> 3] |= (1 << (*control & 7));
        control++;
    }
    //遍歷string,取string中的一個字符,如果該字符在map中對應的位被標記爲1,則它屬於control
    if (*string)
    {
        while (map[*string >> 3] & (1 << (*string & 7)))
        {
            count++;
            string++;
        }
        return(count);
    }
    return(0);
}


size_t strcspn(const char *str, const char * reject);
【函數說明】若 strcspn() 返回的數值爲n,則代表字符串 str 開頭連續有 n 個字符都不屬於字符串 reject 內的字符。
【返回值】返回字符串 str 開頭連續包屬於字符串 accept 內的字符數目。
 

/* Return the length of the maximum inital segment of S
   which contains no characters from REJECT.  */
size_t strcspn(const char *s, const char *accept)
{
  register const char *p;
  register const char *a;
  register size_t count = 0;

  for (p = s; *p != '\0'; ++p)
  {
      for (a = accept; *a != '\0'; ++a)
	      if (*p == *a)
	          break;
      if (*a == '\0')
	      ++count;
      else
	      return count;
  }

  return count;
}

 

/*
int strcspn(string, control) - search for init substring w/o control chars

Purpose:
       returns the index of the first character in string that belongs
       to the set of characters specified by control.  This is equivalent
       to the length of the length of the initial substring of string
       composed entirely of characters not in control.  Null chars not
       considered.
Exit:
       returns the index of the first char in string
       that is in the set of characters specified by control.
*/
int strcspn (const char *string, const char *control)
{
               unsigned char map[32];
               int count;

               for (count = 0; count < 32; count++)
                       map[count] = 0;
               while (*control)
               {
                       map[*control >> 3] |= (1 << (*control & 7));
                       control++;
               }
               map[0] |= 1;
               while (!(map[*string >> 3] & (1 << (*string & 7))))
               {
                       count++;
                       string++;
               }
               return(count);
}


char *strpbrk(const char *s, const char *accept);
【返回值】strpbrk函數返回一個指針,它指向字符串accept中的任意字符第一次出現在字符串s中的位置,如果s沒有與accept中相同的字符,則返回NULL
 

​/* Find the first ocurrence in S of any character in ACCEPT.    */
char *strpbrk(const char *s, const char *accept)
{
  register const char *p;
  register const char *a;

  for (p = s; *p != '\0'; ++p)
  {
      for (a = accept; *a != '\0'; ++a)
	      if (*p == *a)
	          return p;
  }

  return NULL;
}
/*
char *strpbrk(string, control) - scans string for a character from control

Purpose:
       Finds the first occurence in string of any character from
       the control string.
Exit:
       returns a pointer to the first character from control found in string.
       returns NULL if string and control have no characters in common.
*/
char *strpbrk (const char *string, const char *control)
{
               unsigned char map[32];
               int count;

               for (count = 0; count < 32; count++)
                       map[count] = 0;
               while (*control)
               {
                       map[*control >> 3] |= (1 << (*control & 7));
                       control++;
               }
               while (*string)
               {
                       if (map[*string >> 3] & (1 << (*string & 7)))
                               return(string);
                       string++;
               }
               return(NULL);
}

char *strtok (char *s, const char *delim); //字符分割函數
 

char *strtok (char * string, const char * control)
{
	unsigned char *str;
	const unsigned char *ctrl = (const unsigned char *)control;
 
    //注意這裏使用了static類型,實際的strtok函數出於線程安全會使用TLS 
	static unsigned char* _TOKEN = NULL;   
	
 
	static unsigned char map[32];	 //賦初值
	int count;
 
	/* Clear control map */
	for (count = 0; count < 32; count++)
		map[count] = 0;
 
	/* Set bits in delimiter table */  //map爲static變量,那麼這個分隔符表只需要賦值一次即可
	if (NULL != string)	   
	{
		do {
			map[*ctrl >> 3] |= (1 << (*ctrl & 7));
		} while (*ctrl++);
	}
 
	/* Initialize str */
 
	/* If string is NULL, set str to the saved
	 * pointer (i.e., continue breaking tokens out of the string
	 * from the last strtok call) 
     */
	if (string)
		str = (unsigned char *)string;
	else
		str = (unsigned char *)_TOKEN;
 
	/* Find beginning of token (skip over leading delimiters). Note that
	 * there is no token iff this loop sets str to point to the terminal
	 * null (*str == '\0') 
     */
	while ( (map[*str >> 3] & (1 << (*str & 7))) && *str )
		str++;
 
	string = (char*)str;
 
	/* Find the end of the token. If it is not the end of the string,
	 * put a null there. */
	for ( ; *str ; str++ )
		if ( map[*str >> 3] & (1 << (*str & 7)) ) {
			*str++ = '\0';
			break;
		}
 
		/* Update nextoken (or the corresponding field in the per-thread data
		 * structure */
		_TOKEN = str;
 
		/* Determine if a token has been found. */
		if ( string == (char*)str )
			return NULL;
		else
			return string;
}
/* Parse S into tokens separated by characters in DELIM.
   If S is NULL, the last string strtok() was called with is
   used.  For example:
	char s[] = "-abc-=-def";
	x = strtok(s, "-");		// x = "abc"
	x = strtok(NULL, "-=");		// x = "def"
	x = strtok(NULL, "=");		// x = NULL
	// s = "abc\0-def\0"
*/
static char *olds = NULL;
char *strtok (char *s, const char *delim)
{
  char *token;

  if (s == NULL)
    s = olds;

  /* Scan leading delimiters.  */
  s += strspn (s, delim);
  if (*s == '\0')
    return NULL;

  /* Find the end of the token.  */
  token = s;
  s = strpbrk (token, delim);
  if (s == NULL)
    /* This token finishes the string.  */
    olds = __rawmemchr (token, '\0');
  else
    {
      /* Terminate the token and make OLDS point past it.  */
      *s = '\0';
      olds = s + 1;
    }
  return token;
}

 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章