先看題目:
輸入一個表示整數的字符串,把該字符串轉換成整數並輸出,例如輸入字符串"345",則輸出整數345。
請完成函數StrToInt,實現字符串轉換成整數的功能,不得用庫函數atoi。
我們來一步一步分析,直至寫出第一份準確的代碼:
1、本題考查的實際上就是字符串轉換成整數的問題,或者說是要你自行實現atoi函數。那如何實現把表示整數的字符串正確地轉換成整數呢?以"345"作爲例子:
- 當我們掃描到字符串的第一個字符'3'時,由於我們知道這是第一位,所以得到數字3。
- 當掃描到第二個數字'4'時,而之前我們知道前面有一個3,所以便在後面加上一個數字4,那前面的3相當於30,因此得到數字:3*10+4=34。
- 繼續掃描到字符'5','5'的前面已經有了34,由於前面的34相當於340,加上後面掃描到的5,最終得到的數是:34*10+5=345。
因此,此題的思路便是:每掃描到一個字符,我們便把在之前得到的數字乘以10,然後再加上當前字符表示的數字。
2、思路有了,有一些細節需要注意,如zhedahht所說:
- “由於整數可能不僅僅之含有數字,還有可能以'+'或者'-'開頭,表示整數的正負。因此我們需要把這個字符串的第一個字符做特殊處理。如果第一個字符是'+'號,則不需要做任何操作;如果第一個字符是'-'號,則表明這個整數是個負數,在最後的時候我們要把得到的數值變成負數。
- 接着我們試着處理非法輸入。由於輸入的是指針,在使用指針之前,我們要做的第一件是判斷這個指針是不是爲空。如果試着去訪問空指針,將不可避免地導致程序崩潰。
- 另外,輸入的字符串中可能含有不是數字的字符。每當碰到這些非法的字符,我們就沒有必要再繼續轉換。
- 最後一個需要考慮的問題是溢出問題。由於輸入的數字是以字符串的形式輸入,因此有可能輸入一個很大的數字轉換之後會超過能夠表示的最大的整數而溢出。”
- //copyright@zhedahht 2007
- enum Status {kValid = 0, kInvalid};
- int g_nStatus = kValid;
- // Convert a string into an integer
- int StrToInt(const char* str)
- {
- g_nStatus = kInvalid;
- long long num = 0;
- if(str != NULL)
- {
- const char* digit = str;
- // the first char in the string maybe '+' or '-'
- bool minus = false;
- if(*digit == '+')
- digit ++;
- else if(*digit == '-')
- {
- digit ++;
- minus = true;
- }
- // the remaining chars in the string
- while(*digit != '\0')
- {
- if(*digit >= '0' && *digit <= '9')
- {
- num = num * 10 + (*digit - '0');
- // overflow
- if(num > std::numeric_limits<int>::max())
- {
- num = 0;
- break;
- }
- digit ++;
- }
- // if the char is not a digit, invalid input
- else
- {
- num = 0;
- break;
- }
- }
- if(*digit == '\0')
- {
- g_nStatus = kValid;
- if(minus)
- num = 0 - num;
- }
- }
- return static_cast<int>(num);
- }
兩個問題:
- 當輸入的字符串不是數字,而是字符的時候,比如“1a”,上述程序直接返回了0(而正確的結果應該是得到1):
- // if the char is not a digit, invalid input
- else
- {
- num = 0;
- break;
- }
- 處理溢出時,有問題。
- //copyright@SP_daiyq 2013/5/29
- int StrToInt(const char* str)
- {
- int res = 0; // result
- int i = 0; // index of str
- int signal = '+'; // signal '+' or '-'
- int cur; // current digit
- if (!str)
- return 0;
- // skip backspace
- while (isspace(str[i]))
- i++;
- // skip signal
- if (str[i] == '+' || str[i] == '-')
- {
- signal = str[i];
- i++;
- }
- // get result
- while (str[i] >= '0' && str[i] <= '9')
- {
- cur = str[i] - '0';
- // judge overlap or not
- if ( (signal == '+') && (cur > INT_MAX - res*10) )
- {
- res = INT_MAX;
- break;
- }
- else if ( (signal == '-') && (cur -1 > INT_MAX - res*10) )
- {
- res = INT_MIN;
- break;
- }
- res = res * 10 + cur;
- i++;
- }
- return (signal == '-') ? -res : res;
- }
- //copyright@fuwutu 2013/5/29
- int StrToInt(const char* str)
- {
- bool negative = false;
- long long result = 0;
- while (*str == ' ' || *str == '\t')
- {
- ++str;
- }
- if (*str == '-')
- {
- negative = true;
- ++str;
- }
- else if (*str == '+')
- {
- ++str;
- }
- while (*str != '\0')
- {
- int n = *str - '0';
- if (n < 0 || n > 9)
- {
- break;
- }
- if (negative)
- {
- result = result * 10 - n;
- if (result < -2147483648LL)
- {
- result = -2147483648LL;
- }
- }
- else
- {
- result = result * 10 + n;
- if (result > 2147483647LL)
- {
- result = 2147483647LL;
- }
- }
- ++str;
- }
- return result;
- }
- long long result = 0;
- //atol函數
- //Copyright (c) 1989-1997, Microsoft Corporation. All rights reserved.
- long __cdecl atol(
- const char *nptr
- )
- {
- int c; /* current char */
- long total; /* current total */
- int sign; /* if ''-'', then negative, otherwise positive */
- /* skip whitespace */
- while ( isspace((int)(unsigned char)*nptr) )
- ++nptr;
- c = (int)(unsigned char)*nptr++;
- sign = c; /* save sign indication */
- if (c == ''-'' || c == ''+'')
- c = (int)(unsigned char)*nptr++; /* skip sign */
- total = 0;
- while (isdigit(c)) {
- total = 10 * total + (c - ''0''); /* accumulate digit */
- c = (int)(unsigned char)*nptr++; /* get next char */
- }
- if (sign == ''-'')
- return -total;
- else
- return total; /* return result, negated if necessary */
- }
- isspace(int x)
- {
- if(x==' '||x=='/t'||x=='/n'||x=='/f'||x=='/b'||x=='/r')
- return 1;
- else
- return 0;
- }
- isdigit(int x)
- {
- if(x<='9'&&x>='0')
- return 1;
- else
- return 0;
- }
- //atoi調用上述的atol
- int __cdecl atoi(
- const char *nptr
- )
- {
- //Overflow is not detected. Because of this, we can just use
- return (int)atol(nptr);
- }
但很遺憾的是,上述atoi標準代碼依然返回的是long:
- long total; /* current total */
- if (sign == ''-'')
- return -total;
- else
- return total; /* return result, negated if necessary */
再者,下面這裏定義成long的total與10相乘,即total*10很容易溢出:
- long total; /* current total */
- total = 10 * total + (c - ''0''); /* accumulate digit */
- simple_strtol,把一個字符串轉換爲一個有符號長整數;
- simple_strtoll,把一個字符串轉換爲一個有符號長長整數;
- simple_strtoul,把一個字符串轉換爲一個無符號長整數;
- simple_strtoull,把一個字符串轉換爲一個無符號長長整數
- //linux/lib/vsprintf.c
- //Copyright (C) 1991, 1992 Linus Torvalds
- //simple_strtol - convert a string to a signed long
- long simple_strtol(const char *cp, char **endp, unsigned int base)
- {
- if (*cp == '-')
- return -simple_strtoul(cp + 1, endp, base);
- return simple_strtoul(cp, endp, base);
- }
- EXPORT_SYMBOL(simple_strtol);
- //simple_strtoul - convert a string to an unsigned long
- unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
- {
- return simple_strtoull(cp, endp, base);
- }
- EXPORT_SYMBOL(simple_strtoul);
- //simple_strtoll - convert a string to a signed long long
- long long simple_strtoll(const char *cp, char **endp, unsigned int base)
- {
- if (*cp == '-')
- return -simple_strtoull(cp + 1, endp, base);
- return simple_strtoull(cp, endp, base);
- }
- EXPORT_SYMBOL(simple_strtoll);
- //simple_strtoull - convert a string to an unsigned long long
- unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
- {
- unsigned long long result;
- unsigned int rv;
- cp = _parse_integer_fixup_radix(cp, &base);
- rv = _parse_integer(cp, base, &result);
- /* FIXME */
- cp += (rv & ~KSTRTOX_OVERFLOW);
- if (endp)
- *endp = (char *)cp;
- return result;
- }
- EXPORT_SYMBOL(simple_strtoull);
- “真正的處理邏輯主要是在_parse_integer裏面,關於溢出的處理,_parse_integer處理的很優美,
- 而_parse_integer_fixup_radix是用來自動根據字符串判斷進制的”。
- //lib/kstrtox.c, line 39
- //Convert non-negative integer string representation in explicitly given radix to an integer.
- //Return number of characters consumed maybe or-ed with overflow bit.
- //If overflow occurs, result integer (incorrect) is still returned.
- unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
- {
- unsigned long long res;
- unsigned int rv;
- int overflow;
- res = 0;
- rv = 0;
- overflow = 0;
- while (*s) {
- unsigned int val;
- if ('0' <= *s && *s <= '9')
- val = *s - '0';
- else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
- val = _tolower(*s) - 'a' + 10;
- else
- break;
- if (val >= base)
- break;
- /*
- * Check for overflow only if we are within range of
- * it in the max base we support (16)
- */
- if (unlikely(res & (~0ull << 60))) {
- if (res > div_u64(ULLONG_MAX - val, base))
- overflow = 1;
- }
- res = res * base + val;
- rv++;
- s++;
- }
- *p = res;
- if (overflow)
- rv |= KSTRTOX_OVERFLOW;
- return rv;
- }
- 上頭出現了個unlikely,其實unlikely和likely經常出現在linux相關內核源碼中
- if(likely(value)){
- //等價於if(likely(value)) == if(value)
- }
- else{
- }
- //include/linux/compiler.h
- # ifndef likely
- # define likely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1))
- # endif
- # ifndef unlikely
- # define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0))
- # endif
- 呈現下div_u64的代碼:
- //include/linux/math64.h
- //div_u64
- static inline u64 div_u64(u64 dividend, u32 divisor)
- {
- u32 remainder;
- return div_u64_rem(dividend, divisor, &remainder);
- }
- //div_u64_rem
- static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
- {
- *remainder = dividend % divisor;
- return dividend / divisor;
- }
- //lib/kstrtox.c, line 23
- const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
- {
- if (*base == 0) {
- if (s[0] == '0') {
- if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
- *base = 16;
- else
- *base = 8;
- } else
- *base = 10;
- }
- if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
- s += 2;
- return s;
- }
第三十一章、帶通配符的字符串匹配問題
字符串匹配問題,給定一串字符串,按照指定規則對其進行匹配,並將匹配的結果保存至output數組中,多個匹配項用空格間隔,最後一個不需要空格。
要求:
- 匹配規則中包含通配符?和*,其中?表示匹配任意一個字符,*表示匹配任意多個(>=0)字符。
- 匹配規則要求匹配最大的字符子串,例如a*d,匹配abbdd而非abbd,即最大匹配子串。
- 匹配後的輸入串不再進行匹配,從當前匹配後的字符串重新匹配其他字符串。
請實現函數:char* my_find(char input[], char rule[])
舉例說明
input:abcadefg
rule:a?c
output:abc
input :newsadfanewfdadsf
rule: new
output: new new
input :breakfastfood
rule: f*d
output:fastfood
注意事項:
- 自行實現函數my_find,勿在my_find函數裏夾雜輸出,且不準用C、C++庫,和Java的String對象;
- 請注意代碼的時間,空間複雜度,及可讀性,簡潔性;
- input=aaa,rule=aa時,返回一個結果aa,即可。
1、本題與上述第三十章的題不同,上題字符串轉換成整數更多考察對思維的全面性和對細節的處理,本題則更多的是編程技巧。閒不多說,直接上代碼:
- //copyright@cao_peng 2013/4/23
- int str_len(char *a) { //字符串長度
- if (a == 0) {
- return 0;
- }
- char *t = a;
- for (;*t;++t)
- ;
- return (int) (t - a);
- }
- void str_copy(char *a,const char *b,int len) { //拷貝字符串 a = b
- for (;len > 0; --len, ++b,++a) {
- *a = *b;
- }
- *a = 0;
- }
- char *str_join(char *a,const char *b,int lenb) { //連接字符串 第一個字符串被回收
- char *t;
- if (a == 0) {
- t = (char *) malloc(sizeof(char) * (lenb + 1));
- str_copy(t, b, lenb);
- return t;
- }
- else {
- int lena = str_len(a);
- t = (char *) malloc(sizeof(char) * (lena + lenb + 2));
- str_copy(t, a, lena);
- *(t + lena) = ' ';
- str_copy(t + lena + 1, b, lenb);
- free(a);
- return t;
- }
- }
- int canMatch(char *input, char *rule) { // 返回最長匹配長度 -1表示不匹配
- if (*rule == 0) { //已經到rule尾端
- return 0;
- }
- int r = -1 ,may;
- if (*rule == '*') {
- r = canMatch(input, rule + 1); // *匹配0個字符
- if (*input) {
- may = canMatch(input + 1, rule); // *匹配非0個字符
- if ((may >= 0) && (++may > r)) {
- r = may;
- }
- }
- }
- if (*input == 0) { //到尾端
- return r;
- }
- if ((*rule == '?') || (*rule == *input)) {
- may = canMatch(input + 1, rule + 1);
- if ((may >= 0) && (++may > r)) {
- r = may;
- }
- }
- return r;
- }
- char * my_find(char input[], char rule[]) {
- int len = str_len(input);
- int *match = (int *) malloc(sizeof(int) * len); //input第i位最多能匹配多少位 匹配不上是-1
- int i,max_pos = - 1;
- char *output = 0;
- for (i = 0; i < len; ++i) {
- match[i] = canMatch(input + i, rule);
- if ((max_pos < 0) || (match[i] > match[max_pos])) {
- max_pos = i;
- }
- }
- if ((max_pos < 0) || (match[max_pos] <= 0)) { //不匹配
- output = (char *) malloc(sizeof(char));
- *output = 0; // \0
- return output;
- }
- for (i = 0; i < len;) {
- if (match[i] == match[max_pos]) { //找到匹配
- output = str_join(output, input + i, match[i]);
- i += match[i];
- }
- else {
- ++i;
- }
- }
- free(match);
- return output;
- }
2、本題也可以直接寫出DP方程,如下代碼所示:
- //copyright@chpeih 2013/4/23
- char* my_find(char input[], char rule[])
- {
- //write your code here
- int len1,len2;
- for(len1 = 0;input[len1];len1++);
- for(len2 = 0;rule[len2];len2++);
- int MAXN = len1>len2?(len1+1):(len2+1);
- int **dp;
- //dp[i][j]表示字符串1和字符串2分別以i j結尾匹配的最大長度
- //記錄dp[i][j]是由之前那個節點推算過來 i*MAXN+j
- dp = new int *[len1+1];
- for (int i = 0;i<=len1;i++)
- {
- dp[i] = new int[len2+1];
- }
- dp[0][0] = 0;
- for(int i = 1;i<=len2;i++)
- dp[0][i] = -1;
- for(int i = 1;i<=len1;i++)
- dp[i][0] = 0;
- for (int i = 1;i<=len1;i++)
- {
- for (int j = 1;j<=len2;j++)
- {
- if(rule[j-1]=='*'){
- dp[i][j] = -1;
- if (dp[i-1][j-1]!=-1)
- {
- dp[i][j] = dp[i-1][j-1]+1;
- }
- if (dp[i-1][j]!=-1 && dp[i][j]<dp[i-1][j]+1)
- {
- dp[i][j] = dp[i-1][j]+1;
- }
- }else if (rule[j-1]=='?')
- {
- if(dp[i-1][j-1]!=-1){
- dp[i][j] = dp[i-1][j-1]+1;
- }else dp[i][j] = -1;
- }
- else
- {
- if(dp[i-1][j-1]!=-1 && input[i-1]==rule[j-1]){
- dp[i][j] = dp[i-1][j-1]+1;
- }else dp[i][j] = -1;
- }
- }
- }
- int m = -1;//記錄最大字符串長度
- int *ans = new int[len1];
- int count_ans = 0;//記錄答案個數
- char *returnans = new char[len1+1];
- int count = 0;
- for(int i = 1;i<=len1;i++)
- if (dp[i][len2]>m){
- m = dp[i][len2];
- count_ans = 0;
- ans[count_ans++] = i-m;
- }else if(dp[i][len2]!=-1 &&dp[i][len2]==m){
- ans[count_ans++] = i-m;
- }
- if (count_ans!=0)
- {
- int len = ans[0];
- for (int i = 0;i<m;i++)
- {
- printf("%c",input[i+ans[0]]);
- returnans[count++] = input[i+ans[0]];
- }
- for (int j = 1;j<count_ans;j++)
- {
- printf(" ");
- returnans[count++] = ' ';
- len = ans[j];
- for (int i = 0;i<m;i++)
- {
- printf("%c",input[i+ans[j]]);
- returnans[count++] = input[i+ans[j]];
- }
- }
- printf("\n");
- returnans[count++] = '\0';
- }
- return returnans;
- }