'How to compare ends of strings in C?
I want to make sure my string ends with ".foo". I am using C, a language I am not totally familiar with. The best way I have found to do it is below. Any C gurus want to make sure I'm doing this elegantly and wisely?
int EndsWithFoo(char *str)
{
if(strlen(str) >= strlen(".foo"))
{
if(!strcmp(str + strlen(str) - strlen(".foo"), ".foo"))
{
return 1;
}
}
return 0;
}
Solution 1:[1]
Don't call strlen more than once per string.
int EndsWith(const char *str, const char *suffix)
{
if (!str || !suffix)
return 0;
size_t lenstr = strlen(str);
size_t lensuffix = strlen(suffix);
if (lensuffix > lenstr)
return 0;
return strncmp(str + lenstr - lensuffix, suffix, lensuffix) == 0;
}
int EndsWithFoo(const char *str) { return EndsWith(str, ".foo"); }
EDIT: added NULL check for the pedantic. For the ultra pedantic, debate whether it should return non-zero if both str and suffix are both NULL.
Solution 2:[2]
int EndsWithFoo( char *string )
{
string = strrchr(string, '.');
if( string != NULL )
return( strcmp(string, ".foo") );
return( -1 );
}
Will return 0 if ending with ".foo".
Solution 3:[3]
I don't have access to a compiler right now, so could someone tell me if this works?
#include <stdio.h>
#include <string.h>
int EndsWithFoo(const char* s);
int
main(void)
{
printf("%d\n", EndsWithFoo("whatever.foo"));
return 0;
}
int EndsWithFoo(const char* s)
{
int ret = 0;
if (s != NULL)
{
size_t size = strlen(s);
if (size >= 4 &&
s[size-4] == '.' &&
s[size-3] == 'f' &&
s[size-2] == 'o' &&
s[size-1] == 'o')
{
ret = 1;
}
}
return ret;
}
Anyway, be sure to qualify the parameter as const
, it tells everyone (including the compiler) that you don't intend to modify the string.
Solution 4:[4]
If you can change the signature of your function, then try changing it to
int EndsWith(char const * str, char const * suffix, int lenstr, int lensuf);
This will result in a safer, more reusable and more efficient code:
- The added const qualifiers will make sure you don't mistakenly alter the input strings. This function is a predicate, so I assume it is never meant to have side-effects.
- The suffix to compare against is passed in as a parameter, so you can save this function for later reuse with other suffixes.
- This signature will give you the opportunity to pass the lengths of the strings in if you already know them. We call this dynamic programming.
We can define the function like so:
int EndsWith(char const * str, char const * suffix, int lenstr, int lensuf)
{
if( ! str && ! suffix ) return 1;
if( ! str || ! suffix ) return 0;
if( lenstr < 0 ) lenstr = strlen(str);
if( lensuf < 0 ) lensuf = strlen(suffix);
return strcmp(str + lenstr - lensuf, suffix) == 0;
}
The obvious counter-argument for the extra parameters is that they imply more noise in the code, or a less expressive code.
Solution 5:[5]
The strlen(".foo")
s are not required. If you really wanted to have it flexible you could use sizeof ".foo" - 1
-- a compile time constant.
Also, a null string check would be good.
Solution 6:[6]
Tested code, includes the test:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int ends_with_foo(const char *str)
{
char *dot = strrchr(str, '.');
if (NULL == dot) return 0;
return strcmp(dot, ".foo") == 0;
}
int main (int argc, const char * argv[])
{
char *test[] = { "something", "anotherthing.foo" };
int i;
for (i = 0; i < sizeof(test) / sizeof(char *); i++) {
printf("'%s' ends %sin '.foo'\n",
test[i],
ends_with_foo(test[i]) ? "" : "not ");
}
return 0;
}
Solution 7:[7]
This is the most efficient(for a computer) answer you will find here.
int endsWith(const char *string,const char *tail)
{
const char *s1;
const char *s2;
if (!*tail)
return 1;
if (!*string)
return 0;
for (s1 = string; *s1; ++s1);
for (s2 = tail; *s2; ++s2);
if (s1 - string < s2 - tail)
return 0;
for (--s1, --s2; *s1 == *s2 && s2 >= tail; --s1, --s2);
if (s2 < tail)
return 1;
else
return 0;
}
Solution 8:[8]
Here is a general solution returning the same values as Pythons str.endswith(), using memcmp(). Not checking str / suffix for NULL is intended, other libc str functions don't check for NULL also:
int ends_with(const char *str, const char *suffix) {
size_t str_len = strlen(str);
size_t suffix_len = strlen(suffix);
return (str_len >= suffix_len) &&
(!memcmp(str + str_len - suffix_len, suffix, suffix_len));
}
Test C:
printf("%i\n", ends_with("", ""));
printf("%i\n", ends_with("", "foo"));
printf("%i\n", ends_with("foo", ""));
printf("%i\n", ends_with("foo", "foo"));
printf("%i\n", ends_with("foo", "foobar"));
printf("%i\n", ends_with("foo", "barfoo"));
printf("%i\n", ends_with("foobar", "foo"));
printf("%i\n", ends_with("barfoo", "foo"));
printf("%i\n", ends_with("foobarfoo", "foo"));
Result C:
1
0
1
1
0
0
0
1
1
Test Python:
print("".endswith(""))
print("".endswith("foo"))
print("foo".endswith(""))
print("foo".endswith("foo"))
print("foo".endswith("foobar"))
print("foo".endswith("barfoo"))
print("foobar".endswith("foo"))
print("barfoo".endswith("foo"))
print("foobarfoo".endswith("foo"))
Result Python:
True
False
True
True
False
False
False
True
True
Solution 9:[9]
#include <assert.h>
#include <string.h>
int string_has_suffix(const char *str, const char *suf)
{
assert(str && suf);
const char *a = str + strlen(str);
const char *b = suf + strlen(suf);
while (a != str && b != suf) {
if (*--a != *--b) break;
}
return b == suf && *a == *b;
}
// Test Unit
int main (int argc, char *argv[])
{
assert( string_has_suffix("", ""));
assert(!string_has_suffix("", "a"));
assert( string_has_suffix("a", ""));
assert( string_has_suffix("a", "a"));
assert(!string_has_suffix("a", "b"));
assert(!string_has_suffix("a", "ba"));
assert( string_has_suffix("abc", "abc"));
assert(!string_has_suffix("abc", "eeabc"));
assert(!string_has_suffix("abc", "xbc"));
assert(!string_has_suffix("abc", "axc"));
assert(!string_has_suffix("abcdef", "abcxef"));
assert(!string_has_suffix("abcdef", "abxxef"));
assert( string_has_suffix("b.a", ""));
assert( string_has_suffix("b.a", "a"));
assert( string_has_suffix("b.a", ".a"));
assert( string_has_suffix("b.a", "b.a"));
assert(!string_has_suffix("b.a", "x"));
assert( string_has_suffix("abc.foo.bar", ""));
assert( string_has_suffix("abc.foo.bar", "r"));
assert( string_has_suffix("abc.foo.bar", "ar"));
assert( string_has_suffix("abc.foo.bar", "bar"));
assert(!string_has_suffix("abc.foo.bar", "xar"));
assert( string_has_suffix("abc.foo.bar", ".bar"));
assert( string_has_suffix("abc.foo.bar", "foo.bar"));
assert(!string_has_suffix("abc.foo.bar", "xoo.bar"));
assert(!string_has_suffix("abc.foo.bar", "foo.ba"));
assert( string_has_suffix("abc.foo.bar", ".foo.bar"));
assert( string_has_suffix("abc.foo.bar", "c.foo.bar"));
assert( string_has_suffix("abc.foo.bar", "abc.foo.bar"));
assert(!string_has_suffix("abc.foo.bar", "xabc.foo.bar"));
assert(!string_has_suffix("abc.foo.bar", "ac.foo.bar"));
assert( string_has_suffix("abc.foo.foo", ".foo"));
assert( string_has_suffix("abc.foo.foo", ".foo.foo"));
assert( string_has_suffix("abcdefgh", ""));
assert(!string_has_suffix("abcdefgh", " "));
assert( string_has_suffix("abcdefgh", "h"));
assert( string_has_suffix("abcdefgh", "gh"));
assert( string_has_suffix("abcdefgh", "fgh"));
assert(!string_has_suffix("abcdefgh", "agh"));
assert( string_has_suffix("abcdefgh", "abcdefgh"));
return 0;
}
// $ gcc -Wall string_has_suffix.c && ./a.out
Solution 10:[10]
Sorry I'm a little late to the party. Couldn't you do something with some simple pointer math?
char* str = "hello.foo"; //this would be string given
int x = 4; //.foo has 4 characters
int n = strlen(str)- x; //where x is equal to suffix length
char* test = &str[n]; //do some pointer math to find the last characters
if(strcmp(test, ".foo") == 0){
//do some stuff
}// end if
Character pointers work by pointing to the first character in their array. So when you do this you set test's first character as the '.' in '.foo' (if that's what it contains). That's also why you don't need to allocate memory for it since it is just pointing at the already existing array of characters.
Solution 11:[11]
Any C gurus want to make sure I'm doing this elegantly and wisely?
Your solution works correctly as long as the argument is a valid null terminated string. This is the most important and in this regard, you are doing this wisely. More complicated solutions posted as answers do not meet this goal.
The compiler will inline strlen(".foo")
and should be able to determine that both instances of strlen(str)
return the same value, hence generate a single call as (clang and gcc do).
Yet it would be more elegant IMHO to compute the lengths once and use memcmp()
instead of strcmp()
which needs more work and is not inlined. You should also define str
as a const char *
to achieve const
correctness and prevent warnings when calling your function with constant strings or string literals.
Testing for a specific ".foo"
suffix is a special case of a more general problem: testing that a string is a suffix of another string.
Here is a simple and efficient solution:
#include <string.h>
int strEndsWith(const char *s, const char *suff) {
size_t slen = strlen(s);
size_t sufflen = strlen(suff);
return slen >= sufflen && !memcmp(s + slen - sufflen, suff, sufflen);
}
int strEndsWithFoo(const char *s) {
return strEndsWith(s, ".foo");
}
The code is very simple and generic, yet modern compilers will inline strEndsWithFoo
very efficiently. As can be verified on GodBolt's compiler explorer, clang 12.0.0 computes the length of ".foo"
at compile time and inlines memcmp()
as a single cmp
instruction, generating just 12 x86_64 instructions:
strEndsWithFoo: # @strEndsWithFoo
pushq %rbx
movq %rdi, %rbx
callq strlen
movq %rax, %rcx
xorl %eax, %eax
cmpq $4, %rcx
jb .LBB1_2
xorl %eax, %eax
cmpl $1869571630, -4(%rbx,%rcx) # imm = 0x6F6F662E
sete %al
.LBB1_2:
popq %rbx
retq
gcc 11.2 generates very similar code, also 12 instructions:
strEndsWithFoo:
pushq %rbx
movq %rdi, %rbx
call strlen
xorl %r8d, %r8d
cmpq $3, %rax
jbe .L7
xorl %r8d, %r8d
cmpl $1869571630, -4(%rbx,%rax)
sete %r8b
.L7:
movl %r8d, %eax
popq %rbx
ret
Intel's ICC compiler generates a long and complex set of SIMD instructions, much more difficult to follow and possibly less efficient even on Intel processors. The performance depends heavily on the efficiency of the strlen()
library function, so benchmarks should include various distributions of string lengths.
There is no absolute answer to what if the most efficient solution? but simplicity does not preclude efficiency, and simple straightforward code is easier to validate. When it combines simplicity, correctness and efficiency, elegance is achieved.
Quoting Brian Kernighan:
Controlling complexity is the essence of computer programming.
Software Tools (1976), p. 319 (with P. J. Plauger).Everyone knows that debugging is twice as hard as writing a program in the first place. So if you're as clever as you can be when you write it, how will you ever debug it?
"The Elements of Programming Style", 2nd edition, chapter 2.
Solution 12:[12]
If there's always something beyond the dot, we could indulge in some pointer arithmetic:
int EndsWithFoo (char *str)
{
int iRetVal = 0;
char * pchDot = strrchr (str, '.');
if (pchDot)
{
if (strcmp (pchDot+1, "foo") == 0)
{
iRetVal = 1;
}
}
return iRetVal;
}
Of course you would probably want to add a little strlen there to check there is something beyond the dot :-)
NB - I didn't run this to check it, but it looks OK to me.
Solution 13:[13]
I would like to use my version:
bool endsWith(const char *filename, const char *ext) {
const uint len = strlen(filename);
const uint extLen = strlen(ext);
if (len < extLen) {
return false;
}
for (uint index = 1; index <= extLen; index++) {
if (filename[len - index] != ext[extLen - index]) {
return false;
}
}
return true;
}
Solution 14:[14]
I always check the glib string functions, they have all sorts of useful bits. A suffix checking function already exists.
gchar * str;
if (!g_str_has_suffix(str)) {
return FALSE;
}
I'm kinda new at C, so I appologize if this isn't 100%... but it looks like a solid guard clause to me!
Solution 15:[15]
My take on this:
int string_has_suffix(const char* string, const char* suffix) {
if (string && suffix) {
if (strlen(string) >= strlen(suffix)) {
const char* testLoc;
testLoc = strrchr(string, suffix[0]);
if (testLoc) {
return (strcmp(suffix, testLoc) == 0);
}
}
}
return 0;
}
Solution 16:[16]
On POSIX systems you can match the end of a string using glob patterns
#include <fnmatch.h>
if (fnmatch("*.foo", my_string, 0))
/* match */
Solution 17:[17]
Maybe...
bool endswith (const char *str, const char *tail)
{
const char *foo = strrstr (str, tail);
if (foo)
{
const int strlength = strlen (str);
const int taillength = strlen (tail);
return foo == (str + strlength - taillength);
}
return false;
}
endswith (str, ".foo");
By the way, the solution in the original question looks fine other than the repeated strlen
calls.
Solution 18:[18]
I only wrote this because someone said something about "most optimized".
#include <stdint.h>
int_fast8_f EndsWithFoo(const char *str) {
char c;
union {
uint32_t u;
char s[4];
} sfx = { .s = { '.','f','o','o'} },
cur = { .u = 0 };
c = *str;
if (0 == c) { return 0; }
cur.s[0] = c;
c = *++str;
if (0 == c) { return 0; }
cur.s[1] = c;
c = *++str;
if (0 == c) { return 0; }
cur.s[2] = c;
c = *++str;
if (0 == c) { return 0; }
cur.s[3] = c;
while (1) {
c = *++str;
if (0 == c) {
if (cur.u == sfx.u)
{
return 1;
} else {
return 0;
}
}
cur.s[0] = cur.s[1];
cur.s[1] = cur.s[2];
cur.s[2] = cur.s[3];
cur.s[3] = c;
}
}
No bytes are loaded from memory more than once (unless your target processor has hardly any registers). The char/byte copies in the loop should be turned into a single logical shift by the compiler on any 32 bit or larger word target processor, but I coded it the way I did so that the C code didn't have to be endian aware. The sfx (suffix) is turned into an integer constant by the compiler, and the equality suffix test is a single 32 bit integer equality test. Each new byte has to be tested for 0. While there are some bit twiddling ways to test for 0 as a byte within a word, they wouldn't safeguard against reading past the memory that we're supposed to have access to (assuming that str points to a properly terminated character string).
Solution 19:[19]
Or...
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
bool strendscmp(const char* haystack, const char* needle) {
size_t len_str = strlen(haystack);
size_t len_ending = strlen(needle);
return len_str >= len_ending && strcmp(&haystack[(len_str - len_ending)], needle) == 0;
}
//SOME TESTS
int main(int argc, char** argv) {
printf("%s\n", strendscmp("abc", "bc") ? "true" : "false"); //true
printf("%s\n", strendscmp("abc", "d") ? "true" : "false"); //false
printf("%s\n", strendscmp("abc", "") ? "true" : "false"); //true
printf("%s\n", strendscmp("sumo", "omo") ? "true" : "false"); //false
printf("%s\n", strendscmp("babbbba", "bbaabaab") ? "true" : "false"); //false
printf("%s\n", strendscmp("dadaab", "bdadaab") ? "true" : "false"); //false
}
Solution 20:[20]
You can also generalize like this:
int endsWith(const char* text, const char* extn)
{
int result = 1;
int len = strlen(text);
int exprLen = strlen(extn);
int index = len-exprLen;
int count = 0;
if(len > exprLen)
{
for( ; count < exprLen; ++count)
{
if(text[index + count] != extn[count])
{
result = 0;
break;
}
}
}
else
{
result = 0;
}
return result;
}
Solution 21:[21]
int strends(char* str, char* end){
return strcmp(str + strlen(str) - strlen(end), end) == 0;
}
I found this is the simplest way to achieve the result.
Solution 22:[22]
General solution with one strlen(needle), strstr() and test for '\0':
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
bool endsWith(const char* haystack, const char* needle)
{
bool rv = false;
if (haystack && needle)
{
size_t needle_size = strlen(needle);
if (needle_size == 0) return false;
const char* act = haystack;
while (NULL != (act = strstr(act, needle)))
{
if (*(act + needle_size) == '\0')
{
rv = true;
break;
}
act += 1;
}
}
return rv;
}
int main (int argc, char * argv[])
{
char *a = "file1.gz";
char *b = "1.gz";
char *c = NULL;
char *d = "1.gzabc";
char *e = "1.gzabc1.gz";
char *f = "";
char *g = "rbrbr";
char *h = "rbr";
printf("endsWith:\n");
printf("'%s' '%s' = %d\n",a,b,endsWith(a,b));
printf("'%s' NULL = %d\n",a,endsWith(a,c));
printf("'%s' '%s' = %d\n",d,b,endsWith(d,b));
printf("'%s' '%s' = %d\n",e,b,endsWith(e,b));
printf("'%s' '%s' = %d\n",e,f,endsWith(e,f));
printf("'%s' '%s' = %d\n",g,h,endsWith(g,h));
return 0;
}
Solution 23:[23]
I would do it like this:
/**
* Return 0 if the string haystack ends with the string needle
*
* @param haystack the string to be analyzed
* @param needle the suffix string
* @return 0 if the string haystack ends with the string needle, 1 if not
*/
int strbcmp(const char *haystack, const char *needle) {
int length;
if (haystack && needle && strlen(haystack) >= (length = strlen(needle)) && strlen(strstr(haystack, needle)) == length) return 0;
return 1;
}
The test program is:
#include <stdio.h>
#include <string.h>
int strbcmp(const char *haystack, const char *needle) {
int length;
if (haystack && needle && strlen(haystack) >= (length = strlen(needle)) && strlen(strstr(haystack,needle)) == length) return 0;
return 1;
}
int main (int argc, char * argv[]){
char *a = "file1.gz";
char *b = "1.gz";
char *c = NULL;
char *d = "1.gzabc";
printf("%s %s = %d\n",a,b,strbcmp(a,b));
printf("%s %s = %d\n",a,c,strbcmp(a,c));
printf("%s %s = %d\n",d,b,strbcmp(d,b));
return 0;
}
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow