'Split a text using different delimiters

long time no coding in c and i'm a bit in trouble.

I get tex from a file formatted like that:

# 3 10
P1 16 3
P2 8  1
P3 10 2

from the second line every line represent a process with his own attributes divided by space, i need to take every attributes and put them in an array of struct like this one

typedef struct process
{
    char *name;
    int priority;
    int duration;
};

to represent the processes, any hint?

I've tried to use strtok with \n as delimiter to isolate the lines and then to use strtok with " " as delimiter inside of the while of the first strtok but obviously is not working.



Solution 1:[1]

As already pointed out in the comments section, nesting of strtok is not allowed, but you can use the function fgets to get the individual lines, and then use strtok to tokenize the contents of these lines.

If you want to solve the problem only with strtok and without fgets, then this is possible, too.

On POSIX (e.g. Linux) systems, you can simply use strtok_r instead of strtok, which allows nesting.

On non-POSIX, since nesting is not allowed, you will first have to perform a sequence of strtok calls to get pointers to the start of the individual lines, and you will have to remember all of these pointers. After finishing the first sequence of strtok calls, you can then use strtok to tokenize the contents of the individual lines:

Here is an example of such a non-POSIX solution:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_LINES 100

int main( void )
{
    char input[] =
        "# 3 10\n"
        "P1 16 3\n"
        "P2 8  1\n"
        "P3 10 2\n";

    char *lines[MAX_LINES];
    int num_lines;
    char *p;

    //perform the first sequence of strtok
    p = strtok( input, "\n" );
    for ( num_lines = 0; p != NULL; num_lines++ )
    {
        if ( num_lines == MAX_LINES )
        {
            fprintf( stderr, "Too many lines!\n" );
            exit( EXIT_FAILURE );
        }

        //remember line
        lines[num_lines] = p;

        p = strtok( NULL, "\n" );
    }

    //perform additional sequence of strtok for every line
    for ( int i = 0; i < num_lines; i++ )
    {
        p = strtok( lines[i], " " );

        printf( "Line %d:\n", i + 1 );

        for ( int j = 0; p != NULL ; j++ )
        {
            printf( "Token #%d: %s\n", j + 1, p );
            p = strtok( NULL, " " );
        }

        printf( "\n" );
    }
}

This program has the following output:

Line 1:
Token #1: #
Token #2: 3
Token #3: 10

Line 2:
Token #1: P1
Token #2: 16
Token #3: 3

Line 3:
Token #1: P2
Token #2: 8
Token #3: 1

Line 4:
Token #1: P3
Token #2: 10
Token #3: 2

How to write the individual tokens into a struct process is a completely different issue.

In order to convert the strings of the individual tokens to numbers, you can use the function strtol. Afterwards, you can write these numbers to the individual struct members.

In order to write to the name member of struct process, you can use the function strdup, if your platform already supports it. Otherwise, you will have to malloc sufficient space for the copy of the string, and then use strcpy. You could also make name point directly inside the input buffer, if that input buffer is not used for something else and the lifetime of that buffer is guaranteed to be at least as long as the lifetime of the struct process object. In that case, it is not necessary to copy the string.

Solution 2:[2]

To read the input line by line, first use fgets(3) function:

while (fgets(line, sizeof line, f)) {
    ....
}

this will allow you to read the file line by line and process each line as it is being read.

Second, strtok(3) can be used perfectly, as in:

char *name = strtok(line, delim),
     *prio = strtok(NULL, delim),
     *dur  = strtok(NULL, delim);

this will allow you to identify empty lines (if name is NULL), comment lines (name starts with '#' character) or missing fields (lines in which prio or dur are NULL)

Finally, use strdup(3) to allocate memory for the name string, as we will use a fixed line buffer for the whole line, it is going to be reused, once we go to the next record, so we need to explicitly allocate (we postpone this to the last operation, so in case of no space or bad format, we have not allocated memory for this slot)

Finally, the code is this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct process {
    char *name;
    int priority;
    int duration;
};

#define MAX_PROC 100
struct process processes[MAX_PROC];

ssize_t read_processes(FILE *f, struct process *proc, size_t count);
void print_process(FILE *out, struct process *proc);

#define E_NOSPACE       (-1)
#define E_BAD_FORMAT    (-2)
#define E_MALLOC        (-3)

ssize_t read_processes(FILE *f, struct process *proc, size_t count)
{
    char        line[256];          /* temporary storage */
    const char *delim    = " \n\t"; /* delimiter string for strtok */
    size_t      ret_val  = 0;
    int         line_num = 0;       /* line number */

    while (fgets(line, sizeof line, f)) {
        line_num++;

        char *name = strtok(line, delim),
             *prio = strtok(NULL, delim),
             *dur  = strtok(NULL, delim);

        if (!name || name[0] == '#')
            continue; /* empty line or comment */

        if (!prio || !dur)
            return E_BAD_FORMAT;

        if (count <= 0) /* no space left in array */
            return E_NOSPACE;

        if (sscanf(prio, "%d", &proc->priority) != 1)
            return E_BAD_FORMAT;

        /* we do malloc last, so we don't need to free()
         * the allocated memory in case of bad format. */
        if ((proc->name = strdup(name)) == NULL)
            return E_MALLOC;

        proc++;
        count--;
        ret_val++;
    }
    return ret_val;
}

int main()
{
    ssize_t n = read_processes(stdin, processes, MAX_PROC);
    if (n < 0) {
        char *s = "<<OTHER ERROR>> (this shouldn't happen)";
        switch(n) {
        case E_BAD_FORMAT: s = "bad format in string"; break;
        case E_NOSPACE: s = "no space in array"; break;
        case E_MALLOC: s = "cannot allocate memory"; break;
        }
        fprintf(stderr, "Error: %s\n", s);
        exit(1);
    }
    struct process *p, *end = processes + n;

    for (p = processes; p < end; p++) {
        print_process(stdout, p);
    }
}

void print_process(FILE *out, struct process *proc)
{
    fprintf(out, "Process name: %s\n", proc->name);
    fprintf(out, "    priority: %d\n", proc->priority);
    fprintf(out, "    duration: %d\n", proc->duration);
}

and when run, it produces this output:

$ a.out <<EOF
# This is a comment
# This is another comment
# below is an empty line.

P1 2 3
P6 4 5
EOF
Process name: P1
    priority: 2
    duration: 3
Process name: P6
    priority: 4
    duration: 5
$ _

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1
Solution 2