src/cut.c - coreutils-8.23
Global variables defined
Data types defined
Functions defined
Macros defined
Source code
#include <config.h>
#include <stdio.h>
#include <assert.h>
#include <getopt.h>
#include <sys/types.h>
#include "system.h"
#include "error.h"
#include "fadvise.h"
#include "getndelim2.h"
#include "hash.h"
#include "quote.h"
#include "xstrndup.h"
#define PROGRAM_NAME "cut"
#define AUTHORS \
proper_name ("David M. Ihnat"), \
proper_name ("David MacKenzie"), \
proper_name ("Jim Meyering")
#define FATAL_ERROR(Message) \
do \
{ \
error (0, 0, (Message)); \
usage (EXIT_FAILURE); \
} \
while (0)
struct range_pair
{
size_t lo;
size_t hi;
};
static struct range_pair *rp;
static struct range_pair *current_rp;
static size_t n_rp;
static size_t n_rp_allocated;
static void
add_range_pair (size_t lo, size_t hi)
{
if (n_rp == n_rp_allocated)
rp = X2NREALLOC (rp, &n_rp_allocated);
rp[n_rp].lo = lo;
rp[n_rp].hi = hi;
++n_rp;
}
static char *field_1_buffer;
static size_t field_1_bufsize;
enum operating_mode
{
undefined_mode,
byte_mode,
field_mode
};
static enum operating_mode operating_mode;
static bool suppress_non_delimited;
static bool complement;
static unsigned char delim;
static bool output_delimiter_specified;
static size_t output_delimiter_length;
static char *output_delimiter_string;
static bool have_read_stdin;
enum
{
OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
COMPLEMENT_OPTION
};
static struct option const longopts[] =
{
{"bytes", required_argument, NULL, 'b'},
{"characters", required_argument, NULL, 'c'},
{"fields", required_argument, NULL, 'f'},
{"delimiter", required_argument, NULL, 'd'},
{"only-delimited", no_argument, NULL, 's'},
{"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
{"complement", no_argument, NULL, COMPLEMENT_OPTION},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
};
void
usage (int status)
{
if (status != EXIT_SUCCESS)
emit_try_help ();
else
{
printf (_("\
Usage: %s OPTION... [FILE]...\n\
"),
program_name);
fputs (_("\
Print selected parts of lines from each FILE to standard output.\n\
"), stdout);
emit_mandatory_arg_note ();
fputs (_("\
-b, --bytes=LIST select only these bytes\n\
-c, --characters=LIST select only these characters\n\
-d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
"), stdout);
fputs (_("\
-f, --fields=LIST select only these fields; also print any line\n\
that contains no delimiter character, unless\n\
the -s option is specified\n\
-n (ignored)\n\
"), stdout);
fputs (_("\
--complement complement the set of selected bytes, characters\n\
or fields\n\
"), stdout);
fputs (_("\
-s, --only-delimited do not print lines not containing delimiters\n\
--output-delimiter=STRING use STRING as the output delimiter\n\
the default is to use the input delimiter\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
fputs (_("\
\n\
Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
range, or many ranges separated by commas. Selected input is written\n\
in the same order that it is read, and is written exactly once.\n\
"), stdout);
fputs (_("\
Each range is one of:\n\
\n\
N N'th byte, character or field, counted from 1\n\
N- from N'th byte, character or field, to end of line\n\
N-M from N'th to M'th (included) byte, character or field\n\
-M from first to M'th (included) byte, character or field\n\
\n\
With no FILE, or when FILE is -, read standard input.\n\
"), stdout);
emit_ancillary_info ();
}
exit (status);
}
static int
compare_ranges (const void *a, const void *b)
{
int a_start = ((const struct range_pair *) a)->lo;
int b_start = ((const struct range_pair *) b)->lo;
return a_start < b_start ? -1 : a_start > b_start;
}
static void
complement_rp (void)
{
if (complement)
{
struct range_pair *c = rp;
size_t n = n_rp;
size_t i;
rp = NULL;
n_rp = 0;
n_rp_allocated = 0;
if (c[0].lo > 1)
add_range_pair (1, c[0].lo - 1);
for (i = 1; i < n; ++i)
{
if (c[i-1].hi + 1 == c[i].lo)
continue;
add_range_pair (c[i-1].hi + 1, c[i].lo - 1);
}
if (c[n-1].hi < SIZE_MAX)
add_range_pair (c[n-1].hi + 1, SIZE_MAX);
free (c);
}
}
static bool
set_fields (const char *fieldstr)
{
size_t initial = 1; size_t value = 0; bool lhs_specified = false;
bool rhs_specified = false;
bool dash_found = false; bool field_found = false;
size_t i;
bool in_digits = false;
while (true)
{
if (*fieldstr == '-')
{
in_digits = false;
if (dash_found)
FATAL_ERROR (_("invalid byte, character or field list"));
dash_found = true;
fieldstr++;
if (lhs_specified && !value)
FATAL_ERROR (_("fields and positions are numbered from 1"));
initial = (lhs_specified ? value : 1);
value = 0;
}
else if (*fieldstr == ','
|| isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
{
in_digits = false;
if (dash_found)
{
dash_found = false;
if (!lhs_specified && !rhs_specified)
FATAL_ERROR (_("invalid range with no endpoint: -"));
if (!rhs_specified)
{
add_range_pair (initial, SIZE_MAX);
field_found = true;
}
else
{
if (value < initial)
FATAL_ERROR (_("invalid decreasing range"));
add_range_pair (initial, value);
field_found = true;
}
value = 0;
}
else
{
if (value == 0)
FATAL_ERROR (_("fields and positions are numbered from 1"));
add_range_pair (value, value);
value = 0;
field_found = true;
}
if (*fieldstr == '\0')
break;
fieldstr++;
lhs_specified = false;
rhs_specified = false;
}
else if (ISDIGIT (*fieldstr))
{
static char const *num_start;
if (!in_digits || !num_start)
num_start = fieldstr;
in_digits = true;
if (dash_found)
rhs_specified = 1;
else
lhs_specified = 1;
if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)
|| value == SIZE_MAX)
{
size_t len = strspn (num_start, "0123456789");
char *bad_num = xstrndup (num_start, len);
if (operating_mode == byte_mode)
error (0, 0,
_("byte offset %s is too large"), quote (bad_num));
else
error (0, 0,
_("field number %s is too large"), quote (bad_num));
free (bad_num);
exit (EXIT_FAILURE);
}
fieldstr++;
}
else
FATAL_ERROR (_("invalid byte, character or field list"));
}
qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
for (i = 0; i < n_rp; ++i)
{
for (size_t j = i + 1; j < n_rp; ++j)
{
if (rp[j].lo <= rp[i].hi)
{
rp[i].hi = MAX (rp[j].hi, rp[i].hi);
memmove (rp + j, rp + j + 1, (n_rp - j - 1) * sizeof *rp);
n_rp--;
j--;
}
else
break;
}
}
complement_rp ();
++n_rp;
rp = xrealloc (rp, n_rp * sizeof (struct range_pair));
rp[n_rp - 1].lo = rp[n_rp - 1].hi = SIZE_MAX;
return field_found;
}
static inline void
next_item (size_t *item_idx)
{
(*item_idx)++;
if ((*item_idx) > current_rp->hi)
current_rp++;
}
static inline bool
print_kth (size_t k)
{
return current_rp->lo <= k;
}
static inline bool
is_range_start_index (size_t k)
{
return k == current_rp->lo;
}
static void
cut_bytes (FILE *stream)
{
size_t byte_idx; bool print_delimiter;
byte_idx = 0;
print_delimiter = false;
current_rp = rp;
while (true)
{
int c;
c = getc (stream);
if (c == '\n')
{
putchar ('\n');
byte_idx = 0;
print_delimiter = false;
current_rp = rp;
}
else if (c == EOF)
{
if (byte_idx > 0)
putchar ('\n');
break;
}
else
{
next_item (&byte_idx);
if (print_kth (byte_idx))
{
if (output_delimiter_specified)
{
if (print_delimiter && is_range_start_index (byte_idx))
{
fwrite (output_delimiter_string, sizeof (char),
output_delimiter_length, stdout);
}
print_delimiter = true;
}
putchar (c);
}
}
}
}
static void
cut_fields (FILE *stream)
{
int c;
size_t field_idx = 1;
bool found_any_selected_field = false;
bool buffer_first_field;
current_rp = rp;
c = getc (stream);
if (c == EOF)
return;
ungetc (c, stream);
c = 0;
buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
while (1)
{
if (field_idx == 1 && buffer_first_field)
{
ssize_t len;
size_t n_bytes;
len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
GETNLINE_NO_LIMIT, delim, '\n', stream);
if (len < 0)
{
free (field_1_buffer);
field_1_buffer = NULL;
if (ferror (stream) || feof (stream))
break;
xalloc_die ();
}
n_bytes = len;
assert (n_bytes != 0);
c = 0;
if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
{
if (suppress_non_delimited)
{
}
else
{
fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
if (field_1_buffer[n_bytes - 1] != '\n')
putchar ('\n');
c = '\n';
}
continue;
}
if (print_kth (1))
{
fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
if (delim == '\n')
{
int last_c = getc (stream);
if (last_c != EOF)
{
ungetc (last_c, stream);
found_any_selected_field = true;
}
}
else
found_any_selected_field = true;
}
next_item (&field_idx);
}
int prev_c = c;
if (print_kth (field_idx))
{
if (found_any_selected_field)
{
fwrite (output_delimiter_string, sizeof (char),
output_delimiter_length, stdout);
}
found_any_selected_field = true;
while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
{
putchar (c);
prev_c = c;
}
}
else
{
while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
{
prev_c = c;
}
}
if (delim == '\n' && c == delim)
{
int last_c = getc (stream);
if (last_c != EOF)
ungetc (last_c, stream);
else
c = last_c;
}
if (c == delim)
next_item (&field_idx);
else if (c == '\n' || c == EOF)
{
if (found_any_selected_field
|| !(suppress_non_delimited && field_idx == 1))
{
if (c == '\n' || prev_c != '\n' || delim == '\n')
putchar ('\n');
}
if (c == EOF)
break;
field_idx = 1;
current_rp = rp;
found_any_selected_field = false;
}
}
}
static void
cut_stream (FILE *stream)
{
if (operating_mode == byte_mode)
cut_bytes (stream);
else
cut_fields (stream);
}
static bool
cut_file (char const *file)
{
FILE *stream;
if (STREQ (file, "-"))
{
have_read_stdin = true;
stream = stdin;
}
else
{
stream = fopen (file, "r");
if (stream == NULL)
{
error (0, errno, "%s", file);
return false;
}
}
fadvise (stream, FADVISE_SEQUENTIAL);
cut_stream (stream);
if (ferror (stream))
{
error (0, errno, "%s", file);
return false;
}
if (STREQ (file, "-"))
clearerr (stream); else if (fclose (stream) == EOF)
{
error (0, errno, "%s", file);
return false;
}
return true;
}
int
main (int argc, char **argv)
{
int optc;
bool ok;
bool delim_specified = false;
char *spec_list_string IF_LINT ( = NULL);
initialize_main (&argc, &argv);
set_program_name (argv[0]);
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
atexit (close_stdout);
operating_mode = undefined_mode;
suppress_non_delimited = false;
delim = '\0';
have_read_stdin = false;
while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
{
switch (optc)
{
case 'b':
case 'c':
if (operating_mode != undefined_mode)
FATAL_ERROR (_("only one type of list may be specified"));
operating_mode = byte_mode;
spec_list_string = optarg;
break;
case 'f':
if (operating_mode != undefined_mode)
FATAL_ERROR (_("only one type of list may be specified"));
operating_mode = field_mode;
spec_list_string = optarg;
break;
case 'd':
if (optarg[0] != '\0' && optarg[1] != '\0')
FATAL_ERROR (_("the delimiter must be a single character"));
delim = optarg[0];
delim_specified = true;
break;
case OUTPUT_DELIMITER_OPTION:
output_delimiter_specified = true;
output_delimiter_length = (optarg[0] == '\0'
? 1 : strlen (optarg));
output_delimiter_string = xstrdup (optarg);
break;
case 'n':
break;
case 's':
suppress_non_delimited = true;
break;
case COMPLEMENT_OPTION:
complement = true;
break;
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
default:
usage (EXIT_FAILURE);
}
}
if (operating_mode == undefined_mode)
FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
if (delim_specified && operating_mode != field_mode)
FATAL_ERROR (_("an input delimiter may be specified only\
when operating on fields"));
if (suppress_non_delimited && operating_mode != field_mode)
FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
\tonly when operating on fields"));
if (! set_fields (spec_list_string))
{
if (operating_mode == field_mode)
FATAL_ERROR (_("missing list of fields"));
else
FATAL_ERROR (_("missing list of positions"));
}
if (!delim_specified)
delim = '\t';
if (output_delimiter_string == NULL)
{
static char dummy[2];
dummy[0] = delim;
dummy[1] = '\0';
output_delimiter_string = dummy;
output_delimiter_length = 1;
}
if (optind == argc)
ok = cut_file ("-");
else
for (ok = true; optind < argc; optind++)
ok &= cut_file (argv[optind]);
if (have_read_stdin && fclose (stdin) == EOF)
{
error (0, errno, "-");
ok = false;
}
exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
}