aboutsummaryrefslogtreecommitdiff
path: root/src/wc.c
diff options
context:
space:
mode:
authorCharles Cabergs <me@cacharle.xyz>2021-07-07 16:04:20 +0200
committerCharles Cabergs <me@cacharle.xyz>2021-07-07 16:04:20 +0200
commitb807fc8105686a6a36531dd2404d8cf1b6945b0f (patch)
treeaa55f192a74f93ea2d4e9d811794cb7ef2b8f5e7 /src/wc.c
parentea770b8419686f9c8f71c20d275a832efc1b3f48 (diff)
downloadcoreutils-b807fc8105686a6a36531dd2404d8cf1b6945b0f.tar.gz
coreutils-b807fc8105686a6a36531dd2404d8cf1b6945b0f.tar.bz2
coreutils-b807fc8105686a6a36531dd2404d8cf1b6945b0f.zip
Added wcHEADmaster
Diffstat (limited to 'src/wc.c')
-rw-r--r--src/wc.c159
1 files changed, 159 insertions, 0 deletions
diff --git a/src/wc.c b/src/wc.c
new file mode 100644
index 0000000..c8aa31b
--- /dev/null
+++ b/src/wc.c
@@ -0,0 +1,159 @@
+#define _POSIX_C_SOURCE 2
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <errno.h>
+#include <string.h>
+#include <locale.h>
+
+enum active_counters
+{
+ COUNTER_BYTES = 1 << 0,
+ COUNTER_CHARS = 1 << 1,
+ COUNTER_LINES = 1 << 2,
+ COUNTER_WORDS = 1 << 3,
+};
+
+static enum active_counters active_counters = 0;
+
+struct counters
+{
+ size_t bytes;
+ size_t chars;
+ size_t lines;
+ size_t words;
+};
+
+#define BUF_SIZE 4096
+
+static void count_file(FILE *file, struct counters *counters)
+{
+ wchar_t buf[BUF_SIZE + 1] = {0};
+
+ if (active_counters == COUNTER_BYTES)
+ {
+ fseek(file, 0, SEEK_END);
+ counters->bytes = ftell(file);
+ return;
+ }
+ while (fgetws(buf, BUF_SIZE, file) != NULL)
+ {
+ if (active_counters & COUNTER_CHARS)
+ counters->chars += wcslen(buf);
+ if (active_counters & COUNTER_LINES)
+ {
+ for (size_t i = 0; buf[i] != L'\0'; i++)
+ {
+ if (buf[i] == L'\n')
+ counters->lines++;
+ }
+ }
+ if (active_counters & COUNTER_WORDS)
+ {
+ for (size_t i = 0; i < BUF_SIZE && buf[i] != L'\0'; i++)
+ {
+ while (i < BUF_SIZE && iswspace(buf[i]))
+ i++;
+ if (i < BUF_SIZE && !iswspace(buf[i]) && buf[i] != L'\0')
+ counters->words++;
+ while (i < BUF_SIZE && !iswspace(buf[i]))
+ i++;
+ }
+ }
+ wmemset(buf, L'\0', BUF_SIZE + 1); // fgetws doesn't empty buf if emptyline
+ }
+ if (active_counters & COUNTER_BYTES)
+ {
+ fseek(file, 0, SEEK_END);
+ counters->bytes = ftell(file);
+ // FIXME does not work with stdin
+ // use fgetwc and mblen, fgetws uses fgetwc anyway so no performance improvement
+ // stream are cached, I assume no read(x, y, 1) occurs
+ }
+}
+
+static void print_counters(struct counters *counters, char *name)
+{
+ bool previous = false;
+ if (active_counters & COUNTER_LINES)
+ {
+ printf("%s%zu", previous ? " " : "", counters->lines);
+ previous = true;
+ }
+ if (active_counters & COUNTER_WORDS)
+ {
+ printf("%s%zu", previous ? " " : "", counters->words);
+ previous = true;
+ }
+ if (active_counters & COUNTER_CHARS)
+ {
+ printf("%s%zu", previous ? " " : "", counters->chars);
+ previous = true;
+ }
+ if (active_counters & COUNTER_BYTES)
+ {
+ printf("%s%zu", previous ? " " : "", counters->bytes);
+ previous = true;
+ }
+ fputc(' ', stdout);
+ fputs(name, stdout);
+ fputc('\n', stdout);
+}
+
+static void count_filepath(char *filepath, struct counters *total_counters)
+{
+ FILE *file;
+ if (strcmp(filepath, "-") == 0)
+ file = stdin;
+ else
+ {
+ file = fopen(filepath, "r");
+ if (file == NULL)
+ {
+ fprintf(stderr, "wc: %s: %s\n", filepath, strerror(errno));
+ return;
+ }
+ }
+ struct counters counters = {0};
+ count_file(file, &counters);
+ print_counters(&counters, filepath);
+ total_counters->lines += counters.lines;
+ total_counters->words += counters.words;
+ total_counters->chars += counters.chars;
+ total_counters->bytes += counters.bytes;
+ if (strcmp(filepath, "-") != 0)
+ fclose(file);
+}
+
+int main(int argc, char *argv[])
+{
+ // needed for fgetws to work properly with utf-8
+ // when value == "" set locale according to environment variable
+ // with glibc, we can do fopen(x, "r,ccs=utf-8") but it's an extension
+ if (setlocale(LC_ALL, "") == NULL)
+ exit(1);
+ int option;
+ while ((option = getopt(argc, argv, "clmw")) != -1)
+ {
+ switch (option)
+ {
+ case 'c': active_counters |= COUNTER_BYTES; break;
+ case 'l': active_counters |= COUNTER_LINES; break;
+ case 'm': active_counters |= COUNTER_CHARS; break;
+ case 'w': active_counters |= COUNTER_WORDS; break;
+ }
+ }
+ if (active_counters == 0)
+ active_counters = COUNTER_BYTES | COUNTER_LINES | COUNTER_WORDS;
+ struct counters total_counters = {0};
+ if (argc - optind == 0)
+ count_filepath("-", &total_counters);
+ for (int i = optind; i < argc; i++)
+ count_filepath(argv[i], &total_counters);
+ if (argc - optind > 1)
+ print_counters(&total_counters, "total");
+ return 0;
+}