-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.c
138 lines (119 loc) · 3.42 KB
/
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <limits.h>
int main(int argc, char **argv) {
if (argc != 2) {
printf("Expected <in_file>\n");
return 1;
}
int in_fd = open(argv[1], O_RDWR);
if (in_fd < 0) {
printf("Unable to open %s to read\n", argv[1]);
return 1;
}
char tmp_name[PATH_MAX+4] = {};
sprintf(tmp_name, "%.*s_tmp", PATH_MAX, argv[1]);
int out_fd = open(tmp_name, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (out_fd < 0) {
printf("Unable to open %s to write\n", tmp_name);
return 1;
}
struct stat desc;
int ret = fstat(in_fd, &desc);
if (ret == -1) {
printf("Unable to stat %s\n", argv[1]);
return 1;
}
size_t file_size = desc.st_size;
if (file_size == 0) {
return 0;
}
char *mem = mmap(NULL, file_size, PROT_WRITE, MAP_PRIVATE, in_fd, 0);
if (mem == MAP_FAILED) {
printf("Failed to map %s\n", argv[1]);
return 1;
}
int buffer_size = 512 * 1024;
char *buffer = malloc(buffer_size);
size_t i = 0;
// Skip over byte order mark if we recognize it
if (file_size > 3) {
uint8_t b1 = mem[0];
uint8_t b2 = mem[1];
uint8_t b3 = mem[2];
// (utf-8)
if (b1 == 0xEF && b2 == 0xBB && b3 == 0xBF) {
i += 3;
}
}
size_t leftover_size = file_size % 8;
size_t trunc_size = file_size - leftover_size;
uint64_t cr_mask = (~(uint64_t)0) / 255 * (uint64_t)('\r');
#define has_zero(x) (((x)-(uint64_t)(0x0101010101010101)) & ~(x)&(uint64_t)(0x8080808080808080))
// Loop for the happy path, we should fit nicely in registers here
size_t w_idx = 0;
while (i < trunc_size) {
uint64_t chunk;
memcpy(&chunk, mem + i, sizeof(chunk));
uint64_t xor_chunk = chunk ^ cr_mask;
// Flush first, so we don't overrun
if (w_idx + 8 > buffer_size) {
write(out_fd, buffer, w_idx);
w_idx = 0;
}
// There are no carriage returns here
if (!has_zero(xor_chunk)) {
memcpy(buffer + w_idx, &chunk, sizeof(chunk));
i += 8;
w_idx += 8;
} else {
size_t start = i;
while (i < start + 8) {
if (mem[i] == '\r' && mem[i+1] == '\n') {
buffer[w_idx] = '\n';
i++;
} else {
buffer[w_idx] = mem[i];
}
w_idx++;
i++;
}
}
}
// If there's anything left in the buffer, flush it now
if (w_idx != 0) {
write(out_fd, buffer, w_idx);
}
if (i == file_size) {
goto end;
}
// Handle alignment leftovers
size_t new_leftovers = file_size - i - 1;
w_idx = 0;
for (; w_idx < new_leftovers; w_idx++) {
if (mem[i] == '\r' && mem[i+1] == '\n') {
buffer[w_idx] = '\n';
i++;
} else {
buffer[w_idx] = mem[i];
}
i++;
}
// If we didn't end on a \r\n, make sure we grab the last char
if (i != file_size) {
buffer[w_idx++] = mem[i];
}
write(out_fd, buffer, w_idx);
end:
if (rename(tmp_name, argv[1])) {
printf("Failed to move tmp!\n");
return 1;
}
remove(tmp_name);
return 0;
}