No Description
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mddiff.c 31KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176
  1. //
  2. // maildir diff (mddiff) computes the delta from an old status of a maildir
  3. // (previously recorded in a support file) and the current status, generating
  4. // a set of commands (a diff) that a third party software can apply to
  5. // synchronize a (remote) copy of the maildir.
  6. //
  7. // Absolutely no warranties, released under GNU GPL version 3 or at your
  8. // option any later version.
  9. // Copyright Enrico Tassi <gares@fettunta.org>
  10. #define _BSD_SOURCE
  11. #define _GNU_SOURCE
  12. #include <dirent.h>
  13. #include <sys/mman.h>
  14. #include <sys/stat.h>
  15. #include <stdio.h>
  16. #include <stdlib.h>
  17. #include <unistd.h>
  18. #include <sys/types.h>
  19. #include <limits.h>
  20. #include <errno.h>
  21. #include <string.h>
  22. #include <fcntl.h>
  23. #include <getopt.h>
  24. #include <fnmatch.h>
  25. #include <glib.h>
  26. #include "smd-config.h"
  27. #ifndef O_NOATIME
  28. # define O_NOATIME 0
  29. #endif
  30. // C99 has a printf length modifier for size_t
  31. #if __STDC_VERSION__ >= 199901L
  32. #define SIZE_T_FMT "%zu"
  33. #define SIZE_T_CAST(x) x
  34. #else
  35. #define SIZE_T_FMT "%lu"
  36. #define SIZE_T_CAST(x) ((unsigned long)x)
  37. #endif
  38. #define STATIC static
  39. #define SHA_DIGEST_LENGTH 20
  40. #define __tostring(x) #x
  41. #define tostring(x) __tostring(x)
  42. #define ERROR(cause, msg...) { \
  43. fprintf(stderr, "error [" tostring(cause) "]: " msg);\
  44. fprintf(stdout, "ERROR " msg);\
  45. exit(EXIT_FAILURE);\
  46. }
  47. #define WARNING(cause, msg...) \
  48. fprintf(stderr, "warning [" tostring(cause) "]: " msg)
  49. #define VERBOSE(cause,msg...) \
  50. if (verbose) fprintf(stderr,"debug [" tostring(cause) "]: " msg)
  51. #define VERBOSE_NOH(msg...) \
  52. if (verbose) fprintf(stderr,msg)
  53. // default numbers for static memory allocation
  54. #define DEFAULT_FILENAME_LEN 100
  55. #define DEFAULT_MAIL_NUMBER 500000
  56. #define MAX_EMAIL_NAME_LEN 1024
  57. // int -> hex
  58. STATIC char hexalphabet[] =
  59. {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
  60. STATIC int hex2int(char c){
  61. switch(c){
  62. case '0': case '1': case '2': case '3': case '4':
  63. case '5': case '6': case '7': case '8': case '9': return c - '0';
  64. case 'a': case 'b': case 'c':
  65. case 'd': case 'e': case 'f': return c - 'a' + 10;
  66. }
  67. ERROR(hex2int,"Invalid hex character: %c\n",c);
  68. }
  69. // temporary buffers used to store sha1 sums in ASCII hex
  70. STATIC char tmpbuff_1[SHA_DIGEST_LENGTH * 2 + 1];
  71. STATIC char tmpbuff_2[SHA_DIGEST_LENGTH * 2 + 1];
  72. STATIC char tmpbuff_3[SHA_DIGEST_LENGTH * 2 + 1];
  73. STATIC char tmpbuff_4[SHA_DIGEST_LENGTH * 2 + 1];
  74. // temporary buffers used to URL encode mail names
  75. STATIC char tmpbuff_5[MAX_EMAIL_NAME_LEN];
  76. STATIC char tmpbuff_6[MAX_EMAIL_NAME_LEN];
  77. STATIC char* txtsha(unsigned char *sha1, char* outbuff){
  78. int fd;
  79. for (fd = 0; fd < 20; fd++){
  80. outbuff[fd*2] = hexalphabet[sha1[fd]>>4];
  81. outbuff[fd*2+1] = hexalphabet[sha1[fd]&0x0f];
  82. }
  83. outbuff[40] = '\0';
  84. return outbuff;
  85. }
  86. STATIC void shatxt(const char string[41], unsigned char outbuff[]) {
  87. int i;
  88. for(i=0; i < SHA_DIGEST_LENGTH; i++){
  89. outbuff[i] = hex2int(string[2*i]) * 16 + hex2int(string[2*i+1]);
  90. }
  91. }
  92. STATIC char* URLtxt(const char string[], char outbuff[]) {
  93. size_t i,j;
  94. size_t len = strlen(string);
  95. for(i=0, j=0; i < len && j + 4 < MAX_EMAIL_NAME_LEN; i++, j++) {
  96. if (string[i] == ' ' || string[i] == '%') {
  97. snprintf(&outbuff[j], 4, "%%%X", string[i]);
  98. j+=2;
  99. } else {
  100. outbuff[j] = string[i];
  101. }
  102. }
  103. outbuff[j] = '\0';
  104. return outbuff;
  105. }
  106. STATIC char* txtURL(const char* string, char* outbuff) {
  107. size_t i,j;
  108. size_t len = strlen(string);
  109. for(i=0, j=0; i < len && j + 4 < MAX_EMAIL_NAME_LEN; i++, j++) {
  110. if (string[i] == '%' && i + 2 < len) {
  111. unsigned int k;
  112. sscanf(&string[i+1],"%2x",&k);
  113. snprintf(&outbuff[j], 2, "%c", k);
  114. i+=2;
  115. } else {
  116. outbuff[j] = string[i];
  117. }
  118. }
  119. outbuff[j] = '\0';
  120. return outbuff;
  121. }
  122. #define PROMOTE(what,from,to) ((what) = ((what) == (from)) ? (to) : (what))
  123. // flags used to mark struct mail so that at the end of the scanning
  124. // we output commands lookig that flag
  125. enum sight {
  126. SEEN=0, NOT_SEEN=1, MOVED=2, CHANGED=3
  127. };
  128. STATIC char* sightalphabet[]={"SEEN","NOT_SEEN","MOVED","CHANGED"};
  129. STATIC const char* strsight(enum sight s){
  130. return sightalphabet[s];
  131. }
  132. // since the mails and names buffers may be reallocated,
  133. // hashtables cannot record pointers to a struct mail or char.
  134. // they record the offset w.r.t. the base pointer of the buffers.
  135. // we define a type for them, so that the compiler complains loudly
  136. typedef size_t name_t;
  137. typedef size_t mail_t;
  138. // mail metadata structure
  139. struct mail {
  140. unsigned char bsha[SHA_DIGEST_LENGTH]; // body hash value
  141. unsigned char hsha[SHA_DIGEST_LENGTH]; // header hash value
  142. name_t __name; // file name, do not use directly
  143. enum sight seen; // already seen?
  144. };
  145. // memory pool for mail file names
  146. STATIC char *names;
  147. STATIC name_t curname, max_curname, old_curname;
  148. // memory pool for mail metadata
  149. STATIC struct mail* mails;
  150. STATIC mail_t mailno, max_mailno;
  151. // hash tables for fast comparison of mails given their name/body-hash
  152. STATIC GHashTable *bsha2mail;
  153. STATIC GHashTable *filename2mail;
  154. STATIC time_t lastcheck;
  155. // program options
  156. STATIC int verbose;
  157. STATIC int dry_run;
  158. STATIC int only_list_subfolders;
  159. STATIC int only_generate_symlinks;
  160. STATIC int only_sha1sum_args;
  161. STATIC int only_mkdirp;
  162. STATIC int only_mkfifo;
  163. STATIC int n_excludes;
  164. STATIC char **excludes;
  165. STATIC int no_delete;
  166. STATIC int no_move;
  167. // ============================ helpers =====================================
  168. // mail da structure accessors
  169. STATIC struct mail* mail(mail_t mail_idx) {
  170. return &mails[mail_idx];
  171. }
  172. STATIC char* mail_name(mail_t mail_idx) {
  173. return &names[mails[mail_idx].__name];
  174. }
  175. STATIC void set_mail_name(mail_t mail_idx, name_t name) {
  176. mails[mail_idx].__name = name;
  177. }
  178. // predicates for assert_all_are
  179. STATIC int directory(struct stat sb){ return S_ISDIR(sb.st_mode); }
  180. STATIC int regular_file(struct stat sb){ return S_ISREG(sb.st_mode); }
  181. // stats and asserts pred on argv[optind] ... argv[argc-optind]
  182. STATIC void assert_all_are(
  183. int(*predicate)(struct stat), char* description, char*argv[], int argc)
  184. {
  185. struct stat sb;
  186. int c, rc;
  187. VERBOSE(input, "Asserting all input paths are: %s\n", description);
  188. for(c = 0; c < argc; c++) {
  189. const char * argv_c = txtURL(argv[c], tmpbuff_5);
  190. rc = stat(argv_c, &sb);
  191. if (rc != 0) {
  192. ERROR(stat,"unable to stat %s\n",argv_c);
  193. } else if ( ! predicate(sb) ) {
  194. ERROR(stat,"%s in not a %s\n", argv_c,description);
  195. }
  196. VERBOSE(input, "%s is a %s\n", argv_c, description);
  197. }
  198. }
  199. #define ASSERT_ALL_ARE(what,v,c) assert_all_are(what,tostring(what),v,c)
  200. // open a file in read only mode trying to use O_NOATIME
  201. STATIC int open_rdonly_noatime(const char *fname) {
  202. int fd = open(fname, O_RDONLY | O_NOATIME);
  203. // if the file is not owned by the euid of the process, then
  204. // it cannot be opened using the O_NOATIME flag (man 2 open)
  205. if (fd == -1 && errno == EPERM) {
  206. fd = open(fname, O_RDONLY);
  207. }
  208. return fd;
  209. }
  210. // looks for \n\n in a buffer starting at addr of size size
  211. STATIC unsigned char * find_endof_header(unsigned char *addr, size_t size) {
  212. unsigned char * next;
  213. unsigned char * end = addr + size;
  214. for(next = addr; next + 1 < end; next++){
  215. if (*next == '\n' && *(next+1) == '\n') {
  216. next+=2;
  217. return next;
  218. }
  219. }
  220. return NULL;
  221. }
  222. // =========================== memory allocator ============================
  223. STATIC mail_t alloc_mail(){
  224. mail_t m = mailno;
  225. mailno++;
  226. if (mailno >= max_mailno) {
  227. mails = realloc(mails, sizeof(struct mail) * max_mailno * 2);
  228. if (mails == NULL){
  229. ERROR(realloc,"allocation failed for " SIZE_T_FMT " mails\n",
  230. SIZE_T_CAST(max_mailno * 2));
  231. }
  232. max_mailno *= 2;
  233. }
  234. return m;
  235. }
  236. STATIC void dealloc_mail(){
  237. mailno--;
  238. }
  239. STATIC char *next_name(){
  240. return &names[curname];
  241. }
  242. STATIC name_t alloc_name(){
  243. name_t name = curname;
  244. size_t len = strlen(&names[name]);
  245. old_curname = curname;
  246. curname += len + 1;
  247. if (curname + MAX_EMAIL_NAME_LEN > max_curname) {
  248. names = realloc(names, max_curname * 2);
  249. max_curname *= 2;
  250. }
  251. return name;
  252. }
  253. STATIC void dealloc_name(){
  254. curname = old_curname;
  255. }
  256. // =========================== global variables setup ======================
  257. // convenience casts to be used with glib hashtables
  258. #define MAIL(t) ((mail_t)(t))
  259. #define GPTR(t) ((gpointer)(t))
  260. STATIC guint bsha_hash(gconstpointer key){
  261. mail_t m = MAIL(key);
  262. unsigned char * k = (unsigned char *) mail(m)->bsha;
  263. return k[0] + (k[1] << 8) + (k[2] << 16) + (k[3] << 24);
  264. }
  265. STATIC gboolean bsha_equal(gconstpointer k1, gconstpointer k2){
  266. mail_t m1 = MAIL(k1);
  267. mail_t m2 = MAIL(k2);
  268. if(!memcmp(mail(m1)->bsha,mail(m2)->bsha,SHA_DIGEST_LENGTH)) return TRUE;
  269. else return FALSE;
  270. }
  271. STATIC gboolean hsha_equal(gconstpointer k1, gconstpointer k2){
  272. mail_t m1 = MAIL(k1);
  273. mail_t m2 = MAIL(k2);
  274. if(!memcmp(mail(m1)->hsha,mail(m2)->hsha,SHA_DIGEST_LENGTH)) return TRUE;
  275. else return FALSE;
  276. }
  277. STATIC guint name_hash(gconstpointer key){
  278. mail_t m = MAIL(key);
  279. return g_str_hash(mail_name(m));
  280. }
  281. STATIC gboolean name_equal(gconstpointer k1, gconstpointer k2){
  282. mail_t m1 = MAIL(k1);
  283. mail_t m2 = MAIL(k2);
  284. return g_str_equal(mail_name(m1), mail_name(m2));
  285. }
  286. // wc -l, returning 0 on error
  287. STATIC unsigned long int wc_l(const char* dbfile){
  288. int unsigned long mno = 0;
  289. struct stat sb;
  290. unsigned char *addr, *next;
  291. int fd;
  292. if ((fd = open(dbfile, O_RDONLY | O_NOATIME)) == -1) goto err_open;
  293. if (fstat(fd, &sb) == -1) goto err_mmap;
  294. if ((addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0))
  295. == MAP_FAILED) goto err_mmap;
  296. for(next = addr; next < addr + sb.st_size; next++){
  297. if (*next == '\n') mno++;
  298. }
  299. munmap(addr, sb.st_size);
  300. close(fd);
  301. return mno;
  302. err_mmap:
  303. close(fd);
  304. err_open:
  305. return 0;
  306. }
  307. // setup memory pools and hash tables
  308. STATIC void setup_globals(
  309. const char *dbfile, unsigned long int mno, unsigned int fnlen){
  310. // we try to guess a reasonable number of email, to avoid asking the
  311. // allocator an unnnecessarily big chunk whose allocation may fail if there
  312. // is too few memory. We compute the number of entries in the db-file and
  313. // we add 1000 speculating not more than 1000 mails will be received.
  314. if (mno == 0){
  315. if ((mno = wc_l(dbfile)) == 0) mno = DEFAULT_MAIL_NUMBER;
  316. else mno += 1000;
  317. VERBOSE(setup_globals, "guessing we need space for %lu mails\n", mno);
  318. }
  319. // allocate space for mail metadata
  320. mails = malloc(sizeof(struct mail) * mno);
  321. if (mails == NULL) ERROR(malloc,"allocation failed for %lu mails\n",mno);
  322. mailno=1; // 0 is reserved for NULL
  323. max_mailno = mno;
  324. // allocate space for mail filenames
  325. names = malloc(mno * fnlen);
  326. if (names == NULL)
  327. ERROR(malloc, "memory allocation failed for " SIZE_T_FMT
  328. " mails with an average filename length of %u\n",
  329. SIZE_T_CAST(mailno),fnlen);
  330. curname=0;
  331. max_curname=mno * fnlen;
  332. // allocate hashtables for detection of already available mails
  333. bsha2mail = g_hash_table_new(bsha_hash,bsha_equal);
  334. if (bsha2mail == NULL) ERROR(bsha2mail,"hashtable creation failure\n");
  335. filename2mail = g_hash_table_new(name_hash,name_equal);
  336. if (filename2mail == NULL)
  337. ERROR(filename2mail,"hashtable creation failure\n");
  338. }
  339. // =========================== cache (de)serialization ======================
  340. // dump to file the mailbox status
  341. STATIC void save_db(const char* dbname, time_t timestamp){
  342. mail_t m;
  343. FILE * fd;
  344. char new_dbname[PATH_MAX];
  345. snprintf(new_dbname,PATH_MAX,"%s.new",dbname);
  346. fd = fopen(new_dbname,"w");
  347. if (fd == NULL) ERROR(fopen,"unable to save db file '%s'\n",new_dbname);
  348. for(m=1; m < mailno; m++){
  349. if (mail(m)->seen == SEEN) {
  350. fprintf(fd,"%s %s %s\n",
  351. txtsha(mail(m)->hsha,tmpbuff_1),
  352. txtsha(mail(m)->bsha,tmpbuff_2),
  353. mail_name(m));
  354. }
  355. }
  356. fclose(fd);
  357. snprintf(new_dbname,PATH_MAX,"%s.mtime.new",dbname);
  358. fd = fopen(new_dbname,"w");
  359. if (fd == NULL) ERROR(fopen,"unable to save db file '%s'\n",new_dbname);
  360. fprintf(fd,"%lu",timestamp);
  361. fclose(fd);
  362. }
  363. // load from disk a mailbox status and index mails with hashtables
  364. STATIC void load_db(const char* dbname){
  365. FILE* fd;
  366. int fields;
  367. int line=0;
  368. char new_dbname[PATH_MAX];
  369. snprintf(new_dbname,PATH_MAX,"%s.mtime",dbname);
  370. fd = fopen(new_dbname,"r");
  371. if (fd == NULL){
  372. WARNING(fopen,"unable to open db file '%s'\n",new_dbname);
  373. lastcheck = 0L;
  374. } else {
  375. fields = fscanf(fd,"%1$lu",&lastcheck);
  376. if (fields != 1)
  377. ERROR(fscanf,"malformed db file '%s', please remove it\n",
  378. new_dbname);
  379. fclose(fd);
  380. }
  381. fd = fopen(dbname,"r");
  382. if (fd == NULL) {
  383. WARNING(fopen,"unable to open db file '%s'\n",dbname);
  384. return;
  385. }
  386. for(;;) {
  387. // allocate a mail entry
  388. mail_t m = alloc_mail();
  389. // read one entry
  390. fields = fscanf(fd,
  391. "%1$40s %2$40s %3$" tostring(MAX_EMAIL_NAME_LEN) "[^\n]\n",
  392. tmpbuff_1, tmpbuff_2, next_name());
  393. line++;
  394. if (fields == EOF) {
  395. // deallocate mail entry
  396. dealloc_mail();
  397. break;
  398. }
  399. // sanity checks
  400. if (fields != 3)
  401. ERROR(fscanf, "%s: malformed line %d: %d != 3 fields."
  402. " Please remove this db file.\n", dbname, line, fields);
  403. shatxt(tmpbuff_1, mail(m)->hsha);
  404. shatxt(tmpbuff_2, mail(m)->bsha);
  405. // allocate a name string
  406. set_mail_name(m,alloc_name());
  407. // not seen file, may be deleted
  408. mail(m)->seen=NOT_SEEN;
  409. // store it in the hash tables
  410. g_hash_table_insert(bsha2mail,GPTR(m),
  411. g_slist_prepend(g_hash_table_lookup(bsha2mail,GPTR(m)),GPTR(m)));
  412. g_hash_table_insert(filename2mail,GPTR(m),GPTR(m));
  413. }
  414. fclose(fd);
  415. }
  416. // =============================== commands ================================
  417. #define COMMAND_SKIP(m) \
  418. VERBOSE(skip,"%s\n",mail_name(m))
  419. #define COMMAND_ADD(m) \
  420. fprintf(stdout,"ADD %s %s %s\n", URLtxt(mail_name(m),tmpbuff_5),\
  421. txtsha(mail(m)->hsha,tmpbuff_1),\
  422. txtsha(mail(m)->bsha, tmpbuff_2))
  423. #define COMMAND_COPY(m,n) \
  424. fprintf(stdout, "COPY %s %s %s TO %s\n", URLtxt(mail_name(m),tmpbuff_5),\
  425. txtsha(mail(m)->hsha, tmpbuff_1),\
  426. txtsha(mail(m)->bsha, tmpbuff_2),\
  427. URLtxt(mail_name(n),tmpbuff_6))
  428. #define COMMAND_MOVE(m,n) \
  429. fprintf(stdout, "MOVE %s %s %s TO %s\n", URLtxt(mail_name(m),tmpbuff_5),\
  430. txtsha(mail(m)->hsha, tmpbuff_1),\
  431. txtsha(mail(m)->bsha, tmpbuff_2),\
  432. URLtxt(mail_name(n),tmpbuff_6))
  433. #define COMMAND_COPYBODY(m,n) \
  434. fprintf(stdout, "COPYBODY %s %s TO %s %s\n",\
  435. URLtxt(mail_name(m),tmpbuff_5),txtsha(mail(m)->bsha, tmpbuff_1),\
  436. URLtxt(mail_name(n),tmpbuff_6),txtsha(mail(n)->hsha, tmpbuff_2))
  437. #define COMMAND_DELETE(m) \
  438. fprintf(stdout,"DELETE %s %s %s\n", URLtxt(mail_name(m),tmpbuff_5), \
  439. txtsha(mail(m)->hsha, tmpbuff_1), txtsha(mail(m)->bsha, tmpbuff_2))
  440. #define COMMAND_REPLACE(m,n) \
  441. fprintf(stdout, "REPLACE %s %s %s WITH %s %s\n",\
  442. URLtxt(mail_name(m),tmpbuff_5),txtsha(mail(m)->hsha,tmpbuff_1),\
  443. txtsha(mail(m)->bsha,tmpbuff_2),\
  444. txtsha(mail(n)->hsha,tmpbuff_3),txtsha(mail(n)->bsha,tmpbuff_4))
  445. #define COMMAND_REPLACE_HEADER(m,n) \
  446. fprintf(stdout, "REPLACEHEADER %s %s %s WITH %s\n",\
  447. mail_name(m),txtsha(mail(m)->hsha,tmpbuff_1),\
  448. txtsha(mail(m)->bsha,tmpbuff_2), \
  449. txtsha(mail(n)->hsha,tmpbuff_3))
  450. STATIC int is_old_file_still_there(const char* file){
  451. int fd;
  452. struct stat sb;
  453. mail_t alias, m;
  454. m = alloc_mail();
  455. snprintf(next_name(), MAX_EMAIL_NAME_LEN,"%s",file);
  456. set_mail_name(m,alloc_name());
  457. fd = open_rdonly_noatime(mail_name(m));
  458. if (fd == -1) {
  459. goto err_alloc_cleanup;
  460. }
  461. if (fstat(fd, &sb) == -1) {
  462. goto err_alloc_fd_cleanup;
  463. }
  464. alias = MAIL(g_hash_table_lookup(filename2mail,GPTR(m)));
  465. if (alias != 0 && lastcheck >= sb.st_mtime) {
  466. // we cache that it has been seen already
  467. mail(alias)->seen=SEEN;
  468. close(fd);
  469. dealloc_name();
  470. dealloc_mail();
  471. return 1;
  472. }
  473. err_alloc_fd_cleanup:
  474. close(fd);
  475. err_alloc_cleanup:
  476. dealloc_name();
  477. dealloc_mail();
  478. return 0;
  479. }
  480. // the heart
  481. STATIC void analyze_file(const char* dir,const char* file) {
  482. unsigned char *addr,*next;
  483. int fd;
  484. struct stat sb;
  485. mail_t alias, m;
  486. GChecksum* ctx;
  487. gsize ctx_len;
  488. GSList *bodyaliases = NULL, *bodyaliases_orig = NULL;
  489. m = alloc_mail();
  490. snprintf(next_name(), MAX_EMAIL_NAME_LEN,"%s/%s",dir,file);
  491. set_mail_name(m,alloc_name());
  492. fd = open_rdonly_noatime(mail_name(m));
  493. if (fd == -1) {
  494. WARNING(open,"unable to open file '%s': %s\n", mail_name(m),
  495. strerror(errno));
  496. WARNING(open,"ignoring '%s'\n", mail_name(m));
  497. goto err_alloc_cleanup;
  498. }
  499. if (fstat(fd, &sb) == -1) {
  500. WARNING(fstat,"unable to stat file '%s'\n",mail_name(m));
  501. goto err_alloc_cleanup;
  502. }
  503. alias = MAIL(g_hash_table_lookup(filename2mail,GPTR(m)));
  504. // check if the cache lists a file with the same name and the same
  505. // mtime. If so, this is an old, untouched, message we can skip
  506. if (alias != 0 && lastcheck > sb.st_mtime) {
  507. mail(alias)->seen=SEEN;
  508. COMMAND_SKIP(alias);
  509. goto err_alloc_fd_cleanup;
  510. }
  511. addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
  512. if (addr == MAP_FAILED){
  513. if (sb.st_size == 0)
  514. // empty file, we do not consider them emails
  515. goto err_alloc_fd_cleanup;
  516. else
  517. // mmap failed
  518. ERROR(mmap, "unable to load '%s'\n",mail_name(m));
  519. }
  520. next = find_endof_header(addr, sb.st_size);
  521. if ( next == NULL ) {
  522. WARNING(parse, "malformed file '%s', no header\n",mail_name(m));
  523. munmap(addr, sb.st_size);
  524. goto err_alloc_fd_cleanup;
  525. }
  526. // calculate sha1
  527. ctx = g_checksum_new(G_CHECKSUM_SHA1);
  528. ctx_len = SHA_DIGEST_LENGTH;
  529. g_checksum_update(ctx, addr, next - addr);
  530. g_checksum_get_digest(ctx, mail(m)->hsha, &ctx_len);
  531. g_checksum_free(ctx);
  532. ctx = g_checksum_new(G_CHECKSUM_SHA1);
  533. ctx_len = SHA_DIGEST_LENGTH;
  534. g_checksum_update(ctx, next, sb.st_size - (next - addr));
  535. g_checksum_get_digest(ctx, mail(m)->bsha, &ctx_len);
  536. g_checksum_free(ctx);
  537. munmap(addr, sb.st_size);
  538. close(fd);
  539. if (alias != 0) {
  540. if(bsha_equal(GPTR(alias),GPTR(m))) {
  541. if (hsha_equal(GPTR(alias), GPTR(m))) {
  542. mail(alias)->seen = SEEN;
  543. goto err_alloc_fd_cleanup;
  544. } else {
  545. COMMAND_REPLACE_HEADER(alias,m);
  546. mail(m)->seen=SEEN;
  547. mail(alias)->seen=CHANGED;
  548. return;
  549. }
  550. } else {
  551. COMMAND_REPLACE(alias,m);
  552. mail(m)->seen=SEEN;
  553. mail(alias)->seen=CHANGED;
  554. return;
  555. }
  556. }
  557. bodyaliases_orig = bodyaliases = g_hash_table_lookup(bsha2mail,GPTR(m));
  558. // some messages with the same body are there
  559. if (bodyaliases != NULL) {
  560. mail_t firstalias = MAIL(bodyaliases->data);
  561. for(; bodyaliases != NULL; bodyaliases = g_slist_next(bodyaliases)) {
  562. mail_t bodyalias = MAIL(bodyaliases->data);
  563. if (hsha_equal(GPTR(bodyalias), GPTR(m))) {
  564. // this one has the same header too
  565. // absurd, see the else case
  566. g_assert(mail(bodyalias)->seen != MOVED || no_move);
  567. if (mail(bodyalias)->seen == SEEN ||
  568. is_old_file_still_there(mail_name(bodyalias)) ||
  569. no_move) {
  570. // a real copy
  571. COMMAND_COPY(bodyalias,m);
  572. PROMOTE(mail(bodyalias)->seen, NOT_SEEN, MOVED);
  573. mail(m)->seen=SEEN;
  574. } else {
  575. // a real move
  576. COMMAND_MOVE(bodyalias,m);
  577. // the new file is the source for such body so that if the
  578. // file was copied twice and then removed we generate a
  579. // MOVE x -> y and a COPY y -> z
  580. g_hash_table_insert(bsha2mail,GPTR(m),
  581. g_slist_prepend(bodyaliases_orig,GPTR(m)));
  582. mail(bodyalias)->seen=MOVED;
  583. mail(m)->seen=SEEN;
  584. }
  585. return;
  586. }
  587. }
  588. // no full alias, we just recycle the body
  589. COMMAND_COPYBODY(firstalias,m);
  590. mail(m)->seen=SEEN;
  591. return;
  592. }
  593. // we should add that file
  594. COMMAND_ADD(m);
  595. mail(m)->seen=SEEN;
  596. return;
  597. // error handlers, status cleanup
  598. err_alloc_fd_cleanup:
  599. close(fd);
  600. err_alloc_cleanup:
  601. dealloc_name();
  602. dealloc_mail();
  603. }
  604. // recursively analyze a directory and its sub-directories
  605. STATIC void analyze_dir(const char* path){
  606. DIR* dir;
  607. struct dirent *dir_entry;
  608. int inside_cur_or_new = 0;
  609. int i, rc;
  610. // skip excluded paths
  611. for(i = 0; i < n_excludes; i++){
  612. if ( (rc = fnmatch(excludes[i], path, 0)) == 0 ) {
  613. VERBOSE(analyze_dir,
  614. "skipping '%s' because excluded by pattern '%s'\n",
  615. path, excludes[i]);
  616. return;
  617. }
  618. if ( rc != FNM_NOMATCH ){
  619. ERROR(fnmatch,"processing pattern '%s': %s",excludes[i],
  620. strerror(errno))
  621. }
  622. }
  623. // detect if inside cur/ or new/
  624. #ifdef __GLIBC__
  625. const char* bname = basename(path);
  626. #else
  627. gchar* bname = g_path_get_basename(path);
  628. #endif
  629. if ( !strcmp(bname,"cur") || !strcmp(bname,"new") ) {
  630. inside_cur_or_new = 1;
  631. if ( only_list_subfolders ) {
  632. fprintf(stdout, "%s\n", path);
  633. return;
  634. }
  635. }
  636. #ifndef __GLIBC__
  637. g_free(bname);
  638. #endif
  639. dir = opendir(path);
  640. if (dir == NULL) ERROR(opendir, "Unable to open directory '%s'\n", path);
  641. while ( (dir_entry = readdir(dir)) != NULL) {
  642. if (DT_REG == dir_entry->d_type) {
  643. if ( inside_cur_or_new && !only_list_subfolders ) {
  644. analyze_file(path,dir_entry->d_name);
  645. } else {
  646. VERBOSE(analyze_dir,"skipping '%s/%s', outside maildir\n",
  647. path,dir_entry->d_name);
  648. }
  649. } else if ((DT_DIR == dir_entry->d_type ||
  650. DT_LNK == dir_entry->d_type) &&
  651. strcmp(dir_entry->d_name,"tmp") &&
  652. strcmp(dir_entry->d_name,".") &&
  653. strcmp(dir_entry->d_name,"..")){
  654. int len = strlen(path) + 1 + strlen(dir_entry->d_name) + 1;
  655. char * newdir = malloc(len);
  656. snprintf(newdir,len,"%s/%s",path,dir_entry->d_name);
  657. analyze_dir(newdir);
  658. free(newdir);
  659. }
  660. }
  661. closedir(dir);
  662. }
  663. STATIC void analyze_dirs(char* paths[], int no){
  664. int i;
  665. for(i=0; i<no; i++){
  666. // we remove a trailing '/' if any
  667. char *data = strdup(txtURL(paths[i],tmpbuff_5));
  668. if (data[strlen(data)-1] == '/') data[strlen(data)-1] = '\0';
  669. analyze_dir(data);
  670. free(data);
  671. }
  672. }
  673. // at the end of the analysis phase, look at the mails data structure to
  674. // identify mails that are not available anymore and should be removed
  675. STATIC void generate_deletions(){
  676. size_t m;
  677. for(m=1; m < mailno; m++){
  678. if (!no_delete &&
  679. (mail(m)->seen == NOT_SEEN || (no_move && mail(m)->seen == MOVED)))
  680. // normally moved or removed mails are deleted
  681. COMMAND_DELETE(m);
  682. else if (no_delete && no_move && mail(m)->seen == MOVED)
  683. // if --no-delete only moved mails should be deleted
  684. COMMAND_DELETE(m);
  685. else
  686. VERBOSE(seen,"STATUS OF %s %s %s IS %s\n",
  687. mail_name(m),txtsha(mail(m)->hsha,tmpbuff_1),
  688. txtsha(mail(m)->bsha,tmpbuff_2),strsight(mail(m)->seen));
  689. }
  690. }
  691. // removes trailing '\n' modifying the string
  692. STATIC void rm_trailing_n(char *src_name){
  693. size_t src_len = strlen(src_name);
  694. if (src_len > 0 && src_name[src_len-1] == '\n') src_name[src_len-1]='\0';
  695. }
  696. STATIC void extra_sha_file(const char* file) {
  697. unsigned char *addr,*next;
  698. int fd;
  699. struct stat sb;
  700. gchar* sha1;
  701. fd = open_rdonly_noatime(file);
  702. if (fd == -1) ERROR(open,"unable to open file '%s'\n",file);
  703. if (fstat(fd, &sb) == -1) ERROR(fstat,"unable to stat file '%s'\n",file);
  704. if (! S_ISREG(sb.st_mode)) {
  705. ERROR(fstat,"not a regular file '%s'\n",file);
  706. }
  707. addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
  708. if (addr == MAP_FAILED) ERROR(mmap, "unable to load '%s'\n",file);
  709. next = find_endof_header(addr, sb.st_size);
  710. if ( next == NULL ) ERROR(parse, "malformed file '%s', no header\n",file);
  711. // calculate sha1
  712. fprintf(stdout, "%s ",
  713. sha1 = g_compute_checksum_for_data(G_CHECKSUM_SHA1, addr, next - addr));
  714. g_free(sha1);
  715. fprintf(stdout, "%s\n",
  716. sha1 = g_compute_checksum_for_data(G_CHECKSUM_SHA1,
  717. next, sb.st_size - (next - addr)));
  718. g_free(sha1);
  719. munmap(addr, sb.st_size);
  720. close(fd);
  721. }
  722. STATIC void extra_mkdir_ln(char* src_name, char* tgt_name) {
  723. gchar* dir_tgt = g_path_get_dirname(tgt_name);
  724. if ( g_mkdir_with_parents(dir_tgt, 0770) ){
  725. ERROR(mkdir,"unable to create dir %s: %s\n",
  726. dir_tgt, strerror(errno));
  727. exit(EXIT_FAILURE);
  728. }
  729. if ( symlink(src_name, tgt_name) != 0 ){
  730. ERROR(symlink,"unable to symlink %s to %s: %s\n",
  731. src_name, tgt_name, strerror(errno));
  732. exit(EXIT_FAILURE);
  733. }
  734. fprintf(stdout,"OK\n");
  735. g_free(dir_tgt);
  736. }
  737. STATIC void extra_sha1sum_file(const char* file) {
  738. unsigned char *addr;
  739. int fd;
  740. struct stat sb;
  741. gchar* sha1;
  742. fd = open_rdonly_noatime(file);
  743. if (fd == -1) ERROR(open,"unable to open file '%s'\n",file);
  744. if (fstat(fd, &sb) == -1) ERROR(fstat,"unable to stat file '%s'\n",file);
  745. addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
  746. if (addr == MAP_FAILED){
  747. if (sb.st_size == 0)
  748. // empty file
  749. ;
  750. else
  751. // mmap failed
  752. ERROR(mmap, "unable to load '%s'\n",file);
  753. }
  754. // calculate sha1
  755. fprintf(stdout, "%s %s\n",
  756. sha1 = g_compute_checksum_for_data(G_CHECKSUM_SHA1, addr, sb.st_size),
  757. file);
  758. g_free(sha1);
  759. if (addr != MAP_FAILED) munmap(addr, sb.st_size);
  760. close(fd);
  761. }
  762. // ============================ main =====================================
  763. #define OPT_MAX_MAILNO 300
  764. #define OPT_DB_FILE 301
  765. #define OPT_EXCLUDE 302
  766. #define OPT_SHA1SUM 303
  767. #define OPT_MKDIRP 304
  768. #define OPT_MKFIFO 305
  769. #define OPT_NOMOVE 306
  770. // command line options
  771. STATIC struct option long_options[] = {
  772. {"max-mailno", required_argument, NULL, OPT_MAX_MAILNO},
  773. {"db-file" , required_argument, NULL, OPT_DB_FILE},
  774. {"exclude" , required_argument, NULL, OPT_EXCLUDE},
  775. {"sha1sum" , no_argument , NULL, OPT_SHA1SUM},
  776. {"mkdir-p" , no_argument , NULL, OPT_MKDIRP},
  777. {"mkfifo" , no_argument , NULL, OPT_MKFIFO},
  778. {"list" , no_argument , NULL, 'l'},
  779. {"symlink" , no_argument , NULL, 's'},
  780. {"verbose" , no_argument , NULL, 'v'},
  781. {"dry-run" , no_argument , NULL, 'd'},
  782. {"no-delete" , no_argument , NULL, 'n'},
  783. {"no-move" , no_argument , NULL, OPT_NOMOVE},
  784. {"help" , no_argument , NULL, 'h'},
  785. {NULL , no_argument , NULL, 0},
  786. };
  787. // command line options documentation
  788. STATIC const char* long_options_doc[] = {
  789. " number Estimation of max mail message number (defaults to the"
  790. "\n "
  791. "number of messages in the db-file + 1000 or "
  792. tostring(DEFAULT_MAIL_NUMBER)
  793. "\n "
  794. "if there is no db-file). You may want to decrease it"
  795. "\n "
  796. "for the first run on small systems. It is anyway"
  797. "\n "
  798. "increased automatically when needed",
  799. "path Name of the cache for the endpoint (default db.txt)",
  800. "glob Exclude paths matching the given glob expression",
  801. "behave as sha1sum",
  802. "behave as mkdir -p",
  803. "behave as mkfifo",
  804. "Only list subfolders (short -l)",
  805. "Symbolic Link generation mode (short -s)",
  806. "Increase program verbosity (printed on stderr, short -v)",
  807. "Do not generate a new db file (short -d)",
  808. "Do not track deletions (short -n)",
  809. "This help screen",
  810. NULL
  811. };
  812. // print help and bail out
  813. STATIC void help(char* argv0){
  814. int i;
  815. char *bname = g_path_get_basename(argv0);
  816. fprintf(stdout,"\nUsage: %s [options] (paths...|fifo)\n",bname);
  817. for (i=0;long_options[i].name != NULL;i++) {
  818. if ( long_options[i].has_arg == required_argument )
  819. fprintf(stdout," --%-8s%s\n",
  820. long_options[i].name,long_options_doc[i]);
  821. else
  822. fprintf(stdout," --%-18s%s\n",
  823. long_options[i].name,long_options_doc[i]);
  824. }
  825. fprintf(stdout,"\n\
  826. If paths is a single fifo, %s reads from it file names and outputs the\n\
  827. sha1 of their header and body separated by space.\n\n\
  828. If paths is a list of directories, %s outputs a list of actions a client\n\
  829. has to perform to syncronize a copy of the same maildirs. This set of actions\n\
  830. is relative to a previous status of the maildir stored in the db file.\n\
  831. The input directories are traversed recursively, and every file encountered\n\
  832. inside directories named cur/ and new/ is a potential mail message (if it\n\
  833. contains no \\n\\n it is skipped).\n\n\
  834. Every client must use a different db-file, and the db-file is strictly\n\
  835. related with the set of directories given as arguments, and should not\n\
  836. be used with a different directory set. Adding items to the directory\n\
  837. set is safe, while removing them may not do what you want (delete actions\n\
  838. are generated).\n\n", bname, bname);
  839. fprintf(stdout, "Copyright %s\n",SMD_CONF_COPYRIGHT);
  840. fprintf(stdout, "Version %s, ",SMD_CONF_VERSION);
  841. fprintf(stdout, "released under the terms of GPLv3, no waranties\n\n");
  842. }
  843. int main(int argc, char *argv[]) {
  844. char *data;
  845. char *dbfile="db.txt";
  846. unsigned long int mailno = 0;
  847. unsigned int filenamelen = DEFAULT_FILENAME_LEN;
  848. struct stat sb;
  849. int c = 0;
  850. int option_index = 0;
  851. time_t bigbang;
  852. glib_check_version(2,16,0);
  853. g_assert(MAIL(NULL) == 0);
  854. g_assert(GPTR(0) == NULL);
  855. g_assert(MAIL(GPTR(1)) == 1);
  856. for(;;) {
  857. c = getopt_long(argc, argv, "vhndls", long_options, &option_index);
  858. if (c == -1) break; // no more args
  859. switch (c) {
  860. case OPT_MAX_MAILNO:
  861. mailno = strtoul(optarg,NULL,10);
  862. break;
  863. case OPT_DB_FILE:
  864. dbfile = strdup(optarg);
  865. break;
  866. case OPT_EXCLUDE:
  867. excludes = realloc(excludes, sizeof(char*) * (n_excludes + 1));
  868. excludes[n_excludes] = strdup(txtURL(optarg,tmpbuff_5));
  869. n_excludes++;
  870. break;
  871. case OPT_SHA1SUM:
  872. only_sha1sum_args = 1;
  873. break;
  874. case OPT_MKDIRP:
  875. only_mkdirp = 1;
  876. break;
  877. case OPT_MKFIFO:
  878. only_mkfifo = 1;
  879. break;
  880. case OPT_NOMOVE:
  881. no_move = 1;
  882. break;
  883. case 'v':
  884. verbose = 1;
  885. break;
  886. case 'd':
  887. dry_run = 1;
  888. break;
  889. case 'n':
  890. no_delete = 1;
  891. break;
  892. case 'l':
  893. only_list_subfolders = 1;
  894. break;
  895. case 's':
  896. only_generate_symlinks = 1;
  897. break;
  898. case 'h':
  899. help(argv[0]);
  900. exit(EXIT_SUCCESS);
  901. break;
  902. default:
  903. help(argv[0]);
  904. exit(EXIT_FAILURE);
  905. break;
  906. }
  907. }
  908. if (optind >= argc) {
  909. help(argv[0]);
  910. exit(EXIT_FAILURE);
  911. }
  912. if ( only_mkdirp ) {
  913. int i;
  914. for (i = optind; i < argc; i++) {
  915. if ( g_mkdir_with_parents(argv[i], 0770) ) {
  916. ERROR(mkdir,"unable to create dir %s: %s\n", argv[i],
  917. strerror(errno));
  918. exit(EXIT_FAILURE);
  919. }
  920. }
  921. exit(EXIT_SUCCESS);
  922. }
  923. if ( only_sha1sum_args ) {
  924. int i;
  925. ASSERT_ALL_ARE(regular_file, &argv[optind], argc - optind);
  926. for (i = optind; i < argc; i++) {
  927. extra_sha1sum_file(argv[i]);
  928. }
  929. exit(EXIT_SUCCESS);
  930. }
  931. if ( only_mkfifo ) {
  932. int i;
  933. for (i = optind; i < argc; i++) {
  934. if ( mknod(argv[i], 0600 | S_IFIFO, 0) ) {
  935. ERROR(mknod,"Unable to create fifo %s: %s\n", argv[i],
  936. strerror(errno));
  937. exit(EXIT_FAILURE);
  938. }
  939. }
  940. exit(EXIT_SUCCESS);
  941. }
  942. // remaining args is the dirs containing the data or the files to hash
  943. data = strdup(txtURL(argv[optind],tmpbuff_5));
  944. // check if data is a directory or a regular file
  945. c = stat(data, &sb);
  946. if (c != 0) ERROR(stat,"unable to stat %s\n",data);
  947. if ( S_ISFIFO(sb.st_mode) && argc - optind == 1){
  948. FILE *in = fopen(data,"r");
  949. if (in == NULL) {
  950. ERROR(fopen,"unable to open fifo %s\n",data);
  951. exit(EXIT_FAILURE);
  952. }
  953. if ( only_generate_symlinks ) {
  954. /* symlink */
  955. char src_name[MAX_EMAIL_NAME_LEN];
  956. char tgt_name[MAX_EMAIL_NAME_LEN];
  957. while (!feof(in)) {
  958. if(fgets(src_name,MAX_EMAIL_NAME_LEN,in) != NULL &&
  959. fgets(tgt_name,MAX_EMAIL_NAME_LEN,in) != NULL) {
  960. rm_trailing_n(src_name);
  961. rm_trailing_n(tgt_name);
  962. extra_mkdir_ln(src_name, tgt_name);
  963. fflush(stdout);
  964. }
  965. }
  966. } else {
  967. /* sha1 mail */
  968. char name[MAX_EMAIL_NAME_LEN];
  969. while (!feof(in)) {
  970. if(fgets(name,MAX_EMAIL_NAME_LEN,in) != NULL){
  971. rm_trailing_n(name);
  972. extra_sha_file(name);
  973. fflush(stdout);
  974. }
  975. }
  976. }
  977. exit(EXIT_SUCCESS);
  978. } else if ( ! S_ISDIR(sb.st_mode) ) {
  979. ERROR(stat, "given path is not a fifo nor a directory: %s\n",data);
  980. }
  981. free(data);
  982. // regular case, hash the content of maildirs rooted in the
  983. // list of directories specified at command line
  984. ASSERT_ALL_ARE(directory, &argv[optind], argc - optind);
  985. if ( only_list_subfolders ) {
  986. analyze_dirs(&argv[optind], argc - optind);
  987. exit(EXIT_SUCCESS);
  988. }
  989. // allocate memory
  990. setup_globals(dbfile, mailno, filenamelen);
  991. load_db(dbfile);
  992. bigbang = time(NULL);
  993. analyze_dirs(&argv[optind], argc - optind);
  994. generate_deletions();
  995. if (!dry_run) save_db(dbfile, bigbang);
  996. exit(EXIT_SUCCESS);
  997. }
  998. // vim:set ts=4: