/* * Copyright 2017 Yann Weber * * This file is part of Ttail. * * Ttail is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * any later version. * * Ttail is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Ttail. If not, see . */ #include "ttail_search_files.h" int _ttail_search_closest_files(ttail_t* t) { int ret; size_t i; struct tm **ftm; /* an array with t->logfile_sz struct tm[2] */ /* Storing min & max of each files in ftm and checking that files are * sorted well */ ftm = malloc(sizeof(struct tm*) * t->logfile_sz); if(!ftm) { perror("Unable to allocate memory"); goto _ttail_search_closest_files_alloc_err; } ret = 0; /* to avoid may be used uninitialized error */ for(i=0; ilogfile_sz; i++) { if(!t->logfile[i]) { ftm[i] = NULL; continue; } ftm[i] = malloc(sizeof(struct tm)*2); if(!ftm[i]) { perror("Unable to allocate memory for time"); goto _ttail_search_closest_files_alloc_loop_err; } ret = _ttail_file_minmax(t, i, ftm[i]); if(ret < 0) { goto _ttail_search_closest_files_loop_err; } else if (ret == 1) { fprintf(stderr, "Error : unable to find a valid date \ in '%s'\n", t->logfile_name[i]); free(ftm[i]); ftm[i] = NULL; fclose(t->logfile[i]); t->logfile[i] = NULL; if(!ttail_permissive(t)) { ttail_strict_msg(); goto _ttail_search_closest_files_loop_err; } } } if(_ttail_file_sort(t, ftm) < 0) { goto _ttail_search_closest_files_err; } if(t->flag & TTAIL_FLAG_DATE_MIN) { ret = _ttail_search_files_binary_search(t, &(t->date_min), (const struct tm**)ftm, 1); if(ret) { if(t->verbose > 2) { fprintf(stderr, "Error while looking for \ date-max\n"); } goto _ttail_search_closest_files_err; } } for(i=0; ilogfile_sz; i++) { if(!t->logfile[i]) { continue; } if(fseek(t->logfile[i], 0, SEEK_SET) < 0) { perror("Error setting position in file"); goto _ttail_search_closest_files_err; } } if(t->flag & TTAIL_FLAG_DATE_MAX) { ret = _ttail_search_files_binary_search(t, &(t->date_max), (const struct tm**)ftm, 0); if(ret) { if(t->verbose > 2) { fprintf(stderr, "Error while looking for \ date-max\n"); } goto _ttail_search_closest_files_err; } t->session->file.off_max.off++; } for(i=0; ilogfile_sz; i++) { if(ftm) { free(ftm[i]); } } free(ftm); return ret; goto _ttail_search_closest_files_err; _ttail_search_closest_files_err: i = t->logfile_sz; do { _ttail_search_closest_files_alloc_loop_err: i--; _ttail_search_closest_files_loop_err: if(ftm[i]) { free(ftm[i]); } }while(i); free(ftm); _ttail_search_closest_files_alloc_err: return -1; } void _ttail_search_print_files(ttail_t* t, int out) { size_t i; int fd; char buf[8192]; int r; off_t cur_off; /* if no date_min t->session->file.id == 0 */ for(i=t->session->file.off_min.id; ilogfile_sz; i++) { if(!t->logfile[i]) { continue; } fd = fileno(t->logfile[i]); if(t->flag & TTAIL_FLAG_DATE_MIN && i==t->session->file.off_min.id) { if(t->flag & TTAIL_FLAG_DATE_MAX) { fseek(t->logfile[i], t->session->file.off_min.off, SEEK_SET); } else { lseek(fd, t->session->file.off_min.off, SEEK_SET); } } else { if(t->flag & TTAIL_FLAG_DATE_MAX) { fseek(t->logfile[i], 0, SEEK_SET); } else { lseek(fd, 0, SEEK_SET); } } cur_off = 0; while(1) { if(t->flag & TTAIL_FLAG_DATE_MAX) { if(i >= t->session->file.off_max.id && cur_off >= t->session->file.off_max.off) { return; } if((r = ttail_file_getline(t, i)) < 0) { break; } cur_off += r; r = write(out, ttail_file_getline_buf(t), strlen(ttail_file_getline_buf(t))); if(r == -1) { perror("unable to write"); return; } } else { r = read(fd, buf, sizeof(buf)); if(r == -1) { perror("unable to read file"); return; } else if(r == 0) { break; } cur_off += r; r = write(out, buf, r); if(r == -1 || r == 0) { perror("Unable to write result"); return; } } } } } int _ttail_search_files_binary_search(ttail_t* t, const struct tm* in, const struct tm** ftm, short min) { int cmin, cmax, ret; size_t valid; int cmpres; off_t tmpoff; ttail_files_off_t *files_off; size_t *id; off_t *off; files_off = min?&(t->session->file.off_min):&(t->session->file.off_max); id = &(files_off->id), off = &(files_off->off); *id = *off = 0; valid = 0; while(*id < t->logfile_sz) { if(!ftm[*id]) { (*id)++; continue; } valid++; cmin = ttail_tm_cmp(&(ftm[*id][0]), in); cmax = ttail_tm_cmp(&(ftm[*id][1]), in); if(!cmin) { /* found at the begining of the file */ return 0; } else if (cmax == 0) { /* found at EOF */ if(min) { *off = _ttail_file_search_from_end(t, *id, in); if(*off < 0) { *off = 0; return -1; } } else { *off = t->session->file.file_sz[*id]; } return 0; } else if(cmin > 0) { /* found at start of file */ off = 0; return 0; } else if(cmax > 0) { /* somewhere in current file */ ret = _ttail_search_file_binary_search(t, in, ftm, min); if(ret) { if(t->verbose > 2) { fprintf(stderr, "Error while \ running binary search algorithm in '%s'\n", t->logfile_name[*id]); } *id = 0; return ret; } /* searching for equivalent */ tmpoff = *off; while(1) { if(fseek(t->logfile[*id], tmpoff, SEEK_SET) < 0) { return -1; } tmpoff = min? _ttail_file_start_line(t, *id): _ttail_file_next_line(t, *id); if(tmpoff < 0) { break; } cmpres=0; ret = _ttail_file_cur_cmp(t, *id, in, &cmpres); if(ret > 0 && !ttail_permissive(t)) { ttail_strict_msg(); return -1; } if((min && cmpres < 0) || (!min && cmpres > 0)) { break; } *off = tmpoff; if(min) { tmpoff--; } } return 0; } else if(*id == t->logfile_sz - 1 && cmax < 0) { *off = t->session->file.file_sz[*id]; return 0; } (*id)++; } if(!valid) { fprintf(stderr, "No files to scan\n"); return 0; } if(*id == t->logfile_sz) { return 0; } /* the answer is somewhere in *id file */ *off = _ttail_file_search_from_end(t, *id, in); return 0; } inline int _ttail_search_file_binary_search(ttail_t* t, const struct tm* in, const struct tm** ftm, short min) { off_t cur, sz, d, prev, tmp; int ret, cmpres; ttail_files_off_t *files_off; size_t id; off_t *off; files_off = min?&(t->session->file.off_min):&(t->session->file.off_max); id = files_off->id, off = &(files_off->off); sz = t->session->file.file_sz[id]; d = cur = sz / 2; prev = 0; cmpres = 0; while(1) { if(fseek(t->logfile[id], cur, SEEK_SET) < 0) { perror("Unable to move in the file"); return -1; } if(cur > prev) { cur = _ttail_file_next_line(t, id); if(cur < 0) { /* not sure errno is really set */ perror("Error searching previous line"); return -1; } } else { cur = _ttail_file_start_line(t, id); if(cur < 0) { /* not sure errno is really set */ perror("Error searching previous line"); return -1; } } if(cur == prev) { *off = cur; return 0; } prev = cur; ret = _ttail_file_cur_cmp(t, id, in, &cmpres); if(ret < 0) { if(t->verbose > 2) { fprintf(stderr, "Error comparing a logline \ to a date directly from a file\n"); } return -1; } else if(cmpres == 0) { *off = cur; break; } else if(cmpres < 0) { tmp = _ttail_file_next_line(t,id); ret = _ttail_file_cur_cmp(t, id, in, &cmpres); if(cmpres >=0) { *off = tmp; break; } d/=2; cur += d; cur %= t->session->file.file_sz[id]; } else { d/=2; cur -= d; if(cur < 0) { cur = 0; } } } return 0; } int ttail_search_files_init(ttail_t* t) { struct stat stat; FILE *fp; int fd; size_t i; off_t *file_sz; t->session = (ttail_search_t*)malloc(sizeof(ttail_search_file_t)); if(!t->session) { perror("Unable to allocate memory for search session"); goto _ttail_search_closest_files_alloc_session_err; } memset(t->session, 0, sizeof(ttail_search_file_t)); file_sz = malloc(sizeof(off_t)*t->logfile_sz); if(!file_sz) { perror("Unable to allocate memory for file sizes"); goto _ttail_search_closest_files_alloc_err; } t->session->file.file_sz = file_sz; #ifdef HUGEFILE t->session->file.sz_div = 0; #endif for(i=0;ilogfile_sz;i++) { fp = t->logfile[i]; if(!fp) { file_sz[i] = 0; continue; } if((fd = fileno(fp)) < 0) { perror("Unable to get fp"); goto _ttail_search_closest_files_err; } if(fstat(fileno(fp), &stat)) { perror("Unable to get file size"); goto _ttail_search_closest_files_err; } file_sz[i] = stat.st_size; } /* we got all real size, determining if we need a divisor */ /* * not implemented */ if(_ttail_search_closest_files_set_fsizes(t)) { goto _ttail_search_closest_files_err; } t->session->file.buf_sz = 128; t->session->file.buf = malloc(t->session->file.buf_sz); if(!t->session->file.buf) { goto _ttail_search_closest_files_err; } if(_ttail_search_files_fmt_init(t) < 0) { goto _ttail_search_closest_files_err; } return 0; _ttail_search_closest_files_err: free(file_sz); t->session->file.file_sz = NULL; _ttail_search_closest_files_alloc_err: free(t->session); _ttail_search_closest_files_alloc_session_err: return -1; } int _ttail_search_files_fmt_init(ttail_t* t) { int fmt_id; size_t i,j; const char *buff; const char *fmt[] = TTAIL_DEFAULT_FORMATS; if(t->flag & TTAIL_FLAG_FORMAT) { return 1; } fmt_id = -1; for(i=0; ilogfile_sz; i++) { if(!t->logfile[i]) { continue; } for(j=0; j<10; j++) { //try to guess fmt on the 10 first lines if(ttail_file_getline(t, i) < 0) { break; } buff = ttail_logline_subst(t, ttail_file_getline_buf(t)); if(!buff) { if(ttail_permissive(t)) { continue; } fprintf(stderr, "Unable to find prefix in '%s' logline", t->logfile_name[i]); if(t->verbose > 0) { fprintf(stderr, " : '%s'", ttail_file_getline_buf(t)); } fprintf(stderr, "\n"); return -1; } //buff contains the lines starting by the date fmt_id = ttail_format_guess(buff, NULL); if(fmt_id >= 0) { break; } if(!ttail_permissive(t)) { ttail_strict_msg(); break; } } rewind(t->logfile[i]); if(fmt_id >= 0) { if(t->verbose > 0) { fprintf(stderr, "Detected format %s in file %s\n", fmt[fmt_id], t->logfile_name[i]); } break; } } if(fmt_id >= 0) { buff = fmt[fmt_id]; t->fmt = malloc(sizeof(char) * (strlen(buff)+1)); if(!t->fmt) { perror("Unable to allocate memory for date format"); return -1; } strcpy(t->fmt, buff); t->flag |= TTAIL_FLAG_FORMAT; return 0; } fprintf(stderr, "Unable to detect date format from logfiles\n"); return -1; } int _ttail_search_closest_files_set_fsizes(ttail_t* t) { size_t i; off_t *vfile, *vsz; vfile = malloc(sizeof(off_t)*t->logfile_sz); if(!vfile) { perror("Unable to allocate memory for file size sum"); return -1; } t->session->file.vfile = vfile; vsz = &(t->session->file.vsz); *vsz = 0; for(i=0; ilogfile_sz;i++) { vfile[i] = *vsz; #ifdef HUGEFILE *vsz += t->session->file.file_sz[i] >> t->session->file.sz_div; #else *vsz += t->session->file.file_sz[i]; #endif } t->session->file.vpos = 0; return 0; } inline int _ttail_file_minmax(ttail_t* t, size_t id, struct tm tm[2]) { FILE *fp; long cur; int ret; memset(tm, 0, sizeof(struct tm)*2); fp = t->logfile[id]; if(!fp) { fprintf(stderr, "File pointer is null !\n"); return 1; } if(fseek(fp, 0, SEEK_SET) < 0) { perror("Unable to manipulate fp"); return -1; } while(1) { if(ttail_file_getline(t, id) < 0) { return 1; } if(!(ret = ttail_logline2date(t, ttail_file_getline_buf(t), tm))) { break; } if(!ttail_permissive(t)) { fprintf(stderr, "Unable to find %s in logline", ret == 1?"prefix":"date"); if(t->verbose > 0) { fprintf(stderr, " : '%s'\n", ttail_file_getline_buf(t)); } else { fprintf(stderr,"\n"); } ttail_strict_msg(); return -1; } } if(fseek(fp, -1, SEEK_END) < 0) { perror("Unable to manipulate fp"); return -1; } while(1) { if((cur = _ttail_file_start_line(t, id)) < 0) { fprintf(stderr, "Error will searching line starts in\ %s\n", t->logfile_name[id]); return -1; } if(ttail_file_getline(t, id) < 0) { return 1; } if(!ttail_logline2date(t, ttail_file_getline_buf(t), tm+1)) { break; } if(!cur) { return 1; } else if(!ttail_permissive(t)) { if(t->verbose <= 0) { fprintf(stderr, "Unable to find a date in logline\n"); } else { fprintf(stderr, "Unable to find a date in '%s'\n", ttail_file_getline_buf(t)); } ttail_strict_msg(); return -1; } if(fseek(fp, cur-1, SEEK_SET) < 0) { perror("Unable to manipulate fp"); return -1; } } return 0; } int _ttail_file_reopen(ttail_t* t, size_t id) { if(t->logfile[id]) { fclose(t->logfile[id]); } t->logfile[id] = fopen(t->logfile_name[id], "r"); if(!t->logfile[id] && t->verbose > 2) { fprintf(stderr, "Unable to reopen '%s'\n", t->logfile_name[id]); } return t->logfile[id]?0:-1; } inline int _ttail_file_sort(ttail_t* t, struct tm** ftm) { size_t i, j, count; short sorted, warn; count = t->logfile_sz; i=0; warn = 0; for(i=0;ilogfile_name, i, count, char*); TTAIL_MVF(t->logfile, i, count, FILE*); TTAIL_MVF(t->session->file.file_sz, i, count, off_t); TTAIL_MVF(t->session->file.vfile, i, count, off_t); TTAIL_MVF(ftm, i, count, struct tm*); } if(!i ||i == count|| ttail_tm_cmp(&(ftm[i-1][1]), &(ftm[i][0])) < 0) { continue; } sorted=0; for(j=0; j 0 && ttail_tm_cmp(&(ftm[i][0]), &(ftm[j-1][1])) > 0)) { TTAIL_MVBF(t->logfile_name, i, j, char*); TTAIL_MVBF(t->logfile, i, j, FILE*); TTAIL_MVBF(t->session->file.file_sz, i, j, off_t); TTAIL_MVBF(t->session->file.vfile, i, j, off_t); TTAIL_MVBF(ftm, i, j, struct tm*); sorted=1; if(!warn && t->verbose > 1) { fprintf(stderr, "Warning : Files list \ given as argument has to be sorted.\n"); } break; } if((ttail_tm_cmp(&(ftm[i][0]), &(ftm[j][0])) < 0 && ttail_tm_cmp(&(ftm[i][1]), &(ftm[j][0])) > 0) || (j > 0 &&( ttail_tm_cmp(&(ftm[i][1]), &(ftm[j][1])) > 0 && ttail_tm_cmp(&(ftm[i][0]), &ftm[j][1]) < 0))) { fprintf(stderr, "Files list not sortable : \ file %s and %s overlaps.\n", t->logfile_name[i], t->logfile_name[j]); fprintf(stderr,"\ meaning file1 date min < file2 date min < file1 date max \n\ or file1 date min < file2 date max < file1 date max\n"); return -1; } } if(sorted) { continue; } fprintf(stderr,"Files list not sortable : file %s contains \ or is inside another file\n", t->logfile_name[i]); fprintf(stderr,"meaning that filex1 date min < file2 date min \ and file1 date max > file2 date max\n"); return -1; } return 0; } inline long _ttail_file_next_line(ttail_t *t, size_t id) { FILE *f; ssize_t s; size_t r; char *buff; long res; int c; f = t->logfile[id]; r=0; buff = NULL; s = getline(&buff, &r, f); if(s < 0) { goto _ttail_file_next_line_err; } while(1) { c = getc(f); if(c == EOF) { return 0; } else if(c!='\n') { if(fseek(f, -1, SEEK_CUR)<0) { perror("Unable to set position in file"); goto _ttail_file_next_line_err; } break; } } res = ftell(f); free(buff); return res; _ttail_file_next_line_err: free(buff); return -1; } inline long _ttail_file_start_line(ttail_t *ttail, size_t id) { #define _STARTLN_BUFFLEN 32 FILE *f; long res; /* function result */ long read_beg, cur, last, start; int read_sz; int c; f = ttail->logfile[id]; if((start = ftell(f)) < 0) { perror("Unable to get position in file"); return -1; } res = 0; read_beg = start; while(!res && start) { if(fseek(f, read_beg, SEEK_SET) < 0) { perror("Unable to set position in file"); return -1; } start = read_beg; read_sz = start <= _STARTLN_BUFFLEN?start:_STARTLN_BUFFLEN; read_beg = start - read_sz; if(fseek(f, read_beg, SEEK_SET) < 0) { perror("Unable to set position in file"); return -1; } last = -1; /* last pos we saw a '\n' */ cur = read_beg; while(cur <= start) { c = getc(f); if(c == EOF) { if(!res) { return 0; } break; } else if (c =='\n') { last = cur; } else if(last >= 0) { res = cur; last = -1; } cur++; } if(!read_beg) { break; } } if(fseek(f, res, SEEK_SET) < 0) { perror("Unable to set position in file"); return -1; } return res; } inline off_t _ttail_file_search_from_end(ttail_t* t , size_t id , const struct tm* tm) { FILE *f; struct tm curtm; off_t last; int ret, cmpret; off_t result; result = -1; f = t->logfile[id]; if(fseek(f, -1, SEEK_END) < 0) { return -1; } while(1) { last = _ttail_file_start_line(t, id); if(last < 0) { break; } if(ttail_file_getline(t, id) < 0) { break; } ret = ttail_logline2date(t, ttail_file_getline_buf(t), &curtm); if(ret < 0) { break; } else if(ret > 0 && !ttail_permissive(t)) { fprintf(stderr, "Unable to find the %s in logline", ret==1?"prefix":"date"); if(t->verbose > 0) { fprintf(stderr, " : '%s'\n", ttail_file_getline_buf(t)); } else { fprintf(stderr, "\n"); } ttail_strict_msg(); break; } cmpret = ttail_tm_cmp(&curtm, tm); if(!ret) { if(cmpret >= 0) { /* found but continue to search the first one*/ result = last; } else { if(cmpret < 0 && result < 0) { result = last; } break; } } if(last == 0) { /* considere the begining of the file as the answer */ return 0; } if(fseek(f, last-1, SEEK_SET)) { return -1; } } return result; } inline int _ttail_file_off_cmp(ttail_t* t, size_t id, off_t off, const struct tm* tm, int *res) { if(fseek(t->logfile[id], off, SEEK_SET)) { perror("Unable to set position in file"); return -1; } if(ttail_file_getline(t, id) < 0) { perror("Unable to read a line from file"); return -1; } return _ttail_file_cur_cmp(t, id, tm, res); } inline int _ttail_file_cur_cmp(ttail_t* t, size_t id, const struct tm* tm , int* res) { int ret; struct tm ctm; if(ttail_file_getline(t, id) < 0) { return -1; } ret = ttail_logline2date(t, ttail_file_getline_buf(t), &ctm); if(ret < 0) { return -1; } else if(ret > 1) { return 1; } *res = ttail_tm_cmp(&ctm, tm); return 0; } void _ttail_search_file_free(ttail_t* t) { if(!t->session) { return; } if(t->session->file.buf) { free(t->session->file.buf); } if(t->session->file.file_sz) { free(t->session->file.file_sz); } if(t->session->file.vfile) { free(t->session->file.vfile); } free(t->session); }