You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2167 lines
48 KiB
2167 lines
48 KiB
/** |
|
* KMimeMagic is inspired by the code of the |
|
* Apache Web Server. |
|
* |
|
* Rewritten for KDE by Fritz Elfert |
|
* fritz@kde.org |
|
*/ |
|
|
|
#include <stdio.h> |
|
#include <unistd.h> |
|
#include <stdlib.h> |
|
#include <sys/wait.h> |
|
#include <sys/types.h> |
|
#include <sys/stat.h> |
|
#include <fcntl.h> |
|
#include <errno.h> |
|
#include <ctype.h> |
|
#include <time.h> |
|
#include <utime.h> |
|
#include <stdarg.h> |
|
#include <qregexp.h> |
|
//#include "config-kfm.h" |
|
#include "kmimemagic.h" |
|
|
|
/* |
|
* data structures and related constants |
|
*/ |
|
#define MIME_MAGIC_DEBUG 0 |
|
#if MIME_MAGIC_DEBUG |
|
#define debug(fmt, args...) debugT(fmt "\n" , ## args) |
|
#else |
|
#define debug(fmt, args...) |
|
#endif |
|
|
|
#define DECLINED 999 |
|
#define ERROR 998 |
|
#define OK 0 |
|
|
|
/* |
|
* Buitltin Mime types |
|
*/ |
|
#define MIME_BINARY_UNKNOWN "application/octet-stream" |
|
#define MIME_BINARY_UNREADABLE "application/x-unreadable" |
|
#define MIME_BINARY_ZEROSIZE "application/x-zerosize" |
|
#define MIME_TEXT_UNKNOWN "text/plain" |
|
#define MIME_INODE_DIR "inode/directory" |
|
#define MIME_INODE_CDEV "inode/chardevice" |
|
#define MIME_INODE_BDEV "inode/blockdevice" |
|
#define MIME_INODE_FIFO "inode/fifo" |
|
#define MIME_INODE_LINK "inode/link" |
|
#define MIME_INODE_SOCK "inode/socket" |
|
// Following should go in magic-file - Fritz |
|
#define MIME_APPL_TROFF "application/x-troff" |
|
#define MIME_APPL_TAR "application/x-tar" |
|
#define MIME_TEXT_FORTRAN "text/x-fortran" |
|
|
|
#define MAXMIMESTRING 256 |
|
|
|
#define HOWMANY 1024 /* big enough to recognize most WWW files */ |
|
#define MAXDESC 50 /* max leng of text description */ |
|
#define MAXstring 64 /* max leng of "string" types */ |
|
|
|
typedef union VALUETYPE { |
|
unsigned char b; |
|
unsigned short h; |
|
unsigned long l; |
|
char s[MAXstring]; |
|
unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */ |
|
unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */ |
|
} VALUETYPE; |
|
|
|
struct magic { |
|
struct magic *next; /* link to next entry */ |
|
int lineno; /* line number from magic file */ |
|
|
|
short flag; |
|
#define INDIR 1 /* if '>(...)' appears, */ |
|
#define UNSIGNED 2 /* comparison is unsigned */ |
|
short cont_level; /* level of ">" */ |
|
struct { |
|
char type; /* byte short long */ |
|
long offset; /* offset from indirection */ |
|
} in; |
|
long offset; /* offset to magic number */ |
|
unsigned char reln; /* relation (0=eq, '>'=gt, etc) */ |
|
char type; /* int, short, long or string. */ |
|
char vallen; /* length of string value, if any */ |
|
#define BYTE 1 |
|
#define SHORT 2 |
|
#define LONG 4 |
|
#define STRING 5 |
|
#define DATE 6 |
|
#define BESHORT 7 |
|
#define BELONG 8 |
|
#define BEDATE 9 |
|
#define LESHORT 10 |
|
#define LELONG 11 |
|
#define LEDATE 12 |
|
VALUETYPE value; /* either number or string */ |
|
unsigned long mask; /* mask before comparison with value */ |
|
char nospflag; /* supress space character */ |
|
|
|
/* NOTE: this string is suspected of overrunning - find it! */ |
|
char desc[MAXDESC]; /* description */ |
|
}; |
|
|
|
/* |
|
* data structures for tar file recognition |
|
* -------------------------------------------------------------------------- |
|
* Header file for public domain tar (tape archive) program. |
|
* |
|
* @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John |
|
* Gilmore, ihnp4!hoptoad!gnu. |
|
* |
|
* Header block on tape. |
|
* |
|
* I'm going to use traditional DP naming conventions here. A "block" is a big |
|
* chunk of stuff that we do I/O on. A "record" is a piece of info that we |
|
* care about. Typically many "record"s fit into a "block". |
|
*/ |
|
#define RECORDSIZE 512 |
|
#define NAMSIZ 100 |
|
#define TUNMLEN 32 |
|
#define TGNMLEN 32 |
|
|
|
union record { |
|
char charptr[RECORDSIZE]; |
|
struct header { |
|
char name[NAMSIZ]; |
|
char mode[8]; |
|
char uid[8]; |
|
char gid[8]; |
|
char size[12]; |
|
char mtime[12]; |
|
char chksum[8]; |
|
char linkflag; |
|
char linkname[NAMSIZ]; |
|
char magic[8]; |
|
char uname[TUNMLEN]; |
|
char gname[TGNMLEN]; |
|
char devmajor[8]; |
|
char devminor[8]; |
|
} header; |
|
}; |
|
|
|
/* The magic field is filled with this if uname and gname are valid. */ |
|
#define TMAGIC "ustar " /* 7 chars and a null */ |
|
|
|
/* |
|
* file-function prototypes |
|
*/ |
|
static int is_tar(unsigned char *, int); |
|
static unsigned long signextend(struct magic *, unsigned long); |
|
static int getvalue(struct magic *, char **); |
|
static int hextoint(int); |
|
static char *getstr(char *, char *, int, int *); |
|
static int parse(char *, int); |
|
static int mget(union VALUETYPE *, unsigned char *, struct magic *, int); |
|
static int mcheck(union VALUETYPE *, struct magic *); |
|
static int mconvert(union VALUETYPE *, struct magic *); |
|
static long from_oct(int, char *); |
|
|
|
/* |
|
* includes for ASCII substring recognition formerly "names.h" in file |
|
* command |
|
* |
|
* Original notes: names and types used by ascmagic in file(1). |
|
* These tokens are |
|
* here because they can appear anywhere in the first HOWMANY bytes, while |
|
* tokens in /etc/magic must appear at fixed offsets into the file. Don't |
|
* make HOWMANY too high unless you have a very fast CPU. |
|
*/ |
|
|
|
/* these types are used calculate index to 'types': keep em in sync! */ |
|
/* HTML inserted in first because this is a web server module now */ |
|
#define L_HTML 0x001 /* HTML */ |
|
#define L_C 0x002 /* first and foremost on UNIX */ |
|
#define L_MAKE 0x004 /* Makefiles */ |
|
#define L_PLI 0x008 /* PL/1 */ |
|
#define L_MACH 0x010 /* some kinda assembler */ |
|
#define L_ENG 0x020 /* English */ |
|
#define L_PAS 0x040 /* Pascal */ |
|
#define L_JAVA 0x080 /* Java source */ |
|
#define L_CPP 0x100 /* C++ */ |
|
#define L_MAIL 0x200 /* Electronic mail */ |
|
#define L_NEWS 0x400 /* Usenet Netnews */ |
|
|
|
#define P_HTML 0 /* HTML */ |
|
#define P_C 1 /* first and foremost on UNIX */ |
|
#define P_MAKE 2 /* Makefiles */ |
|
#define P_PLI 3 /* PL/1 */ |
|
#define P_MACH 4 /* some kinda assembler */ |
|
#define P_ENG 5 /* English */ |
|
#define P_PAS 6 /* Pascal */ |
|
#define P_JAVA 7 /* Java source */ |
|
#define P_CPP 8 /* C++ */ |
|
#define P_MAIL 9 /* Electronic mail */ |
|
#define P_NEWS 10 /* Usenet Netnews */ |
|
|
|
typedef struct asc_type { |
|
char *type; |
|
int kwords; |
|
double weight; |
|
} asc_type; |
|
|
|
static asc_type types[] = { |
|
{ "text/html", 10, 1.2 }, |
|
{ "text/x-c", 9, 1.3 }, |
|
{ "text/x-makefile", 4, 1.9 }, |
|
{ "text/x-pli", 1, 3 }, |
|
{ "text/x-assembler", 6, 2.1 }, |
|
{ "text/english", 2, 0.2 }, |
|
{ "text/x-pascal", 1, 1 }, |
|
{ "text/x-java", 14, 1 }, |
|
{ "text/x-c++", 14, 1 }, |
|
{ "message/rfc822", 4, 1.9 }, |
|
{ "message/news", 3, 2 } |
|
}; |
|
|
|
#define NTYPES (sizeof(types)/sizeof(asc_type)) |
|
|
|
static struct names { |
|
char *name; |
|
short type; |
|
} names[] = { |
|
|
|
/* These must be sorted by eye for optimal hit rate */ |
|
/* Add to this list only after substantial meditation */ |
|
{ |
|
"<html>", L_HTML |
|
}, |
|
{ |
|
"<HTML>", L_HTML |
|
}, |
|
{ |
|
"<head>", L_HTML |
|
}, |
|
{ |
|
"<HEAD>", L_HTML |
|
}, |
|
{ |
|
"<title>", L_HTML |
|
}, |
|
{ |
|
"<TITLE>", L_HTML |
|
}, |
|
{ |
|
"<h1>", L_HTML |
|
}, |
|
{ |
|
"<H1>", L_HTML |
|
}, |
|
{ |
|
"<!--", L_HTML |
|
}, |
|
{ |
|
"<!DOCTYPE HTML", L_HTML |
|
}, |
|
{ |
|
"/*", L_C|L_CPP|L_JAVA |
|
}, /* must precede "The", "the", etc. */ |
|
{ |
|
"//", L_CPP|L_JAVA |
|
}, /* must precede "The", "the", etc. */ |
|
{ |
|
"#include", L_C|L_CPP |
|
}, |
|
{ |
|
"char", L_C|L_CPP|L_JAVA |
|
}, |
|
{ |
|
"The", L_ENG |
|
}, |
|
{ |
|
"the", L_ENG |
|
}, |
|
{ |
|
"double", L_C|L_CPP|L_JAVA |
|
}, |
|
{ |
|
"extern", L_C|L_CPP |
|
}, |
|
{ |
|
"float", L_C|L_CPP|L_JAVA |
|
}, |
|
{ |
|
"real", L_C|L_CPP|L_JAVA |
|
}, |
|
{ |
|
"struct", L_C|L_CPP |
|
}, |
|
{ |
|
"union", L_C|L_CPP |
|
}, |
|
{ |
|
"implements", L_JAVA |
|
}, |
|
{ |
|
"package", L_JAVA |
|
}, |
|
{ |
|
"super", L_JAVA |
|
}, |
|
{ |
|
"import", L_JAVA |
|
}, |
|
{ |
|
"this", L_CPP|L_JAVA |
|
}, |
|
{ |
|
"class", L_CPP|L_JAVA |
|
}, |
|
{ |
|
"public", L_CPP|L_JAVA |
|
}, |
|
{ |
|
"private", L_CPP|L_JAVA |
|
}, |
|
{ |
|
"CFLAGS", L_MAKE |
|
}, |
|
{ |
|
"LDFLAGS", L_MAKE |
|
}, |
|
{ |
|
"all:", L_MAKE |
|
}, |
|
{ |
|
".PRECIOUS", L_MAKE |
|
}, |
|
/* |
|
* Too many files of text have these words in them. Find another way |
|
* to recognize Fortrash. |
|
*/ |
|
{ |
|
".ascii", L_MACH |
|
}, |
|
{ |
|
".asciiz", L_MACH |
|
}, |
|
{ |
|
".byte", L_MACH |
|
}, |
|
{ |
|
".even", L_MACH |
|
}, |
|
{ |
|
".globl", L_MACH |
|
}, |
|
{ |
|
"clr", L_MACH |
|
}, |
|
{ |
|
"(input", L_PAS |
|
}, |
|
{ |
|
"dcl", L_PLI |
|
}, |
|
{ |
|
"Received:", L_MAIL |
|
}, |
|
{ |
|
">From", L_MAIL |
|
}, |
|
{ |
|
"Return-Path:", L_MAIL |
|
}, |
|
{ |
|
"Cc:", L_MAIL |
|
}, |
|
{ |
|
"Newsgroups:", L_NEWS |
|
}, |
|
{ |
|
"Path:", L_NEWS |
|
}, |
|
{ |
|
"Organization:", L_NEWS |
|
}, |
|
{ |
|
NULL, 0 |
|
} |
|
}; |
|
|
|
/* current config */ |
|
typedef struct { |
|
char *magicfile; /* where magic be found */ |
|
struct magic *magic, /* head of magic config list */ |
|
*last; |
|
} config_rec; |
|
|
|
static int accuracy; |
|
|
|
config_rec *conf; |
|
|
|
void error( const char *msg, ... ) |
|
{ |
|
va_list ap; |
|
va_start( ap, msg ); |
|
// use variable arg list |
|
QString tmp = msg; |
|
tmp += "\n"; |
|
vfprintf( stderr, tmp.data() , ap ); |
|
va_end( ap ); |
|
accuracy = 0; |
|
} |
|
|
|
#if (MIME_MAGIC_DEBUG > 1) |
|
static void |
|
test_table() |
|
{ |
|
struct magic *m; |
|
struct magic *prevm = NULL; |
|
|
|
debug("%s: started", __FUNCTION__); |
|
for (m = conf->magic; m; m = m->next) { |
|
if (isprint((((unsigned long) m) >> 24) & 255) && |
|
isprint((((unsigned long) m) >> 16) & 255) && |
|
isprint((((unsigned long) m) >> 8) & 255) && |
|
isprint(((unsigned long) m) & 255)) { |
|
debug("%s: POINTER CLOBBERED! " |
|
"m=\"%c%c%c%c\" line=%d", __FUNCTION__, |
|
(((unsigned long) m) >> 24) & 255, |
|
(((unsigned long) m) >> 16) & 255, |
|
(((unsigned long) m) >> 8) & 255, |
|
((unsigned long) m) & 255, |
|
prevm ? prevm->lineno : -1); |
|
break; |
|
} |
|
prevm = m; |
|
} |
|
} |
|
#endif |
|
|
|
#define EATAB {while (isascii((unsigned char) *l) && \ |
|
isspace((unsigned char) *l)) ++l;} |
|
|
|
static int |
|
parse_line(char *line, int *rule, int lineno) |
|
{ |
|
int ws_offset; |
|
|
|
/* delete newline */ |
|
if (line[0]) { |
|
line[strlen(line) - 1] = '\0'; |
|
} |
|
/* skip leading whitespace */ |
|
ws_offset = 0; |
|
while (line[ws_offset] && isspace(line[ws_offset])) { |
|
ws_offset++; |
|
} |
|
|
|
/* skip blank lines */ |
|
if (line[ws_offset] == 0) { |
|
return 0; |
|
} |
|
/* comment, do not parse */ |
|
if (line[ws_offset] == '#') |
|
return 0; |
|
|
|
/* if we get here, we're going to use it so count it */ |
|
(*rule)++; |
|
|
|
/* parse it */ |
|
return (parse(line + ws_offset, lineno) != 0); |
|
} |
|
|
|
/* |
|
* apprentice - load configuration from the magic file. |
|
*/ |
|
static int |
|
apprentice() |
|
{ |
|
FILE *f; |
|
char line[BUFSIZ + 1]; |
|
int errs = 0; |
|
int lineno; |
|
int rule = 0; |
|
char *fname; |
|
|
|
if (!conf->magicfile) |
|
return -1; |
|
fname = conf->magicfile; |
|
f = fopen(fname, "r"); |
|
if (f == NULL) { |
|
error("kmimelib: can't read magic file %s: %s", |
|
fname, strerror(errno)); |
|
return -1; |
|
} |
|
|
|
/* parse it */ |
|
for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++) |
|
if (parse_line(line, &rule, lineno)) |
|
errs++; |
|
|
|
fclose(f); |
|
|
|
#if (MIME_MAGIC_DEBUG > 1) |
|
debug("%s: conf=%p file=%s m=%s m->next=%s last=%s", |
|
__FUNCTION__, conf, |
|
conf->magicfile ? conf->magicfile : "NULL", |
|
conf->magic ? "set" : "NULL", |
|
(conf->magic && conf->magic->next) ? "set" : "NULL", |
|
conf->last ? "set" : "NULL"); |
|
debug("%s: read %d lines, %d rules, %d errors", |
|
__FUNCTION__, lineno, rule, errs); |
|
|
|
test_table(); |
|
#endif |
|
|
|
return (errs ? -1 : 0); |
|
} |
|
|
|
static int |
|
buff_apprentice(char *buff) |
|
{ |
|
char line[BUFSIZ + 2]; |
|
int errs = 0; |
|
int lineno = 1; |
|
char *start = buff; |
|
char *end; |
|
int count = 0; |
|
int rule = 0; |
|
int len = strlen(buff) + 1; |
|
|
|
/* parse it */ |
|
do { |
|
count = (len > BUFSIZ-1)?BUFSIZ-1:len; |
|
strncpy(line, start, count); |
|
line[count] = '\0'; |
|
if ((end = strchr(line, '\n'))) { |
|
*(++end) = '\0'; |
|
count = strlen(line); |
|
} else |
|
strcat(line, "\n"); |
|
start += count; |
|
len -= count; |
|
if (parse_line(line, &rule, lineno)) |
|
errs++; |
|
lineno++; |
|
} while (len > 0); |
|
|
|
#if (MIME_MAGIC_DEBUG > 1) |
|
debug("%s: conf=%p m=%s m->next=%s last=%s", |
|
__FUNCTION__, conf, |
|
conf->magic ? "set" : "NULL", |
|
(conf->magic && conf->magic->next) ? "set" : "NULL", |
|
conf->last ? "set" : "NULL"); |
|
debug("%s: read %d lines, %d rules, %d errors", |
|
__FUNCTION__, lineno, rule, errs); |
|
|
|
test_table(); |
|
#endif |
|
|
|
return (errs ? -1 : 0); |
|
} |
|
|
|
/* |
|
* extend the sign bit if the comparison is to be signed |
|
*/ |
|
static unsigned long |
|
signextend(struct magic *m, unsigned long v) |
|
{ |
|
if (!(m->flag & UNSIGNED)) |
|
switch (m->type) { |
|
/* |
|
* Do not remove the casts below. They are vital. |
|
* When later compared with the data, the sign |
|
* extension must have happened. |
|
*/ |
|
case BYTE: |
|
v = (char) v; |
|
break; |
|
case SHORT: |
|
case BESHORT: |
|
case LESHORT: |
|
v = (short) v; |
|
break; |
|
case DATE: |
|
case BEDATE: |
|
case LEDATE: |
|
case LONG: |
|
case BELONG: |
|
case LELONG: |
|
v = (long) v; |
|
break; |
|
case STRING: |
|
break; |
|
default: |
|
error("%s: can't happen: m->type=%d", |
|
__FUNCTION__, m->type); |
|
return ERROR; |
|
} |
|
return v; |
|
} |
|
|
|
/* |
|
* parse one line from magic file, put into magic[index++] if valid |
|
*/ |
|
static int |
|
parse(char *l, int lineno) |
|
{ |
|
int i = 0; |
|
struct magic *m; |
|
char *t, |
|
*s; |
|
/* allocate magic structure entry */ |
|
if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) { |
|
error("%s: Out of memory.", __FUNCTION__); |
|
return -1; |
|
} |
|
/* append to linked list */ |
|
m->next = NULL; |
|
if (!conf->magic || !conf->last) { |
|
conf->magic = conf->last = m; |
|
} else { |
|
conf->last->next = m; |
|
conf->last = m; |
|
} |
|
|
|
/* set values in magic structure */ |
|
m->flag = 0; |
|
m->cont_level = 0; |
|
m->lineno = lineno; |
|
|
|
while (*l == '>') { |
|
++l; /* step over */ |
|
m->cont_level++; |
|
} |
|
|
|
if (m->cont_level != 0 && *l == '(') { |
|
++l; /* step over */ |
|
m->flag |= INDIR; |
|
} |
|
/* get offset, then skip over it */ |
|
m->offset = (int) strtol(l, &t, 0); |
|
if (l == t) { |
|
error("%s: offset %s invalid", __FUNCTION__, l); |
|
} |
|
l = t; |
|
|
|
if (m->flag & INDIR) { |
|
m->in.type = LONG; |
|
m->in.offset = 0; |
|
/* |
|
* read [.lbs][+-]nnnnn) |
|
*/ |
|
if (*l == '.') { |
|
switch (*++l) { |
|
case 'l': |
|
m->in.type = LONG; |
|
break; |
|
case 's': |
|
m->in.type = SHORT; |
|
break; |
|
case 'b': |
|
m->in.type = BYTE; |
|
break; |
|
default: |
|
error("%s: indirect offset type %c invalid", __FUNCTION__, *l); |
|
break; |
|
} |
|
l++; |
|
} |
|
s = l; |
|
if (*l == '+' || *l == '-') |
|
l++; |
|
if (isdigit((unsigned char) *l)) { |
|
m->in.offset = strtol(l, &t, 0); |
|
if (*s == '-') |
|
m->in.offset = -m->in.offset; |
|
} else |
|
t = l; |
|
if (*t++ != ')') { |
|
error("%s: missing ')' in indirect offset", |
|
__FUNCTION__); |
|
} |
|
l = t; |
|
} |
|
while (isascii((unsigned char) *l) && isdigit((unsigned char) *l)) |
|
++l; |
|
EATAB; |
|
|
|
#define NBYTE 4 |
|
#define NSHORT 5 |
|
#define NLONG 4 |
|
#define NSTRING 6 |
|
#define NDATE 4 |
|
#define NBESHORT 7 |
|
#define NBELONG 6 |
|
#define NBEDATE 6 |
|
#define NLESHORT 7 |
|
#define NLELONG 6 |
|
#define NLEDATE 6 |
|
|
|
if (*l == 'u') { |
|
++l; |
|
m->flag |= UNSIGNED; |
|
} |
|
/* get type, skip it */ |
|
if (strncmp(l, "byte", NBYTE) == 0) { |
|
m->type = BYTE; |
|
l += NBYTE; |
|
} else if (strncmp(l, "short", NSHORT) == 0) { |
|
m->type = SHORT; |
|
l += NSHORT; |
|
} else if (strncmp(l, "long", NLONG) == 0) { |
|
m->type = LONG; |
|
l += NLONG; |
|
} else if (strncmp(l, "string", NSTRING) == 0) { |
|
m->type = STRING; |
|
l += NSTRING; |
|
} else if (strncmp(l, "date", NDATE) == 0) { |
|
m->type = DATE; |
|
l += NDATE; |
|
} else if (strncmp(l, "beshort", NBESHORT) == 0) { |
|
m->type = BESHORT; |
|
l += NBESHORT; |
|
} else if (strncmp(l, "belong", NBELONG) == 0) { |
|
m->type = BELONG; |
|
l += NBELONG; |
|
} else if (strncmp(l, "bedate", NBEDATE) == 0) { |
|
m->type = BEDATE; |
|
l += NBEDATE; |
|
} else if (strncmp(l, "leshort", NLESHORT) == 0) { |
|
m->type = LESHORT; |
|
l += NLESHORT; |
|
} else if (strncmp(l, "lelong", NLELONG) == 0) { |
|
m->type = LELONG; |
|
l += NLELONG; |
|
} else if (strncmp(l, "ledate", NLEDATE) == 0) { |
|
m->type = LEDATE; |
|
l += NLEDATE; |
|
} else { |
|
error("%s: type %s invalid", __FUNCTION__, l); |
|
return -1; |
|
} |
|
/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ |
|
if (*l == '&') { |
|
++l; |
|
m->mask = signextend(m, strtol(l, &l, 0)); |
|
} else |
|
m->mask = ~0L; |
|
EATAB; |
|
|
|
switch (*l) { |
|
case '>': |
|
case '<': |
|
/* Old-style anding: "0 byte &0x80 dynamically linked" */ |
|
case '&': |
|
case '^': |
|
case '=': |
|
m->reln = *l; |
|
++l; |
|
break; |
|
case '!': |
|
if (m->type != STRING) { |
|
m->reln = *l; |
|
++l; |
|
break; |
|
} |
|
/* FALL THROUGH */ |
|
default: |
|
if (*l == 'x' && isascii((unsigned char) l[1]) && |
|
isspace((unsigned char) l[1])) { |
|
m->reln = *l; |
|
++l; |
|
goto GetDesc; /* Bill The Cat */ |
|
} |
|
m->reln = '='; |
|
break; |
|
} |
|
EATAB; |
|
|
|
if (getvalue(m, &l)) |
|
return -1; |
|
/* |
|
* now get last part - the description |
|
*/ |
|
GetDesc: |
|
EATAB; |
|
if (l[0] == '\b') { |
|
++l; |
|
m->nospflag = 1; |
|
} else if ((l[0] == '\\') && (l[1] == 'b')) { |
|
++l; |
|
++l; |
|
m->nospflag = 1; |
|
} else |
|
m->nospflag = 0; |
|
while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC) |
|
/* NULLBODY */ ; |
|
|
|
#if (MIME_MAGIC_DEBUG > 1) |
|
debug("%s: line=%d m=%p next=%p cont=%d desc=%s", |
|
__FUNCTION__, lineno, m, m->next, m->cont_level, |
|
m->desc ? m->desc : "NULL"); |
|
#endif /* MIME_MAGIC_DEBUG */ |
|
|
|
return 0; |
|
} |
|
|
|
/* |
|
* Read a numeric value from a pointer, into the value union of a magic |
|
* pointer, according to the magic type. Update the string pointer to point |
|
* just after the number read. Return 0 for success, non-zero for failure. |
|
*/ |
|
static int |
|
getvalue(struct magic *m, char **p) |
|
{ |
|
int slen; |
|
|
|
if (m->type == STRING) { |
|
*p = getstr(*p, m->value.s, sizeof(m->value.s), &slen); |
|
m->vallen = slen; |
|
} else if (m->reln != 'x') |
|
m->value.l = signextend(m, strtol(*p, p, 0)); |
|
return 0; |
|
} |
|
|
|
/* |
|
* Convert a string containing C character escapes. Stop at an unescaped |
|
* space or tab. Copy the converted version to "p", returning its length in |
|
* *slen. Return updated scan pointer as function result. |
|
*/ |
|
static char * |
|
getstr(register char *s, register char *p, int plen, int *slen) |
|
{ |
|
char *origs = s, |
|
*origp = p; |
|
char *pmax = p + plen - 1; |
|
register int c; |
|
register int val; |
|
|
|
while ((c = *s++) != '\0') { |
|
if (isspace((unsigned char) c)) |
|
break; |
|
if (p >= pmax) { |
|
error("String too long: %s", origs); |
|
break; |
|
} |
|
if (c == '\\') { |
|
switch (c = *s++) { |
|
|
|
case '\0': |
|
goto out; |
|
|
|
default: |
|
*p++ = (char) c; |
|
break; |
|
|
|
case 'n': |
|
*p++ = '\n'; |
|
break; |
|
|
|
case 'r': |
|
*p++ = '\r'; |
|
break; |
|
|
|
case 'b': |
|
*p++ = '\b'; |
|
break; |
|
|
|
case 't': |
|
*p++ = '\t'; |
|
break; |
|
|
|
case 'f': |
|
*p++ = '\f'; |
|
break; |
|
|
|
case 'v': |
|
*p++ = '\v'; |
|
break; |
|
|
|
/* \ and up to 3 octal digits */ |
|
case '0': |
|
case '1': |
|
case '2': |
|
case '3': |
|
case '4': |
|
case '5': |
|
case '6': |
|
case '7': |
|
val = c - '0'; |
|
c = *s++; /* try for 2 */ |
|
if (c >= '0' && c <= '7') { |
|
val = (val << 3) | (c - '0'); |
|
c = *s++; /* try for 3 */ |
|
if (c >= '0' && c <= '7') |
|
val = (val << 3) | (c - '0'); |
|
else |
|
--s; |
|
} else |
|
--s; |
|
*p++ = (char) val; |
|
break; |
|
|
|
/* \x and up to 3 hex digits */ |
|
case 'x': |
|
val = 'x'; /* Default if no digits */ |
|
c = hextoint(*s++); /* Get next char */ |
|
if (c >= 0) { |
|
val = c; |
|
c = hextoint(*s++); |
|
if (c >= 0) { |
|
val = (val << 4) + c; |
|
c = hextoint(*s++); |
|
if (c >= 0) { |
|
val = (val << 4) + c; |
|
} else |
|
--s; |
|
} else |
|
--s; |
|
} else |
|
--s; |
|
*p++ = (char) val; |
|
break; |
|
} |
|
} else |
|
*p++ = (char) c; |
|
} |
|
out: |
|
*p = '\0'; |
|
*slen = p - origp; |
|
return s; |
|
} |
|
|
|
|
|
/* Single hex char to int; -1 if not a hex char. */ |
|
static int |
|
hextoint(int c) |
|
{ |
|
if (!isascii((unsigned char) c)) |
|
return -1; |
|
if (isdigit((unsigned char) c)) |
|
return c - '0'; |
|
if ((c >= 'a') && (c <= 'f')) |
|
return c + 10 - 'a'; |
|
if ((c >= 'A') && (c <= 'F')) |
|
return c + 10 - 'A'; |
|
return -1; |
|
} |
|
|
|
/* |
|
* Convert the byte order of the data we are looking at |
|
*/ |
|
static int |
|
mconvert(union VALUETYPE *p, struct magic *m) |
|
{ |
|
char *rt; |
|
|
|
switch (m->type) { |
|
case BYTE: |
|
return 1; |
|
case STRING: |
|
/* Null terminate and eat the return */ |
|
p->s[sizeof(p->s) - 1] = '\0'; |
|
if ((rt = strchr(p->s, '\n')) != NULL) |
|
*rt = '\0'; |
|
return 1; |
|
#ifndef WORDS_BIGENDIAN |
|
case SHORT: |
|
#endif |
|
case BESHORT: |
|
p->h = (short) ((p->hs[0] << 8) | (p->hs[1])); |
|
return 1; |
|
#ifndef WORDS_BIGENDIAN |
|
case LONG: |
|
case DATE: |
|
#endif |
|
case BELONG: |
|
case BEDATE: |
|
p->l = (long) |
|
((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3])); |
|
return 1; |
|
#ifdef WORDS_BIGENDIAN |
|
case SHORT: |
|
#endif |
|
case LESHORT: |
|
p->h = (short) ((p->hs[1] << 8) | (p->hs[0])); |
|
return 1; |
|
#ifdef WORDS_BIGENDIAN |
|
case LONG: |
|
case DATE: |
|
#endif |
|
case LELONG: |
|
case LEDATE: |
|
p->l = (long) |
|
((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0])); |
|
return 1; |
|
default: |
|
error("%s: invalid type %d", __FUNCTION__, m->type); |
|
return 0; |
|
} |
|
} |
|
|
|
|
|
static int |
|
mget(union VALUETYPE *p, unsigned char *s, struct magic *m, |
|
int nbytes) |
|
{ |
|
long offset = m->offset; |
|
if (offset + (int)sizeof(union VALUETYPE) > nbytes) |
|
return 0; |
|
|
|
|
|
memcpy(p, s + offset, sizeof(union VALUETYPE)); |
|
|
|
if (!mconvert(p, m)) |
|
return 0; |
|
|
|
if (m->flag & INDIR) { |
|
|
|
switch (m->in.type) { |
|
case BYTE: |
|
offset = p->b + m->in.offset; |
|
break; |
|
case SHORT: |
|
offset = p->h + m->in.offset; |
|
break; |
|
case LONG: |
|
offset = p->l + m->in.offset; |
|
break; |
|
} |
|
|
|
if (offset + (int)sizeof(union VALUETYPE) > nbytes) |
|
return 0; |
|
|
|
memcpy(p, s + offset, sizeof(union VALUETYPE)); |
|
|
|
if (!mconvert(p, m)) |
|
return 0; |
|
} |
|
return 1; |
|
} |
|
|
|
static int |
|
mcheck(union VALUETYPE *p, struct magic *m) |
|
{ |
|
register unsigned long l = m->value.l; |
|
register unsigned long v; |
|
int matched; |
|
|
|
if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) { |
|
error("BOINK"); |
|
return 1; |
|
} |
|
switch (m->type) { |
|
case BYTE: |
|
v = p->b; |
|
break; |
|
|
|
case SHORT: |
|
case BESHORT: |
|
case LESHORT: |
|
v = p->h; |
|
break; |
|
|
|
case LONG: |
|
case BELONG: |
|
case LELONG: |
|
case DATE: |
|
case BEDATE: |
|
case LEDATE: |
|
v = p->l; |
|
break; |
|
|
|
case STRING: |
|
l = 0; |
|
/* |
|
* What we want here is: v = strncmp(m->value.s, p->s, |
|
* m->vallen); but ignoring any nulls. bcmp doesn't give |
|
* -/+/0 and isn't universally available anyway. |
|
*/ |
|
v = 0; |
|
{ |
|
register unsigned char *a = (unsigned char *) m->value.s; |
|
register unsigned char *b = (unsigned char *) p->s; |
|
register int len = m->vallen; |
|
|
|
while (--len >= 0) |
|
if ((v = *b++ - *a++) != 0) |
|
break; |
|
} |
|
break; |
|
default: |
|
error("%s: invalid type %d", |
|
__FUNCTION__, m->type); |
|
return 0; /* NOTREACHED */ |
|
} |
|
#if 0 |
|
debug("Before signextend %08x", v); |
|
#endif |
|
v = signextend(m, v) & m->mask; |
|
#if 0 |
|
debug("After signextend %08x", v); |
|
#endif |
|
|
|
switch (m->reln) { |
|
case 'x': |
|
matched = 1; |
|
break; |
|
|
|
case '!': |
|
matched = v != l; |
|
break; |
|
|
|
case '=': |
|
matched = v == l; |
|
break; |
|
|
|
case '>': |
|
if (m->flag & UNSIGNED) |
|
matched = v > l; |
|
else |
|
matched = (long) v > (long) l; |
|
break; |
|
|
|
case '<': |
|
if (m->flag & UNSIGNED) |
|
matched = v < l; |
|
else |
|
matched = (long) v < (long) l; |
|
break; |
|
|
|
case '&': |
|
matched = (v & l) == l; |
|
break; |
|
|
|
case '^': |
|
matched = (v & l) != l; |
|
break; |
|
|
|
default: |
|
matched = 0; |
|
error("%s: can't happen: invalid relation %d.", |
|
__FUNCTION__, m->reln); |
|
break; /* NOTREACHED */ |
|
} |
|
|
|
return matched; |
|
} |
|
|
|
/* hook for printf-type functions */ |
|
void |
|
KMimeMagic::resultBufPrintf(char *str,...) |
|
{ |
|
va_list ap; |
|
|
|
char buf[MAXMIMESTRING]; |
|
|
|
/* assemble the string into the buffer */ |
|
va_start(ap, str); |
|
vsprintf(buf, str, ap); |
|
va_end(ap); |
|
|
|
/* add the buffer to the list */ |
|
resultBuf += QString(buf); |
|
} |
|
|
|
/* states for the state-machine algorithm in finishResult() */ |
|
typedef enum { |
|
rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding |
|
} rsl_states; |
|
|
|
/* process resultBuf and set the MIME info in magicResult */ |
|
int |
|
KMimeMagic::finishResult() |
|
{ |
|
int cur_pos, /* current position within result */ |
|
type_pos, /* content type starting point: position */ |
|
type_len, /* content type length */ |
|
encoding_pos, /* content encoding starting point: position */ |
|
encoding_len; /* content encoding length */ |
|
|
|
int state; |
|
/* start searching for the type and encoding */ |
|
state = rsl_leading_space; |
|
type_pos = type_len = 0; |
|
encoding_pos = encoding_len = 0; |
|
/* loop through the characters in the result */ |
|
for (cur_pos = 0; cur_pos < (int)resultBuf.length(); cur_pos++) { |
|
if (isspace(resultBuf.at(cur_pos))) { |
|
/* process whitespace actions for each state */ |
|
if (state == rsl_leading_space) { |
|
/* eat whitespace in this state */ |
|
continue; |
|
} else if (state == rsl_type) { |
|
/* whitespace: type has no slash! */ |
|
return DECLINED; |
|
} else if (state == rsl_subtype) { |
|
/* whitespace: end of MIME type */ |
|
state++; |
|
continue; |
|
} else if (state == rsl_separator) { |
|
/* eat whitespace in this state */ |
|
continue; |
|
} else if (state == rsl_encoding) { |
|
/* whitespace: end of MIME encoding */ |
|
/* we're done */ |
|
break; |
|
} else { |
|
/* should not be possible */ |
|
/* abandon malfunctioning module */ |
|
error("%s: bad state %d (ws)", __FUNCTION__, state); |
|
return DECLINED; |
|
} |
|
/* NOTREACHED */ |
|
} else if (state == rsl_type && |
|
resultBuf.at(cur_pos) == '/') { |
|
/* copy the char and go to rsl_subtype state */ |
|
type_len++; |
|
state++; |
|
} else { |
|
/* process non-space actions for each state */ |
|
if (state == rsl_leading_space) { |
|
/* non-space: begin MIME type */ |
|
state++; |
|
type_pos = cur_pos; |
|
type_len = 1; |
|
continue; |
|
} else if (state == rsl_type || |
|
state == rsl_subtype) { |
|
/* non-space: adds to type */ |
|
type_len++; |
|
continue; |
|
} else if (state == rsl_separator) { |
|
/* non-space: begin MIME encoding */ |
|
state++; |
|
encoding_pos = cur_pos; |
|
encoding_len = 1; |
|
continue; |
|
} else if (state == rsl_encoding) { |
|
/* non-space: adds to encoding */ |
|
encoding_len++; |
|
continue; |
|
} else { |
|
/* should not be possible */ |
|
/* abandon malfunctioning module */ |
|
error("%s: bad state %d (ns)", __FUNCTION__, state); |
|
return DECLINED; |
|
} |
|
/* NOTREACHED */ |
|
} |
|
/* NOTREACHED */ |
|
} |
|
|
|
/* if we ended prior to state rsl_subtype, we had incomplete info */ |
|
if (state != rsl_subtype && state != rsl_separator && |
|
state != rsl_encoding) { |
|
/* defer to other modules */ |
|
return DECLINED; |
|
} |
|
/* save the info in the request record */ |
|
if (state == rsl_subtype || state == rsl_encoding || |
|
state == rsl_encoding || state == rsl_separator) { |
|
magicResult->setContent(resultBuf.mid(type_pos, type_len)); |
|
} |
|
if (state == rsl_encoding) |
|
magicResult->setEncoding(resultBuf.mid(encoding_pos, |
|
encoding_len)); |
|
/* detect memory allocation errors */ |
|
if (!magicResult->getContent() || |
|
(state == rsl_encoding && !magicResult->getEncoding())) { |
|
return -1; |
|
} |
|
/* success! */ |
|
return OK; |
|
} |
|
|
|
/* |
|
* magic_process - process input file fn. Opens the file and reads a |
|
* fixed-size buffer to begin processing the contents. |
|
*/ |
|
void |
|
KMimeMagic::process(const char * fn) |
|
{ |
|
int fd = 0; |
|
unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ |
|
struct utimbuf utbuf; |
|
struct stat sb; |
|
int nbytes = 0; /* number of bytes read from a datafile */ |
|
|
|
/* |
|
* first try judging the file based on its filesystem status |
|
*/ |
|
if (fsmagic(fn, &sb) != 0) { |
|
resultBuf += "\n"; |
|
return; |
|
} |
|
if ((fd = open(fn, O_RDONLY)) < 0) { |
|
/* We can't open it, but we were able to stat it. */ |
|
/* |
|
* if (sb.st_mode & 0002) addResult("writable, "); |
|
* if (sb.st_mode & 0111) addResult("executable, "); |
|
*/ |
|
error("can't read `%s' (%s).", fn, strerror(errno)); |
|
resultBuf += MIME_BINARY_UNREADABLE; |
|
return; |
|
} |
|
/* |
|
* try looking at the first HOWMANY bytes |
|
*/ |
|
if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) { |
|
error("%s read failed (%s).", fn, strerror(errno)); |
|
resultBuf += MIME_BINARY_UNREADABLE; |
|
return; |
|
/* NOTREACHED */ |
|
} |
|
if (nbytes == 0) { |
|
resultBuf += MIME_BINARY_ZEROSIZE; |
|
} else { |
|
buf[nbytes++] = '\0'; /* null-terminate it */ |
|
tryit(buf, nbytes); |
|
} |
|
|
|
/* |
|
* Try to restore access, modification times if read it. |
|
*/ |
|
utbuf.actime = sb.st_atime; |
|
utbuf.modtime = sb.st_mtime; |
|
(void) utime(fn, &utbuf); /* don't care if loses */ |
|
(void) close(fd); |
|
resultBuf += "\n"; |
|
} |
|
|
|
|
|
void |
|
KMimeMagic::tryit(unsigned char *buf, int nb) |
|
{ |
|
/* |
|
* try tests in /etc/magic (or surrogate magic file) |
|
*/ |
|
if (softmagic(buf, nb) == 1) |
|
return; |
|
|
|
/* |
|
* try known keywords, check for ascii-ness too. |
|
*/ |
|
if (ascmagic(buf, nb) == 1) |
|
return; |
|
|
|
/* |
|
* abandon hope, all ye who remain here |
|
*/ |
|
resultBuf += MIME_BINARY_UNKNOWN; |
|
accuracy = 0; |
|
} |
|
|
|
int |
|
KMimeMagic::fsmagic(const char *fn, struct stat *sb) |
|
{ |
|
int ret = 0; |
|
|
|
/* |
|
* Fstat is cheaper but fails for files you don't have read perms on. |
|
* On 4.2BSD and similar systems, use lstat() to identify symlinks. |
|
*/ |
|
ret = lstat(fn, sb); /* don't merge into if; see "ret =" above */ |
|
|
|
if (ret) { |
|
return 1; |
|
} |
|
/* |
|
* if (sb->st_mode & S_ISUID) resultBuf += "setuid "; |
|
* if (sb->st_mode & S_ISGID) resultBuf += "setgid "; |
|
* if (sb->st_mode & S_ISVTX) resultBuf += "sticky "; |
|
*/ |
|
|
|
switch (sb->st_mode & S_IFMT) { |
|
case S_IFDIR: |
|
resultBuf += MIME_INODE_DIR; |
|
return 1; |
|
case S_IFCHR: |
|
resultBuf += MIME_INODE_CDEV; |
|
return 1; |
|
case S_IFBLK: |
|
resultBuf += MIME_INODE_BDEV; |
|
return 1; |
|
/* TODO add code to handle V7 MUX and Blit MUX files */ |
|
#ifdef S_IFIFO |
|
case S_IFIFO: |
|
resultBuf += MIME_INODE_FIFO;; |
|
return 1; |
|
#endif |
|
#ifdef S_IFLNK |
|
case S_IFLNK: |
|
{ |
|
char buf[BUFSIZ + BUFSIZ + 4]; |
|
register int nch; |
|
struct stat tstatbuf; |
|
|
|
if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) { |
|
resultBuf += MIME_INODE_LINK; |
|
resultBuf += "\nunreadable"; |
|
return 1; |
|
} |
|
buf[nch] = '\0'; /* readlink(2) forgets this */ |
|
/* If broken symlink, say so and quit early. */ |
|
if (*buf == '/') { |
|
if (stat(buf, &tstatbuf) < 0) { |
|
resultBuf += MIME_INODE_LINK; |
|
resultBuf += "\nbroken"; |
|
return 1; |
|
} |
|
} else { |
|
char *tmp; |
|
char buf2[BUFSIZ + BUFSIZ + 4]; |
|
|
|
strcpy(buf2, fn); |
|
if ((tmp = strrchr(buf2, '/')) == NULL) { |
|
tmp = buf; /* in current dir */ |
|
} else { |
|
/* dir part plus (rel.) link */ |
|
*++tmp = '\0'; |
|
strcat(buf2, buf); |
|
tmp = buf2; |
|
} |
|
if (stat(tmp, &tstatbuf) < 0) { |
|
resultBuf += MIME_INODE_LINK; |
|
resultBuf += "\nbroken"; |
|
return 1; |
|
} else |
|
strcpy(buf, tmp); |
|
} |
|
if (followLinks) |
|
process(buf); |
|
else |
|
resultBuf += MIME_INODE_LINK; |
|
return 1; |
|
} |
|
return 1; |
|
#endif |
|
#ifdef S_IFSOCK |
|
#ifndef __COHERENT__ |
|
case S_IFSOCK: |
|
resultBuf += MIME_INODE_SOCK; |
|
return 1; |
|
#endif |
|
#endif |
|
case S_IFREG: |
|
break; |
|
default: |
|
error("%s: invalid mode 0%o.", __FUNCTION__, |
|
sb->st_mode); |
|
/* NOTREACHED */ |
|
} |
|
|
|
/* |
|
* regular file, check next possibility |
|
*/ |
|
if (sb->st_size == 0) { |
|
resultBuf += MIME_BINARY_ZEROSIZE; |
|
return 1; |
|
} |
|
return 0; |
|
} |
|
|
|
/* |
|
* softmagic - lookup one file in database (already read from /etc/magic by |
|
* apprentice.c). Passed the name and FILE * of one file to be typed. |
|
*/ |
|
int |
|
KMimeMagic::softmagic(unsigned char *buf, int nbytes) |
|
{ |
|
if (match(buf, nbytes)) |
|
return 1; |
|
|
|
return 0; |
|
} |
|
|
|
/* |
|
* Go through the whole list, stopping if you find a match. Process all the |
|
* continuations of that match before returning. |
|
* |
|
* We support multi-level continuations: |
|
* |
|
* At any time when processing a successful top-level match, there is a current |
|
* continuation level; it represents the level of the last successfully |
|
* matched continuation. |
|
* |
|
* Continuations above that level are skipped as, if we see one, it means that |
|
* the continuation that controls them - i.e, the lower-level continuation |
|
* preceding them - failed to match. |
|
* |
|
* Continuations below that level are processed as, if we see one, it means |
|
* we've finished processing or skipping higher-level continuations under the |
|
* control of a successful or unsuccessful lower-level continuation, and are |
|
* now seeing the next lower-level continuation and should process it. The |
|
* current continuation level reverts to the level of the one we're seeing. |
|
* |
|
* Continuations at the current level are processed as, if we see one, there's |
|
* no lower-level continuation that may have failed. |
|
* |
|
* If a continuation matches, we bump the current continuation level so that |
|
* higher-level continuations are processed. |
|
*/ |
|
int |
|
KMimeMagic::match(unsigned char *s, int nbytes) |
|
{ |
|
#if (MIME_MAGIC_DEBUG > 1) |
|
int rule_counter = 0; |
|
#endif |
|
int cont_level = 0; |
|
int need_separator = 0; |
|
union VALUETYPE p; |
|
struct magic *m; |
|
|
|
#if (MIME_MAGIC_DEBUG > 1) |
|
debug("%s: conf=%p file=%s m=%s m->next=%s last=%s", |
|
__FUNCTION__, conf, |
|
conf->magicfile ? conf->magicfile : "NULL", |
|
conf->magic ? "set" : "NULL", |
|
(conf->magic && conf->magic->next) ? "set" : "NULL", |
|
conf->last ? "set" : "NULL"); |
|
for (m = conf->magic; m; m = m->next) { |
|
if (isprint((((unsigned long) m) >> 24) & 255) && |
|
isprint((((unsigned long) m) >> 16) & 255) && |
|
isprint((((unsigned long) m) >> 8) & 255) && |
|
isprint(((unsigned long) m) & 255)) { |
|
debug("%s: POINTER CLOBBERED! " |
|
"m=\"%c%c%c%c\"", __FUNCTION__, |
|
(((unsigned long) m) >> 24) & 255, |
|
(((unsigned long) m) >> 16) & 255, |
|
(((unsigned long) m) >> 8) & 255, |
|
((unsigned long) m) & 255); |
|
break; |
|
} |
|
} |
|
#endif |
|
|
|
for (m = conf->magic; m; m = m->next) { |
|
#if (MIME_MAGIC_DEBUG > 1) |
|
rule_counter++; |
|
debug("%s: line=%d desc=%s", __FUNCTION__, |
|
m->lineno, m->desc); |
|
#endif |
|
|
|
/* check if main entry matches */ |
|
if (!mget(&p, s, m, nbytes) || |
|
!mcheck(&p, m)) { |
|
struct magic *m_cont; |
|
|
|
/* |
|
* main entry didn't match, flush its continuations |
|
*/ |
|
if (!m->next || (m->next->cont_level == 0)) { |
|
continue; |
|
} |
|
m_cont = m->next; |
|
while (m_cont && (m_cont->cont_level != 0)) { |
|
#if (MIME_MAGIC_DEBUG > 1) |
|
rule_counter++; |
|
debug("%s: line=%d mc=%p mc->next=%p " |
|
"cont=%d desc=%s", |
|
__FUNCTION__, m_cont->lineno, m_cont, |
|
m_cont->next, m_cont->cont_level, |
|
m_cont->desc ? m_cont->desc : "NULL"); |
|
#endif |
|
/* |
|
* this trick allows us to keep *m in sync |
|
* when the continue advances the pointer |
|
*/ |
|
m = m_cont; |
|
m_cont = m_cont->next; |
|
} |
|
continue; |
|
} |
|
/* if we get here, the main entry rule was a match */ |
|
/* this will be the last run through the loop */ |
|
#if (MIME_MAGIC_DEBUG > 1) |
|
debug("%s: rule matched, line=%d type=%d %s", |
|
__FUNCTION__, m->lineno, m->type, |
|
(m->type == STRING) ? m->value.s : ""); |
|
#endif |
|
|
|
/* print the match */ |
|
mprint(&p, m); |
|
|
|
/* |
|
* If we printed something, we'll need to print a blank |
|
* before we print something else. |
|
*/ |
|
if (m->desc[0]) |
|
need_separator = 1; |
|
/* and any continuations that match */ |
|
cont_level++; |
|
/* |
|
* while (m && m->next && m->next->cont_level != 0 && ( m = |
|
* m->next )) |
|
*/ |
|
m = m->next; |
|
while (m && (m->cont_level != 0)) { |
|
#if (MIME_MAGIC_DEBUG > 1) |
|
debug("%s: line=%d cont=%d type=%d %s", |
|
__FUNCTION__, m->lineno, m->cont_level, m->type, |
|
(m->type == STRING) ? m->value.s : ""); |
|
#endif |
|
if (cont_level >= m->cont_level) { |
|
if (cont_level > m->cont_level) { |
|
/* |
|
* We're at the end of the level |
|
* "cont_level" continuations. |
|
*/ |
|
cont_level = m->cont_level; |
|
} |
|
if (mget(&p, s, m, nbytes) && |
|
mcheck(&p, m)) { |
|
/* |
|
* This continuation matched. Print |
|
* its message, with a blank before |
|
* it if the previous item printed |
|
* and this item isn't empty. |
|
*/ |
|
/* space if previous printed */ |
|
if (need_separator |
|
&& (m->nospflag == 0) |
|
&& (m->desc[0] != '\0') |
|
) { |
|
resultBuf += " "; |
|
need_separator = 0; |
|
} |
|
mprint(&p, m); |
|
if (m->desc[0]) |
|
need_separator = 1; |
|
|
|
/* |
|
* If we see any continuations at a |
|
* higher level, process them. |
|
*/ |
|
cont_level++; |
|
} |
|
} |
|
/* move to next continuation record */ |
|
m = m->next; |
|
} |
|
#if (MIME_MAGIC_DEBUG > 1) |
|
debug("%s: matched after %d rules", __FUNCTION__, rule_counter); |
|
#endif |
|
return 1; /* all through */ |
|
} |
|
#if (MIME_MAGIC_DEBUG > 1) |
|
debug("%s: failed after %d rules", __FUNCTION__, rule_counter); |
|
#endif |
|
return 0; /* no match at all */ |
|
} |
|
|
|
void |
|
KMimeMagic::mprint(union VALUETYPE *p, struct magic *m) |
|
{ |
|
char *pp, |
|
*rt; |
|
unsigned long v; |
|
|
|
switch (m->type) { |
|
case BYTE: |
|
v = p->b; |
|
break; |
|
|
|
case SHORT: |
|
case BESHORT: |
|
case LESHORT: |
|
v = p->h; |
|
break; |
|
|
|
case LONG: |
|
case BELONG: |
|
case LELONG: |
|
v = p->l; |
|
break; |
|
|
|
case STRING: |
|
if (m->reln == '=') { |
|
resultBufPrintf(m->desc, m->value.s); |
|
} else { |
|
resultBufPrintf(m->desc, p->s); |
|
} |
|
return; |
|
|
|
case DATE: |
|
case BEDATE: |
|
case LEDATE: |
|
pp = ctime((time_t *) & p->l); |
|
if ((rt = strchr(pp, '\n')) != NULL) |
|
*rt = '\0'; |
|
resultBufPrintf(m->desc, pp); |
|
return; |
|
default: |
|
error("%s: invalid m->type (%d)", |
|
__FUNCTION__, m->type); |
|
return; |
|
} |
|
|
|
v = signextend(m, v) & m->mask; |
|
resultBufPrintf(m->desc, (unsigned long) v); |
|
} |
|
|
|
/* an optimization over plain strcmp() */ |
|
#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) |
|
|
|
int |
|
KMimeMagic::ascmagic(unsigned char *buf, int nbytes) |
|
{ |
|
int i; |
|
double pct, maxpct, pctsum; |
|
double pcts[NTYPES]; |
|
int mostaccurate, tokencount; |
|
int typeset, jonly, conly, jconly, cppcomm, ccomm; |
|
int has_escapes = 0; |
|
unsigned char *s; |
|
char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */ |
|
char *token; |
|
register struct names *p; |
|
int typecount[NTYPES]; |
|
|
|
/* these are easy, do them first */ |
|
accuracy = 70; |
|
|
|
/* |
|
* for troff, look for . + letter + letter or .\"; this must be done |
|
* to disambiguate tar archives' ./file and other trash from real |
|
* troff input. |
|
*/ |
|
if (*buf == '.') { |
|
unsigned char *tp = buf + 1; |
|
|
|
while (isascii(*tp) && isspace(*tp)) |
|
++tp; /* skip leading whitespace */ |
|
if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') && |
|
isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) { |
|
resultBuf += MIME_APPL_TROFF; |
|
return 1; |
|
} |
|
} |
|
if ((*buf == 'c' || *buf == 'C') && |
|
isascii(*(buf + 1)) && isspace(*(buf + 1))) { |
|
/* Fortran */ |
|
resultBuf += MIME_TEXT_FORTRAN; |
|
return 1; |
|
} |
|
/* look for tokens - this is expensive! */ |
|
/* make a copy of the buffer here because strtok() will destroy it */ |
|
s = (unsigned char *) memcpy(nbuf, buf, nbytes); |
|
s[nbytes] = '\0'; |
|
has_escapes = (memchr(s, '\033', nbytes) != NULL); |
|
/* |
|
* Fritz: |
|
* Try a little harder on C/C++/Java. |
|
*/ |
|
memset(&typecount, 0, sizeof(typecount)); |
|
typeset = 0; |
|
jonly = 0; |
|
conly = 0; |
|
jconly = 0; |
|
cppcomm = 0; |
|
ccomm = 0; |
|
tokencount = 0; |
|
// first collect all possible types and count matches |
|
while ((token = strtok((char *) s, " \t\n\r\f,;")) != NULL) { |
|
s = NULL; /* make strtok() keep on tokin' */ |
|
for (p = names; p->name ; p++) { |
|
if (STREQ(p->name, token)) { |
|
tokencount++; |
|
typeset |= p->type; |
|
if (p->type == L_JAVA) |
|
jonly++; |
|
if ((p->type & (L_C|L_CPP|L_JAVA)) |
|
== (L_CPP|L_JAVA)) |
|
jconly++; |
|
if ((p->type & (L_C|L_CPP|L_JAVA)) |
|
== (L_C|L_CPP)) |
|
conly++; |
|
if (STREQ(token, "//")) |
|
cppcomm++; |
|
if (STREQ(token, "/*")) |
|
ccomm++; |
|
for (i = 0; i < (int)NTYPES; i++) |
|
if ((1 << i) & p->type) |
|
typecount[i]++; |
|
} |
|
} |
|
} |
|
|
|
if (typeset & (L_C|L_CPP|L_JAVA)) { |
|
accuracy = 40; |
|
if (!(typeset & ~(L_C|L_CPP|L_JAVA))) { |
|
if (jonly && conly) |
|
error("Oops, jonly && conly?!"); |
|
if (jonly) { |
|
// A java-only token has matched |
|
resultBuf += QString(types[P_JAVA].type); |
|
return 1; |
|
} |
|
if (jconly) { |
|
// A non-C (only C++ or Java) token has matched. |
|
if (typecount[P_JAVA] > typecount[P_CPP]) |
|
resultBuf += QString(types[P_JAVA].type); |
|
else |
|
resultBuf += QString(types[P_CPP].type); |
|
return 1; |
|
} |
|
if (conly) { |
|
// Either C or C++, rely on comments. |
|
if (cppcomm) |
|
resultBuf += QString(types[P_CPP].type); |
|
else |
|
resultBuf += QString(types[P_C].type); |
|
return 1; |
|
} |
|
if (ccomm) { |
|
resultBuf += QString(types[P_C].type); |
|
return 1; |
|
} |
|
} |
|
} |
|
|
|
/* Neither C, C++ or Java (or all of them without able to distinguish): |
|
* Simply take the token-class with the highest |
|
* matchcount > 0 |
|
*/ |
|
mostaccurate = -1; |
|
maxpct = pctsum = 0.0; |
|
for (i = 0; i < (int)NTYPES; i++) { |
|
pct = (double)typecount[i] / (double)types[i].kwords * |
|
(double)types[i].weight; |
|
pcts[i] = pct; |
|
pctsum += pct; |
|
if (pct > maxpct) { |
|
maxpct = pct; |
|
mostaccurate = i; |
|
} |
|
#if MIME_MAGIC_DEBUG |
|
printf("%s has %d hits, %d kw, %f -> max = %f\n", |
|
types[i].type, typecount[i], types[i].kwords, |
|
pct, maxpct); |
|
#endif |
|
} |
|
if (mostaccurate >= 0.0) { |
|
accuracy = (int)(pcts[mostaccurate] / pctsum * 60); |
|
resultBuf += QString(types[mostaccurate].type); |
|
return 1; |
|
} |
|
|
|
switch (is_tar(buf, nbytes)) { |
|
case 1: |
|
/* V7 tar archive */ |
|
resultBuf += MIME_APPL_TAR; |
|
accuracy = 90; |
|
return 1; |
|
case 2: |
|
/* POSIX tar archive */ |
|
resultBuf += MIME_APPL_TAR; |
|
accuracy = 90; |
|
return 1; |
|
} |
|
|
|
for (i = 0; i < nbytes; i++) { |
|
if (!isascii(*(buf + i))) |
|
return 0; /* not all ascii */ |
|
} |
|
|
|
/* all else fails, but it is ascii... */ |
|
accuracy = 90; |
|
if (has_escapes) { |
|
/* text with escape sequences */ |
|
/* we leave this open for further differentiation later */ |
|
resultBuf += MIME_TEXT_UNKNOWN; |
|
} else { |
|
/* plain text */ |
|
resultBuf += MIME_TEXT_UNKNOWN; |
|
} |
|
return 1; |
|
} |
|
|
|
/* |
|
* is_tar() -- figure out whether file is a tar archive. |
|
* |
|
* Stolen (by author of file utility) from the public domain tar program: Public |
|
* Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu). |
|
* |
|
* @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7 |
|
* 1997/06/24 00:41:02 ikluft Exp ikluft $ |
|
* |
|
* Comments changed and some code/comments reformatted for file command by Ian |
|
* Darwin. |
|
*/ |
|
|
|
#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') ) |
|
|
|
/* |
|
* Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for |
|
* old UNIX tar file, 2 for Unix Std (POSIX) tar file. |
|
*/ |
|
|
|
static int |
|
is_tar(unsigned char *buf, int nbytes) |
|
{ |
|
register union record *header = (union record *) buf; |
|
register int i; |
|
register long sum, |
|
recsum; |
|
register char *p; |
|
|
|
if (nbytes < (int)sizeof(union record)) |
|
return 0; |
|
|
|
recsum = from_oct(8, header->header.chksum); |
|
|
|
sum = 0; |
|
p = header->charptr; |
|
for (i = sizeof(union record); --i >= 0;) { |
|
/* |
|
* We can't use unsigned char here because of old compilers, |
|
* e.g. V7. |
|
*/ |
|
sum += 0xFF & *p++; |
|
} |
|
|
|
/* Adjust checksum to count the "chksum" field as blanks. */ |
|
for (i = sizeof(header->header.chksum); --i >= 0;) |
|
sum -= 0xFF & header->header.chksum[i]; |
|
sum += ' ' * sizeof header->header.chksum; |
|
|
|
if (sum != recsum) |
|
return 0; /* Not a tar archive */ |
|
|
|
if (0 == strcmp(header->header.magic, TMAGIC)) |
|
return 2; /* Unix Standard tar archive */ |
|
|
|
return 1; /* Old fashioned tar archive */ |
|
} |
|
|
|
|
|
/* |
|
* Quick and dirty octal conversion. |
|
* |
|
* Result is -1 if the field is invalid (all blank, or nonoctal). |
|
*/ |
|
static long |
|
from_oct(int digs, char *where) |
|
{ |
|
register long value; |
|
|
|
while (isspace(*where)) { /* Skip spaces */ |
|
where++; |
|
if (--digs <= 0) |
|
return -1; /* All blank field */ |
|
} |
|
value = 0; |
|
while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */ |
|
value = (value << 3) | (*where++ - '0'); |
|
--digs; |
|
} |
|
|
|
if (digs > 0 && *where && !isspace(*where)) |
|
return -1; /* Ended on non-space/nul */ |
|
|
|
return value; |
|
} |
|
|
|
/* |
|
* Check for file-revision suffix |
|
* |
|
* This is for an obscure document control system used on an intranet. |
|
* The web representation of each file's revision has an @1, @2, etc |
|
* appended with the revision number. This needs to be stripped off to |
|
* find the file suffix, which can be recognized by sending the name back |
|
* through a sub-request. The base file name (without the @num suffix) |
|
* must exist because its type will be used as the result. |
|
*/ |
|
|
|
/* |
|
* Don't know if we really need this within KDE?! |
|
* ... well i've to look into original file code. |
|
*/ |
|
const KMimeMagicResult * |
|
KMimeMagic::revision_suffix(const char * fn) |
|
{ |
|
int suffix_pos; |
|
QString newfn = QString(fn); |
|
|
|
#if (MIME_MAGIC_DEBUG > 2) |
|
debug("%s: checking %s", __FUNCTION__, fn); |
|
#endif |
|
/* check for recognized revision suffix */ |
|
suffix_pos = newfn.findRev(QRegExp("@[0-9]*$")); |
|
#if (MIME_MAGIC_DEBUG > 2) |
|
debug("%s: suffix_pos=%d", __FUNCTION__, suffix_pos); |
|
#endif |
|
if (suffix_pos == -1) |
|
return NULL; |
|
return findFileType((const char *)newfn.left(suffix_pos)); |
|
} |
|
|
|
/* |
|
* The Constructor |
|
*/ |
|
KMimeMagic::KMimeMagic(const char * _configfile) |
|
{ |
|
int result; |
|
conf = (config_rec *)calloc(1, sizeof(config_rec)); |
|
|
|
/* set up the magic list (empty) */ |
|
conf->magic = conf->last = NULL; |
|
magicResult = NULL; |
|
followLinks = FALSE; |
|
|
|
if (_configfile) |
|
conf->magicfile = strdup(_configfile); |
|
/* on the first time through we read the magic file */ |
|
result = apprentice(); |
|
if (result == -1) |
|
return; |
|
#if (MIME_MAGIC_DEBUG > 1) |
|
test_table(); |
|
#endif |
|
} |
|
|
|
/* |
|
* The destructor. |
|
* Free the magic-table and other resources. |
|
*/ |
|
KMimeMagic::~KMimeMagic() |
|
{ |
|
if (conf) { |
|
struct magic *p = conf->magic; |
|
struct magic *q; |
|
while (p) { |
|
q = p; |
|
p = p->next; |
|
free(q); |
|
} |
|
free(conf); |
|
} |
|
if (magicResult) |
|
delete magicResult; |
|
} |
|
|
|
bool |
|
KMimeMagic::mergeConfig(const char * _configfile) |
|
{ |
|
int result; |
|
|
|
if (conf) { |
|
char * old_magicfile = conf->magicfile; |
|
|
|
if (_configfile) |
|
conf->magicfile = strdup(_configfile); |
|
else |
|
return false; |
|
result = apprentice(); |
|
if (result == -1) { |
|
conf->magicfile = old_magicfile; |
|
return false; |
|
} |
|
#if (MIME_MAGIC_DEBUG > 1) |
|
test_table(); |
|
#endif |
|
return true; |
|
} |
|
return false; |
|
} |
|
|
|
bool |
|
KMimeMagic::mergeBufConfig(char * _configbuf) |
|
{ |
|
int result; |
|
|
|
if (conf) { |
|
result = buff_apprentice(_configbuf); |
|
if (result == -1) |
|
return false; |
|
#if (MIME_MAGIC_DEBUG > 1) |
|
test_table(); |
|
#endif |
|
return true; |
|
} |
|
return false; |
|
} |
|
|
|
void |
|
KMimeMagic::setFollowLinks( bool _enable ) |
|
{ |
|
followLinks = _enable; |
|
} |
|
|
|
const KMimeMagicResult * |
|
KMimeMagic::findBufferType(const char * buffer, int nbytes) |
|
{ |
|
unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ |
|
|
|
resultBuf.resize(0); |
|
if (magicResult) { |
|
magicResult->setContent(QString(0)); |
|
magicResult->setEncoding(QString(0)); |
|
} else |
|
magicResult = new KMimeMagicResult(); |
|
accuracy = 100; |
|
|
|
if (nbytes > HOWMANY) |
|
nbytes = HOWMANY; |
|
memcpy(buf, buffer, nbytes); |
|
if (nbytes == 0) { |
|
resultBuf += MIME_BINARY_ZEROSIZE; |
|
} else { |
|
buf[nbytes++] = '\0'; /* null-terminate it */ |
|
tryit(buf, nbytes); |
|
} |
|
resultBuf += "\n"; |
|
/* if we have any results, put them in the request structure */ |
|
finishResult(); |
|
magicResult->setAccuracy(accuracy); |
|
return magicResult; |
|
} |
|
|
|
static void |
|
refineResult(KMimeMagicResult *r, const char * _filename) |
|
{ |
|
QString tmp = r->getContent(); |
|
if (tmp.isEmpty()) |
|
return; |
|
if ((strcmp(tmp, "text/x-c") == 0) || |
|
(strcmp(tmp, "text/x-c++") == 0) ) |
|
{ |
|
if ( QString(_filename).right(2) == ".h" ) |
|
tmp += "hdr"; |
|
else |
|
tmp += "src"; |
|
r->setContent(tmp); |
|
} |
|
} |
|
|
|
const KMimeMagicResult * |
|
KMimeMagic::findBufferFileType( const char * buffer, int nbytes, |
|
const char * fn) |
|
{ |
|
KMimeMagicResult * r = (KMimeMagicResult *)findBufferType( buffer, nbytes ); |
|
refineResult(r, fn); |
|
return r; |
|
} |
|
|
|
/* |
|
* Find the content-type of the given file. |
|
*/ |
|
const KMimeMagicResult * |
|
KMimeMagic::findFileType(const char *fn) |
|
{ |
|
resultBuf.resize(0); |
|
if (magicResult) { |
|
magicResult->setContent(QString(0)); |
|
magicResult->setEncoding(QString(0)); |
|
} else |
|
magicResult = new KMimeMagicResult(); |
|
accuracy = 100; |
|
|
|
/* try excluding file-revision suffixes */ |
|
if (!revision_suffix(fn)) { |
|
/* process it based on the file contents */ |
|
process(fn); |
|
} |
|
/* if we have any results, put them in the request structure */ |
|
finishResult(); |
|
magicResult->setAccuracy(accuracy); |
|
refineResult(magicResult, fn); |
|
return magicResult; |
|
}
|
|
|