32
32
WPDConfidence WP1Heuristics::isWP1FileFormat(WPXInputStream *input, bool partialContent)
34
int functionGroupCount = 0;
36
WPD_DEBUG_MSG(("WP1Heuristics::isWP1FileFormat()\n"));
37
input->seek(0, WPX_SEEK_SET);
38
WPD_DEBUG_MSG(("WP1Heuristics::isWP1FileFormat()\n"));
40
while (!input->atEOS())
42
uint8_t readVal = readU8(input);
44
WPD_DEBUG_MSG(("WP1Heuristics, Offset 0x%.8x, value 0x%.2x\n", (unsigned int)input->tell() - 1, readVal));
46
if (readVal < (uint8_t)0x20)
48
// line breaks et al, skip
50
else if (readVal >= (uint8_t)0x20 && readVal <= (uint8_t)0x7F)
52
// normal ASCII characters, skip
54
else if (readVal >= (uint8_t)0x80 && readVal <= (uint8_t)0xBF)
56
// single character function codes, skip
59
else if (readVal >= (uint8_t)0xFF)
61
// special codes that should not be found as separate functions
62
return WPD_CONFIDENCE_NONE;
66
// multi character function group
67
// check that the size constrains are valid, and that every group_member
68
// is properly closed at the right place
70
if (WP1_FUNCTION_GROUP_SIZE[readVal-0xC0] == -1)
72
// variable length function group
74
// We are checking following structure:
75
// <function code>{function length}...{function length}<function code>
76
// that we observed in variable length WP1 functions
36
int functionGroupCount = 0;
38
WPD_DEBUG_MSG(("WP1Heuristics::isWP1FileFormat()\n"));
39
input->seek(0, WPX_SEEK_SET);
40
WPD_DEBUG_MSG(("WP1Heuristics::isWP1FileFormat()\n"));
42
while (!input->atEOS())
44
uint8_t readVal = readU8(input);
46
WPD_DEBUG_MSG(("WP1Heuristics, Offset 0x%.8x, value 0x%.2x\n", (unsigned int)input->tell() - 1, readVal));
48
if (readVal < (uint8_t)0x20)
50
// line breaks et al, skip
52
else if (readVal >= (uint8_t)0x20 && readVal <= (uint8_t)0x7F)
54
// normal ASCII characters, skip
56
else if (readVal >= (uint8_t)0x80 && readVal <= (uint8_t)0xBF)
58
// single character function codes, skip
61
else if (readVal >= (uint8_t)0xFF)
63
// special codes that should not be found as separate functions
64
return WPD_CONFIDENCE_NONE;
68
// multi character function group
69
// check that the size constrains are valid, and that every group_member
70
// is properly closed at the right place
72
if (WP1_FUNCTION_GROUP_SIZE[readVal-0xC0] == -1)
74
// variable length function group
76
// We are checking following structure:
77
// <function code>{function length}...{function length}<function code>
78
// that we observed in variable length WP1 functions
78
unsigned long functionLength = readU32(input, true);
79
if (functionLength > ((std::numeric_limits<uint32_t>::max)() / 2))
80
return WPD_CONFIDENCE_NONE;
81
long closingFunctionLength = 0;
82
WPD_DEBUG_MSG(("WP1Heuristics functionLength = 0x%.8x\n", (unsigned int)functionLength));
80
unsigned long functionLength = readU32(input, true);
81
if (functionLength > ((std::numeric_limits<uint32_t>::max)() / 2))
82
return WPD_CONFIDENCE_NONE;
83
if (functionLength == 0)
84
return WPD_CONFIDENCE_NONE;
85
unsigned long closingFunctionLength = 0;
86
WPD_DEBUG_MSG(("WP1Heuristics functionLength = 0x%.8x\n", (unsigned int)functionLength));
84
input->seek(functionLength, WPX_SEEK_CUR);
85
closingFunctionLength = readU32(input, true);
86
WPD_DEBUG_MSG(("WP1Heuristics closingFunctionLength = 0x%.8x\n", (unsigned int)closingFunctionLength));
87
if (functionLength != closingFunctionLength)
88
return WPD_CONFIDENCE_NONE;
88
input->seek(functionLength, WPX_SEEK_CUR);
89
closingFunctionLength = readU32(input, true);
90
WPD_DEBUG_MSG(("WP1Heuristics closingFunctionLength = 0x%.8x\n", (unsigned int)closingFunctionLength));
91
if (functionLength != closingFunctionLength)
92
return WPD_CONFIDENCE_NONE;
90
uint8_t closingGate = 0;
94
uint8_t closingGate = 0;
97
closingGate = readU8(input);
98
WPD_DEBUG_MSG(("WP1Heuristics closingGate = 0x%.2x\n", closingGate));
99
if (closingGate != readVal)
100
return WPD_CONFIDENCE_NONE;
103
// when passed the complete file, we don't allow for open groups when we've reached EOF
104
if (!partialContent && input->atEOS() && (closingGate != readVal))
105
return WPD_CONFIDENCE_NONE;
107
functionGroupCount++;
93
closingGate = readU8(input);
94
WPD_DEBUG_MSG(("WP1Heuristics closingGate = 0x%.2x\n", closingGate));
95
if (closingGate != readVal)
96
return WPD_CONFIDENCE_NONE;
111
// fixed length function group
113
// seek to the position where the closing gate should be
114
int res = input->seek(WP1_FUNCTION_GROUP_SIZE[readVal-0xC0]-2, WPX_SEEK_CUR);
115
// when passed the complete file, we should be able to do that
116
if (!partialContent && res)
117
return WPD_CONFIDENCE_NONE;
119
// read the closing gate
120
uint8_t readNextVal = readU8(input);
121
if (readNextVal != readVal)
122
return WPD_CONFIDENCE_NONE;
124
functionGroupCount++;
99
// when passed the complete file, we don't allow for open groups when we've reached EOF
100
if (!partialContent && input->atEOS() && (closingGate != readVal))
101
return WPD_CONFIDENCE_NONE;
103
functionGroupCount++;
107
// fixed length function group
109
// seek to the position where the closing gate should be
110
int res = input->seek(WP1_FUNCTION_GROUP_SIZE[readVal-0xC0]-2, WPX_SEEK_CUR);
111
// when passed the complete file, we should be able to do that
112
if (!partialContent && res)
113
return WPD_CONFIDENCE_NONE;
115
// read the closing gate
116
uint8_t readNextVal = readU8(input);
117
if (readNextVal != readVal)
118
return WPD_CONFIDENCE_NONE;
120
functionGroupCount++;
125
/* When we get here, the document is in a format that we *could* import properly.
126
However, if we didn't entcounter a single WP4.2 function group) we need to be more carefull:
127
this would be the case when passed a plaintext file for example, which libwpd is not
128
supposed to handle. */
129
if (!functionGroupCount)
130
return WPD_CONFIDENCE_POOR;
129
/* When we get here, the document is in a format that we *could* import properly.
130
However, if we didn't encounter a single WP1 function group) we need to be more carefull:
131
this would be the case when passed a plaintext file for example, which libwpd is not
132
supposed to handle. */
133
if (!functionGroupCount)
134
return WPD_CONFIDENCE_POOR;
132
return WPD_CONFIDENCE_EXCELLENT;
136
return WPD_CONFIDENCE_EXCELLENT;
140
return WPD_CONFIDENCE_NONE;