~ubuntu-branches/ubuntu/karmic/libwpd/karmic

« back to all changes in this revision

Viewing changes to src/lib/WP1Heuristics.cpp

  • Committer: Bazaar Package Importer
  • Author(s): Rene Engelhard
  • Date: 2007-06-15 13:28:41 UTC
  • mfrom: (1.1.9 upstream)
  • Revision ID: james.westby@ubuntu.com-20070615132841-00nybwftc708w96n
Tags: 0.8.10-1
* New upstream release
* bump shlibs for libwpd-stream8c2a 

Show diffs side-by-side

added added

removed removed

Lines of Context:
31
31
 
32
32
WPDConfidence WP1Heuristics::isWP1FileFormat(WPXInputStream *input, bool partialContent)
33
33
{
34
 
        int functionGroupCount = 0;
35
 
        
36
 
        WPD_DEBUG_MSG(("WP1Heuristics::isWP1FileFormat()\n"));
37
 
        input->seek(0, WPX_SEEK_SET);
38
 
        WPD_DEBUG_MSG(("WP1Heuristics::isWP1FileFormat()\n"));
39
 
        
40
 
        while (!input->atEOS())
 
34
        try
41
35
        {
42
 
                uint8_t readVal = readU8(input);
43
 
 
44
 
                WPD_DEBUG_MSG(("WP1Heuristics, Offset 0x%.8x, value 0x%.2x\n", (unsigned int)input->tell() - 1, readVal));
45
 
                
46
 
                if (readVal < (uint8_t)0x20)
47
 
                {
48
 
                        // line breaks et al, skip
49
 
                }
50
 
                else if (readVal >= (uint8_t)0x20 && readVal <= (uint8_t)0x7F)
51
 
                {
52
 
                        // normal ASCII characters, skip                        
53
 
                }
54
 
                else if (readVal >= (uint8_t)0x80 && readVal <= (uint8_t)0xBF)
55
 
                {
56
 
                        // single character function codes, skip
57
 
                        functionGroupCount++;
58
 
                }
59
 
                else if (readVal >= (uint8_t)0xFF)
60
 
                {
61
 
                        // special codes that should not be found as separate functions
62
 
                        return WPD_CONFIDENCE_NONE;
63
 
                }
64
 
                else 
65
 
                {
66
 
                        // multi character function group
67
 
                        // check that the size constrains are valid, and that every group_member
68
 
                        // is properly closed at the right place
69
 
                
70
 
                        if (WP1_FUNCTION_GROUP_SIZE[readVal-0xC0] == -1)
71
 
                        {
72
 
                                // variable length function group
73
 
 
74
 
                                // We are checking following structure:
75
 
                                //   <function code>{function length}...{function length}<function code>
76
 
                                //   that we observed in variable length WP1 functions 
 
36
                int functionGroupCount = 0;
 
37
        
 
38
                WPD_DEBUG_MSG(("WP1Heuristics::isWP1FileFormat()\n"));
 
39
                input->seek(0, WPX_SEEK_SET);
 
40
                WPD_DEBUG_MSG(("WP1Heuristics::isWP1FileFormat()\n"));
 
41
        
 
42
                while (!input->atEOS())
 
43
                {
 
44
                        uint8_t readVal = readU8(input);
 
45
 
 
46
                        WPD_DEBUG_MSG(("WP1Heuristics, Offset 0x%.8x, value 0x%.2x\n", (unsigned int)input->tell() - 1, readVal));
 
47
                
 
48
                        if (readVal < (uint8_t)0x20)
 
49
                        {
 
50
                                // line breaks et al, skip
 
51
                        }
 
52
                        else if (readVal >= (uint8_t)0x20 && readVal <= (uint8_t)0x7F)
 
53
                        {
 
54
                                // normal ASCII characters, skip                        
 
55
                        }
 
56
                        else if (readVal >= (uint8_t)0x80 && readVal <= (uint8_t)0xBF)
 
57
                        {
 
58
                                // single character function codes, skip
 
59
                                functionGroupCount++;
 
60
                        }
 
61
                        else if (readVal >= (uint8_t)0xFF)
 
62
                        {
 
63
                                // special codes that should not be found as separate functions
 
64
                                return WPD_CONFIDENCE_NONE;
 
65
                        }
 
66
                        else 
 
67
                        {
 
68
                                // multi character function group
 
69
                                // check that the size constrains are valid, and that every group_member
 
70
                                // is properly closed at the right place
 
71
                
 
72
                                if (WP1_FUNCTION_GROUP_SIZE[readVal-0xC0] == -1)
 
73
                                {
 
74
                                        // variable length function group
 
75
 
 
76
                                        // We are checking following structure:
 
77
                                        //   <function code>{function length}...{function length}<function code>
 
78
                                        //   that we observed in variable length WP1 functions 
77
79
                                
78
 
                                unsigned long functionLength = readU32(input, true);
79
 
                                if (functionLength > ((std::numeric_limits<uint32_t>::max)() / 2))
80
 
                                        return WPD_CONFIDENCE_NONE;
81
 
                                long closingFunctionLength = 0;
82
 
                                WPD_DEBUG_MSG(("WP1Heuristics functionLength = 0x%.8x\n", (unsigned int)functionLength));
 
80
                                        unsigned long functionLength = readU32(input, true);
 
81
                                        if (functionLength > ((std::numeric_limits<uint32_t>::max)() / 2))
 
82
                                                return WPD_CONFIDENCE_NONE;
 
83
                                        if (functionLength == 0)
 
84
                                                return WPD_CONFIDENCE_NONE;
 
85
                                        unsigned long closingFunctionLength = 0;
 
86
                                        WPD_DEBUG_MSG(("WP1Heuristics functionLength = 0x%.8x\n", (unsigned int)functionLength));
83
87
 
84
 
                                input->seek(functionLength, WPX_SEEK_CUR);
85
 
                                closingFunctionLength = readU32(input, true);
86
 
                                WPD_DEBUG_MSG(("WP1Heuristics closingFunctionLength = 0x%.8x\n", (unsigned int)closingFunctionLength));
87
 
                                if (functionLength != closingFunctionLength)
88
 
                                        return WPD_CONFIDENCE_NONE;
 
88
                                        input->seek(functionLength, WPX_SEEK_CUR);
 
89
                                        closingFunctionLength = readU32(input, true);
 
90
                                        WPD_DEBUG_MSG(("WP1Heuristics closingFunctionLength = 0x%.8x\n", (unsigned int)closingFunctionLength));
 
91
                                        if (functionLength != closingFunctionLength)
 
92
                                                return WPD_CONFIDENCE_NONE;
89
93
                                        
90
 
                                uint8_t closingGate = 0;
91
 
                                if (!input->atEOS())
 
94
                                        uint8_t closingGate = 0;
 
95
                                        if (!input->atEOS())
 
96
                                        {
 
97
                                                closingGate = readU8(input);
 
98
                                                WPD_DEBUG_MSG(("WP1Heuristics closingGate = 0x%.2x\n", closingGate));
 
99
                                                if (closingGate != readVal)
 
100
                                                        return WPD_CONFIDENCE_NONE;
 
101
                                        }
 
102
 
 
103
                                        // when passed the complete file, we don't allow for open groups when we've reached EOF
 
104
                                        if (!partialContent && input->atEOS() && (closingGate != readVal))
 
105
                                                return WPD_CONFIDENCE_NONE;
 
106
                                
 
107
                                        functionGroupCount++;
 
108
                                }
 
109
                                else
92
110
                                {
93
 
                                        closingGate = readU8(input);
94
 
                                        WPD_DEBUG_MSG(("WP1Heuristics closingGate = 0x%.2x\n", closingGate));
95
 
                                        if (closingGate != readVal)
96
 
                                                return WPD_CONFIDENCE_NONE;
 
111
                                        // fixed length function group
 
112
                                
 
113
                                        // seek to the position where the closing gate should be
 
114
                                        int res = input->seek(WP1_FUNCTION_GROUP_SIZE[readVal-0xC0]-2, WPX_SEEK_CUR);
 
115
                                        // when passed the complete file, we should be able to do that
 
116
                                        if (!partialContent && res)
 
117
                                                return WPD_CONFIDENCE_NONE;
 
118
                                
 
119
                                        // read the closing gate
 
120
                                        uint8_t readNextVal = readU8(input);
 
121
                                        if (readNextVal != readVal)
 
122
                                                return WPD_CONFIDENCE_NONE;
 
123
                                
 
124
                                        functionGroupCount++;
97
125
                                }
98
 
 
99
 
                                // when passed the complete file, we don't allow for open groups when we've reached EOF
100
 
                                if (!partialContent && input->atEOS() && (closingGate != readVal))
101
 
                                        return WPD_CONFIDENCE_NONE;
102
 
                                
103
 
                                functionGroupCount++;
104
 
                        }
105
 
                        else
106
 
                        {
107
 
                                // fixed length function group
108
 
                                
109
 
                                // seek to the position where the closing gate should be
110
 
                                int res = input->seek(WP1_FUNCTION_GROUP_SIZE[readVal-0xC0]-2, WPX_SEEK_CUR);
111
 
                                // when passed the complete file, we should be able to do that
112
 
                                if (!partialContent && res)
113
 
                                        return WPD_CONFIDENCE_NONE;
114
 
                                
115
 
                                // read the closing gate
116
 
                                uint8_t readNextVal = readU8(input);
117
 
                                if (readNextVal != readVal)
118
 
                                        return WPD_CONFIDENCE_NONE;
119
 
                                
120
 
                                functionGroupCount++;
121
 
                        }
122
 
                }
123
 
        }       
124
 
 
125
 
        /* When we get here, the document is in a format that we *could* import properly.
126
 
        However, if we didn't entcounter a single WP4.2 function group) we need to be more carefull:
127
 
        this would be the case when passed a plaintext file for example, which libwpd is not
128
 
        supposed to handle. */
129
 
        if (!functionGroupCount)
130
 
                return WPD_CONFIDENCE_POOR;
 
126
                        }
 
127
                }       
 
128
 
 
129
                /* When we get here, the document is in a format that we *could* import properly.
 
130
                However, if we didn't encounter a single WP1 function group) we need to be more carefull:
 
131
                this would be the case when passed a plaintext file for example, which libwpd is not
 
132
                supposed to handle. */
 
133
                if (!functionGroupCount)
 
134
                        return WPD_CONFIDENCE_POOR;
131
135
        
132
 
        return WPD_CONFIDENCE_EXCELLENT;
 
136
                return WPD_CONFIDENCE_EXCELLENT;
 
137
        }
 
138
        catch (...)
 
139
        {
 
140
                return WPD_CONFIDENCE_NONE;
 
141
        }
133
142
}