2
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4
* Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
6
* The contents of this file are subject to the terms of either the GNU
7
* General Public License Version 2 only ("GPL") or the Common
8
* Development and Distribution License("CDDL") (collectively, the
9
* "License"). You may not use this file except in compliance with the
10
* License. You can obtain a copy of the License at
11
* http://www.netbeans.org/cddl-gplv2.html
12
* or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
13
* specific language governing permissions and limitations under the
14
* License. When distributing the software, include this License Header
15
* Notice in each file and include the License file at
16
* nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
17
* particular file as subject to the "Classpath" exception as provided
18
* by Sun in the GPL Version 2 section of the License file that
19
* accompanied this code. If applicable, add the following below the
20
* License Header, with the fields enclosed by brackets [] replaced by
21
* your own identifying information:
22
* "Portions Copyrighted [year] [name of copyright owner]"
26
* The Original Software is NetBeans. The Initial Developer of the Original
27
* Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
28
* Microsystems, Inc. All Rights Reserved.
30
* If you wish your version of this file to be governed by only the CDDL
31
* or only the GPL Version 2, indicate your decision by adding
32
* "[Contributor] elects to include this software in this distribution
33
* under the [CDDL or GPL Version 2] license." If you do not indicate a
34
* single choice of license, a recipient has the option to distribute
35
* your version of this file under either the CDDL, the GPL Version 2 or
36
* to extend the choice of license to its licensees as provided above.
37
* However, if you add GPL Version 2 code and therefore, elected the GPL
38
* Version 2 license, then the option applies only if the new code is
39
* made subject to such option by the copyright holder.
42
package org.netbeans.api.lexer;
44
import java.util.ArrayList;
45
import java.util.Collection;
46
import java.util.Collections;
47
import java.util.EnumSet;
49
import java.util.HashMap;
50
import java.util.List;
52
import org.netbeans.lib.lexer.LanguageManager;
53
import org.netbeans.lib.lexer.LanguageOperation;
54
import org.netbeans.lib.lexer.LexerApiPackageAccessor;
55
import org.netbeans.lib.lexer.LexerSpiPackageAccessor;
56
import org.netbeans.lib.lexer.TokenIdSet;
57
import org.netbeans.lib.lexer.TokenHierarchyOperation;
58
import org.netbeans.lib.lexer.TokenList;
59
import org.netbeans.lib.lexer.inc.TokenChangeInfo;
60
import org.netbeans.lib.lexer.inc.TokenHierarchyEventInfo;
61
import org.netbeans.lib.lexer.inc.TokenListChange;
62
import org.netbeans.spi.lexer.LanguageHierarchy;
65
* Language describes a set of token ids
66
* that comprise the given language.
68
* Each language corresponds to a certain mime-type.
70
* An input source may be lexed by using an existing language
71
* - see {@link TokenHierarchy} which is an entry point into the Lexer API.
73
* Language hierarchy is represented by an unmodifiable set of {@link TokenId}s
74
* that can be retrieved by {@link #tokenIds()} and token categories
75
* {@link #tokenCategories()}.
78
* The language cannot be instantiated directly.
80
* Instead it should be obtained from {@link LanguageHierarchy#language()}
81
* on an existing language hierarchy.
83
* @see LanguageHierarchy
86
* @author Miloslav Metelka
90
public final class Language<T extends TokenId> {
93
LexerApiPackageAccessor.register(new Accessor());
96
private final LanguageHierarchy<T> languageHierarchy;
98
private final LanguageOperation<T> languageOperation;
100
private String mimeType;
102
private final int maxOrdinal;
104
private final Set<T> ids;
106
/** Lazily inited indexed ids for quick translation of ordinal to token id. */
107
private TokenIdSet<T> indexedIds;
109
private final Map<String,T> idName2id;
112
* Map of category to ids that it contains.
114
private final Map<String,Set<T>> cat2ids;
117
* Lists of token categories for particular id.
119
* It's a list because it is ordered (primary category is first).
121
private List<String>[] id2cats;
124
* Lists of non-primary token categories for particular id.
126
* It's a list because the order might be important
127
* (e.g. for syntax coloring information resolving) although
128
* the present SPI does not utilize that.
130
private List<String>[] id2nonPrimaryCats;
133
* Finds a language by its mime type.
135
* <p>This method uses information from <code>LanguageProvider</code>s registered
136
* in the default lookup to find <code>Language</code> for a given
139
* <div class="nonnormative">
140
* <p>Netbeans provide an implementation of <code>LanguageProvider</code>
141
* that reads data from the <code>Editors</code> folder on the system filesystem.
142
* Therefore Netbeans modules can register their <code>Language</code>s
143
* in MimeLookup as any other mime-type related service.
145
* @param mimeType The mime type of a language that you want to find.
146
* @return The <code>Language</code> registered
147
* for the given <code>mimeType</code>.
149
public static Language<?> find(String mimeType) {
150
return LanguageManager.getInstance().findLanguage(mimeType);
154
* Construct language by providing a collection of token ids
155
* that comprise the language and extra categories into which the token ids belong.
157
* @param languageHierarchy non-null language hierarchy is in one-to-one relationship
158
* with the language and represents it on SPI side.
159
* @throws IndexOutOfBoundsException if any token id's ordinal is < 0.
161
Language(LanguageHierarchy<T> languageHierarchy) {
162
this.languageHierarchy = languageHierarchy;
163
this.languageOperation = new LanguageOperation<T>(languageHierarchy, this);
164
mimeType = LexerSpiPackageAccessor.get().mimeType(languageHierarchy);
165
checkMimeTypeValid(mimeType);
166
// Create ids and find max ordinal
167
Collection<T> createdIds = LexerSpiPackageAccessor.get().createTokenIds(languageHierarchy);
168
if (createdIds == null)
169
throw new IllegalArgumentException("Ids cannot be null"); // NOI18N
170
maxOrdinal = TokenIdSet.findMaxOrdinal(createdIds);
172
// Convert collection of ids to efficient indexed Set<T>
173
if (createdIds instanceof EnumSet) {
174
ids = (Set<T>)createdIds;
175
} else { // not EnumSet
176
ids = new TokenIdSet<T>(createdIds, maxOrdinal, true);
179
// Create TokenIdSet instances for token categories
180
Map<String,Collection<T>> createdCat2ids
181
= LexerSpiPackageAccessor.get().createTokenCategories(languageHierarchy);
182
if (createdCat2ids == null) {
183
createdCat2ids = Collections.emptyMap();
185
cat2ids = new HashMap<String,Set<T>>((int)(createdCat2ids.size() / 0.73f));
186
for (Map.Entry<String,Collection<T>> entry : createdCat2ids.entrySet()) {
187
Collection<T> createdCatIds = entry.getValue();
188
TokenIdSet.checkIdsFromLanguage(createdCatIds, ids);
189
// Do not use the original createdCatIds set because of the following:
190
// 1) Two token categories will have the same sets of contained ids
191
// in the createdCatIds map (the same physical Set instance).
192
// 2) At least one token id will have one of the two categories
193
// as its primary category.
194
// 3) If the original Set instance from the createdCatIds would be used
195
// then both categories would incorrectly contain the extra id(s).
196
Set<T> catIds = new TokenIdSet<T>(createdCatIds, maxOrdinal, false);
197
cat2ids.put(entry.getKey(), catIds);
200
// Walk through all ids and check duplicate names and primary categories
201
idName2id = new HashMap<String,T>((int)(ids.size() / 0.73f));
203
T sameNameId = idName2id.put(id.name(), id);
204
if (sameNameId != null && sameNameId != id) { // token ids with same name
205
throw new IllegalArgumentException(id +
206
" has duplicate name with " + sameNameId);
209
String cat = id.primaryCategory();
211
Set<T> catIds = cat2ids.get(cat);
212
if (catIds == null) {
213
catIds = new TokenIdSet<T>(null, maxOrdinal, false);
214
cat2ids.put(cat, catIds);
222
* Get unmodifiable set of ids contained in this language.
224
* An iterator over the set returns the ids sorted by their ordinals.
226
* @return unmodifiable set of ids contained in this language.
228
public Set<T> tokenIds() {
233
* Get tokenId for the given ordinal. This method
234
* can be used by lexers to quickly translate ordinal
236
* @param ordinal ordinal to be translated to corresponding tokenId.
237
* @return valid tokenId or null if there's no corresponding
238
* tokenId for the given int-id. It's possible because intIds
239
* of the language's token ids do not need to be continuous.
240
* If the ordinal is <0 or higher than the highest
241
* ordinal of all the token ids of this language the method
242
* throws {@link IndexOutOfBoundsException}.
243
* @throws IndexOutOfBoundsException if the ordinal is
244
* <0 or higher than {@link #maxOrdinal()}.
246
public T tokenId(int ordinal) {
247
synchronized (idName2id) {
248
if (indexedIds == null) {
249
if (ids instanceof EnumSet) {
250
indexedIds = new TokenIdSet<T>(ids, maxOrdinal, false);
251
} else { // not EnumSet
252
indexedIds = (TokenIdSet<T>)ids;
255
return indexedIds.indexedIds()[ordinal];
260
* Similar to {@link #tokenId(int)} however it guarantees
261
* that it will always return non-null tokenId. Typically for a lexer
262
* just being developed it's possible that there are some integer
263
* token ids defined in the generated lexer for which there is
264
* no correspondence in the language. The lexer wrapper should
265
* always call this method if it expects to find a valid
266
* counterpart for given integer id.
267
* @param ordinal ordinal to translate to token id.
268
* @return always non-null tokenId that corresponds to the given integer id.
269
* @throws IndexOutOfBoundsException if the ordinal is
270
* <0 or higher than {@link #maxOrdinal()} or when there is no corresponding
273
public T validTokenId(int ordinal) {
274
T id = tokenId(ordinal);
276
throw new IndexOutOfBoundsException("No tokenId for ordinal=" + ordinal
277
+ " in language " + this);
283
* Find the tokenId from its name.
284
* @param name name of the tokenId to find.
285
* @return tokenId with the requested name or null if it does not exist.
287
public T tokenId(String name) {
288
return idName2id.get(name);
292
* Similar to {@link #tokenId(String)} but guarantees a valid tokenId to be returned.
293
* @throws IllegalArgumentException if no token in this language has the given name.
295
public T validTokenId(String name) {
296
T id = tokenId(name);
298
throw new IllegalArgumentException("No tokenId for name=\"" + name
299
+ "\" in language " + this);
305
* Get maximum ordinal of all the token ids that this language contains.
306
* @return maximum integer ordinal of all the token ids that this language contains
307
* or <code>-1</code> if the language contains no token ids.
309
public int maxOrdinal() {
314
* Get names of all token categories of this language.
316
* @return unmodifiable set containing names of all token categories
317
* contained in this language.
319
public Set<String> tokenCategories() {
320
return Collections.unmodifiableSet(cat2ids.keySet());
324
* Get members of the category with given name.
326
* @param tokenCategory non-null name of the category.
327
* @return set of token ids belonging to the given category.
329
public Set<T> tokenCategoryMembers(String tokenCategory) {
330
return Collections.unmodifiableSet(cat2ids.get(tokenCategory));
334
* Get list of all token categories for the particular token id.
336
* @return non-null unmodifiable list of all token categories for the particular token id.
338
* Primary token's category (if defined for the token id) will be contained
339
* as first one in the list.
340
* @throws IllegalArgumentException if the given token id does not belong
343
public List<String> tokenCategories(T tokenId) {
344
checkMemberId(tokenId);
345
synchronized (idName2id) {
346
if (id2cats == null) {
347
buildTokenIdCategories();
349
return id2cats[tokenId.ordinal()];
354
* Get list of non-primary token categories (not containing the primary category)
355
* for the particular token id.
357
* If the token id has no primary category defined then the result
358
* of this method is equal to {@link #tokenCategories(TokenId)}.
360
* @return non-null unmodifiable list of secondary token categories for the particular token id.
361
* Primary token's category (if defined for the token id) will not be contained
363
* @throws IllegalArgumentException if the given token id does not belong
366
public List<String> nonPrimaryTokenCategories(T tokenId) {
367
checkMemberId(tokenId);
368
synchronized (idName2id) {
369
if (id2nonPrimaryCats == null) {
370
buildTokenIdCategories();
372
return id2nonPrimaryCats[tokenId.ordinal()];
377
* Merge two collections of token ids from this language
378
* into an efficient indexed set (the implementation similar
379
* to {@link java.util.EnumSet}).
381
* @param tokenIds1 non-null collection of token ids to be contained in the returned set.
382
* @param tokenIds2 collection of token ids to be contained in the returned set.
383
* @return set of token ids indexed by their ordinal number.
385
public Set<T> merge(Collection<T> tokenIds1, Collection<T> tokenIds2) {
386
TokenIdSet.checkIdsFromLanguage(tokenIds1, ids);
387
// Cannot retain EnumSet as tokenIds will already be wrapped
388
// by unmodifiableSet()
389
Set<T> ret = new TokenIdSet<T>(tokenIds1, maxOrdinal, false);
390
if (tokenIds2 != null) {
391
TokenIdSet.checkIdsFromLanguage(tokenIds2, ids);
392
ret.addAll(tokenIds2);
398
* Gets the mime type of this language.
400
* @return non-null language's mime type.
402
public String mimeType() {
406
/** The languages are equal only if they are the same objects. */
407
public @Override boolean equals(Object obj) {
408
return super.equals(obj);
411
/** The hashCode of the language is the identity hashCode. */
412
public @Override int hashCode() {
413
return super.hashCode();
416
private void buildTokenIdCategories() {
418
// List for decreasing of the number of created maps
419
// for tokenId2category mappings.
420
// List.get(0) is a Map[category, list-of-[category]].
421
// List.get(1) is a Map[category1, Map[category2, list-of-[category1;category2]]].
423
List<Map<String,Object>> catMapsList = new ArrayList<Map<String,Object>>(4);
424
// All categories for a single token id
425
List<String> idCats = new ArrayList<String>(4);
427
// No extra sorting of the categories in which the particular id is contained
428
// - making explicit order of the categories could possibly be acomplished
429
// in the future if necessary by supporting some extra hints
430
// Add all the categories for the particular id into idCats
431
for (Map.Entry<String,Set<T>> e : cat2ids.entrySet()) {
432
if (e.getValue().contains(id)) {
433
idCats.add(e.getKey()); // Add this category to id's categories
436
// Assign both non-primary cats and all cats
437
id2cats[id.ordinal()] = findCatList(catMapsList, idCats, 0);
438
id2nonPrimaryCats[id.ordinal()] = findCatList(catMapsList, idCats, 1);
440
idCats.clear(); // reuse the list (is cloned if added to catMapsList)
445
* Find the cached list of categories from the catMapsList
446
* for the particular temporarily collected list of categories.
448
* @param catMapsList non-null list of cached maps.
450
* List.get(0) is a Map[category, list-containing-[category]].
452
* List.get(1) is a Map[category1, Map[category2, list-containing-[category1;category2]]].
456
* @param idCats non-null temporarily collected list of categories for the particular id.
457
* It may be modified after this method gets finished.
458
* @param startIndex >=0 starting index in idCats - either 0 for returning
459
* of all categories or 1 for returning non-primary categories.
460
* @return non-null cached list of categories with contents equal to idCats.
462
private static List<String> findCatList(List<Map<String,Object>> catMapsList, List<String> idCats, int startIndex) {
463
int size = idCats.size() - startIndex;
465
return Collections.emptyList();
467
while (catMapsList.size() < size) {
468
catMapsList.add(new HashMap<String,Object>());
470
// Find the catList as the last item in the cascaded search through the maps
471
Map<String,Object> m = catMapsList.get(--size);
472
for (int i = startIndex; i < size; i++) {
473
@SuppressWarnings("unchecked")
474
Map<String,Object> catMap = (Map<String,Object>)m.get(idCats.get(i));
475
if (catMap == null) {
476
catMap = new HashMap<String,Object>();
477
// Map<String,Map<String,Object>>
478
m.put(idCats.get(i), catMap);
483
@SuppressWarnings("unchecked")
484
List<String> catList = (List<String>)m.get(idCats.get(size));
485
if (catList == null) {
486
catList = new ArrayList<String>(idCats.size() - startIndex);
487
catList.addAll((startIndex > 0)
488
? idCats.subList(startIndex, idCats.size())
490
m.put(idCats.get(size), catList);
495
@SuppressWarnings("unchecked")
496
private void assignCatArrays() {
497
id2cats = (List<String>[])new List[maxOrdinal + 1];
498
id2nonPrimaryCats = (List<String>[])new List[maxOrdinal + 1];
502
* Dump list of token ids for this language into string.
504
* @return dump of contents of this language.
506
public String dumpInfo() {
507
StringBuilder sb = new StringBuilder();
510
List<String> cats = tokenCategories(id);
511
if (cats.size() > 0) {
513
for (int i = 0; i < cats.size(); i++) {
517
String cat = (String)cats.get(i);
524
return ids.toString();
527
public String toString() {
528
return mimeType + ", LH: " + languageHierarchy;
531
private void checkMemberId(T id) {
532
if (!ids.contains(id)) {
533
throw new IllegalArgumentException(id + " does not belong to language " + this); // NOI18N
537
private static void checkMimeTypeValid(String mimeType) {
538
if (mimeType == null) {
539
throw new IllegalStateException("mimeType cannot be null"); // NOI18N
541
int slashIndex = mimeType.indexOf('/');
542
if (slashIndex == -1) { // no slash
543
throw new IllegalStateException("mimeType=" + mimeType + " does not contain '/'"); // NOI18N
545
if (mimeType.indexOf('/', slashIndex + 1) != -1) {
546
throw new IllegalStateException("mimeType=" + mimeType + " contains more than one '/'"); // NOI18N
551
* Return language hierarchy associated with this language.
553
* This method is for API package accessor only.
555
LanguageHierarchy<T> languageHierarchy() {
556
return languageHierarchy;
559
LanguageOperation<T> languageOperation() {
560
return languageOperation;
564
* Accessor of package-private things in this package
565
* that need to be used by the lexer implementation classes.
567
private static final class Accessor extends LexerApiPackageAccessor {
569
public <T extends TokenId> Language<T> createLanguage(
570
LanguageHierarchy<T> languageHierarchy) {
571
return new Language<T>(languageHierarchy);
574
public <T extends TokenId> LanguageHierarchy<T> languageHierarchy(
575
Language<T> language) {
576
return language.languageHierarchy();
579
public <T extends TokenId> LanguageOperation<T> languageOperation(
580
Language<T> language) {
581
return language.languageOperation();
584
public <I> TokenHierarchy<I> createTokenHierarchy(
585
TokenHierarchyOperation<I,?> tokenHierarchyOperation) {
586
return new TokenHierarchy<I>(tokenHierarchyOperation);
589
public TokenHierarchyEvent createTokenChangeEvent(
590
TokenHierarchyEventInfo info) {
591
return new TokenHierarchyEvent(info);
594
public <T extends TokenId> TokenChange<T> createTokenChange(
595
TokenChangeInfo<T> info) {
596
return new TokenChange<T>(info);
599
public <T extends TokenId> TokenChangeInfo<T> tokenChangeInfo(
600
TokenChange<T> tokenChange) {
601
return tokenChange.info();
604
public <I> TokenHierarchyOperation<I,?> tokenHierarchyOperation(
605
TokenHierarchy<I> tokenHierarchy) {
606
return tokenHierarchy.operation();
609
public <T extends TokenId> TokenSequence<T> createTokenSequence(TokenList<T> tokenList) {
610
return new TokenSequence<T>(tokenList);