privatevoidscanToNextToken() { // If there is a byte order mark (BOM) at the beginning of the stream, // forward past it. if (reader.getIndex() == 0 && reader.peek() == 0xFEFF) {//跳过文件开头可能存在的bom头 reader.forward(); } booleanfound=false; while (!found) { intff=0; // Peek ahead until we find the first non-space character, then // move forward directly to that character. while (reader.peek(ff) == ' ') { ff++; } if (ff > 0) { reader.forward(ff); } // If the character we have skipped forward to is a comment (#), // then peek ahead until we find the next end of line. YAML // comments are from a # to the next new-line. We then forward // past the comment. if (reader.peek() == '#') {//这一行被注释,直接跳过 ff = 0; while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {//跳过一些字符,具体为什么还没弄明白 ff++; } if (ff > 0) { reader.forward(ff); } } // If we scanned a line break, then (depending on flow level), // simple keys may be allowed. if (scanLineBreak().length() != 0) {// found a line-break 处理各种情况的换行符,这里涉及到\u2029之类的特殊字符编码,需要去学习 if (this.flowLevel == 0) { // Simple keys are allowed at flow-level 0 after a line // break this.allowSimpleKey = true; } } else { found = true; } } }
privatevoidfetchMoreTokens() { // Eat whitespaces and comments until we reach the next token. scanToNextToken(); // Remove obsolete possible simple keys. 删除一些不再使用的 simple keys,不能理解这个simple keys是什么,在debug中,这个方法实际没有具体执行内容 stalePossibleSimpleKeys(); // Compare the current indentation and column. It may add some tokens // and decrease the current indentation level. 根据当前数据指针所在列,确定缩进级别;这里有个flowLevel决定要不要处理缩进;这个方法里有一些细节,建议先看下文解析;只处理缩进减小是因为如果缩进会增大,那么在下面的判断第一个字符获取token的时候就会重新定位缩进 unwindIndent(reader.getColumn()); // Peek the next code point, to decide what the next group of tokens // will look like. intc= reader.peek();//此时拿到的应该是一行数据第一个有意义的字符 switch (c) { //接下来首先要判断是不是一些关键字 case'\0' // Is it the end of stream? 可能读取到了一行回车,或者只有空白字符的数据 fetchStreamEnd(); return; case'%': // Is it a directive? 指令 if (checkDirective()) { fetchDirective(); return; } break; case'-': // Is it the document start? if (checkDocumentStart()) { fetchDocumentStart(); return; // Is it the block entry indicator? } elseif (checkBlockEntry()) { fetchBlockEntry(); return; } break; case'.': // Is it the document end? if (checkDocumentEnd()) { fetchDocumentEnd(); return; } break; // TODO support for BOM within a stream. (not implemented in PyYAML) case'[': // Is it the flow sequence start indicator? fetchFlowSequenceStart(); return; case'{': // Is it the flow mapping start indicator? fetchFlowMappingStart(); return; case']': // Is it the flow sequence end indicator? fetchFlowSequenceEnd(); return; case'}': // Is it the flow mapping end indicator? fetchFlowMappingEnd(); return; case',': // Is it the flow entry indicator? fetchFlowEntry(); return; // see block entry indicator above case'?': // Is it the key indicator? if (checkKey()) { fetchKey(); return; } break; case':': // Is it the value indicator? if (checkValue()) { fetchValue(); return; } break; case'*': // Is it an alias? fetchAlias(); return; case'&': // Is it an anchor? fetchAnchor(); return; case'!': // Is it a tag? fetchTag(); return; case'|': // Is it a literal scalar? if (this.flowLevel == 0) { fetchLiteral(); return; } break; case'>': // Is it a folded scalar? if (this.flowLevel == 0) { fetchFolded(); return; } break; case'\'': // Is it a single quoted scalar? fetchSingle(); return; case'"': // Is it a double quoted scalar? fetchDouble(); return; } // It must be a plain scalar then. 此时是一个普通字符 if (checkPlain()) { fetchPlain(); return; } // No? It's an error. Let's produce a nice error message.We do this by // converting escaped characters into their escape sequences. This is a // backwards use of the ESCAPE_REPLACEMENTS map. StringchRepresentation= String.valueOf(Character.toChars(c)); for (Character s : ESCAPE_REPLACEMENTS.keySet()) { Stringv= ESCAPE_REPLACEMENTS.get(s); if (v.equals(chRepresentation)) { chRepresentation = "\\" + s;// ' ' -> '\t' break; } } if (c == '\t') chRepresentation += "(TAB)"; Stringtext= String .format("found character '%s' that cannot start any token. (Do not use %s for indentation)", chRepresentation, chRepresentation); thrownewScannerException("while scanning for the next token", null, text, reader.getMark()); }
// Simple keys treatment.
/** * Return the number of the nearest possible simple key. Actually we don't * need to loop through the whole dictionary. */ privateintnextPossibleSimpleKey() { /* * the implementation is not as in PyYAML. Because * this.possibleSimpleKeys is ordered we can simply take the first key */ if (!this.possibleSimpleKeys.isEmpty()) { returnthis.possibleSimpleKeys.values().iterator().next().getTokenNumber(); } return -1; }
看一下如何决定缩进级别的
1 2 3 4 5 6 7 8 9 10 11 12 13 14
privatevoidunwindIndent(int col) { // In the flow context, indentation is ignored. We make the scanner less // restrictive then specification requires. if (this.flowLevel != 0) { return; }
// In block context, we may need to issue the BLOCK-END tokens. while (this.indent > col) {//确定当前有没有必要减小缩进;一个缩进减小代表结束了一个block(代码块) Markmark= reader.getMark();//这里mark相当于对当前数据状态的一个快照 this.indent = this.indents.pop();//这里使用了一个栈来记录经历的缩进级别变化,此处出栈来确定当前应该有的缩进级别 this.tokens.add(newBlockEndToken(mark, mark));//代码块结束,而且可能不止结束一个 } }
fetchMoreToken执行完成后,此时应该增加了一个Token
转换
此时调用来到了PraseImpl的第195行,此时处在一个内部类中private class ParseImplicitDocumentStart implements Production