// JsonKit v0.5 - A simple but flexible Json library in a single .cs file. // // Copyright (C) 2014 Topten Software (contact@toptensoftware.com) All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this product // except in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software distributed under the // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. using System; using System.Collections.Generic; using System.Text; using System.IO; using System.Globalization; namespace Topten.JsonKit { class Tokenizer { public Tokenizer(TextReader r, JsonOptions options) { _underlying = r; _options = options; FillBuffer(); NextChar(); NextToken(); } private JsonOptions _options; private StringBuilder _sb = new StringBuilder(); private TextReader _underlying; private char[] _buf = new char[4096]; private int _pos; private int _bufUsed; private StringBuilder _rewindBuffer; private int _rewindBufferPos; private LineOffset _currentCharPos; private char _currentChar; private Stack _bookmarks = new Stack(); public LineOffset CurrentTokenPosition; public Token CurrentToken; public LiteralKind LiteralKind; public string String; public object LiteralValue { get { if (CurrentToken != Token.Literal) throw new InvalidOperationException("token is not a literal"); switch (LiteralKind) { case LiteralKind.Null: return null; case LiteralKind.False: return false; case LiteralKind.True: return true; case LiteralKind.String: return String; case LiteralKind.SignedInteger: return long.Parse(String, CultureInfo.InvariantCulture); case LiteralKind.UnsignedInteger: if (String.StartsWith("0x") || String.StartsWith("0X")) return Convert.ToUInt64(String.Substring(2), 16); else return ulong.Parse(String, CultureInfo.InvariantCulture); case LiteralKind.FloatingPoint: return double.Parse(String, CultureInfo.InvariantCulture); } return null; } } public Type LiteralType { get { if (CurrentToken != Token.Literal) throw new InvalidOperationException("token is not a literal"); switch (LiteralKind) { case LiteralKind.Null: return typeof(Object); case LiteralKind.False: return typeof(Boolean); case LiteralKind.True: return typeof(Boolean); case LiteralKind.String: return typeof(string); case LiteralKind.SignedInteger: return typeof(long); case LiteralKind.UnsignedInteger: return typeof(ulong); case LiteralKind.FloatingPoint: return typeof(double); } return null; } } // This object represents the entire state of the reader and is used for rewind struct ReaderState { public ReaderState(Tokenizer tokenizer) { _currentCharPos = tokenizer._currentCharPos; _currentChar = tokenizer._currentChar; _string = tokenizer.String; _literalKind = tokenizer.LiteralKind; _rewindBufferPos = tokenizer._rewindBufferPos; _currentTokenPos = tokenizer.CurrentTokenPosition; _currentToken = tokenizer.CurrentToken; } public void Apply(Tokenizer tokenizer) { tokenizer._currentCharPos = _currentCharPos; tokenizer._currentChar = _currentChar; tokenizer._rewindBufferPos = _rewindBufferPos; tokenizer.CurrentToken = _currentToken; tokenizer.CurrentTokenPosition = _currentTokenPos; tokenizer.String = _string; tokenizer.LiteralKind = _literalKind; } private LineOffset _currentCharPos; private LineOffset _currentTokenPos; private char _currentChar; private Token _currentToken; private LiteralKind _literalKind; private string _string; private int _rewindBufferPos; } // Create a rewind bookmark public void CreateBookmark() { _bookmarks.Push(new ReaderState(this)); if (_rewindBuffer == null) { _rewindBuffer = new StringBuilder(); _rewindBufferPos = 0; } } // Discard bookmark public void DiscardBookmark() { _bookmarks.Pop(); if (_bookmarks.Count == 0) { _rewindBuffer = null; _rewindBufferPos = 0; } } // Rewind to a bookmark public void RewindToBookmark() { _bookmarks.Pop().Apply(this); } // Fill buffer by reading from underlying TextReader void FillBuffer() { _bufUsed = _underlying.Read(_buf, 0, _buf.Length); _pos = 0; } // Get the next character from the input stream // (this function could be extracted into a few different methods, but is mostly inlined // for performance - yes it makes a difference) public char NextChar() { if (_rewindBuffer == null) { if (_pos >= _bufUsed) { if (_bufUsed > 0) { FillBuffer(); } if (_bufUsed == 0) { return _currentChar = '\0'; } } // Next _currentCharPos.Offset++; return _currentChar = _buf[_pos++]; } if (_rewindBufferPos < _rewindBuffer.Length) { _currentCharPos.Offset++; return _currentChar = _rewindBuffer[_rewindBufferPos++]; } else { if (_pos >= _bufUsed && _bufUsed > 0) FillBuffer(); _currentChar = _bufUsed == 0 ? '\0' : _buf[_pos++]; _rewindBuffer.Append(_currentChar); _rewindBufferPos++; _currentCharPos.Offset++; return _currentChar; } } // Read the next token from the input stream // (Mostly inline for performance) public void NextToken() { while (true) { // Skip whitespace and handle line numbers while (true) { if (_currentChar == '\r') { if (NextChar() == '\n') { NextChar(); } _currentCharPos.Line++; _currentCharPos.Offset = 0; } else if (_currentChar == '\n') { if (NextChar() == '\r') { NextChar(); } _currentCharPos.Line++; _currentCharPos.Offset = 0; } else if (_currentChar == ' ') { NextChar(); } else if (_currentChar == '\t') { NextChar(); } else break; } // Remember position of token CurrentTokenPosition = _currentCharPos; // Handle common characters first switch (_currentChar) { case '/': // Comments not support in strict mode if ((_options & JsonOptions.StrictParser) != 0) { throw new InvalidDataException(string.Format("syntax error, unexpected character '{0}'", _currentChar)); } // Process comment NextChar(); switch (_currentChar) { case '/': NextChar(); while (_currentChar!='\0' && _currentChar != '\r' && _currentChar != '\n') { NextChar(); } break; case '*': bool endFound = false; while (!endFound && _currentChar!='\0') { if (_currentChar == '*') { NextChar(); if (_currentChar == '/') { endFound = true; } } NextChar(); } break; default: throw new InvalidDataException("syntax error, unexpected character after slash"); } continue; case '\"': case '\'': { _sb.Length = 0; var quoteKind = _currentChar; NextChar(); while (_currentChar!='\0') { if (_currentChar == '\\') { NextChar(); var escape = _currentChar; switch (escape) { case '\"': _sb.Append('\"'); break; case '\\': _sb.Append('\\'); break; case '/': _sb.Append('/'); break; case 'b': _sb.Append('\b'); break; case 'f': _sb.Append('\f'); break; case 'n': _sb.Append('\n'); break; case 'r': _sb.Append('\r'); break; case 't': _sb.Append('\t'); break; case 'u': var sbHex = new StringBuilder(); for (int i = 0; i < 4; i++) { NextChar(); sbHex.Append(_currentChar); } _sb.Append((char)Convert.ToUInt16(sbHex.ToString(), 16)); break; default: throw new InvalidDataException(string.Format("Invalid escape sequence in string literal: '\\{0}'", _currentChar)); } } else if (_currentChar == quoteKind) { String = _sb.ToString(); CurrentToken = Token.Literal; LiteralKind = LiteralKind.String; NextChar(); return; } else { _sb.Append(_currentChar); } NextChar(); } throw new InvalidDataException("syntax error, unterminated string literal"); } case '{': CurrentToken = Token.OpenBrace; NextChar(); return; case '}': CurrentToken = Token.CloseBrace; NextChar(); return; case '[': CurrentToken = Token.OpenSquare; NextChar(); return; case ']': CurrentToken = Token.CloseSquare; NextChar(); return; case '=': CurrentToken = Token.Equal; NextChar(); return; case ':': CurrentToken = Token.Colon; NextChar(); return; case ';': CurrentToken = Token.SemiColon; NextChar(); return; case ',': CurrentToken = Token.Comma; NextChar(); return; case '\0': CurrentToken = Token.EOF; return; } // Number? if (char.IsDigit(_currentChar) || _currentChar == '-') { TokenizeNumber(); return; } // Identifier? (checked for after everything else as identifiers are actually quite rare in valid json) if (Char.IsLetter(_currentChar) || _currentChar == '_' || _currentChar == '$') { // Find end of identifier _sb.Length = 0; while (Char.IsLetterOrDigit(_currentChar) || _currentChar == '_' || _currentChar == '$') { _sb.Append(_currentChar); NextChar(); } String = _sb.ToString(); // Handle special identifiers switch (String) { case "true": LiteralKind = LiteralKind.True; CurrentToken = Token.Literal; return; case "false": LiteralKind = LiteralKind.False; CurrentToken = Token.Literal; return; case "null": LiteralKind = LiteralKind.Null; CurrentToken = Token.Literal; return; } CurrentToken = Token.Identifier; return; } // What the? throw new InvalidDataException(string.Format("syntax error, unexpected character '{0}'", _currentChar)); } } // Parse a sequence of characters that could make up a valid number // For performance, we don't actually parse it into a number yet. When using Topten.JsonKitEmit we parse // later, directly into a value type to avoid boxing private void TokenizeNumber() { _sb.Length = 0; // Leading negative sign bool signed = false; if (_currentChar == '-') { signed = true; _sb.Append(_currentChar); NextChar(); } // Hex prefix? bool hex = false; if (_currentChar == '0' && (_options & JsonOptions.StrictParser)==0) { _sb.Append(_currentChar); NextChar(); if (_currentChar == 'x' || _currentChar == 'X') { _sb.Append(_currentChar); NextChar(); hex = true; } } // Process characters, but vaguely figure out what type it is bool cont = true; bool fp = false; while (cont) { switch (_currentChar) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': _sb.Append(_currentChar); NextChar(); break; case 'A': case 'a': case 'B': case 'b': case 'C': case 'c': case 'D': case 'd': case 'F': case 'f': if (!hex) cont = false; else { _sb.Append(_currentChar); NextChar(); } break; case '.': if (hex) { cont = false; } else { fp = true; _sb.Append(_currentChar); NextChar(); } break; case 'E': case 'e': if (!hex) { fp = true; _sb.Append(_currentChar); NextChar(); if (_currentChar == '+' || _currentChar == '-') { _sb.Append(_currentChar); NextChar(); } } break; default: cont = false; break; } } if (char.IsLetter(_currentChar)) throw new InvalidDataException(string.Format("syntax error, invalid character following number '{0}'", _sb.ToString())); // Setup token String = _sb.ToString(); CurrentToken = Token.Literal; // Setup literal kind if (fp) { LiteralKind = LiteralKind.FloatingPoint; } else if (signed) { LiteralKind = LiteralKind.SignedInteger; } else { LiteralKind = LiteralKind.UnsignedInteger; } } // Check the current token, throw exception if mismatch public void Check(Token tokenRequired) { if (tokenRequired != CurrentToken) { throw new InvalidDataException(string.Format("syntax error, expected {0} found {1}", tokenRequired, CurrentToken)); } } // Skip token which must match public void Skip(Token tokenRequired) { Check(tokenRequired); NextToken(); } // Skip token if it matches public bool SkipIf(Token tokenRequired) { if (tokenRequired == CurrentToken) { NextToken(); return true; } return false; } } }