| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557 | // JsonKit v0.5 - A simple but flexible Json library in a single .cs file.// // Copyright (C) 2014 Topten Software (contact@toptensoftware.com) All rights reserved.// // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this product // except in compliance with the License. You may obtain a copy of the License at// // http://www.apache.org/licenses/LICENSE-2.0// // Unless required by applicable law or agreed to in writing, software distributed under the // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License.using System;using System.Collections.Generic;using System.Text;using System.IO;using System.Globalization;namespace Topten.JsonKit{    class Tokenizer    {        public Tokenizer(TextReader r, JsonOptions options)        {            _underlying = r;            _options = options;            FillBuffer();            NextChar();            NextToken();        }        private JsonOptions _options;        private StringBuilder _sb = new StringBuilder();        private TextReader _underlying;        private char[] _buf = new char[4096];        private int _pos;        private int _bufUsed;        private StringBuilder _rewindBuffer;        private int _rewindBufferPos;        private LineOffset _currentCharPos;        private char _currentChar;        private Stack<ReaderState> _bookmarks = new Stack<ReaderState>();        public LineOffset CurrentTokenPosition;        public Token CurrentToken;        public LiteralKind LiteralKind;        public string String;        public object LiteralValue        {            get            {                if (CurrentToken != Token.Literal)                    throw new InvalidOperationException("token is not a literal");                switch (LiteralKind)                {                    case LiteralKind.Null: return null;                    case LiteralKind.False: return false;                    case LiteralKind.True: return true;                    case LiteralKind.String: return String;                    case LiteralKind.SignedInteger: return long.Parse(String, CultureInfo.InvariantCulture);                    case LiteralKind.UnsignedInteger:                        if (String.StartsWith("0x") || String.StartsWith("0X"))                            return Convert.ToUInt64(String.Substring(2), 16);                        else                            return ulong.Parse(String, CultureInfo.InvariantCulture);                    case LiteralKind.FloatingPoint: return double.Parse(String, CultureInfo.InvariantCulture);                }                return null;            }        }        public Type LiteralType        {            get            {                if (CurrentToken != Token.Literal)                    throw new InvalidOperationException("token is not a literal");                switch (LiteralKind)                {                    case LiteralKind.Null: return typeof(Object);                    case LiteralKind.False: return typeof(Boolean);                    case LiteralKind.True: return typeof(Boolean);                    case LiteralKind.String: return typeof(string);                    case LiteralKind.SignedInteger: return typeof(long);                    case LiteralKind.UnsignedInteger: return typeof(ulong);                    case LiteralKind.FloatingPoint: return typeof(double);                }                return null;            }        }        // This object represents the entire state of the reader and is used for rewind        struct ReaderState        {            public ReaderState(Tokenizer tokenizer)            {                _currentCharPos = tokenizer._currentCharPos;                _currentChar = tokenizer._currentChar;                _string = tokenizer.String;                _literalKind = tokenizer.LiteralKind;                _rewindBufferPos = tokenizer._rewindBufferPos;                _currentTokenPos = tokenizer.CurrentTokenPosition;                _currentToken = tokenizer.CurrentToken;            }            public void Apply(Tokenizer tokenizer)            {                tokenizer._currentCharPos = _currentCharPos;                tokenizer._currentChar = _currentChar;                tokenizer._rewindBufferPos = _rewindBufferPos;                tokenizer.CurrentToken = _currentToken;                tokenizer.CurrentTokenPosition = _currentTokenPos;                tokenizer.String = _string;                tokenizer.LiteralKind = _literalKind;            }            private LineOffset _currentCharPos;            private LineOffset _currentTokenPos;            private char _currentChar;            private Token _currentToken;            private LiteralKind _literalKind;            private string _string;            private int _rewindBufferPos;        }        // Create a rewind bookmark        public void CreateBookmark()        {            _bookmarks.Push(new ReaderState(this));            if (_rewindBuffer == null)            {                _rewindBuffer = new StringBuilder();                _rewindBufferPos = 0;            }        }        // Discard bookmark        public void DiscardBookmark()        {            _bookmarks.Pop();            if (_bookmarks.Count == 0)            {                _rewindBuffer = null;                _rewindBufferPos = 0;            }        }        // Rewind to a bookmark        public void RewindToBookmark()        {            _bookmarks.Pop().Apply(this);        }        // Fill buffer by reading from underlying TextReader        void FillBuffer()        {            _bufUsed = _underlying.Read(_buf, 0, _buf.Length);            _pos = 0;        }        // Get the next character from the input stream        // (this function could be extracted into a few different methods, but is mostly inlined        //  for performance - yes it makes a difference)        public char NextChar()        {            if (_rewindBuffer == null)            {                if (_pos >= _bufUsed)                {                    if (_bufUsed > 0)                    {                        FillBuffer();                    }                    if (_bufUsed == 0)                    {                        return _currentChar = '\0';                    }                }                // Next                _currentCharPos.Offset++;                return _currentChar = _buf[_pos++];            }            if (_rewindBufferPos < _rewindBuffer.Length)            {                _currentCharPos.Offset++;                return _currentChar = _rewindBuffer[_rewindBufferPos++];            }            else            {                if (_pos >= _bufUsed && _bufUsed > 0)                    FillBuffer();                _currentChar = _bufUsed == 0 ? '\0' : _buf[_pos++];                _rewindBuffer.Append(_currentChar);                _rewindBufferPos++;                _currentCharPos.Offset++;                return _currentChar;            }        }        // Read the next token from the input stream        // (Mostly inline for performance)        public void NextToken()        {            while (true)            {                // Skip whitespace and handle line numbers                while (true)                {                    if (_currentChar == '\r')                    {                        if (NextChar() == '\n')                        {                            NextChar();                        }                        _currentCharPos.Line++;                        _currentCharPos.Offset = 0;                    }                    else if (_currentChar == '\n')                    {                        if (NextChar() == '\r')                        {                            NextChar();                        }                        _currentCharPos.Line++;                        _currentCharPos.Offset = 0;                    }                    else if (_currentChar == ' ')                    {                        NextChar();                    }                    else if (_currentChar == '\t')                    {                        NextChar();                    }                    else                        break;                }                                    // Remember position of token                CurrentTokenPosition = _currentCharPos;                // Handle common characters first                switch (_currentChar)                {                    case '/':                        // Comments not support in strict mode                        if ((_options & JsonOptions.StrictParser) != 0)                        {                            throw new InvalidDataException(string.Format("syntax error, unexpected character '{0}'", _currentChar));                        }                        // Process comment                        NextChar();                        switch (_currentChar)                        {                            case '/':                                NextChar();                                while (_currentChar!='\0' && _currentChar != '\r' && _currentChar != '\n')                                {                                    NextChar();                                }                                break;                            case '*':                                bool endFound = false;                                while (!endFound && _currentChar!='\0')                                {                                    if (_currentChar == '*')                                    {                                        NextChar();                                        if (_currentChar == '/')                                        {                                            endFound = true;                                        }                                    }                                    NextChar();                                }                                break;                            default:                                throw new InvalidDataException("syntax error, unexpected character after slash");                        }                        continue;                    case '\"':                    case '\'':                    {                        _sb.Length = 0;                        var quoteKind = _currentChar;                        NextChar();                        while (_currentChar!='\0')                        {                            if (_currentChar == '\\')                            {                                NextChar();                                var escape = _currentChar;                                switch (escape)                                {                                    case '\"': _sb.Append('\"'); break;                                    case '\\': _sb.Append('\\'); break;                                    case '/': _sb.Append('/'); break;                                    case 'b': _sb.Append('\b'); break;                                    case 'f': _sb.Append('\f'); break;                                    case 'n': _sb.Append('\n'); break;                                    case 'r': _sb.Append('\r'); break;                                    case 't': _sb.Append('\t'); break;                                    case 'u':                                        var sbHex = new StringBuilder();                                        for (int i = 0; i < 4; i++)                                        {                                            NextChar();                                            sbHex.Append(_currentChar);                                        }                                        _sb.Append((char)Convert.ToUInt16(sbHex.ToString(), 16));                                        break;                                    default:                                        throw new InvalidDataException(string.Format("Invalid escape sequence in string literal: '\\{0}'", _currentChar));                                }                            }                            else if (_currentChar == quoteKind)                            {                                String = _sb.ToString();                                CurrentToken = Token.Literal;                                LiteralKind = LiteralKind.String;                                NextChar();                                return;                            }                            else                            {                                _sb.Append(_currentChar);                            }                            NextChar();                        }                        throw new InvalidDataException("syntax error, unterminated string literal");                    }                    case '{': CurrentToken =  Token.OpenBrace; NextChar(); return;                    case '}': CurrentToken =  Token.CloseBrace; NextChar(); return;                    case '[': CurrentToken =  Token.OpenSquare; NextChar(); return;                    case ']': CurrentToken =  Token.CloseSquare; NextChar(); return;                    case '=': CurrentToken =  Token.Equal; NextChar(); return;                    case ':': CurrentToken =  Token.Colon; NextChar(); return;                    case ';': CurrentToken =  Token.SemiColon; NextChar(); return;                    case ',': CurrentToken =  Token.Comma; NextChar(); return;                    case '\0': CurrentToken = Token.EOF; return;                }                // Number?                if (char.IsDigit(_currentChar) || _currentChar == '-')                {                    TokenizeNumber();                    return;                }                // Identifier?  (checked for after everything else as identifiers are actually quite rare in valid json)                if (Char.IsLetter(_currentChar) || _currentChar == '_' || _currentChar == '$')                {                    // Find end of identifier                    _sb.Length = 0;                    while (Char.IsLetterOrDigit(_currentChar) || _currentChar == '_' || _currentChar == '$')                    {                        _sb.Append(_currentChar);                        NextChar();                    }                    String = _sb.ToString();                    // Handle special identifiers                    switch (String)                    {                        case "true":                            LiteralKind = LiteralKind.True;                            CurrentToken =  Token.Literal;                            return;                        case "false":                            LiteralKind = LiteralKind.False;                            CurrentToken =  Token.Literal;                            return;                        case "null":                            LiteralKind = LiteralKind.Null;                            CurrentToken =  Token.Literal;                            return;                    }                    CurrentToken =  Token.Identifier;                    return;                }                // What the?                throw new InvalidDataException(string.Format("syntax error, unexpected character '{0}'", _currentChar));            }        }        // Parse a sequence of characters that could make up a valid number        // For performance, we don't actually parse it into a number yet.  When using Topten.JsonKitEmit we parse        // later, directly into a value type to avoid boxing        private void TokenizeNumber()        {            _sb.Length = 0;            // Leading negative sign            bool signed = false;            if (_currentChar == '-')            {                signed = true;                _sb.Append(_currentChar);                NextChar();            }            // Hex prefix?            bool hex = false;            if (_currentChar == '0' && (_options & JsonOptions.StrictParser)==0)            {                _sb.Append(_currentChar);                NextChar();                if (_currentChar == 'x' || _currentChar == 'X')                {                    _sb.Append(_currentChar);                    NextChar();                    hex = true;                }            }            // Process characters, but vaguely figure out what type it is            bool cont = true;            bool fp = false;            while (cont)            {                switch (_currentChar)                {                    case '0':                    case '1':                    case '2':                    case '3':                    case '4':                    case '5':                    case '6':                    case '7':                    case '8':                    case '9':                        _sb.Append(_currentChar);                        NextChar();                        break;                    case 'A':                    case 'a':                    case 'B':                    case 'b':                    case 'C':                    case 'c':                    case 'D':                    case 'd':                    case 'F':                    case 'f':                        if (!hex)                            cont = false;                        else                        {                            _sb.Append(_currentChar);                            NextChar();                        }                        break;                    case '.':                        if (hex)                        {                            cont = false;                        }                        else                        {                            fp = true;                            _sb.Append(_currentChar);                            NextChar();                        }                        break;                    case 'E':                    case 'e':                        if (!hex)                        {                            fp = true;                            _sb.Append(_currentChar);                            NextChar();                            if (_currentChar == '+' || _currentChar == '-')                            {                                _sb.Append(_currentChar);                                NextChar();                            }                        }                        break;                    default:                        cont = false;                        break;                }            }            if (char.IsLetter(_currentChar))                throw new InvalidDataException(string.Format("syntax error, invalid character following number '{0}'", _sb.ToString()));            // Setup token            String = _sb.ToString();            CurrentToken = Token.Literal;            // Setup literal kind            if (fp)            {                LiteralKind = LiteralKind.FloatingPoint;            }            else if (signed)            {                LiteralKind = LiteralKind.SignedInteger;            }            else            {                LiteralKind = LiteralKind.UnsignedInteger;            }        }        // Check the current token, throw exception if mismatch        public void Check(Token tokenRequired)        {            if (tokenRequired != CurrentToken)            {                throw new InvalidDataException(string.Format("syntax error, expected {0} found {1}", tokenRequired, CurrentToken));            }        }        // Skip token which must match        public void Skip(Token tokenRequired)        {            Check(tokenRequired);            NextToken();        }        // Skip token if it matches        public bool SkipIf(Token tokenRequired)        {            if (tokenRequired == CurrentToken)            {                NextToken();                return true;            }            return false;        }    }}
 |