#################################################################
#
#
#
####################################################################
import sys
import unicodedata
from collections import defaultdict

from umychart_complier_util import xrange, unicode, uchr, uord


class Messages:
    BadGetterArity = 'Getter must not have any formal parameters'
    BadSetterArity= 'Setter must have exactly one formal parameter'
    BadSetterRestParameter= 'Setter function argument must not be a rest parameter'
    ConstructorIsAsync= 'Class constructor may not be an async method'
    ConstructorSpecialMethod= 'Class constructor may not be an accessor'
    DeclarationMissingInitializer= 'Missing initializer in %0 declaration'
    DefaultRestParameter= 'Unexpected token ='
    DuplicateBinding= 'Duplicate binding %0'
    DuplicateConstructor= 'A class may only have one constructor'
    DuplicateProtoProperty= 'Duplicate __proto__ fields are not allowed in object literals'
    ForInOfLoopInitializer= '%0 loop variable declaration may not have an initializer'
    GeneratorInLegacyContext= 'Generator declarations are not allowed in legacy contexts'
    IllegalBreak= 'Illegal break statement'
    IllegalContinue= 'Illegal continue statement'
    IllegalExportDeclaration= 'Unexpected token'
    IllegalImportDeclaration= 'Unexpected token'
    IllegalLanguageModeDirective= 'Illegal \'use strict\' directive in function with non-simple parameter list'
    IllegalReturn= 'Illegal return statement'
    InvalidEscapedReservedWord= 'Keyword must not contain escaped characters'
    InvalidHexEscapeSequence= 'Invalid hexadecimal escape sequence'
    InvalidLHSInAssignment= 'Invalid left-hand side in assignment'
    InvalidLHSInForIn= 'Invalid left-hand side in for-in'
    InvalidLHSInForLoop= 'Invalid left-hand side in for-loop'
    InvalidModuleSpecifier= 'Unexpected token'
    InvalidRegExp= 'Invalid regular expression'
    LetInLexicalBinding= 'let is disallowed as a lexically bound name'
    MissingFromClause= 'Unexpected token'
    MultipleDefaultsInSwitch= 'More than one default clause in switch statement'
    NewlineAfterThrow= 'Illegal newline after throw'
    NoAsAfterImportNamespace= 'Unexpected token'
    NoCatchOrFinally= 'Missing catch or finally after try'
    ParameterAfterRestParameter= 'Rest parameter must be last formal parameter'
    Redeclaration= '%0 \'%1\' has already been declared'
    StaticPrototype= 'Classes may not have static property named prototype'
    StrictCatchVariable= 'Catch variable may not be eval or arguments in strict mode'
    StrictDelete= 'Delete of an unqualified identifier in strict mode.'
    StrictFunction= 'In strict mode code, functions can only be declared at top level or inside a block'
    StrictFunctionName= 'Function name may not be eval or arguments in strict mode'
    StrictLHSAssignment= 'Assignment to eval or arguments is not allowed in strict mode'
    StrictLHSPostfix= 'Postfix increment/decrement may not have eval or arguments operand in strict mode'
    StrictLHSPrefix= 'Prefix increment/decrement may not have eval or arguments operand in strict mode'
    StrictModeWith= 'Strict mode code may not include a with statement'
    StrictOctalLiteral= 'Octal literals are not allowed in strict mode.'
    StrictParamDupe= 'Strict mode function may not have duplicate parameter names'
    StrictParamName= 'Parameter name eval or arguments is not allowed in strict mode'
    StrictReservedWord= 'Use of future reserved word in strict mode'
    StrictVarName= 'Variable name may not be eval or arguments in strict mode'
    TemplateOctalLiteral= 'Octal literals are not allowed in template strings.'
    UnexpectedEOS= 'Unexpected end of input'
    UnexpectedIdentifier= 'Unexpected identifier'
    UnexpectedNumber= 'Unexpected number'
    UnexpectedReserved= 'Unexpected reserved word'
    UnexpectedString= 'Unexpected string'
    UnexpectedTemplate= 'Unexpected quasi %0'
    UnexpectedToken= 'Unexpected token %0'
    UnexpectedTokenIllegal= 'Unexpected token ILLEGAL'
    UnknownLabel= 'Undefined label \'%0\''
    UnterminatedRegExp= 'Invalid regular expression: missing /'


# http://stackoverflow.com/questions/14245893/efficiently-list-all-characters-in-a-given-unicode-category
U_CATEGORIES = defaultdict(list)
for c in map(chr, range(sys.maxunicode + 1)): 
    U_CATEGORIES[unicodedata.category(c)].append(c)

UNICODE_LETTER = set(U_CATEGORIES['Lu'] + U_CATEGORIES['Ll'] +U_CATEGORIES['Lt'] + U_CATEGORIES['Lm'] +U_CATEGORIES['Lo'] + U_CATEGORIES['Nl'])

UNICODE_OTHER_ID_START = set((
    # Other_ID_Start
    '\u1885', '\u1886', '\u2118', '\u212E', '\u309B', '\u309C',
    # New in Unicode 8.0
    '\u08B3', '\u0AF9', '\u13F8', '\u9FCD', '\uAB60', '\U00010CC0', '\U000108E0', '\U0002B820',
    # New in Unicode 9.0
    '\u1C80', '\U000104DB', '\U0001E922',
    '\U0001EE00', '\U0001EE06', '\U0001EE0A',
))

UNICODE_OTHER_ID_CONTINUE = set((
    # Other_ID_Continue
    '\xB7', '\u0387', '\u1369', '\u136A', '\u136B', '\u136C',
    '\u136D', '\u136E', '\u136F', '\u1370', '\u1371', '\u19DA',
    # New in Unicode 8.0
    '\u08E3', '\uA69E', '\U00011730',
    # New in Unicode 9.0
    '\u08D4', '\u1DFB', '\uA8C5', '\U00011450',
    '\U0001EE03', '\U0001EE0B',
))

UNICODE_COMBINING_MARK = set(U_CATEGORIES['Mn'] + U_CATEGORIES['Mc'])
IDENTIFIER_START = UNICODE_LETTER.union(UNICODE_OTHER_ID_START).union(set(('$', '_', '\\')))
UNICODE_CONNECTOR_PUNCTUATION = set(U_CATEGORIES['Pc'])

DECIMAL_CONV = dict((c, n) for n, c in enumerate('0123456789'))
OCTAL_CONV = dict((c, n) for n, c in enumerate('01234567'))
HEX_CONV = dict((c, n) for n, c in enumerate('0123456789abcdef'))
for n, c in enumerate('ABCDEF', 10): 
    HEX_CONV[c] = n

#空格
WHITE_SPACE = set(('\x09', '\x0B', '\x0C', '\x20', '\xA0','\u1680', '\u180E', '\u2000', '\u2001', '\u2002','\u2003', '\u2004', '\u2005', '\u2006', '\u2007','\u2008', '\u2009', '\u200A', '\u202F', '\u205F','\u3000', '\uFEFF',))
#换行
LINE_TERMINATOR = set(('\x0A', '\x0D', '\u2028', '\u2029'))
#数字
UNICODE_DIGIT = set(U_CATEGORIES['Nd'])
DECIMAL_DIGIT = set(DECIMAL_CONV.keys())
OCTAL_DIGIT = set(OCTAL_CONV.keys())
HEX_DIGIT = set(HEX_CONV.keys())
#合法的起始字符
IDENTIFIER_START = UNICODE_LETTER.union(UNICODE_OTHER_ID_START).union(set(('$', '_', '\\')))
#合法的中间字符
IDENTIFIER_PART = IDENTIFIER_START.union(UNICODE_COMBINING_MARK).union(UNICODE_DIGIT).union(UNICODE_CONNECTOR_PUNCTUATION).union(set(('\u200D', '\u200C'))).union(UNICODE_OTHER_ID_CONTINUE)

class Character:
    @staticmethod   # 返回ASCII字符
    def FromCodePoint(cp): return uchr(cp) 

    @staticmethod   # 是否是空格 https://tc39.github.io/ecma262/#sec-white-space
    def IsWhiteSpace(cp): return cp in WHITE_SPACE
    
    @staticmethod   # 是否换行 https://tc39.github.io/ecma262/#sec-line-terminators
    def IsLineTerminator(cp): return cp in LINE_TERMINATOR

    @staticmethod   # https://tc39.github.io/ecma262/#sec-names-and-keywords
    def IsIdentifierStart(cp): return cp in IDENTIFIER_START

    @staticmethod 
    def IsIdentifierPart(cp): return cp in IDENTIFIER_PART

    @staticmethod   # 0..9 https://tc39.github.io/ecma262/#sec-literals-numeric-literals
    def IsDecimalDigit(cp): return cp in DECIMAL_DIGIT

    @staticmethod
    def IsHexDigit(cp):  return cp in HEX_DIGIT

    @staticmethod   #0..7
    def IsOctalDigit(cp): return cp in OCTAL_DIGIT



######################################################################
#
# 错误信息类
#
######################################################################
class Error(Exception):
    def __init__(self, message, name=None, index=None, lineNumber=None, column=None, description=None):
        super(Error, self).__init__(message)
        self.Message = message
        self.Name = name
        self.Index = index
        self.LineNumber = lineNumber
        self.Column = column
        # self.description = description

    def ToString(self):
        return '%s: %s' % (self.__class__.__name__, self)

    def ToDict(self):
        d = dict((unicode(k), v) for k, v in self.__dict__.items() if v is not None)
        d['message'] = self.ToString()
        return d

######################################################################
#
# 编译异常, 错误类
#
######################################################################
class ErrorHandler:
    def __init__(self):
        self.Error=[]

    def RecordError(self, error):
        self.Error.append(error.ToDict())

    def CreateError(self, index, line, col, description):
        # msg='Line ' + line + ': ' + description
        msg = 'Line %s: %s' % (line, description)
        return Error(msg,index=index,lineNumber=line, column=col, description=description)

    def ThrowError(self, index, line, col, description):
        error=self.CreateError(index,line,col,description)
        raise  error

######################################################################
#
# 扫描状态信息类
#
######################################################################
class ScannerState:
    def __init__(self, index=None, lineNumber=None, lineStart=None):
        self.Index = index
        self.LineNumber = lineNumber
        self.LineStart = lineStart

class RawToken():
    def __init__(self, type=None, value=None, pattern=None, flags=None, regex=None, octal=None, cooked=None, head=None, tail=None, lineNumber=None, lineStart=None, start=None, end=None):
        self.Type = type
        self.Value = value
        #self.Pattern = pattern
        #self.Flags = flags
        #self.regex = regex
        #self.octal = octal
        #self.cooked = cooked
        #self.head = head
        #self.tail = tail
        self.LineNumber = lineNumber
        self.LineStart = lineStart
        self.Start = start
        self.End = end


#######################################################################################
#
#   扫描类
#
#######################################################################################
class Scanner:
    def __init__(self, code, ErrorHandler):
        self.Source=unicode(code) + '\x00'
        self.ErrorHandler=ErrorHandler
        self.Length=len(code)
        self.Index=0
        self.LineNumber=1 if (self.Length>0) else 0
        self.LineStart=0
        self.CurlyStack=[]

    def SaveState(self):   # 保存当前扫描状态
        return ScannerState(index=self.Index, lineNumber=self.LineNumber, lineStart=self.LineStart)

    def RestoreState(self, state):   #还原扫描状态
        self.Index=state.Index
        self.LineNumber=state.LineNumber
        self.LineStart=state.LineStart

    def IsEOF(self):     #否是已经结束
        return self.Index>=self.Length

    def IsKeyword(self,id):
        return False

    def CodePointAt (self, i):
        return uord(self.Source[i:i + 2])

    def Lex(self):
        if (self.IsEOF()):
            return RawToken( type=2, value='', lineNumber=self.LineNumber, lineStart=self.LineStart, start=self.Index, end=self.Index )  # 2=EOF
        cp=self.Source[self.Index]

        # 变量名 或 关键字
        if (Character.IsIdentifierStart(cp)):
            return self.ScanIdentifier()

        # ( ) ; 开头 操作符扫描
        if cp in ('(', ')', ';'): 
            return self.ScanPunctuator()

        # ' " 开头 字符串扫描
        if cp in ('\'', '"'):
            return self.ScanStringLiteral()

        # . 开头 浮点型
        if cp == '.':
            if Character.IsDecimalDigit(self.Source[self.Index + 1]):
                return self.ScanNumericLiteral()
            return self.ScanPunctuator()

        # 数字
        if Character.IsDecimalDigit(cp):
            return self.ScanNumericLiteral()
        
        cp1=ord(cp)
        if  cp1 >= 0xD800 and cp1 < 0xDFFF :
            cp1 = self.CodePointAt(self.Index)
            cp = Character.FromCodePoint(cp1)
            if Character.IsIdentifierStart(cp):
                return self.ScanIdentifier()

        return self.ScanPunctuator()

    # 关键字 变量名 https://tc39.github.io/ecma262/#sec-names-and-keywords
    def ScanIdentifier(self):
        type=0
        start=self.Index
        # \\ 反斜杠
        id=self.GetComplexIdentifier() if self.Source[start] == '\\' else self.GetIdentifier()

        if len(id) == 1:
            type=3                      # Identifier
        elif self.IsKeyword(id):
            type=4                       # Keyword
        elif id=='null':
            type=5                       # NullLiteral
        elif id=='true' or id=='false':
            type=1                       # BooleanLiteral
        else:
            type=3                       # Identifier

        if type!=3  and start+len(id)!=self.Index :
            # restore=self.Index
            self.Index=start
            raise Messages.InvalidEscapedReservedWord
            # self.Index=restore

        if id=='AND' or id=='OR' :
            type=7                      #Punctuator*/

        return RawToken( type=type, value=id, lineNumber=self.LineNumber, lineStart=self.LineStart, start=start, end=self.Index )

    def GetIdentifier(self):
        start=self.Index        # start 保存进来的位置
        self.Index+=1
        while not self.IsEOF():
            ch=self.Source[self.Index]
            if ch=='\\':
                self.Index=start
                return self.GetComplexIdentifier()
            else:
                cp = ord(ch)
                if cp >= 0xD800 and cp < 0xDFFF :
                    self.Index=start
                    return self.GetComplexIdentifier()

            if Character.IsIdentifierPart(ch):
                self.Index+=1
            else:
                break

        return self.Source[start:self.Index]

    # 操作符 https://tc39.github.io/ecma262/#sec-punctuators
    def ScanPunctuator(self):
        start=self.Index
        str=self.Source[self.Index]
        if str=='(':
            self.Index+=1
        elif str in (')',';',','): 
            self.Index+=1
        else :
            str=self.Source[self.Index:self.Index+3]
            if str=='AND' :
                self.Index+=3
            else :
                str = self.Source[self.Index:self.Index+2]
                if str in ( '&&', '||', '==' , '!=', '<>', '<=', '>=', '=>', ':=', 'OR') :
                    self.Index += 2
                else  :
                    str=self.Source[self.Index]
                    if str in '<>=!+-*%&|^/:' :
                        self.Index += 1

        if self.Index==start :
           self.ThrowUnexpectedToken()

        return RawToken( type=7, value=str, lineNumber=self.LineNumber, lineStart=self.LineStart, start=start, end=self.Index )    #7=Punctuator

    # 字符串 https://tc39.github.io/ecma262/#sec-literals-string-literals
    def ScanStringLiteral(self):
        start=self.Index
        quote=self.Source[self.Index]

        self.Index+=1
        # octal=False
        str=''
        while not self.IsEOF():
            ch=self.Source[self.Index]
            self.Index+=1
            if ch==quote: 
                quote=''
                break
            elif ch=='\\':  #字符串转义
                raise "not complete"
            elif Character.IsLineTerminator(ch) :
                break
            else :
                str+=ch

        if quote!='' :
            self.Index=start
            self.ThrowUnexpectedToken()

        return RawToken( type=8, value=str, lineNumber=self.LineNumber, lineStart=self.LineStart, start=start, end=self.Index) # 8=StringLiteral

    def ScanNumericLiteral(self) :
        start=self.Index
        ch=self.Source[self.Index]
        num=''
        if ch!='.' :
            num=self.Source[self.Index]
            self.Index+=1
            ch=self.Source[self.Index]
            # Hex number starts with '0x'. 16进制
            if num=='0' :
                if ch in ('x', 'X') :
                    self.Index+=1
                    return self.ScanHexLiteral(start)

            while Character.IsDecimalDigit(self.Source[self.Index]):
                num+=self.Source[self.Index]
                self.Index+=1
            
            ch=self.Source[self.Index]

        if ch=='.' :
            num+=self.Source[self.Index]
            self.Index+=1
            while Character.IsDecimalDigit(self.Source[self.Index]) :
                num+=self.Source[self.Index]
                self.Index+=1

            ch=self.Source[self.Index]

        # 科学计数法
        if ch in ('e', 'E'):
            num+=self.Source[self.Index]
            self.Index+=1
            ch=self.Source[self.Index]
            if ch in ('+' ,'-'):
                num+=self.Source[self.Index]
                self.Index+=1
            if Character.IsDecimalDigit(self.Source[self.Index]) :
                while Character.IsDecimalDigit(self.Source[self.Index]) :
                    num+=self.Source[self.Index]
                    self.Index+=1
            else :
                self.ThrowUnexpectedToken()

        if Character.IsIdentifierStart(self.Source[self.Index]) :
            self.ThrowUnexpectedToken()

        value = float(num)
        return RawToken( type=6, value=value, lineNumber=self.LineNumber, lineStart=self.LineStart, start=start, end=self.Index )  #6=NumericLiteral

    # 16进制数值
    def ScanHexLiteral(self, start):
        num = ''
        while not self.IsEOF() :
            if not Character.IsHexDigit(self.Source[self.Index]):
                break

            num += self.Source[self.Index]
            self.Index += 1

        if len(num) == 0:
            self.ThrowUnexpectedToken()

        if Character.IsIdentifierStart(self.Source[self.Index]):
            self.ThrowUnexpectedToken()

        return RawToken(type=6, value=int(num, 16), lineNumber=self.LineNumber, lineStart=self.LineStart, start=start, end=self.Index)   # 6=NumericLiteral,

    # 不支持
    def GetComplexIdentifier(self):
        self.ThrowUnexpectedToken()
        return id

    # 空格 或 注释
    def ScanComments(self) :
        comments=[]
        start= self.Index==0 
        while not self.IsEOF() :
            ch=self.Source[self.Index]
            if Character.IsWhiteSpace(ch) : # 过滤掉空格
                self.Index+=1

            elif Character.IsLineTerminator(ch):
                self.Index+=1
                if ch=='\r' and self.Source[self.Index]=='\n' :
                     self.Index+=1     #回车+换行

                self.LineNumber+=1
                self.LineStart=self.Index
                start=True

            elif ch=='/' :          # //注释
                ch=self.Source[self.Index+1]
                if ch=='/' :
                    self.Index+=2
                    comment=self.SkipSingleLineComment(2)
                    start=True
                else :
                    break

            elif ch== '{' :      #{ }  注释
                self.Index += 1
                comment = self.SkipMultiLineComment()
            else :
                break

        return comments
    
    # 多行注释
    def SkipMultiLineComment(self) :
        comments = []
        while not self.IsEOF() :
            ch = self.Source[self.Index]
            if Character.IsLineTerminator(ch) : 
                self.LineNumber+=1
                self.Index+=1
                self.LineStart = self.Index
            elif ch == '*' :
                if self.Source[self.Index + 1] == '/':
                    self.Index += 2
                    return comments

                self.Index+=1
            else :
                self.Index+=1

        return comments

    
    # 单行注释 https://tc39.github.io/ecma262/#sec-comments
    def SkipSingleLineComment(self, offset) :
        comments=[]
        while not self.IsEOF() :
            ch=self.Source[self.Index]
            self.Index+=1
            if Character.IsLineTerminator(ch) :
                if ch=='\r' and self.Source[self.Index] =='\n' :
                    self.Index+=1

                self.LineNumber+=1
                self.LineStart=self.Index
                return comments

        return comments

    # 抛异常
    def ThrowUnexpectedToken(self, message= Messages.UnexpectedTokenIllegal) :
	    return self.ErrorHandler.ThrowError(self.Index, self.LineNumber, self.Index - self.LineStart + 1, message)