html-parser.js 2.2 KB
Newer Older
fxy060608's avatar
fxy060608 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
import HTMLParser from '../../helpers/html-parser'

function removeDOCTYPE(html) {
  return html
    .replace(/<\?xml.*\?>\n/, '')
    .replace(/<!doctype.*>\n/, '')
    .replace(/<!DOCTYPE.*>\n/, '')
}

function parseAttrs(attrs) {
  return attrs.reduce(function(pre, attr) {
    let value = attr.value
    const name = attr.name

    if (value.match(/ /) && name !== 'style') {
      value = value.split(' ')
    }

    if (pre[name]) {
      if (Array.isArray(pre[name])) {
        pre[name].push(value)
      } else {
        pre[name] = [pre[name], value]
      }
    } else {
      pre[name] = value
    }
    return pre
  }, {})
}

export default function parseHtml(html) {
  html = removeDOCTYPE(html)

  const stacks = []

  const results = {
    node: 'root',
    children: []
  }

  HTMLParser(html, {
    start: function(tag, attrs, unary) {
      const node = {
        name: tag
      }
      if (attrs.length !== 0) {
        node.attrs = parseAttrs(attrs)
      }
      if (unary) {
        const parent = stacks[0] || results
        if (!parent.children) {
          parent.children = []
        }
        parent.children.push(node)
      } else {
        stacks.unshift(node)
      }
    },
    end: function(tag) {
      const node = stacks.shift()
      if (node.name !== tag) console.error('invalid state: mismatch end tag')

      if (stacks.length === 0) {
        results.children.push(node)
      } else {
        const parent = stacks[0]
        if (!parent.children) {
          parent.children = []
        }
        parent.children.push(node)
      }
    },
    chars: function(text) {
      const node = {
        type: 'text',
        text: text
      }
      if (stacks.length === 0) {
        results.children.push(node)
      } else {
        const parent = stacks[0]
        if (!parent.children) {
          parent.children = []
        }
        parent.children.push(node)
      }
    },
    comment: function(text) {
      const node = {
        node: 'comment',
        text: text
      }
      const parent = stacks[0]
      if (!parent.children) {
        parent.children = []
      }
      parent.children.push(node)
    }
  })
  return results.children
}