parser.js 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075
  1. 'use strict'
  2. /**
  3. * @fileoverview html 解析器
  4. */
  5. // 配置
  6. const config = {
  7. // 信任的标签(保持标签名不变)
  8. trustTags: makeMap('a,abbr,ad,audio,b,blockquote,br,code,col,colgroup,dd,del,dl,dt,div,em,fieldset,h1,h2,h3,h4,h5,h6,hr,i,img,ins,label,legend,li,ol,p,q,ruby,rt,source,span,strong,sub,sup,table,tbody,td,tfoot,th,thead,tr,title,ul,video'),
  9. // 块级标签(转为 div,其他的非信任标签转为 span)
  10. blockTags: makeMap('address,article,aside,body,caption,center,cite,footer,header,html,nav,pre,section'),
  11. // 要移除的标签
  12. ignoreTags: makeMap('area,base,canvas,embed,frame,head,iframe,input,link,map,meta,param,rp,script,source,style,textarea,title,track,wbr'),
  13. // 自闭合的标签
  14. voidTags: makeMap('area,base,br,col,circle,ellipse,embed,frame,hr,img,input,line,link,meta,param,path,polygon,rect,source,track,use,wbr'),
  15. // html 实体
  16. entities: {
  17. lt: '<',
  18. gt: '>',
  19. quot: '"',
  20. apos: "'",
  21. ensp: '\u2002',
  22. emsp: '\u2003',
  23. nbsp: '\xA0',
  24. semi: ';',
  25. ndash: '–',
  26. mdash: '—',
  27. middot: '·',
  28. lsquo: '‘',
  29. rsquo: '’',
  30. ldquo: '“',
  31. rdquo: '”',
  32. bull: '•',
  33. hellip: '…'
  34. },
  35. // 默认的标签样式
  36. tagStyle: {
  37. // #ifndef APP-PLUS-NVUE
  38. address: 'font-style:italic',
  39. big: 'display:inline;font-size:1.2em',
  40. caption: 'display:table-caption;text-align:center',
  41. center: 'text-align:center',
  42. cite: 'font-style:italic',
  43. dd: 'margin-left:40px',
  44. mark: 'background-color:yellow',
  45. pre: 'font-family:monospace;white-space:pre',
  46. s: 'text-decoration:line-through',
  47. small: 'display:inline;font-size:0.8em',
  48. u: 'text-decoration:underline' // #endif
  49. }
  50. }
  51. const { windowWidth } = uni.getSystemInfoSync()
  52. const blankChar = makeMap(' ,\r,\n,\t,\f')
  53. let idIndex = 0 // #ifdef H5 || APP-PLUS
  54. config.ignoreTags.iframe = void 0
  55. config.trustTags.iframe = true
  56. config.ignoreTags.embed = void 0
  57. config.trustTags.embed = true // #endif
  58. // #ifdef APP-PLUS-NVUE
  59. config.ignoreTags.source = void 0
  60. config.ignoreTags.style = void 0 // #endif
  61. /**
  62. * @description 创建 map
  63. * @param {String} str 逗号分隔
  64. */
  65. function makeMap(str) {
  66. const map = Object.create(null)
  67. const list = str.split(',')
  68. for (let i = list.length; i--;) {
  69. map[list[i]] = true
  70. }
  71. return map
  72. }
  73. /**
  74. * @description 解码 html 实体
  75. * @param {String} str 要解码的字符串
  76. * @param {Boolean} amp 要不要解码 &amp;
  77. * @returns {String} 解码后的字符串
  78. */
  79. function decodeEntity(str, amp) {
  80. let i = str.indexOf('&')
  81. while (i != -1) {
  82. const j = str.indexOf(';', i + 3)
  83. let code = void 0
  84. if (j == -1) break
  85. if (str[i + 1] == '#') {
  86. // &#123; 形式的实体
  87. code = parseInt((str[i + 2] == 'x' ? '0' : '') + str.substring(i + 2, j))
  88. if (!isNaN(code)) str = str.substr(0, i) + String.fromCharCode(code) + str.substr(j + 1)
  89. } else {
  90. // &nbsp; 形式的实体
  91. code = str.substring(i + 1, j)
  92. if (config.entities[code] || code == 'amp' && amp) str = str.substr(0, i) + (config.entities[code] || '&') + str.substr(j + 1)
  93. }
  94. i = str.indexOf('&', i + 1)
  95. }
  96. return str
  97. }
  98. /**
  99. * @description html 解析器
  100. * @param {Object} vm 组件实例
  101. */
  102. function parser(vm) {
  103. this.options = vm || {}
  104. this.tagStyle = Object.assign(config.tagStyle, this.options.tagStyle)
  105. this.imgList = vm.imgList || []
  106. this.plugins = vm.plugins || []
  107. this.attrs = Object.create(null)
  108. this.stack = []
  109. this.nodes = []
  110. }
  111. /**
  112. * @description 执行解析
  113. * @param {String} content 要解析的文本
  114. */
  115. parser.prototype.parse = function (content) {
  116. // 插件处理
  117. for (let i = this.plugins.length; i--;) {
  118. if (this.plugins[i].onUpdate) content = this.plugins[i].onUpdate(content, config) || content
  119. }
  120. new lexer(this).parse(content) // 出栈未闭合的标签
  121. while (this.stack.length) {
  122. this.popNode()
  123. }
  124. return this.nodes
  125. }
  126. /**
  127. * @description 将标签暴露出来(不被 rich-text 包含)
  128. */
  129. parser.prototype.expose = function () {
  130. // #ifndef APP-PLUS-NVUE
  131. for (let i = this.stack.length; i--;) {
  132. const item = this.stack[i]
  133. if (item.name == 'a' || item.c) return
  134. item.c = 1
  135. } // #endif
  136. }
  137. /**
  138. * @description 处理插件
  139. * @param {Object} node 要处理的标签
  140. * @returns {Boolean} 是否要移除此标签
  141. */
  142. parser.prototype.hook = function (node) {
  143. for (let i = this.plugins.length; i--;) {
  144. if (this.plugins[i].onParse && this.plugins[i].onParse(node, this) == false) return false
  145. }
  146. return true
  147. }
  148. /**
  149. * @description 将链接拼接上主域名
  150. * @param {String} url 需要拼接的链接
  151. * @returns {String} 拼接后的链接
  152. */
  153. parser.prototype.getUrl = function (url) {
  154. const { domain } = this.options
  155. if (url[0] == '/') {
  156. // // 开头的补充协议名
  157. if (url[1] == '/') url = `${domain ? domain.split('://')[0] : 'http'}:${url}` // 否则补充整个域名
  158. else if (domain) url = domain + url
  159. } else if (domain && !url.includes('data:') && !url.includes('://')) url = `${domain}/${url}`
  160. return url
  161. }
  162. /**
  163. * @description 解析样式表
  164. * @param {Object} node 标签
  165. * @returns {Object}
  166. */
  167. parser.prototype.parseStyle = function (node) {
  168. const { attrs } = node
  169. const list = (this.tagStyle[node.name] || '').split(';').concat((attrs.style || '').split(';'))
  170. const styleObj = {}
  171. let tmp = ''
  172. if (attrs.id) {
  173. // 暴露锚点
  174. if (this.options.useAnchor) this.expose(); else if (node.name != 'img' && node.name != 'a' && node.name != 'video' && node.name != 'audio') attrs.id = void 0
  175. } // 转换 width 和 height 属性
  176. if (attrs.width) {
  177. styleObj.width = parseFloat(attrs.width) + (attrs.width.includes('%') ? '%' : 'px')
  178. attrs.width = void 0
  179. }
  180. if (attrs.height) {
  181. styleObj.height = parseFloat(attrs.height) + (attrs.height.includes('%') ? '%' : 'px')
  182. attrs.height = void 0
  183. }
  184. for (let i = 0, len = list.length; i < len; i++) {
  185. const info = list[i].split(':')
  186. if (info.length < 2) continue
  187. const key = info.shift().trim().toLowerCase()
  188. let value = info.join(':').trim() // 兼容性的 css 不压缩
  189. if (value[0] == '-' && value.lastIndexOf('-') > 0 || value.includes('safe')) tmp += ';'.concat(key, ':').concat(value) // 重复的样式进行覆盖
  190. else if (!styleObj[key] || value.includes('import') || !styleObj[key].includes('import')) {
  191. // 填充链接
  192. if (value.includes('url')) {
  193. let j = value.indexOf('(') + 1
  194. if (j) {
  195. while (value[j] == '"' || value[j] == "'" || blankChar[value[j]]) {
  196. j++
  197. }
  198. value = value.substr(0, j) + this.getUrl(value.substr(j))
  199. }
  200. } // 转换 rpx(rich-text 内部不支持 rpx)
  201. else if (value.includes('rpx')) {
  202. value = value.replace(/[0-9.]+\s*rpx/g, ($) => `${parseFloat($) * windowWidth / 750}px`)
  203. }
  204. styleObj[key] = value
  205. }
  206. }
  207. node.attrs.style = tmp
  208. return styleObj
  209. }
  210. /**
  211. * @description 解析到标签名
  212. * @param {String} name 标签名
  213. * @private
  214. */
  215. parser.prototype.onTagName = function (name) {
  216. this.tagName = this.xml ? name : name.toLowerCase()
  217. if (this.tagName == 'svg') this.xml = true // svg 标签内大小写敏感
  218. }
  219. /**
  220. * @description 解析到属性名
  221. * @param {String} name 属性名
  222. * @private
  223. */
  224. parser.prototype.onAttrName = function (name) {
  225. name = this.xml ? name : name.toLowerCase()
  226. if (name.substr(0, 5) == 'data-') {
  227. // data-src 自动转为 src
  228. if (name == 'data-src' && !this.attrs.src) this.attrName = 'src' // a 和 img 标签保留 data- 的属性,可以在 imgtap 和 linktap 事件中使用
  229. else if (this.tagName == 'img' || this.tagName == 'a') this.attrName = name // 剩余的移除以减小大小
  230. else this.attrName = void 0
  231. } else {
  232. this.attrName = name
  233. this.attrs[name] = 'T' // boolean 型属性缺省设置
  234. }
  235. }
  236. /**
  237. * @description 解析到属性值
  238. * @param {String} val 属性值
  239. * @private
  240. */
  241. parser.prototype.onAttrVal = function (val) {
  242. const name = this.attrName || '' // 部分属性进行实体解码
  243. if (name == 'style' || name == 'href') this.attrs[name] = decodeEntity(val, true) // 拼接主域名
  244. else if (name.includes('src')) this.attrs[name] = this.getUrl(decodeEntity(val, true)); else if (name) this.attrs[name] = val
  245. }
  246. /**
  247. * @description 解析到标签开始
  248. * @param {Boolean} selfClose 是否有自闭合标识 />
  249. * @private
  250. */
  251. parser.prototype.onOpenTag = function (selfClose) {
  252. // 拼装 node
  253. const node = Object.create(null)
  254. node.name = this.tagName
  255. node.attrs = this.attrs
  256. this.attrs = Object.create(null)
  257. const { attrs } = node
  258. const parent = this.stack[this.stack.length - 1]
  259. const siblings = parent ? parent.children : this.nodes
  260. const close = this.xml ? selfClose : config.voidTags[node.name] // 转换 embed 标签
  261. if (node.name == 'embed') {
  262. // #ifndef H5 || APP-PLUS
  263. const src = attrs.src || '' // 按照后缀名和 type 将 embed 转为 video 或 audio
  264. if (src.includes('.mp4') || src.includes('.3gp') || src.includes('.m3u8') || (attrs.type || '').includes('video')) node.name = 'video'; else if (src.includes('.mp3') || src.includes('.wav') || src.includes('.aac') || src.includes('.m4a') || (attrs.type || '').includes('audio')) node.name = 'audio'
  265. if (attrs.autostart) attrs.autoplay = 'T'
  266. attrs.controls = 'T' // #endif
  267. // #ifdef H5 || APP-PLUS
  268. this.expose() // #endif
  269. } // #ifndef APP-PLUS-NVUE
  270. // 处理音视频
  271. if (node.name == 'video' || node.name == 'audio') {
  272. // 设置 id 以便获取 context
  273. if (node.name == 'video' && !attrs.id) attrs.id = `v${idIndex++}` // 没有设置 controls 也没有设置 autoplay 的自动设置 controls
  274. if (!attrs.controls && !attrs.autoplay) attrs.controls = 'T' // 用数组存储所有可用的 source
  275. node.src = []
  276. if (attrs.src) {
  277. node.src.push(attrs.src)
  278. attrs.src = void 0
  279. }
  280. this.expose()
  281. } // #endif
  282. // 处理自闭合标签
  283. if (close) {
  284. if (!this.hook(node) || config.ignoreTags[node.name]) {
  285. // 通过 base 标签设置主域名
  286. if (node.name == 'base' && !this.options.domain) this.options.domain = attrs.href // #ifndef APP-PLUS-NVUE
  287. // 设置 source 标签(仅父节点为 video 或 audio 时有效)
  288. else if (node.name == 'source' && parent && (parent.name == 'video' || parent.name == 'audio') && attrs.src) parent.src.push(attrs.src) // #endif
  289. return
  290. } // 解析 style
  291. const styleObj = this.parseStyle(node) // 处理图片
  292. if (node.name == 'img') {
  293. if (attrs.src) {
  294. // 标记 webp
  295. if (attrs.src.includes('webp')) node.webp = 'T' // data url 图片如果没有设置 original-src 默认为不可预览的小图片
  296. if (attrs.src.includes('data:') && !attrs['original-src']) attrs.ignore = 'T'
  297. if (!attrs.ignore || node.webp || attrs.src.includes('cloud://')) {
  298. for (let i = this.stack.length; i--;) {
  299. const item = this.stack[i]
  300. if (item.name == 'a') {
  301. node.a = item.attrs
  302. break
  303. } // #ifndef H5 || APP-PLUS
  304. const style = item.attrs.style || ''
  305. if (style.includes('flex:') && !style.includes('flex:0') && !style.includes('flex: 0') && (!styleObj.width || !styleObj.width.includes('%'))) {
  306. styleObj.width = '100% !important'
  307. styleObj.height = ''
  308. for (let j = i + 1; j < this.stack.length; j++) {
  309. this.stack[j].attrs.style = (this.stack[j].attrs.style || '').replace('inline-', '')
  310. }
  311. } else if (style.includes('flex') && styleObj.width == '100%') {
  312. for (let _j = i + 1; _j < this.stack.length; _j++) {
  313. const _style = this.stack[_j].attrs.style || ''
  314. if (!_style.includes(';width') && !_style.includes(' width') && _style.indexOf('width') != 0) {
  315. styleObj.width = ''
  316. break
  317. }
  318. }
  319. } else if (style.includes('inline-block')) {
  320. if (styleObj.width && styleObj.width[styleObj.width.length - 1] == '%') {
  321. item.attrs.style += `;max-width:${styleObj.width}`
  322. styleObj.width = ''
  323. } else item.attrs.style += ';max-width:100%'
  324. } // #endif
  325. item.c = 1
  326. }
  327. attrs.i = this.imgList.length.toString()
  328. let _src = attrs['original-src'] || attrs.src // #ifndef H5 || MP-ALIPAY || APP-PLUS || MP-360
  329. if (this.imgList.includes(_src)) {
  330. // 如果有重复的链接则对域名进行随机大小写变换避免预览时错位
  331. let _i = _src.indexOf('://')
  332. if (_i != -1) {
  333. _i += 3
  334. let newSrc = _src.substr(0, _i)
  335. for (; _i < _src.length; _i++) {
  336. if (_src[_i] == '/') break
  337. newSrc += Math.random() > 0.5 ? _src[_i].toUpperCase() : _src[_i]
  338. }
  339. newSrc += _src.substr(_i)
  340. _src = newSrc
  341. }
  342. } // #endif
  343. this.imgList.push(_src) // #ifdef H5 || APP-PLUS
  344. if (this.options.lazyLoad) {
  345. attrs['data-src'] = attrs.src
  346. attrs.src = void 0
  347. } // #endif
  348. }
  349. }
  350. if (styleObj.display == 'inline') styleObj.display = '' // #ifndef APP-PLUS-NVUE
  351. if (attrs.ignore) {
  352. styleObj['max-width'] = styleObj['max-width'] || '100%'
  353. attrs.style += ';-webkit-touch-callout:none'
  354. } // #endif
  355. // 设置的宽度超出屏幕,为避免变形,高度转为自动
  356. if (parseInt(styleObj.width) > windowWidth) styleObj.height = void 0 // 记录是否设置了宽高
  357. if (styleObj.width) {
  358. if (styleObj.width.includes('auto')) styleObj.width = ''; else {
  359. node.w = 'T'
  360. if (styleObj.height && !styleObj.height.includes('auto')) node.h = 'T'
  361. }
  362. }
  363. } else if (node.name == 'svg') {
  364. siblings.push(node)
  365. this.stack.push(node)
  366. this.popNode()
  367. return
  368. }
  369. for (const key in styleObj) {
  370. if (styleObj[key]) attrs.style += ';'.concat(key, ':').concat(styleObj[key].replace(' !important', ''))
  371. }
  372. attrs.style = attrs.style.substr(1) || void 0
  373. } else {
  374. if (node.name == 'pre' || (attrs.style || '').includes('white-space') && attrs.style.includes('pre')) this.pre = node.pre = true
  375. node.children = []
  376. this.stack.push(node)
  377. } // 加入节点树
  378. siblings.push(node)
  379. }
  380. /**
  381. * @description 解析到标签结束
  382. * @param {String} name 标签名
  383. * @private
  384. */
  385. parser.prototype.onCloseTag = function (name) {
  386. // 依次出栈到匹配为止
  387. name = this.xml ? name : name.toLowerCase()
  388. let i
  389. for (i = this.stack.length; i--;) {
  390. if (this.stack[i].name == name) break
  391. }
  392. if (i != -1) {
  393. while (this.stack.length > i) {
  394. this.popNode()
  395. }
  396. } else if (name == 'p' || name == 'br') {
  397. const siblings = this.stack.length ? this.stack[this.stack.length - 1].children : this.nodes
  398. siblings.push({
  399. name,
  400. attrs: {}
  401. })
  402. }
  403. }
  404. /**
  405. * @description 处理标签出栈
  406. * @private
  407. */
  408. parser.prototype.popNode = function () {
  409. const node = this.stack.pop()
  410. let { attrs } = node
  411. const { children } = node
  412. const parent = this.stack[this.stack.length - 1]
  413. const siblings = parent ? parent.children : this.nodes
  414. if (!this.hook(node) || config.ignoreTags[node.name]) {
  415. // 获取标题
  416. if (node.name == 'title' && children.length && children[0].type == 'text' && this.options.setTitle) {
  417. uni.setNavigationBarTitle({
  418. title: children[0].text
  419. })
  420. }
  421. siblings.pop()
  422. return
  423. }
  424. if (node.pre) {
  425. // 是否合并空白符标识
  426. node.pre = this.pre = void 0
  427. for (let i = this.stack.length; i--;) {
  428. if (this.stack[i].pre) this.pre = true
  429. }
  430. }
  431. const styleObj = {} // 转换 svg
  432. if (node.name == 'svg') {
  433. // #ifndef APP-PLUS-NVUE
  434. let src = ''
  435. const { style } = attrs
  436. attrs.style = ''
  437. attrs.xmlns = 'http://www.w3.org/2000/svg';
  438. (function traversal(node) {
  439. src += `<${node.name}`
  440. for (let item in node.attrs) {
  441. const val = node.attrs[item]
  442. if (val) {
  443. if (item == 'viewbox') item = 'viewBox'
  444. src += ' '.concat(item, '="').concat(val, '"')
  445. }
  446. }
  447. if (!node.children) src += '/>'; else {
  448. src += '>'
  449. for (let _i2 = 0; _i2 < node.children.length; _i2++) {
  450. traversal(node.children[_i2])
  451. }
  452. src += `</${node.name}>`
  453. }
  454. }(node))
  455. node.name = 'img'
  456. node.attrs = {
  457. src: `data:image/svg+xml;utf8,${src.replace(/#/g, '%23')}`,
  458. style,
  459. ignore: 'T'
  460. }
  461. node.children = void 0 // #endif
  462. this.xml = false
  463. return
  464. } // #ifndef APP-PLUS-NVUE
  465. // 转换 align 属性
  466. if (attrs.align) {
  467. if (node.name == 'table') {
  468. if (attrs.align == 'center') styleObj['margin-inline-start'] = styleObj['margin-inline-end'] = 'auto'; else styleObj.float = attrs.align
  469. } else styleObj['text-align'] = attrs.align
  470. attrs.align = void 0
  471. } // 转换 font 标签的属性
  472. if (node.name == 'font') {
  473. if (attrs.color) {
  474. styleObj.color = attrs.color
  475. attrs.color = void 0
  476. }
  477. if (attrs.face) {
  478. styleObj['font-family'] = attrs.face
  479. attrs.face = void 0
  480. }
  481. if (attrs.size) {
  482. let size = parseInt(attrs.size)
  483. if (!isNaN(size)) {
  484. if (size < 1) size = 1; else if (size > 7) size = 7
  485. styleObj['font-size'] = ['xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large'][size - 1]
  486. }
  487. attrs.size = void 0
  488. }
  489. } // #endif
  490. // 一些编辑器的自带 class
  491. if ((attrs.class || '').includes('align-center')) styleObj['text-align'] = 'center'
  492. Object.assign(styleObj, this.parseStyle(node))
  493. if (parseInt(styleObj.width) > windowWidth) {
  494. styleObj['max-width'] = '100%'
  495. styleObj['box-sizing'] = 'border-box'
  496. } // #ifndef APP-PLUS-NVUE
  497. if (config.blockTags[node.name]) node.name = 'div' // 未知标签转为 span,避免无法显示
  498. else if (!config.trustTags[node.name] && !this.xml) node.name = 'span'
  499. if (node.name == 'a' || node.name == 'ad' // #ifdef H5 || APP-PLUS
  500. || node.name == 'iframe' // #endif
  501. ) this.expose() // #ifdef APP-PLUS
  502. else if (node.name == 'video') {
  503. let str = '<video style="width:100%;height:100%"' // 空白图占位
  504. if (!attrs.poster && !attrs.autoplay) attrs.poster = "data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg'/>"
  505. for (const item in attrs) {
  506. if (attrs[item]) str += ` ${item}="${attrs[item]}"`
  507. }
  508. if (this.options.pauseVideo) str += ' onplay="for(var e=document.getElementsByTagName(\'video\'),t=0;t<e.length;t++)e[t]!=this&&e[t].pause()"'
  509. str += '>'
  510. for (let _i3 = 0; _i3 < node.src.length; _i3++) {
  511. str += `<source src="${node.src[_i3]}">`
  512. }
  513. str += '</video>'
  514. node.html = str
  515. } // #endif
  516. // 列表处理
  517. else if ((node.name == 'ul' || node.name == 'ol') && node.c) {
  518. const types = {
  519. a: 'lower-alpha',
  520. A: 'upper-alpha',
  521. i: 'lower-roman',
  522. I: 'upper-roman'
  523. }
  524. if (types[attrs.type]) {
  525. attrs.style += `;list-style-type:${types[attrs.type]}`
  526. attrs.type = void 0
  527. }
  528. for (let _i4 = children.length; _i4--;) {
  529. if (children[_i4].name == 'li') children[_i4].c = 1
  530. }
  531. } // 表格处理
  532. else if (node.name == 'table') {
  533. // cellpadding、cellspacing、border 这几个常用表格属性需要通过转换实现
  534. let padding = parseFloat(attrs.cellpadding)
  535. let spacing = parseFloat(attrs.cellspacing)
  536. const border = parseFloat(attrs.border)
  537. if (node.c) {
  538. // padding 和 spacing 默认 2
  539. if (isNaN(padding)) padding = 2
  540. if (isNaN(spacing)) spacing = 2
  541. }
  542. if (border) attrs.style += `;border:${border}px solid gray`
  543. if (node.flag && node.c) {
  544. // 有 colspan 或 rowspan 且含有链接的表格通过 grid 布局实现
  545. styleObj.display = 'grid'
  546. if (spacing) {
  547. styleObj['grid-gap'] = `${spacing}px`
  548. styleObj.padding = `${spacing}px`
  549. } // 无间隔的情况下避免边框重叠
  550. else if (border) attrs.style += ';border-left:0;border-top:0'
  551. const width = []
  552. // 表格的列宽
  553. const trList = []
  554. // tr 列表
  555. const cells = []
  556. // 保存新的单元格
  557. const map = {}; // 被合并单元格占用的格子
  558. (function traversal(nodes) {
  559. for (let _i5 = 0; _i5 < nodes.length; _i5++) {
  560. if (nodes[_i5].name == 'tr') trList.push(nodes[_i5]); else traversal(nodes[_i5].children || [])
  561. }
  562. }(children))
  563. for (let row = 1; row <= trList.length; row++) {
  564. let col = 1
  565. for (let j = 0; j < trList[row - 1].children.length; j++, col++) {
  566. const td = trList[row - 1].children[j]
  567. if (td.name == 'td' || td.name == 'th') {
  568. // 这个格子被上面的单元格占用,则列号++
  569. while (map[`${row}.${col}`]) {
  570. col++
  571. }
  572. let _style2 = td.attrs.style || ''
  573. const start = _style2.indexOf('width') ? _style2.indexOf(';width') : 0 // 提取出 td 的宽度
  574. if (start != -1) {
  575. let end = _style2.indexOf(';', start + 6)
  576. if (end == -1) end = _style2.length
  577. if (!td.attrs.colspan) width[col] = _style2.substring(start ? start + 7 : 6, end)
  578. _style2 = _style2.substr(0, start) + _style2.substr(end)
  579. }
  580. _style2 += (border ? ';border:'.concat(border, 'px solid gray') + (spacing ? '' : ';border-right:0;border-bottom:0') : '') + (padding ? ';padding:'.concat(padding, 'px') : '') // 处理列合并
  581. if (td.attrs.colspan) {
  582. _style2 += ';grid-column-start:'.concat(col, ';grid-column-end:').concat(col + parseInt(td.attrs.colspan))
  583. if (!td.attrs.rowspan) _style2 += ';grid-row-start:'.concat(row, ';grid-row-end:').concat(row + 1)
  584. col += parseInt(td.attrs.colspan) - 1
  585. } // 处理行合并
  586. if (td.attrs.rowspan) {
  587. _style2 += ';grid-row-start:'.concat(row, ';grid-row-end:').concat(row + parseInt(td.attrs.rowspan))
  588. if (!td.attrs.colspan) _style2 += ';grid-column-start:'.concat(col, ';grid-column-end:').concat(col + 1) // 记录下方单元格被占用
  589. for (let k = 1; k < td.attrs.rowspan; k++) {
  590. map[`${row + k}.${col}`] = 1
  591. }
  592. }
  593. if (_style2) td.attrs.style = _style2
  594. cells.push(td)
  595. }
  596. }
  597. if (row == 1) {
  598. let temp = ''
  599. for (let _i6 = 1; _i6 < col; _i6++) {
  600. temp += `${width[_i6] ? width[_i6] : 'auto'} `
  601. }
  602. styleObj['grid-template-columns'] = temp
  603. }
  604. }
  605. node.children = cells
  606. } else {
  607. // 没有使用合并单元格的表格通过 table 布局实现
  608. if (node.c) styleObj.display = 'table'
  609. if (!isNaN(spacing)) styleObj['border-spacing'] = `${spacing}px`
  610. if (border || padding) {
  611. // 遍历
  612. (function traversal(nodes) {
  613. for (let _i7 = 0; _i7 < nodes.length; _i7++) {
  614. const _td = nodes[_i7]
  615. if (_td.name == 'th' || _td.name == 'td') {
  616. if (border) _td.attrs.style = 'border:'.concat(border, 'px solid gray;').concat(_td.attrs.style || '')
  617. if (padding) _td.attrs.style = 'padding:'.concat(padding, 'px;').concat(_td.attrs.style || '')
  618. } else if (_td.children) traversal(_td.children)
  619. }
  620. }(children))
  621. }
  622. } // 给表格添加一个单独的横向滚动层
  623. if (this.options.scrollTable && !(attrs.style || '').includes('inline')) {
  624. const table = { ...node }
  625. node.name = 'div'
  626. node.attrs = {
  627. style: 'overflow:auto'
  628. }
  629. node.children = [table]
  630. attrs = table.attrs
  631. }
  632. } else if ((node.name == 'td' || node.name == 'th') && (attrs.colspan || attrs.rowspan)) {
  633. for (let _i8 = this.stack.length; _i8--;) {
  634. if (this.stack[_i8].name == 'table') {
  635. this.stack[_i8].flag = 1 // 指示含有合并单元格
  636. break
  637. }
  638. }
  639. } // 转换 ruby
  640. else if (node.name == 'ruby') {
  641. node.name = 'span'
  642. for (let _i9 = 0; _i9 < children.length - 1; _i9++) {
  643. if (children[_i9].type == 'text' && children[_i9 + 1].name == 'rt') {
  644. children[_i9] = {
  645. name: 'div',
  646. attrs: {
  647. style: 'display:inline-block'
  648. },
  649. children: [{
  650. name: 'div',
  651. attrs: {
  652. style: 'font-size:50%;text-align:start'
  653. },
  654. children: children[_i9 + 1].children
  655. }, children[_i9]]
  656. }
  657. children.splice(_i9 + 1, 1)
  658. }
  659. }
  660. } else if (node.c) {
  661. node.c = 2
  662. for (let _i10 = node.children.length; _i10--;) {
  663. if (!node.children[_i10].c || node.children[_i10].name == 'table') node.c = 1
  664. }
  665. }
  666. if ((styleObj.display || '').includes('flex') && !node.c) {
  667. for (let _i11 = children.length; _i11--;) {
  668. const _item = children[_i11]
  669. if (_item.f) {
  670. _item.attrs.style = (_item.attrs.style || '') + _item.f
  671. _item.f = void 0
  672. }
  673. }
  674. } // flex 布局时部分样式需要提取到 rich-text 外层
  675. const flex = parent && (parent.attrs.style || '').includes('flex') // #ifdef MP-WEIXIN
  676. // 检查基础库版本 virtualHost 是否可用
  677. && !(node.c && wx.getNFCAdapter) // #endif
  678. // #ifndef MP-WEIXIN || MP-QQ || MP-BAIDU || MP-TOUTIAO
  679. && !node.c // #endif
  680. if (flex) node.f = ';max-width:100%' // #endif
  681. for (const key in styleObj) {
  682. if (styleObj[key]) {
  683. const val = ';'.concat(key, ':').concat(styleObj[key].replace(' !important', '')) // #ifndef APP-PLUS-NVUE
  684. if (flex && (key.includes('flex') && key != 'flex-direction' || key == 'align-self' || styleObj[key][0] == '-' || key == 'width' && val.includes('%'))) {
  685. node.f += val
  686. if (key == 'width') attrs.style += ';width:100%'
  687. } else // #endif
  688. { attrs.style += val }
  689. }
  690. }
  691. attrs.style = attrs.style.substr(1) || void 0
  692. }
  693. /**
  694. * @description 解析到文本
  695. * @param {String} text 文本内容
  696. */
  697. parser.prototype.onText = function (text) {
  698. if (!this.pre) {
  699. // 合并空白符
  700. let trim = ''
  701. let flag
  702. for (let i = 0, len = text.length; i < len; i++) {
  703. if (!blankChar[text[i]]) trim += text[i]; else {
  704. if (trim[trim.length - 1] != ' ') trim += ' '
  705. if (text[i] == '\n' && !flag) flag = true
  706. }
  707. } // 去除含有换行符的空串
  708. if (trim == ' ' && flag) return
  709. text = trim
  710. }
  711. const node = Object.create(null)
  712. node.type = 'text'
  713. node.text = decodeEntity(text)
  714. if (this.hook(node)) {
  715. const siblings = this.stack.length ? this.stack[this.stack.length - 1].children : this.nodes
  716. siblings.push(node)
  717. }
  718. }
  719. /**
  720. * @description html 词法分析器
  721. * @param {Object} handler 高层处理器
  722. */
  723. function lexer(handler) {
  724. this.handler = handler
  725. }
  726. /**
  727. * @description 执行解析
  728. * @param {String} content 要解析的文本
  729. */
  730. lexer.prototype.parse = function (content) {
  731. this.content = content || ''
  732. this.i = 0 // 标记解析位置
  733. this.start = 0 // 标记一个单词的开始位置
  734. this.state = this.text // 当前状态
  735. for (let len = this.content.length; this.i != -1 && this.i < len;) {
  736. this.state()
  737. }
  738. }
  739. /**
  740. * @description 检查标签是否闭合
  741. * @param {String} method 如果闭合要进行的操作
  742. * @returns {Boolean} 是否闭合
  743. * @private
  744. */
  745. lexer.prototype.checkClose = function (method) {
  746. const selfClose = this.content[this.i] == '/'
  747. if (this.content[this.i] == '>' || selfClose && this.content[this.i + 1] == '>') {
  748. if (method) this.handler[method](this.content.substring(this.start, this.i))
  749. this.i += selfClose ? 2 : 1
  750. this.start = this.i
  751. this.handler.onOpenTag(selfClose)
  752. if (this.handler.tagName == 'script') {
  753. this.i = this.content.indexOf('</', this.i)
  754. if (this.i != -1) {
  755. this.i += 2
  756. this.start = this.i
  757. }
  758. this.state = this.endTag
  759. } else this.state = this.text
  760. return true
  761. }
  762. return false
  763. }
  764. /**
  765. * @description 文本状态
  766. * @private
  767. */
  768. lexer.prototype.text = function () {
  769. this.i = this.content.indexOf('<', this.i) // 查找最近的标签
  770. if (this.i == -1) {
  771. // 没有标签了
  772. if (this.start < this.content.length) this.handler.onText(this.content.substring(this.start, this.content.length))
  773. return
  774. }
  775. const c = this.content[this.i + 1]
  776. if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') {
  777. // 标签开头
  778. if (this.start != this.i) this.handler.onText(this.content.substring(this.start, this.i))
  779. this.start = ++this.i
  780. this.state = this.tagName
  781. } else if (c == '/' || c == '!' || c == '?') {
  782. if (this.start != this.i) this.handler.onText(this.content.substring(this.start, this.i))
  783. const next = this.content[this.i + 2]
  784. if (c == '/' && (next >= 'a' && next <= 'z' || next >= 'A' && next <= 'Z')) {
  785. // 标签结尾
  786. this.i += 2
  787. this.start = this.i
  788. return this.state = this.endTag
  789. } // 处理注释
  790. let end = '-->'
  791. if (c != '!' || this.content[this.i + 2] != '-' || this.content[this.i + 3] != '-') end = '>'
  792. this.i = this.content.indexOf(end, this.i)
  793. if (this.i != -1) {
  794. this.i += end.length
  795. this.start = this.i
  796. }
  797. } else this.i++
  798. }
  799. /**
  800. * @description 标签名状态
  801. * @private
  802. */
  803. lexer.prototype.tagName = function () {
  804. if (blankChar[this.content[this.i]]) {
  805. // 解析到标签名
  806. this.handler.onTagName(this.content.substring(this.start, this.i))
  807. while (blankChar[this.content[++this.i]]) {
  808. }
  809. if (this.i < this.content.length && !this.checkClose()) {
  810. this.start = this.i
  811. this.state = this.attrName
  812. }
  813. } else if (!this.checkClose('onTagName')) this.i++
  814. }
  815. /**
  816. * @description 属性名状态
  817. * @private
  818. */
  819. lexer.prototype.attrName = function () {
  820. let c = this.content[this.i]
  821. if (blankChar[c] || c == '=') {
  822. // 解析到属性名
  823. this.handler.onAttrName(this.content.substring(this.start, this.i))
  824. let needVal = c == '='
  825. const len = this.content.length
  826. while (++this.i < len) {
  827. c = this.content[this.i]
  828. if (!blankChar[c]) {
  829. if (this.checkClose()) return
  830. if (needVal) {
  831. // 等号后遇到第一个非空字符
  832. this.start = this.i
  833. return this.state = this.attrVal
  834. }
  835. if (this.content[this.i] == '=') needVal = true; else {
  836. this.start = this.i
  837. return this.state = this.attrName
  838. }
  839. }
  840. }
  841. } else if (!this.checkClose('onAttrName')) this.i++
  842. }
  843. /**
  844. * @description 属性值状态
  845. * @private
  846. */
  847. lexer.prototype.attrVal = function () {
  848. const c = this.content[this.i]
  849. const len = this.content.length // 有冒号的属性
  850. if (c == '"' || c == "'") {
  851. this.start = ++this.i
  852. this.i = this.content.indexOf(c, this.i)
  853. if (this.i == -1) return
  854. this.handler.onAttrVal(this.content.substring(this.start, this.i))
  855. } // 没有冒号的属性
  856. else {
  857. for (; this.i < len; this.i++) {
  858. if (blankChar[this.content[this.i]]) {
  859. this.handler.onAttrVal(this.content.substring(this.start, this.i))
  860. break
  861. } else if (this.checkClose('onAttrVal')) return
  862. }
  863. }
  864. while (blankChar[this.content[++this.i]]) {
  865. }
  866. if (this.i < len && !this.checkClose()) {
  867. this.start = this.i
  868. this.state = this.attrName
  869. }
  870. }
  871. /**
  872. * @description 结束标签状态
  873. * @returns {String} 结束的标签名
  874. * @private
  875. */
  876. lexer.prototype.endTag = function () {
  877. const c = this.content[this.i]
  878. if (blankChar[c] || c == '>' || c == '/') {
  879. this.handler.onCloseTag(this.content.substring(this.start, this.i))
  880. if (c != '>') {
  881. this.i = this.content.indexOf('>', this.i)
  882. if (this.i == -1) return
  883. }
  884. this.start = ++this.i
  885. this.state = this.text
  886. } else this.i++
  887. }
  888. module.exports = parser