由於json文件不支持註釋,所以如果在json文件中標記了註釋,則使用python中的json.dump()無法加載該json文件。
本文旨在解決當定義“//”爲json註釋時,如何正確解析有註釋的json文件。
程序實現
# encoding: utf-8
import json
import re
import sys
reload(sys)
sys.setdefaultencoding('utf8')
CAUTION_PRINT_HEAD = 'caution: '
# 創建一個xstr類,用於處理從文件中讀出的字符串
class xstr:
def __init__(self, instr):
self.instr = instr
# 刪除“//”標誌後的註釋
def rmCmt(self):
qtCnt = cmtPos = slashPos = 0
rearLine = self.instr
# rearline: 前一個“//”之後的字符串,
# 雙引號裏的“//”不是註釋標誌,所以遇到這種情況,仍需繼續查找後續的“//”
while rearLine.find('//') >= 0: # 查找“//”
slashPos = rearLine.find('//')
cmtPos += slashPos
# print 'slashPos: ' + str(slashPos)
headLine = rearLine[:slashPos]
while headLine.find('"') >= 0: # 查找“//”前的雙引號
qtPos = headLine.find('"')
if not self.isEscapeOpr(headLine[:qtPos]): # 如果雙引號沒有被轉義
qtCnt += 1 # 雙引號的數量加1
headLine = headLine[qtPos+1:]
# print qtCnt
if qtCnt % 2 == 0: # 如果雙引號的數量爲偶數,則說明“//”是註釋標誌
# print self.instr[:cmtPos]
return self.instr[:cmtPos]
rearLine = rearLine[slashPos+2:]
# print rearLine
cmtPos += 2
# print self.instr
return self.instr
# 判斷是否爲轉義字符
def isEscapeOpr(self, instr):
if len(instr) <= 0:
return False
cnt = 0
while instr[-1] == '\\':
cnt += 1
instr = instr[:-1]
if cnt % 2 == 1:
return True
else:
return False
# 從json文件的路徑JsonPath讀取該文件,返回json對象
def loadJson(JsonPath):
try:
srcJson = open(JsonPath, 'r')
except:
print CAUTION_PRINT_HEAD + 'cannot open ' + JsonPath
quit()
dstJsonStr = ''
for line in srcJson.readlines():
if not re.match(r'\s*//', line) and not re.match(r'\s*\n', line):
xline = xstr(line)
dstJsonStr += xline.rmCmt()
# print dstJsonStr
dstJson = {}
try:
dstJson = json.loads(dstJsonStr)
return dstJson
except:
print CAUTION_PRINT_HEAD + JsonPath + ' is not a valid json file'
quit()
# 帶縮進地在屏幕輸出json字符串
def printRes(resStr):
resStr = resStr.replace(',', ',\n')
resStr = resStr.replace('{', '{\n')
resStr = resStr.replace(':{', ':\n{')
resStr = resStr.replace('}', '\n}')
resStr = resStr.replace('[', '\n[\n')
resStr = resStr.replace(']', '\n]')
resStr = resStr
resArray = resStr.split('\n')
preBlank = ''
for line in resArray:
if len(line) == 0:
continue
lastChar = line[len(line)-1]
lastTwoChars = line[len(line)-2:]
if lastChar in {'}', ']'} or lastTwoChars in {'},', '],'}:
preBlank = preBlank[:len(preBlank)-2]
try:
print preBlank + line.decode('utf-8')
except:
print(preBlank + '[%This line cannot be decoded%]')
if lastChar == '{' or lastChar == '[':
preBlank += ' '*2