锘??xml version="1.0" encoding="utf-8" standalone="yes"?>import re
message = '瀵嗙爜淇敼鎴愬姛'
regex = '((\\d{5});)'
entities = re.findall(regex, message)
for entity in entities:
message = message.replace(entity[0], unichr(int(entity[1])))
#print message #鍦╬ythonwin涓墽琛屾病鏈夐棶棰?鍦╬ydev涓墽琛屾姤閿?/span>
print message.encode('gbk') #姝e父
涓嬮潰鏄'瀵嗙爜'涓や釜瀛楃殑涓浜涙祴璇?鍦╬ythonwin浜や簰紿楀彛涓墽琛?铏界劧鐭ラ亾浜嗚寰楀埌姝g‘鐨勪腑鏂囪鎬庝箞鍦ㄧ▼搴忎腑澶勭悊,騫朵笖浣垮緱紼嬪簭鍦╬ydev鍜宲ythonwin
涓や釜鐜涓墽琛岄兘涓嶅嚭閿?浣嗘槸鍗磋繕鏄悶涓嶆竻涓轟粈涔堜細榪欎釜鏍峰瓙.鍏堣涓嬬艦.鏈夎兘瑙i噴搴曞眰鏈哄埗鐨勬湅鍙?涔熻鐣欒█甯釜蹇欏惂.
>>> s = '瀵嗙爜'
>>> s
'\xc3\xdc\xc2\xeb'
>>> su = u'瀵嗙爜'
>>> su
u'\xc3\xdc\xc2\xeb'
#u鍓嶇紑琛ㄧず鏄痷nicode緙栫爜,浣嗘槸閲岄潰瀹為檯瀛樺偍鐨勬槸瀵嗙爜涓や釜瀛楃殑gbk緙栫爜
>>> print s
瀵嗙爜
>>> print su #鍦╬ydev鍜孲ciTE涓姤閿?涓嶈兘鎵撳嵃
?ü??
>>> char1 = unichr(23494)
>>> char2 = unichr(30721)
>>> char1
u'\u5bc6'
>>> char2
u'\u7801'
>>> print char1
瀵?br />
>>> print char2
鐮?br />
>>> char = char1 + char2
>>> char
u'\u5bc6\u7801'
>>> print char #鍦╬ydev鍜孲ciTE涓細鎶ラ敊涓轟粈涔?/span>
瀵嗙爜
>>> s.decode('gbk') #鍙樻垚浜唘nicode緙栫爜16榪涘埗
u'\u5bc6\u7801'
>>> print s.decode('gbk') #鍦╬ydev鍜孲ciTE涓笉鑳芥甯告墦鍗拌緭鍑?/span>
瀵嗙爜
>>> char.encode('gbk')
'\xc3\xdc\xc2\xeb'
>>> s
'\xc3\xdc\xc2\xeb'
>>> su
u'\xc3\xdc\xc2\xeb'
>>> char
u'\u5bc6\u7801'
>>> char.encode('gbk')
'\xc3\xdc\xc2\xeb'
>>> print s
瀵嗙爜
>>> print char.encode('gbk')
瀵嗙爜
Update 1:
sys.setdefaultencoding()鍙互璁劇疆榛樿緙栫爜渚涜漿鎹㈡椂浣跨敤,浣嗘槸Python鍚姩涔嬪悗榪欎釜鏂規硶浼氳site.py鍒犻櫎,鍥犳蹇呴』reload(sys)鐒跺悗璋冪敤sys.setdefaultencoding
涓嬮潰鏄垜鐨勬祴璇曟柟娉?鏍規嵁榪欓噷鐨勭粨鏋?浼拌鏄痯ythonwin鍐呭惎鍔ㄧ殑浜や簰鐜瀵瑰弬鏁板簲璇ユ湁浜嗚緗?涓嶈繃sys.getdefaultencoding()鐨勮繑鍥炶繕鏄粯璁ょ殑ascii:
'淇敼瀵嗙爜鎴愬姛鐨刪tml entity鏄? "密码修改成功",鍦ㄤ笅闈㈢殑python浠g爜涓湪榪欎釜緗戦〉涓婃樉紺轟笉瀵?鎶?amp;amp;瑕佹崲鎴?amp;絎﹀彿
#python 2.4 windows, execute in SciTe or Eclipse pydev
#Note: if you execute this script in pythonwin, the result may be different!
import sys
import re
reload(sys)
#should do this to setdefault encoding of sys,
#this method is deleted in site.py [/PYTHON_HOME/Lib] by default
#try to set default encoding other than default encoding
#default ascii # Error: out 1, out 3, out 6
#sys.setdefaultencoding('gbk') #all right, but a warning raised
#sys.setdefaultencoding('utf-8') #out 1, out 3, out 6: unreadable in outputwindow, may be right if change the console encoding
message = '密码修改成功'
regex = '((\\d{5});)'
entities = re.findall(regex, message)
for entity in entities:
message = message.replace(entity[0], unichr(int(entity[1])))
#message : u'\u5bc6\u7801\u4fee\u6539\u6210\u529f'
print message #out 1
#message.encode('gbk') : '\xc3\xdc\xc2\xeb\xd0\xde\xb8\xc4\xb3\xc9\xb9\xa6'
print message.encode('gbk') #out 2
char1 = unichr(23494)
char2 = unichr(30721)
char = char1 + char2
#char : u'\u5bc6\u7801'
print char #out 3
#char.encode('gbk') : '\xc3\xdc\xc2\xeb'
print char.encode('gbk') #out 4
s='瀵嗙爜'
#s : '\xc3\xdc\xc2\xeb'
print s #out 5
#s.decode('gbk') : u'\u5bc6\u7801'
print s.decode('gbk') #out 6
]]>