初步完成png图片识别
parent
d1e0061c14
commit
6ebd0c35ef
|
@ -0,0 +1,417 @@
|
|||
import binascii
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class PNG:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
# 检测文件格式,返回百分比
|
||||
@staticmethod
|
||||
def test(data: bytearray):
|
||||
return 100
|
||||
|
||||
# 获取图片信息
|
||||
@staticmethod
|
||||
def get_info(data: bytearray):
|
||||
text = b'tEXt'
|
||||
index = data.find(text)
|
||||
if index > 32:
|
||||
length = int.from_bytes(data[index-4: index], byteorder='big')
|
||||
chunk_data = data[index+4: index+4+length]
|
||||
print('[*] 图像信息:')
|
||||
print(bytes(chunk_data).split(b'\x00'))
|
||||
|
||||
info = chunk_data
|
||||
return info
|
||||
|
||||
def check_tail(data: bytearray):
|
||||
changed = False
|
||||
tail = None
|
||||
iend = b'\x49\x45\x4E\x44\xAE\x42\x60\x82'
|
||||
length = len(data)
|
||||
index = data.find(iend)
|
||||
if index == -1:
|
||||
index = data.find(iend[0:4])
|
||||
if index != -1:
|
||||
index = index + 4
|
||||
else:
|
||||
index = index + 8
|
||||
|
||||
if index == -1:
|
||||
print('[x] 缺少png文件格式结束标识符IEND')
|
||||
elif index < 32:
|
||||
print('[x] 缺少png文件格式结束标识符IEND位置貌似不太对')
|
||||
else:
|
||||
tail = data[index:]
|
||||
if len(tail) > 0:
|
||||
changed = True
|
||||
print('[x] 检测到文件尾部有垃圾数据,已尝试修复')
|
||||
|
||||
return changed, data, tail
|
||||
|
||||
# 修复文件头
|
||||
@staticmethod
|
||||
def repair_sig(data: bytearray):
|
||||
changed = False
|
||||
head = None
|
||||
print('[*] 检测文件头')
|
||||
sig = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
|
||||
index = data.find(sig)
|
||||
if index > 0:
|
||||
print('[x] 检测到文件头前有垃圾数据,已尝试修复')
|
||||
head = bytes(data[:index])
|
||||
data = data[index:]
|
||||
changed = True
|
||||
elif data[0:8] != sig:
|
||||
print('[x] 检测到文件头异常,已尝试修复')
|
||||
data[0:8] = sig
|
||||
changed = True
|
||||
return changed, data, head
|
||||
|
||||
# 修复尺寸
|
||||
@staticmethod
|
||||
def repair_size(data: bytearray):
|
||||
changed = False
|
||||
print('[*] 检测文件尺寸')
|
||||
chunk_begin = 8
|
||||
data_begin = chunk_begin + 8
|
||||
width_begin = data_begin
|
||||
height_begin = data_begin + 4
|
||||
length = int.from_bytes(data[chunk_begin:chunk_begin+4], byteorder='big')
|
||||
ctype = data[chunk_begin+4: chunk_begin+8] # 应该为 b'\x49\x48\x44\x52',即 b'IHDR'
|
||||
chunk_data = data[data_begin: data_begin+length]
|
||||
width = int.from_bytes(chunk_data[0:4], byteorder='big')
|
||||
height = int.from_bytes(chunk_data[4:8], byteorder='big')
|
||||
crc = int.from_bytes(data[data_begin+length:data_begin+length+4], byteorder='big')
|
||||
|
||||
if PNG.test_ihdr_chunk(ctype + chunk_data, crc):
|
||||
print(' - 图片尺寸设置正确')
|
||||
else:
|
||||
b = False
|
||||
if not b:
|
||||
print('[x] 尝试修复图片宽度...')
|
||||
chunk = ctype + chunk_data
|
||||
for i in range(1, 2048):
|
||||
chunk[4:8] = i.to_bytes(4, byteorder='big')
|
||||
if PNG.test_ihdr_chunk(bytes(chunk), crc):
|
||||
width = i
|
||||
print(f'[*] 图片宽度修复成功,宽度值为{i}')
|
||||
data[width_begin: width_begin+4] = width.to_bytes(4, byteorder='big')
|
||||
changed = True
|
||||
b = True
|
||||
break
|
||||
if not b:
|
||||
print('[x] 尝试修复图片高度...')
|
||||
chunk = ctype + chunk_data
|
||||
for i in range(1, 2048):
|
||||
chunk[8:12] = i.to_bytes(4, byteorder='big')
|
||||
if PNG.test_ihdr_chunk(chunk, crc):
|
||||
height = i
|
||||
print(f'[*] 图片高度修复成功,宽度值为{i}')
|
||||
data[height_begin: height_begin+4] = width.to_bytes(4, byteorder='big')
|
||||
changed = True
|
||||
b = True
|
||||
break
|
||||
if not b:
|
||||
temp = input(f'当前宽度值为{width},高度值为{height},是否尝试将高度值设置为更高?可输入高度值或直接回车:')
|
||||
if temp.isdigit():
|
||||
height = int(temp)
|
||||
data[height_begin: height_begin+4] = height.to_bytes(4, byteorder='big')
|
||||
print(f'[*] 已将图片高度设置为{height}')
|
||||
changed = True
|
||||
return changed, data, width, height
|
||||
|
||||
@staticmethod
|
||||
def test_ihdr_chunk(chunk_data, crc):
|
||||
crc32 = binascii.crc32(chunk_data) & 0xffffffff
|
||||
return crc32 == crc
|
||||
|
||||
class JPG:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
# 检测文件格式,返回百分比
|
||||
@staticmethod
|
||||
def test(data: bytearray):
|
||||
return 100
|
||||
|
||||
# 获取图片信息
|
||||
@staticmethod
|
||||
def get_info(data: bytearray):
|
||||
info = {}
|
||||
return info
|
||||
|
||||
def check_tail(data: bytearray):
|
||||
changed = False
|
||||
tail = None
|
||||
return changed, data, tail
|
||||
|
||||
# 修复文件头
|
||||
@staticmethod
|
||||
def repair_sig(data: bytearray):
|
||||
changed = False
|
||||
head = None
|
||||
print('[*] 检测文件头')
|
||||
sig = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
|
||||
index = data.find(sig)
|
||||
if index != -1:
|
||||
print('[x] 检测到文件头前有垃圾数据,已尝试修复')
|
||||
head = data[:index]
|
||||
data = data[index:]
|
||||
changed = True
|
||||
elif data[0:8] != sig:
|
||||
print('[x] 检测到文件头异常,已尝试修复')
|
||||
data[0:8] = sig
|
||||
changed = True
|
||||
return changed, data, head
|
||||
|
||||
# 修复尺寸
|
||||
@staticmethod
|
||||
def repair_size(data: bytearray):
|
||||
changed = False
|
||||
print('[*] 检测文件尺寸')
|
||||
chunk_begin = 8
|
||||
width_begin = chunk_begin + 8
|
||||
height_begin = chunk_begin + 12
|
||||
length = int.from_bytes(data[chunk_begin:chunk_begin+4], byteorder='big')
|
||||
chunk_data = data[chunk_begin+4: chunk_begin+4+length]
|
||||
ctype = chunk_data[0:4] # 应该为 b'\x49\x48\x44\x52',即 b'IHDR'
|
||||
width = int.from_bytes(chunk_data[4:8], byteorder='big')
|
||||
height = int.from_bytes(chunk_data[8:12], byteorder='big')
|
||||
crc = chunk_data[-4:]
|
||||
|
||||
if JPG.test_ihdr_chunk(chunk_data[:-4], crc):
|
||||
print(' - 图片尺寸设置正确')
|
||||
else:
|
||||
if width == 0:
|
||||
print('[x] 检测到图片宽度设置异常,尝试修复...')
|
||||
for i in range(1, 2048):
|
||||
chunk_data[4:8] = i.to_bytes(4, byteorder='big')
|
||||
if JPG.test_ihdr_chunk(chunk_data[:-4], crc):
|
||||
width = i
|
||||
print(f'[*] 图片宽度修复成功,宽度值为{i}')
|
||||
data[width_begin: width_begin+4] = width.to_bytes(4, byteorder='big')
|
||||
changed = True
|
||||
break
|
||||
if width == 0:
|
||||
print('[x] 图片宽度修复失败')
|
||||
|
||||
temp = input(f'当前宽度值为{width},高度值为{height},是否尝试将高度值设置为更高?可输入高度值或直接回车:')
|
||||
if temp.isdigit():
|
||||
height = int(temp)
|
||||
data[height_begin: height_begin+4] = height.to_bytes(4, byteorder='big')
|
||||
print(f'[*] 已将图片高度设置为{height}')
|
||||
changed = True
|
||||
return changed, data
|
||||
|
||||
@staticmethod
|
||||
def test_ihdr_chunk(chunk_data, crc):
|
||||
crc32 = binascii.crc32(chunk_data) & 0xffffffff
|
||||
return crc32 == crc
|
||||
|
||||
class GIF:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
# 检测文件格式,返回百分比
|
||||
@staticmethod
|
||||
def test(data: bytearray):
|
||||
return 100
|
||||
|
||||
# 获取图片信息
|
||||
@staticmethod
|
||||
def get_info(data: bytearray):
|
||||
info = {}
|
||||
return info
|
||||
|
||||
def check_tail(data: bytearray):
|
||||
changed = False
|
||||
tail = None
|
||||
return changed, data, tail
|
||||
|
||||
# 修复文件头
|
||||
@staticmethod
|
||||
def repair_sig(data: bytearray):
|
||||
changed = False
|
||||
head = None
|
||||
print('[*] 检测文件头')
|
||||
sig = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
|
||||
index = data.find(sig)
|
||||
if index != -1:
|
||||
print('[x] 检测到文件头前有垃圾数据,已尝试修复')
|
||||
head = data[:index]
|
||||
data = data[index:]
|
||||
changed = True
|
||||
elif data[0:8] != sig:
|
||||
print('[x] 检测到文件头异常,已尝试修复')
|
||||
data[0:8] = sig
|
||||
changed = True
|
||||
return changed, data, head
|
||||
|
||||
# 修复尺寸
|
||||
@staticmethod
|
||||
def repair_size(data: bytearray):
|
||||
changed = False
|
||||
print('[*] 检测文件尺寸')
|
||||
chunk_begin = 8
|
||||
width_begin = chunk_begin + 8
|
||||
height_begin = chunk_begin + 12
|
||||
length = int.from_bytes(data[chunk_begin:chunk_begin+4], byteorder='big')
|
||||
chunk_data = data[chunk_begin+4: chunk_begin+4+length]
|
||||
ctype = chunk_data[0:4] # 应该为 b'\x49\x48\x44\x52',即 b'IHDR'
|
||||
width = int.from_bytes(chunk_data[4:8], byteorder='big')
|
||||
height = int.from_bytes(chunk_data[8:12], byteorder='big')
|
||||
crc = chunk_data[-4:]
|
||||
|
||||
if GIF.test_ihdr_chunk(chunk_data[:-4], crc):
|
||||
print(' - 图片尺寸设置正确')
|
||||
else:
|
||||
if width == 0:
|
||||
print('[x] 检测到图片宽度设置异常,尝试修复...')
|
||||
for i in range(1, 2048):
|
||||
chunk_data[4:8] = i.to_bytes(4, byteorder='big')
|
||||
if GIF.test_ihdr_chunk(chunk_data[:-4], crc):
|
||||
width = i
|
||||
print(f'[*] 图片宽度修复成功,宽度值为{i}')
|
||||
data[width_begin: width_begin+4] = width.to_bytes(4, byteorder='big')
|
||||
changed = True
|
||||
break
|
||||
if width == 0:
|
||||
print('[x] 图片宽度修复失败')
|
||||
|
||||
temp = input(f'当前宽度值为{width},高度值为{height},是否尝试将高度值设置为更高?可输入高度值或直接回车:')
|
||||
if temp.isdigit():
|
||||
height = int(temp)
|
||||
data[height_begin: height_begin+4] = height.to_bytes(4, byteorder='big')
|
||||
print(f'[*] 已将图片高度设置为{height}')
|
||||
changed = True
|
||||
return changed, data
|
||||
|
||||
@staticmethod
|
||||
def test_ihdr_chunk(chunk_data, crc):
|
||||
crc32 = binascii.crc32(chunk_data) & 0xffffffff
|
||||
return crc32 == crc
|
||||
|
||||
|
||||
def check_image_type(filename: str, data: bytearray):
|
||||
file_type1 = None
|
||||
if re.match('.*\.png$', filename.lower()):
|
||||
file_type1 = 'png'
|
||||
elif re.match('.*\.jpg$', filename.lower()):
|
||||
file_type1 = 'jpg'
|
||||
elif re.match('.*\.gif$', filename.lower()):
|
||||
file_type1 = 'gif'
|
||||
|
||||
file_type2 = None
|
||||
weight = 0
|
||||
weight2 = PNG.test(data)
|
||||
if weight2 > weight:
|
||||
file_type2 = 'png'
|
||||
weight = weight2
|
||||
weight2 = JPG.test(data)
|
||||
if weight2 > weight:
|
||||
file_type2 = 'jpg'
|
||||
weight = weight2
|
||||
weight2 = GIF.test(data)
|
||||
if weight2 > weight:
|
||||
file_type2 = 'gif'
|
||||
weight = weight2
|
||||
|
||||
if file_type1 != None:
|
||||
if file_type1 == file_type2:
|
||||
print(f'看起来文件格式是{file_type1}')
|
||||
else:
|
||||
print(f'文件名中格式是{file_type1},但是解析文件头格式发现{weight}%是{file_type2}')
|
||||
else:
|
||||
if file_type2 != None:
|
||||
file_type1 = file_type2
|
||||
print(f'检测文件头认为文件格式{weight}%是{file_type2}')
|
||||
else:
|
||||
print('未检测到任何图片格式特征,该文件可能不是图片文件')
|
||||
|
||||
index = input('要指定文件格式吗?或者直接回车。(1、png 2、jpg 3、gif):')
|
||||
if index.isdigit():
|
||||
index = int(index)
|
||||
if index == 1:
|
||||
file_type1 = 'png'
|
||||
elif index == 2:
|
||||
file_type1 = 'jpg'
|
||||
elif index == 3:
|
||||
file_type1 = 'gif'
|
||||
|
||||
return file_type1
|
||||
|
||||
|
||||
|
||||
# 总函数
|
||||
def main(filename: str):
|
||||
# 读取图片数据
|
||||
data = None
|
||||
with open(filename, 'rb') as f:
|
||||
data = bytearray(f.read())
|
||||
|
||||
# 确定文件格式
|
||||
file_type = check_image_type(filename, data)
|
||||
model = None
|
||||
if file_type == 'png':
|
||||
model = PNG
|
||||
elif file_type == 'jpg':
|
||||
model = JPG
|
||||
elif file_type == 'gif':
|
||||
model = GIF
|
||||
|
||||
# 检测并修复
|
||||
changed = False
|
||||
if model != None:
|
||||
info = model.get_info(data)
|
||||
# print(json.dumps(info))
|
||||
changed1, data, tail = model.check_tail(data)
|
||||
changed2, data, head = model.repair_sig(data)
|
||||
changed3, data, width, height = model.repair_size(data)
|
||||
changed = changed1 or changed2 or changed3
|
||||
else:
|
||||
print('[*] 未进行任何处理')
|
||||
|
||||
if changed:
|
||||
# 创建结果目录
|
||||
basename = os.path.basename(filename)
|
||||
dirname = filename.replace('.', '_') + str(int(time.time()))
|
||||
dirpath = os.path.join(os.path.dirname(os.path.abspath(filename)), dirname)
|
||||
os.mkdir(dirpath)
|
||||
|
||||
# 保存解析结果
|
||||
imagepath = os.path.join(dirpath, basename)
|
||||
f = open(imagepath, 'wb')
|
||||
f.write(data)
|
||||
f.close()
|
||||
if changed1 and tail:
|
||||
f = open(os.path.join(dirpath, 'tail'), 'wb')
|
||||
f.write(tail)
|
||||
f.close()
|
||||
if changed2 and head:
|
||||
f = open(os.path.join(dirpath, 'head'), 'wb')
|
||||
f.write(head)
|
||||
f.close()
|
||||
print(f'[*] 分析结果已保存到 {dirpath}')
|
||||
# 尝试打开图片看一看
|
||||
try:
|
||||
im = Image.open(imagepath)
|
||||
im.show()
|
||||
except Exception as e:
|
||||
print('[x] 尝试打开图片失败,失败原因:' + str(e))
|
||||
else:
|
||||
print('[*] 未进行任何处理')
|
||||
|
||||
print('[*] 分析完成!')
|
||||
|
||||
# 开始执行
|
||||
filename = sys.argv[1]
|
||||
# filename = 'G:\\1.png'
|
||||
main(filename)
|
Loading…
Reference in New Issue