ocr-braille-preisliste/main.py
2024-06-09 17:29:18 +02:00

47 lines
1.3 KiB
Python

from PIL import Image
import pytesseract
from pybraille import convertText
import matplotlib.pyplot as plt
import re
text_kwargs = dict(fontsize=8)
def imgToText(path):
raw = pytesseract.image_to_string(path)
# remove special characters
raw = raw.replace('', 'Euro')
# remove special characters that do not fit into braille notation
raw = re.sub(r"[^,.\na-zA-Z0-9]", ' ', raw)
rows = raw.split('\n')
# remove rows with less than 3 characters
rows = [row for row in rows if len(row) > 3]
# remove empty rows
rows = [row for row in rows if row]
# remove prefix and suffix spaces and double spaces
rows = [' '.join(row.split()) for row in rows]
# concat row
return '\n'.join(rows)
def textToBraille(text):
return convertText(text)
def brailleToImg(text, braille):
f, ax = plt.subplots()
plt.axis('off')
plt.text(0, 0.5, text,
horizontalalignment='left',
verticalalignment='center',
c="dodgerblue",
transform=ax.transAxes)
plt.text(0.5, 0.5, braille,
horizontalalignment='left',
verticalalignment='center',
transform=ax.transAxes)
plt.show()
if __name__ == '__main__':
text = imgToText('sample.jpg')
braille = textToBraille(text)
print(braille)
brailleToImg(text, braille)