import argparse
import numpy as np


app_name = "txt2c64"
app_description = " is a program that converts .txt documents to C64 Petscii files. Karstein Djupdal 2024."

        #1: argparse - filename and optional arguments

parser = argparse.ArgumentParser(description=app_name+app_description)

parser.add_argument("input_file", help="Path to the input file")
parser.add_argument("output_file", help="Output filename (seq or prg)")
parser.add_argument("-a", metavar="aaaa", help="if prg file enter start address aaaa (in hex)")
groupcharset = parser.add_mutually_exclusive_group()
groupcharset.add_argument("-C", action="store_true", help="C64 character set: specify upper case/graphics")
groupcharset.add_argument("-c", action="store_true", help="C64 character set: specify lower/upper case")
parser.add_argument("-t", action="count", help="tabulator: -t use tab code (09), -ttt (2 or more) use a number of spaces instead")
parser.add_argument("-s", "--symbol", action="store_true", help="translate certain symbols and smileys.")
parser.add_argument("-d", "--digraph", action="store_true", help="use digraphs for letters øåöäüñ. æßÞ will be translated to digraphs anyway.")
parser.add_argument("-r", "--replace", action="store_true", help="replace unknown characters with ? (if not they will be removed).")
args = parser.parse_args()


        #2: transfer arguments to variables

args = vars(parser.parse_args())
inputfilename = args['input_file']
output_file = args['output_file']
symbol = args['symbol']
digraph = args['digraph']
replace = args['replace']

            #prg or seq

if output_file[-4] == '.':
    outputfilename = output_file[:-4]
    outputfile_type = output_file[-3:]
if output_file[-4] != '.':
    outputfilename = output_file
    outputfile_type = 'seq'     #default if not specified

            #prg start address
address_string = args['a']
if address_string:
    address = int(address_string, base=16)
else:
    address = 0x0801    #basic start default address
byteaddress = address.to_bytes(length=2, byteorder='little')    #Return an array of bytes representing the address
    

            #define variable for character set

if args['c']:
    charset = "shifted"
elif args['C']:
    charset = "normal"
else:
    charset = "normal"  #default

            #define variable for tab and number of spaces

tabspaces=3  #default
if args['t']:
    if args['t'] > 1:
        tab = False
        tabspaces = args['t']
    else:
        tab = True  #True means keep tab code
else:
    tab = False


        #3: open text file and create numpy array 'txtfile'

print(" --- ",app_name," --- ")

file = open(inputfilename, 'r')
txtfile = list(file.read())     #convert string to list
txtfile = np.array(txtfile, dtype=str)     #convert to numpy array


        #4: replace characters with digraphs


diacritics2digraph = np.array(['ø','å','ö','ä','ü','ñ',
                               'Ø','Å','Ö','Ä','Ü','Ñ'])

digraphs = np.array(
           [['o','e'],
            ['a','a'],
            ['o','e'],
            ['a','e'],
            ['u','e'],
            ['n','n'],
            
            ['O','e'],
            ['A','a'],
            ['O','e'],
            ['A','e'],
            ['U','e'],
            ['N','n']]
           )

ligatures = np.array(['æ','œ','ß','þ','Æ','Œ','ẞ','Þ'])

ligatures_digraph = np.array(
                    [['a','e'],
                     ['a','e'],
                     ['s','s'],
                     ['t','h'],
                     ['A','e'],
                     ['A','e'],
                     ['S','S'],
                     ['T','h']]
                    )


number_replaced_letters = 0

                # replace ligatures with digraphs (obligatory)

for curchar in range(len(ligatures)):      #curchar is current number special character
    chararray, = np.where(txtfile == ligatures[curchar])    #creates array of index to characters to be replaced with digraph

    if chararray.size > 0:
        for c in range(len(chararray),0,-1):    #count backwards through the index array
            x = chararray[c-1]                   #x is index of letter in txtfile
            txtfile = np.insert(txtfile, x+1, 0)    #insert 0 to make space for digraph
            txtfile[x] = ligatures_digraph[curchar,0]        #insert diagraph first character
            txtfile[x+1] = ligatures_digraph[curchar,1]      #insert diagraph second character
                
            number_replaced_letters += 1


                # replace diacritics letters with digraphs (optional)

ask_digraph = False

for curchar in range(len(diacritics2digraph)):      #curchar is current number special character
    chararray, = np.where(txtfile == diacritics2digraph[curchar])    #creates array of index to characters to be replaced with digraph

    if chararray.size > 0:

        if digraph == False and ask_digraph == False:
            ask_digraph = input("Use digraphs for certain diacritics letters (e.g. ö -> oe, å -> aa) ? y/n ")

        if digraph == True or ask_digraph == "y":
            digraph = True

            for c in range(len(chararray),0,-1):    #count backwards through the index array
                x = chararray[c-1]                   #x is index of letter in txtfile
                txtfile = np.insert(txtfile, x+1, 0)    #insert 0 to make space for digraph
                txtfile[x] = digraphs[curchar,0]        #insert diagraph first character
                txtfile[x+1] = digraphs[curchar,1]      #insert diagraph second character
                
                number_replaced_letters += 1


        #5: replace symbols with multi character codes

symbols_3 = np.array(['©','€','½','😴'])         #three characters, sleep emoji
symbols_4 = np.array(['™'])              #(TM)

smileys_2 = np.array(['😀','😊','😃','🙂','☺',   #regular smileys
                      '😆','😂','😄','😁',          # laughing
                      '😕','😕','🙁','☹','😥','😢',    #sad
                      '😍','😗','😙','😚','😘','🥰','❤',      # kiss and heart
                      '😮','😯','😲',                          # surprise
                      '😉','😜','🙃','😎','🫤','😐','😑','😛']) #various

symbols_three = np.array(
           [['(','C',')'],  # (C)
            ['e','u','r'],  # eur
            ['1','/','2'],  # 1/2
            ['z','z','Z']]  # zzZ
           )

symbols_four = np.array([['(','T','M',')']])

smileys_two = np.array(
            [[':',')'],[':',')'],[':',')'],[':',')'],[':',')'],    #smile
             ['x','D'],['x','D'],[':','D'],[':','D'],           # laughing
             [':','('],[':','('],[':','('],[':','('],[':','('],[':','('],     #sad
             [':','D'],[':','x'],[':','x'],[':','x'],[':','x'],['<','3'],['<','3'], # kiss and heart
             [':','o'],[':','o'],[':','o'],      # surprise
             [';',')'],[';',')'],['(',':'],['8','D'],[':',chr(0x5c)],[':','|'],[':','|'],[':','P']]  #various
            )

number_replaced_symbols = 0
number_smileys = 0
ask_symbol = False

ask_symbol_text = "Convert smileys and special symbols? y/n "

for curchar in range(len(smileys_2)):               #curchar is current number symbol
    chararray, = np.where(txtfile == smileys_2[curchar])    #creates array of index to characters to be replaced with digraph

    if chararray.size > 0:
        if symbol == False and ask_symbol == False:
            ask_symbol = input(ask_symbol_text)
            
        if symbol == True or ask_symbol == "y":
            symbol = True
            for c in range(len(chararray),0,-1):
                    
                x = chararray[c-1]                   #x is index of letter in txtfile
                txtfile = np.insert(txtfile, x+1, 0)    #insert 0 to make space for 2 characters
                txtfile[x] = smileys_two[curchar,0]        #insert first character
                txtfile[x+1] = smileys_two[curchar,1]      #insert second character
                number_smileys += 1

for curchar in range(len(symbols_3)):               #curchar is current number symbol
    chararray, = np.where(txtfile == symbols_3[curchar])    #creates array of index to characters to be replaced with digraph
                
    if chararray.size > 0:
        if symbol == False and ask_symbol == False:
            ask_symbol = input(ask_symbol_text)
            
        if symbol == True or ask_symbol == "y":
            symbol = True
            for c in range(len(chararray),0,-1):    #count backwards through the index array
                    
                x = chararray[c-1]                   #x is index of letter in txtfile
                txtfile = np.insert(txtfile, x+1, [0, 0])    #insert 0 to make space for 3 characters
                txtfile[x] = symbols_three[curchar,0]        #insert first character
                txtfile[x+1] = symbols_three[curchar,1]      #insert second character
                txtfile[x+2] = symbols_three[curchar,2]      #insert third character
                number_replaced_symbols += 1

for curchar in range(len(symbols_4)):               #curchar is current number symbol
    chararray, = np.where(txtfile == symbols_4[curchar])    #creates array of index to characters to be replaced with digraph
                
    if chararray.size > 0:
        if symbol == False and ask_symbol == False:
            ask_symbol = input(ask_symbol_text)
            
        if symbol == True or ask_symbol == "y":
            symbol = True
            for c in range(len(chararray),0,-1):
                    
                x = chararray[c-1]                   #x is index of letter in txtfile
                txtfile = np.insert(txtfile, x+1, [0, 0, 0])    #insert 0 to make space for 4 characters
                txtfile[x] = symbols_four[curchar,0]        #insert first character
                txtfile[x+1] = symbols_four[curchar,1]      #insert second character
                txtfile[x+2] = symbols_four[curchar,2]      #insert third character
                txtfile[x+3] = symbols_four[curchar,3]      #insert fourth character
                number_replaced_symbols += 1


        #6: Simplify UTF-8 txtfile

            #hyphen equivalents

tilde = False

hyphen_equiv = np.array([chr(0x2013),   #en dash
                         chr(0x2014),   #em dash
                         chr(0x2015),   #horizontal bar
                         chr(0x7E)])    #tilde

for curchar in range(len(hyphen_equiv)):
    chararray, = np.where(txtfile == hyphen_equiv[curchar])
    
    for c in range(len(chararray),0,-1):
        x = chararray[c-1]              #x is index of letter in txtfile
        txtfile[x] = chr(0x2D)           #replace with hyphen-minus
        if hyphen_equiv[curchar] == chr(0x7E):
            tilde = True

            #quotation marks

quotation = False

quotation_equiv = np.array([chr(0xab),chr(0xbb),   #double angle
                            chr(0x2018),chr(0x2019),   #single
                            chr(0x201c),chr(0x201d)])   #double

for curchar in range(len(quotation_equiv)):
    chararray, = np.where(txtfile == quotation_equiv[curchar])
    
    for c in range(len(chararray),0,-1):
        x = chararray[c-1]              #x is index of letter in txtfile
        txtfile[x] = chr(0x22)           #replace with quotation mark
        quotation = True

            #apostrophe

apostrophe = False

apostrophe_equiv = np.array([chr(0x60),   #grave accent
                             chr(0xb4)])   #acute accent

for curchar in range(len(apostrophe_equiv)):
    chararray, = np.where(txtfile == apostrophe_equiv[curchar])
    
    for c in range(len(chararray),0,-1):
        x = chararray[c-1]              #x is index of letter in txtfile
        txtfile[x] = chr(0x27)           #replace with apostrophe
        apostrophe = True

            #line feed

chararray, = np.where(txtfile == chr(0x0a)) #line feed code
for c in range(len(chararray),0,-1):
    x = chararray[c-1]              #x is index of letter in txtfile
    txtfile[x] = chr(0x0d)      #replace with return code


        #7: letters with diacritics

diacritics = 0

                    #letters with acute, grave, circumflex, macron, tilde, breve, L with line, C with cedilla, ogonek, caron, double acute, C, E, Z with dot
letters_diacritics = np.array(['Ø','Å','Ö','Ä','Ü','Ñ',     #letters that can optionally be converted to digraphs
                               'À','Á','Â','Ã','Ç','È','É','Ê','Ë','Ì','Í','Î','Ï','Ð','Ò','Ó','Ô','Õ','Ù','Ú','Û','Ý','Ā','Ă','Ą','Ć','Ĉ','Ċ','Č','Ď','Ē','Ĕ','Ė','Ę','Ě','Ĝ','Ğ','Ĥ','Ī','Ĭ','Į','Ĵ','Ĺ','Ľ','Ł','Ń','Ň','Ō','Ŏ','Ő','Ŕ','Ř','Ś','Ŝ','Š','Ť','Ū','Ŭ','Ů','Ű','Ų','Ÿ','Ź','Ż','Ž',
                               'ø','å','ö','ä','ü','ñ',
                               'à','á','â','ã','ç','è','é','ê','ë','ì','í','î','ï','ð','ò','ó','ô','õ','ù','ú','û','ý','ā','ă','ą','ć','ĉ','ċ','č','ď','ē','ĕ','ė','ę','ě','ĝ','ğ','ĥ','ī','ĭ','į','ĵ','ĺ','ľ','ł','ń','ň','ō','ŏ','ő','ŕ','ř','ś','ŝ','š','ť','ū','ŭ','ů','ű','ų','ÿ','ź','ż','ž'])

replace_diacritics = np.array(['O','A','O','A','U','N',
                               'A','A','A','A','C','E','E','E','E','I','I','I','I','D','O','O','O','O','U','U','U','Y','A','A','A','C','C','C','C','D','E','E','E','E','E','G','G','H','I','I','I','J','L','L','L','N','N','O','O','O','R','R','S','S','S','T','U','U','U','U','U','Y','Z','Z','Z',
                               'o','a','o','a','u','n',
                               'a','a','a','a','c','e','e','e','e','i','i','i','i','d','o','o','o','o','u','u','u','y','a','a','a','c','c','c','c','d','e','e','e','e','e','g','g','h','i','i','i','j','l','l','l','n','n','o','o','o','r','r','s','s','s','t','u','u','u','u','u','y','z','z','z'])


for curchar in range(len(letters_diacritics)):
    chararray, = np.where(txtfile == letters_diacritics[curchar])
    
    for c in range(len(chararray),0,-1):
        x = chararray[c-1]              #x is index of letter in txtfile
        diacritics += 1
        txtfile[x] = replace_diacritics[curchar]    #remove diacritics


        #8: Check for remaining non-ascii characters (code above 255)

number_lines = 1
previous_line = 0
invalid_characters = 0
index_valid = np.array([],dtype=int)
lines_invalid_char = np.array([],dtype=int)

for x in range(len(txtfile)):
    c = ord(txtfile[x])
    if c < 128:         #test for valid ascii character
        index_valid = np.append(index_valid, x)    #create mask array
        if c == 0x0d:       #test for return code
            number_lines += 1    #count lines
    else:
        invalid_characters += 1
        txtfile[x] = '?'        #replace with ? (later they will be removed if option is chosen)
        if previous_line != number_lines:
            lines_invalid_char = np.append(lines_invalid_char, number_lines)   #store line numbers with invalid characters
            previous_line = number_lines

if replace == False:
    txtfile = txtfile[index_valid]  #remove invalid characters


        #9: Tabulator

chararray, = np.where(txtfile == chr(0x09))    #creates array of index to tab codes
number_tabcodes = chararray.size

if chararray.size > 0 and tab == False:     #if true, keep tab code (do nothing)
    tab_insert = np.full(tabspaces-1, chr(0x20),dtype='S1')  #create array of length tabspaces (minus one) and fill with ascii code for space
    
    for c in range(len(chararray),0,-1):    #count backwards through the index array
        x = chararray[c-1]                   #x is index of tab in txtfile
        txtfile[x] = chr(0x20)                  #replace tab code with one space
        txtfile = np.insert(txtfile, x+1, tab_insert)    #insert array of space characters


        #10:length of longest line and count lines

number_lines = 1
line_length_count = 0
longest_line = 0

for x in range(len(txtfile)):
    line_length_count += 1
    if ord(txtfile[x]) == 0x0d:       #test for return code
        if longest_line < line_length_count-1:
            longest_line = line_length_count-1  # -1 because not counting return code
        number_lines += 1    #count lines
        line_length_count = 0
if longest_line < line_length_count:
    longest_line = line_length_count

        
        #11: Convert to PETSCII

txtpetscii = txtfile.astype('S1')       #one-byte string

            #replace curly bracket with parentheses {} ()

curly_brackets = False

brackets = np.array(['{','}'])
replace_brackets = np.array(['(',')'])

for curchar in range(len(brackets)):
    chararray, = np.where(txtpetscii == brackets[curchar])
    for c in range(len(chararray),0,-1):
        x = chararray[c-1]              #x is index of letter in txtpetscii
        txtpetscii[x] = replace_brackets[curchar]      #replace 
        curly_brackets = True

            #replace backslash with normal slash

reverse_solidus = False

chararray, = np.where(txtpetscii == chr(0x5c))
for c in range(len(chararray),0,-1):
    x = chararray[c-1]              #x is index of letter in txtpetscii
    txtpetscii[x] = chr(0x2f)      #replace
    reverse_solidus = True

            #translate to PETSCII: vertical line, underscore, £ pound, π pi, ˂ left arrow  - no message

char2petscii = np.array(['｜','_','£','π','˂'])
petscii_char = np.array([chr(0x7D),chr(0xa4),chr(0x5c),chr(0x7e),chr(0x5f)])

for curchar in range(len(char2petscii)):
    chararray, = np.where(txtpetscii == char2petscii[curchar])
    for c in range(len(chararray),0,-1):
        x = chararray[c-1]              #x is index of letter in txtpetscii
        txtpetscii[x] = petscii_char[curchar]      #replace 

            #move capital letters and small letters to Petscii codes

if charset == 'normal':

    for x in range(len(txtpetscii)):
        if ord(txtpetscii[x]) >= 0x61 and ord(txtpetscii[x]) <= 0x7a:  #PETSCII small letters
            txtpetscii[x] = chr(ord(txtpetscii[x])-0x20)     #convert to PETSCII capital letters


if charset == 'shifted':

    for x in range(len(txtpetscii)):
        if ord(txtpetscii[x]) >= 0x61 and ord(txtpetscii[x]) <= 0x7a:  #utf-8 small letters
            txtpetscii[x] = chr(ord(txtpetscii[x])-0x20)     #convert to PETSCII small letters
        elif ord(txtpetscii[x]) >= 0x41 and ord(txtpetscii[x]) <= 0x5a:  #utf-8 capital letters
            txtpetscii[x] = chr(ord(txtpetscii[x])+0x20)     #convert to PETSCII capital letters


        #12: save petscii file to disk

filename = outputfilename+"."+outputfile_type

writefile = open(filename, "wb")       #overwrite new file
writefile = open(filename, "ab")      #append, binary file

if outputfile_type == 'prg':
    writefile.write(byteaddress)

for x in range(len(txtpetscii)):
    writefile.write(txtpetscii[x])

writefile.close()


        #Output messages

print("C64 file",filename,"created")
print("- text has",number_lines,"lines.")
print("- longest line is",longest_line,"characters long (not including return code).")

if tab == False and number_tabcodes > 0:
    print("-",number_tabcodes,"tabs replaced with spaces.")
if digraph == True or bool(ask_digraph) == True or number_replaced_letters > 0:
    print("- replaced",number_replaced_letters, "letters with digraphs.")
if diacritics > 0:
    print("- removed diacritics from",diacritics,"letters.")
if symbol == True:
    print("- replaced",number_smileys, "smileys and",number_replaced_symbols, "special symbols.")

if tilde == True:
    print("- replaced tilde with hyphen.")
if quotation == True:
    print("- replaced non-standard quotation marks.")
if apostrophe == True:
    print("- replaced grave/acute accents with apostrophe.")
if curly_brackets == True:
    print("- replaced curly brackets with normal parentheses.")
if reverse_solidus == True:
    print("- replaced backslash with normal slash.")

if invalid_characters > 0:
    if replace == True:
        print("-",invalid_characters,"unknown characters replaced with ? in line",lines_invalid_char)
    else:
        print("-",invalid_characters,"unknown characters were removed from line",lines_invalid_char)
