304 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			304 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import string
 | |
| 
 | |
| # returns tuple, [success,updated_string] where the updated string has
 | |
| # has one less (the first) occurance of match string
 | |
| def removefirstoccurance( remove_string, match_string ):
 | |
|     lowercase_string = remove_string.lower()
 | |
|     lowercase_match_string = match_string.lower()
 | |
|     lowest_index = lowercase_string.find(lowercase_match_string)
 | |
|     if(lowest_index == -1):
 | |
|         return [False,remove_string]
 | |
|     past_match_index = lowest_index + len(lowercase_match_string)
 | |
|     highest_index = len(remove_string)
 | |
|     remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index]
 | |
|     return [True,remove_string]
 | |
| 
 | |
| # returns a string with all occurances of match_string removed
 | |
| def removealloccurances( remove_string, match_string ):
 | |
|     return_value = [True, remove_string]
 | |
|     while(return_value[0]):
 | |
|         return_value = removefirstoccurance(return_value[1],match_string)
 | |
|     return return_value[1]
 | |
| 
 | |
| # removes an occurance of match_string only if it's first in the string
 | |
| # returns tuple [succes, new_string]
 | |
| def removeprefix( remove_string, match_string ):
 | |
|     lowercase_string = remove_string.lower()
 | |
|     lowercase_match_string = match_string.lower()
 | |
|     lowest_index = lowercase_string.find(lowercase_match_string)
 | |
|     if(lowest_index == -1):
 | |
|         return [False,remove_string]
 | |
|     if(lowest_index != 0):
 | |
|         return [False,remove_string]
 | |
|     past_match_index = lowest_index + len(lowercase_match_string)
 | |
|     highest_index = len(remove_string)
 | |
|     remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index]
 | |
| #    print lowest_index
 | |
| #    print past_match_index
 | |
|     return [True,remove_string]
 | |
| 
 | |
| # removes multiple occurances of match string as long as they are first in
 | |
| # the string
 | |
| def removeallprefix( remove_string, match_string ):
 | |
|     return_value = [True, remove_string]
 | |
|     while(return_value[0]):
 | |
|         return_value = removeprefix(return_value[1],match_string)
 | |
|     return return_value[1]
 | |
| 
 | |
| # returns true if extensionstring is a correct extension
 | |
| def isextension( extensionstring ):
 | |
|     if(len(extensionstring) < 2):
 | |
|         return False
 | |
|     if(extensionstring[0] != '.'):
 | |
|         return False
 | |
|     if(extensionstring[1:len(extensionstring)-1].find('.') != -1):
 | |
|         return False
 | |
|     return True
 | |
| 
 | |
| # returns the index of start of the last occurance of match_string
 | |
| def findlastoccurance( original_string, match_string ):
 | |
|     search_index = original_string.find(match_string)
 | |
|     found_index = search_index
 | |
|     last_index = len(original_string) - 1
 | |
|     while((search_index != -1) and (search_index < last_index)):
 | |
|         search_index = original_string[search_index+1:last_index].find(match_string)
 | |
|         if(search_index != -1):
 | |
|             found_index = search_index
 | |
|     return found_index
 | |
| 
 | |
| # changes extension from original_extension to new_extension
 | |
| def changeextension( original_string, original_extension, new_extension):
 | |
|     if(not isextension(original_extension)):
 | |
|         return original_string
 | |
|     if(not isextension(new_extension)):
 | |
|         return original_string
 | |
|     index = findlastoccurance(original_string, original_extension)
 | |
|     if(index == -1):
 | |
|         return original_string
 | |
|     return_value = original_string[0:index] + new_extension
 | |
|     return return_value
 | |
| 
 | |
| # wanted to do this with str.find however didnt seem to work so do it manually
 | |
| # returns the index of the first capital letter
 | |
| def findfirstcapitalletter( original_string ):
 | |
|     for index in range(len(original_string)):
 | |
|         if(original_string[index].lower() != original_string[index]):
 | |
|             return index
 | |
|     return -1
 | |
| 
 | |
| 
 | |
| # replaces capital letters with underscore and lower case letter (except very
 | |
| # first
 | |
| def lowercasewithunderscore( original_string ):
 | |
| # ignore the first letter since there should be no underscore in front of it
 | |
|     if(len(original_string) < 2):
 | |
|         return original_string
 | |
|     return_value = original_string[1:len(original_string)]
 | |
|     index = findfirstcapitalletter(return_value)
 | |
|     while(index != -1):
 | |
|         return_value = return_value[0:index] + \
 | |
|                        '_' + \
 | |
|                        return_value[index].lower() + \
 | |
|                        return_value[index+1:len(return_value)]
 | |
|         index = findfirstcapitalletter(return_value)
 | |
|     return_value = original_string[0].lower() + return_value
 | |
|     return return_value
 | |
| 
 | |
| # my table is a duplicate of strings
 | |
| def removeduplicates( my_table ):
 | |
|     new_table = []
 | |
|     for old_string1, new_string1 in my_table:
 | |
|         found = 0
 | |
|         for old_string2, new_string2 in new_table:
 | |
|             if(old_string1 == old_string2):
 | |
|                 found += 1
 | |
|             if(new_string1 == new_string2):
 | |
|                 if(new_string1 == ''):
 | |
|                     found += found
 | |
|                 else:
 | |
|                     found += 1
 | |
|             if(found == 1):
 | |
|                 print 'missmatching set, terminating program'
 | |
|                 print old_string1
 | |
|                 print new_string1
 | |
|                 print old_string2
 | |
|                 print new_string2
 | |
|                 quit()
 | |
|             if(found == 2):
 | |
|                 break
 | |
|         if(found == 0):
 | |
|             new_table.append([old_string1,new_string1])
 | |
|     return new_table
 | |
| 
 | |
| def removenochange( my_table ):
 | |
|     new_table = []
 | |
|     for old_string, new_string in my_table:
 | |
|         if(old_string != new_string):
 | |
|             new_table.append([old_string,new_string])
 | |
|     return new_table
 | |
| 
 | |
| # order table after size of the string (can be used to replace bigger strings
 | |
| # first which is useful since smaller strings can be inside the bigger string)
 | |
| # E.g. GIPS is a sub string of GIPSVE if we remove GIPS first GIPSVE will never
 | |
| # be removed. N is small so no need for fancy sort algorithm. Use selection sort
 | |
| def ordertablesizefirst( my_table ):
 | |
|     for current_index in range(len(my_table)):
 | |
|         biggest_string = 0
 | |
|         biggest_string_index = -1
 | |
|         for search_index in range(len(my_table)):
 | |
|             if(search_index < current_index):
 | |
|                 continue
 | |
|             length_of_string = len(my_table[search_index][0])
 | |
|             if(length_of_string > biggest_string):
 | |
|                 biggest_string = length_of_string
 | |
|                 biggest_string_index = search_index
 | |
|         if(biggest_string_index == -1):
 | |
|             print 'sorting algorithm failed, program exit'
 | |
|             quit()
 | |
|         old_value = my_table[current_index]
 | |
|         my_table[current_index] = my_table[biggest_string_index]
 | |
|         my_table[biggest_string_index] = old_value
 | |
|     return my_table
 | |
| 
 | |
| # returns true if string 1 or 2 is a substring of the other, assuming neither
 | |
| # has whitespaces
 | |
| def issubstring( string1, string2 ):
 | |
|     if(len(string1) == 0):
 | |
|         return -1
 | |
|     if(len(string2) == 0):
 | |
|         return -1
 | |
|     large_string = string1
 | |
|     small_string = string2
 | |
|     if(len(string1) < len(string2)):
 | |
|         large_string = string2
 | |
|         small_string = string1
 | |
| 
 | |
|     for index in range(len(large_string)):
 | |
|         large_sub_string = large_string[index:index+len(small_string)].lower()
 | |
|         if(large_sub_string ==\
 | |
|            small_string.lower()):
 | |
|               return index
 | |
|     return -1
 | |
| 
 | |
| #not_part_of_word_table = [' ','(',')','{','}',':','\t','*','&','/','[',']','.',',','\n']
 | |
| #def ispartofword( char ):
 | |
| #    for item in not_part_of_word_table:
 | |
| #        if(char == item):
 | |
| #            return False
 | |
| #    return True
 | |
| 
 | |
| # must be numerical,_ or charachter
 | |
| def ispartofword( char ):
 | |
|     if(char.isalpha()):
 | |
|         return True
 | |
|     if(char.isalnum()):
 | |
|         return True
 | |
|     if(char == '_'):
 | |
|         return True
 | |
|     return False
 | |
| 
 | |
| # returns the index of the first letter in the word that the current_index
 | |
| # is pointing to and the size of the word
 | |
| def getword( line, current_index):
 | |
|     if(current_index < 0):
 | |
|         return []
 | |
|     line = line.rstrip()
 | |
|     if(len(line) <= current_index):
 | |
|         return []
 | |
|     if(line[current_index] == ' '):
 | |
|         return []
 | |
|     start_pos = current_index
 | |
|     while start_pos >= 0:
 | |
|         if(not ispartofword(line[start_pos])):
 | |
|             start_pos += 1
 | |
|             break
 | |
|         start_pos -= 1
 | |
|     if(start_pos == -1):
 | |
|         start_pos = 0
 | |
|     end_pos = current_index
 | |
|     while end_pos < len(line):
 | |
|         if(not ispartofword(line[end_pos])):
 | |
|             break
 | |
|         end_pos += 1
 | |
|     return [start_pos,end_pos - start_pos]
 | |
| 
 | |
| # my table is a tuple [string1,string2] complement_to_table is just a list
 | |
| # of strings to compare to string1
 | |
| def complement( my_table, complement_to_table ):
 | |
|     new_table = []
 | |
|     for index in range(len(my_table)):
 | |
|         found = False;
 | |
|         for compare_string in complement_to_table:
 | |
|             if(my_table[index][0].lower() == compare_string.lower()):
 | |
|                 found = True
 | |
|         if(not found):
 | |
|             new_table.append(my_table[index])
 | |
|     return new_table
 | |
| 
 | |
| def removestringfromhead( line, remove_string):
 | |
|     for index in range(len(line)):
 | |
|         if(line[index:index+len(remove_string)] != remove_string):
 | |
|             return line[index:index+len(line)]
 | |
|     return ''
 | |
| 
 | |
| def removeccomment( line ):
 | |
|     comment_string = '//'
 | |
|     for index in range(len(line)):
 | |
|         if(line[index:index+len(comment_string)] == comment_string):
 | |
|             return line[0:index]
 | |
|     return line
 | |
| 
 | |
| def whitespacestoonespace( line ):
 | |
|     return ' '.join(line.split())
 | |
| 
 | |
| def fixabbreviations( original_string ):
 | |
|     previouswascapital = (original_string[0].upper() == original_string[0])
 | |
|     new_string = ''
 | |
|     for index in range(len(original_string)):
 | |
|         if(index == 0):
 | |
|             new_string += original_string[index]
 | |
|             continue
 | |
|         if(original_string[index] == '_'):
 | |
|             new_string += original_string[index]
 | |
|             previouswascapital = False
 | |
|             continue
 | |
|         if(original_string[index].isdigit()):
 | |
|             new_string += original_string[index]
 | |
|             previouswascapital = False
 | |
|             continue
 | |
|         currentiscapital = (original_string[index].upper() == original_string[index])
 | |
|         letter_to_add = original_string[index]
 | |
|         if(previouswascapital and currentiscapital):
 | |
|             letter_to_add = letter_to_add.lower()
 | |
|         if(previouswascapital and (not currentiscapital)):
 | |
|             old_letter = new_string[len(new_string)-1]
 | |
|             new_string = new_string[0:len(new_string)-1]
 | |
|             new_string += old_letter.upper()
 | |
|         previouswascapital = currentiscapital
 | |
|         new_string += letter_to_add
 | |
|     return new_string
 | |
| 
 | |
| def replaceoccurances(old_string, replace_string, replace_with_string):
 | |
|     if (len(replace_string) == 0):
 | |
|         return old_string
 | |
|     if (len(old_string) < len(replace_string)):
 | |
|         return old_string
 | |
|     # Simple implementation, could proably be done smarter
 | |
|     new_string = ''
 | |
|     for index in range(len(old_string)):
 | |
|         #print new_string
 | |
|         if(len(replace_string) > (len(old_string) - index)):
 | |
|             new_string += old_string[index:index + len(old_string)]
 | |
|             break
 | |
|         match = (len(replace_string) > 0)
 | |
|         for replace_index in range(len(replace_string)):
 | |
|             if (replace_string[replace_index] != old_string[index + replace_index]):
 | |
|                 match = False
 | |
|                 break
 | |
|         if (match):
 | |
|             new_string += replace_with_string
 | |
|             index =+ len(replace_string)
 | |
|         else:
 | |
|             new_string += old_string[index]
 | |
|     return new_string
 | 
