OLD | NEW |
| (Empty) |
1 import string | |
2 | |
3 # returns tuple, [success,updated_string] where the updated string has | |
4 # has one less (the first) occurance of match string | |
5 def removefirstoccurance( remove_string, match_string ): | |
6 lowercase_string = remove_string.lower() | |
7 lowercase_match_string = match_string.lower() | |
8 lowest_index = lowercase_string.find(lowercase_match_string) | |
9 if(lowest_index == -1): | |
10 return [False,remove_string] | |
11 past_match_index = lowest_index + len(lowercase_match_string) | |
12 highest_index = len(remove_string) | |
13 remove_string = remove_string[0:lowest_index] + remove_string[past_match_ind
ex: highest_index] | |
14 return [True,remove_string] | |
15 | |
16 # returns a string with all occurances of match_string removed | |
17 def removealloccurances( remove_string, match_string ): | |
18 return_value = [True, remove_string] | |
19 while(return_value[0]): | |
20 return_value = removefirstoccurance(return_value[1],match_string) | |
21 return return_value[1] | |
22 | |
23 # removes an occurance of match_string only if it's first in the string | |
24 # returns tuple [succes, new_string] | |
25 def removeprefix( remove_string, match_string ): | |
26 lowercase_string = remove_string.lower() | |
27 lowercase_match_string = match_string.lower() | |
28 lowest_index = lowercase_string.find(lowercase_match_string) | |
29 if(lowest_index == -1): | |
30 return [False,remove_string] | |
31 if(lowest_index != 0): | |
32 return [False,remove_string] | |
33 past_match_index = lowest_index + len(lowercase_match_string) | |
34 highest_index = len(remove_string) | |
35 remove_string = remove_string[0:lowest_index] + remove_string[past_match_ind
ex: highest_index] | |
36 # print lowest_index | |
37 # print past_match_index | |
38 return [True,remove_string] | |
39 | |
40 # removes multiple occurances of match string as long as they are first in | |
41 # the string | |
42 def removeallprefix( remove_string, match_string ): | |
43 return_value = [True, remove_string] | |
44 while(return_value[0]): | |
45 return_value = removeprefix(return_value[1],match_string) | |
46 return return_value[1] | |
47 | |
48 # returns true if extensionstring is a correct extension | |
49 def isextension( extensionstring ): | |
50 if(len(extensionstring) < 2): | |
51 return False | |
52 if(extensionstring[0] != '.'): | |
53 return False | |
54 if(extensionstring[1:len(extensionstring)-1].find('.') != -1): | |
55 return False | |
56 return True | |
57 | |
58 # returns the index of start of the last occurance of match_string | |
59 def findlastoccurance( original_string, match_string ): | |
60 search_index = original_string.find(match_string) | |
61 found_index = search_index | |
62 last_index = len(original_string) - 1 | |
63 while((search_index != -1) and (search_index < last_index)): | |
64 search_index = original_string[search_index+1:last_index].find(match_str
ing) | |
65 if(search_index != -1): | |
66 found_index = search_index | |
67 return found_index | |
68 | |
69 # changes extension from original_extension to new_extension | |
70 def changeextension( original_string, original_extension, new_extension): | |
71 if(not isextension(original_extension)): | |
72 return original_string | |
73 if(not isextension(new_extension)): | |
74 return original_string | |
75 index = findlastoccurance(original_string, original_extension) | |
76 if(index == -1): | |
77 return original_string | |
78 return_value = original_string[0:index] + new_extension | |
79 return return_value | |
80 | |
81 # wanted to do this with str.find however didnt seem to work so do it manually | |
82 # returns the index of the first capital letter | |
83 def findfirstcapitalletter( original_string ): | |
84 for index in range(len(original_string)): | |
85 if(original_string[index].lower() != original_string[index]): | |
86 return index | |
87 return -1 | |
88 | |
89 | |
90 # replaces capital letters with underscore and lower case letter (except very | |
91 # first | |
92 def lowercasewithunderscore( original_string ): | |
93 # ignore the first letter since there should be no underscore in front of it | |
94 if(len(original_string) < 2): | |
95 return original_string | |
96 return_value = original_string[1:len(original_string)] | |
97 index = findfirstcapitalletter(return_value) | |
98 while(index != -1): | |
99 return_value = return_value[0:index] + \ | |
100 '_' + \ | |
101 return_value[index].lower() + \ | |
102 return_value[index+1:len(return_value)] | |
103 index = findfirstcapitalletter(return_value) | |
104 return_value = original_string[0].lower() + return_value | |
105 return return_value | |
106 | |
107 # my table is a duplicate of strings | |
108 def removeduplicates( my_table ): | |
109 new_table = [] | |
110 for old_string1, new_string1 in my_table: | |
111 found = 0 | |
112 for old_string2, new_string2 in new_table: | |
113 if(old_string1 == old_string2): | |
114 found += 1 | |
115 if(new_string1 == new_string2): | |
116 if(new_string1 == ''): | |
117 found += found | |
118 else: | |
119 found += 1 | |
120 if(found == 1): | |
121 print 'missmatching set, terminating program' | |
122 print old_string1 | |
123 print new_string1 | |
124 print old_string2 | |
125 print new_string2 | |
126 quit() | |
127 if(found == 2): | |
128 break | |
129 if(found == 0): | |
130 new_table.append([old_string1,new_string1]) | |
131 return new_table | |
132 | |
133 def removenochange( my_table ): | |
134 new_table = [] | |
135 for old_string, new_string in my_table: | |
136 if(old_string != new_string): | |
137 new_table.append([old_string,new_string]) | |
138 return new_table | |
139 | |
140 # order table after size of the string (can be used to replace bigger strings | |
141 # first which is useful since smaller strings can be inside the bigger string) | |
142 # E.g. GIPS is a sub string of GIPSVE if we remove GIPS first GIPSVE will never | |
143 # be removed. N is small so no need for fancy sort algorithm. Use selection sort | |
144 def ordertablesizefirst( my_table ): | |
145 for current_index in range(len(my_table)): | |
146 biggest_string = 0 | |
147 biggest_string_index = -1 | |
148 for search_index in range(len(my_table)): | |
149 if(search_index < current_index): | |
150 continue | |
151 length_of_string = len(my_table[search_index][0]) | |
152 if(length_of_string > biggest_string): | |
153 biggest_string = length_of_string | |
154 biggest_string_index = search_index | |
155 if(biggest_string_index == -1): | |
156 print 'sorting algorithm failed, program exit' | |
157 quit() | |
158 old_value = my_table[current_index] | |
159 my_table[current_index] = my_table[biggest_string_index] | |
160 my_table[biggest_string_index] = old_value | |
161 return my_table | |
162 | |
163 # returns true if string 1 or 2 is a substring of the other, assuming neither | |
164 # has whitespaces | |
165 def issubstring( string1, string2 ): | |
166 if(len(string1) == 0): | |
167 return -1 | |
168 if(len(string2) == 0): | |
169 return -1 | |
170 large_string = string1 | |
171 small_string = string2 | |
172 if(len(string1) < len(string2)): | |
173 large_string = string2 | |
174 small_string = string1 | |
175 | |
176 for index in range(len(large_string)): | |
177 large_sub_string = large_string[index:index+len(small_string)].lower() | |
178 if(large_sub_string ==\ | |
179 small_string.lower()): | |
180 return index | |
181 return -1 | |
182 | |
183 #not_part_of_word_table = [' ','(',')','{','}',':','\t','*','&','/','[',']','.',
',','\n'] | |
184 #def ispartofword( char ): | |
185 # for item in not_part_of_word_table: | |
186 # if(char == item): | |
187 # return False | |
188 # return True | |
189 | |
190 # must be numerical,_ or charachter | |
191 def ispartofword( char ): | |
192 if(char.isalpha()): | |
193 return True | |
194 if(char.isalnum()): | |
195 return True | |
196 if(char == '_'): | |
197 return True | |
198 return False | |
199 | |
200 # returns the index of the first letter in the word that the current_index | |
201 # is pointing to and the size of the word | |
202 def getword( line, current_index): | |
203 if(current_index < 0): | |
204 return [] | |
205 line = line.rstrip() | |
206 if(len(line) <= current_index): | |
207 return [] | |
208 if(line[current_index] == ' '): | |
209 return [] | |
210 start_pos = current_index | |
211 while start_pos >= 0: | |
212 if(not ispartofword(line[start_pos])): | |
213 start_pos += 1 | |
214 break | |
215 start_pos -= 1 | |
216 if(start_pos == -1): | |
217 start_pos = 0 | |
218 end_pos = current_index | |
219 while end_pos < len(line): | |
220 if(not ispartofword(line[end_pos])): | |
221 break | |
222 end_pos += 1 | |
223 return [start_pos,end_pos - start_pos] | |
224 | |
225 # my table is a tuple [string1,string2] complement_to_table is just a list | |
226 # of strings to compare to string1 | |
227 def complement( my_table, complement_to_table ): | |
228 new_table = [] | |
229 for index in range(len(my_table)): | |
230 found = False; | |
231 for compare_string in complement_to_table: | |
232 if(my_table[index][0].lower() == compare_string.lower()): | |
233 found = True | |
234 if(not found): | |
235 new_table.append(my_table[index]) | |
236 return new_table | |
237 | |
238 def removestringfromhead( line, remove_string): | |
239 for index in range(len(line)): | |
240 if(line[index:index+len(remove_string)] != remove_string): | |
241 return line[index:index+len(line)] | |
242 return '' | |
243 | |
244 def removeccomment( line ): | |
245 comment_string = '//' | |
246 for index in range(len(line)): | |
247 if(line[index:index+len(comment_string)] == comment_string): | |
248 return line[0:index] | |
249 return line | |
250 | |
251 def whitespacestoonespace( line ): | |
252 return ' '.join(line.split()) | |
253 | |
254 def fixabbreviations( original_string ): | |
255 previouswascapital = (original_string[0].upper() == original_string[0]) | |
256 new_string = '' | |
257 for index in range(len(original_string)): | |
258 if(index == 0): | |
259 new_string += original_string[index] | |
260 continue | |
261 if(original_string[index] == '_'): | |
262 new_string += original_string[index] | |
263 previouswascapital = False | |
264 continue | |
265 if(original_string[index].isdigit()): | |
266 new_string += original_string[index] | |
267 previouswascapital = False | |
268 continue | |
269 currentiscapital = (original_string[index].upper() == original_string[in
dex]) | |
270 letter_to_add = original_string[index] | |
271 if(previouswascapital and currentiscapital): | |
272 letter_to_add = letter_to_add.lower() | |
273 if(previouswascapital and (not currentiscapital)): | |
274 old_letter = new_string[len(new_string)-1] | |
275 new_string = new_string[0:len(new_string)-1] | |
276 new_string += old_letter.upper() | |
277 previouswascapital = currentiscapital | |
278 new_string += letter_to_add | |
279 return new_string | |
280 | |
281 def replaceoccurances(old_string, replace_string, replace_with_string): | |
282 if (len(replace_string) == 0): | |
283 return old_string | |
284 if (len(old_string) < len(replace_string)): | |
285 return old_string | |
286 # Simple implementation, could proably be done smarter | |
287 new_string = '' | |
288 for index in range(len(old_string)): | |
289 #print new_string | |
290 if(len(replace_string) > (len(old_string) - index)): | |
291 new_string += old_string[index:index + len(old_string)] | |
292 break | |
293 match = (len(replace_string) > 0) | |
294 for replace_index in range(len(replace_string)): | |
295 if (replace_string[replace_index] != old_string[index + replace_inde
x]): | |
296 match = False | |
297 break | |
298 if (match): | |
299 new_string += replace_with_string | |
300 index =+ len(replace_string) | |
301 else: | |
302 new_string += old_string[index] | |
303 return new_string | |
OLD | NEW |