@@ -290,16 +290,16 @@ def get_cond(key, cutoff, max_cutoff):
290
290
"stopwords_ratio"
291
291
]
292
292
for i in range (len (self .docs ["stopwords_ratio" ])):
293
- self .docs ["stopwords_ratio" ].iloc [
294
- i
295
- ] = Filtering . compute_stopwords_ratio (
296
- self .docs [ "text" ]. iloc [ i ] ,
297
- self .sentencepiece_model_tok ,
298
- self .param ["strip_characters " ],
299
- self .param ["cond_words_augmentation " ],
300
- self .param ["words_augmentation_group_sizes " ],
301
- self . param [ "words_augmentation_join_char" ] ,
302
- new_stopwords ,
293
+ self .docs ["stopwords_ratio" ].iloc [i ] = (
294
+ Filtering . compute_stopwords_ratio (
295
+ self . docs [ "text" ]. iloc [ i ],
296
+ self .sentencepiece_model_tok ,
297
+ self .param [ "strip_characters" ] ,
298
+ self .param ["cond_words_augmentation " ],
299
+ self .param ["words_augmentation_group_sizes " ],
300
+ self .param ["words_augmentation_join_char " ],
301
+ new_stopwords ,
302
+ )
303
303
)
304
304
cutoff_def = "If the stop words ratio of a document is lower than this number, the document is removed."
305
305
cutoff_stopwords_ratio = st .slider (
@@ -326,16 +326,16 @@ def get_cond(key, cutoff, max_cutoff):
326
326
"flagged_words_ratio"
327
327
]
328
328
for i in range (len (self .docs ["flagged_words_ratio" ])):
329
- self .docs ["flagged_words_ratio" ].iloc [
330
- i
331
- ] = Filtering . compute_flagged_words_ratio (
332
- self .docs [ "text" ]. iloc [ i ] ,
333
- self .sentencepiece_model_tok ,
334
- self .param ["strip_characters " ],
335
- self .param ["cond_words_augmentation " ],
336
- self .param ["words_augmentation_group_sizes " ],
337
- self . param [ "words_augmentation_join_char" ] ,
338
- new_flagged_words ,
329
+ self .docs ["flagged_words_ratio" ].iloc [i ] = (
330
+ Filtering . compute_flagged_words_ratio (
331
+ self . docs [ "text" ]. iloc [ i ],
332
+ self .sentencepiece_model_tok ,
333
+ self .param [ "strip_characters" ] ,
334
+ self .param ["cond_words_augmentation " ],
335
+ self .param ["words_augmentation_group_sizes " ],
336
+ self .param ["words_augmentation_join_char " ],
337
+ new_flagged_words ,
338
+ )
339
339
)
340
340
cutoff_def = "If the flagged words ratio of a document is higher than this number, the document is removed."
341
341
max_fwr = np .max (self .docs ["flagged_words_ratio" ])
0 commit comments