Skip to content

Commit 440b4d1

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 410a23a commit 440b4d1

File tree

14 files changed

+17
-26
lines changed

14 files changed

+17
-26
lines changed

ac_dc/deduplicate/self_deduplicate.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
32
# @Date : 2022-01-08 22:39:29
43
# @Author : Chenghao Mou ([email protected])
54
# @Description: Self-deduplication with `datasets`
@@ -28,7 +27,7 @@
2827

2928
def main(conf: str) -> None:
3029

31-
with open(conf, "r") as f:
30+
with open(conf) as f:
3231
conf = yaml.safe_load(f.read())
3332

3433
if conf["load_from_disk"]["path"]:

bertin/evaluation/run_glue.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python
2-
# coding=utf-8
32
# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
43
#
54
# Licensed under the Apache License, Version 2.0 (the "License");

bertin/evaluation/run_ner.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python
2-
# coding=utf-8
32
# Copyright 2020 The HuggingFace Team All rights reserved.
43
#
54
# Licensed under the Apache License, Version 2.0 (the "License");

bertin/mc4/mc4.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ def _generate_examples(self, filepaths):
404404
for filepath in filepaths:
405405
logger.info("generating examples from = %s", filepath)
406406
if filepath.endswith("jsonl"):
407-
with open(filepath, "r", encoding="utf-8") as f:
407+
with open(filepath, encoding="utf-8") as f:
408408
for line in f:
409409
if line:
410410
example = json.loads(line)

bertin/run_mlm_flax.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python
2-
# coding=utf-8
32
# Copyright 2021 The HuggingFace Team All rights reserved.
43
#
54
# Licensed under the Apache License, Version 2.0 (the "License");

bertin/run_mlm_flax_stream.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python
2-
# coding=utf-8
32
# Copyright 2021 The HuggingFace Team All rights reserved.
43
#
54
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -446,7 +445,7 @@ def restore_checkpoint(save_dir, state):
446445
args = joblib.load(os.path.join(save_dir, "training_args.joblib"))
447446
data_collator = joblib.load(os.path.join(save_dir, "data_collator.joblib"))
448447

449-
with open(os.path.join(save_dir, "training_state.json"), "r") as f:
448+
with open(os.path.join(save_dir, "training_state.json")) as f:
450449
training_state = json.load(f)
451450
step = training_state["step"]
452451

bertin/utils/dataset_perplexity.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def get_perplexity(doc):
1717

1818

1919
with open("mc4-es-train-50M-stats.csv", "w") as csv:
20-
with open("mc4-es-train-50M-steps.jsonl", "r") as data:
20+
with open("mc4-es-train-50M-steps.jsonl") as data:
2121
for line in tqdm(data):
2222
text = json.loads(line)["text"]
2323
csv.write(f"{len(text.split())},{get_perplexity(text)}\n")

cc_pseudo_crawl/python_scripts/download_warc.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,9 @@ def get_warcs(batch):
143143
existing_compressed_warcs,
144144
)
145145

146-
batch["compressed_warc"], batch["download_exception"] = [
146+
batch["compressed_warc"], batch["download_exception"] = (
147147
list(l) for l in zip(*warcs_or_exceptions)
148-
]
148+
)
149149
return batch
150150

151151

cc_pseudo_crawl/python_scripts/load_all_seed_ids.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def main():
2121

2222
seed_ids = []
2323
for seed_path in args.seed_paths:
24-
with open(seed_path, "r") as fi:
24+
with open(seed_path) as fi:
2525
data = csv.reader(fi)
2626
# First line is all the headers that we remove.
2727
seed_ids += [row[0] for row_id, row in enumerate(data) if row_id > 0]

kenlm_training/cc_net/jsonql.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -880,8 +880,7 @@ def describe(source, columns=None, weights=None, **kwargs):
880880
continue
881881
if "." in k or k == ALL_DOCUMENTS:
882882
continue
883-
for line in display_stats(stats, k, weights=weights, **kwargs):
884-
yield line
883+
yield from display_stats(stats, k, weights=weights, **kwargs)
885884

886885

887886
def shard(lines):
@@ -961,7 +960,7 @@ def open_read(filename: ReadableFileLike) -> Iterable[str]:
961960
if filename.suffix == ".gz":
962961
file: TextIO = gzip.open(filename, "rt") # type: ignore
963962
else:
964-
file = open(filename, "rt")
963+
file = open(filename)
965964

966965
return _close_when_exhausted(file)
967966

@@ -1015,7 +1014,7 @@ def open_write(
10151014
if filename.suffix == ".gz":
10161015
return BlockedGzipWriter(Path(filename), mode, block_size="64M")
10171016

1018-
return open(filename, "wt")
1017+
return open(filename, "w")
10191018

10201019

10211020
def parse_size(size):

pii-manager/setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,15 @@
2727

2828
def requirements(filename="requirements.txt"):
2929
"""Read the requirements file"""
30-
with io.open(filename, "r") as f:
30+
with open(filename) as f:
3131
return [line.strip() for line in f if line and line[0] != "#"]
3232

3333

3434
def long_description():
3535
"""
3636
Take the README and remove markdown hyperlinks
3737
"""
38-
with open("README.md", "rt", encoding="utf-8") as f:
38+
with open("README.md", encoding="utf-8") as f:
3939
desc = f.read()
4040
desc = re.sub(r"^\[ ([^\]]+) \]: \s+ \S.*\n", r"", desc, flags=re.X | re.M)
4141
return re.sub(r"\[ ([^\]]+) \]", r"\1", desc, flags=re.X)

pii-manager/src/pii_manager/api/manager.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,19 @@ def fetch_all_tasks(
3131
"""
3232
taskdict = get_taskdict(debug=debug)
3333
# Language-independent
34-
for task in taskdict[LANG_ANY].values():
35-
yield task
34+
yield from taskdict[LANG_ANY].values()
3635

3736
langdict = taskdict.get(lang, {})
3837
# Country-independent
39-
for task in langdict.get(COUNTRY_ANY, {}).values():
40-
yield task
38+
yield from langdict.get(COUNTRY_ANY, {}).values()
4139
# Country-specific
4240
if country:
4341
if country[0] in (COUNTRY_ANY, "all"):
4442
country = country_list(lang)
4543
for c in country:
4644
if c == COUNTRY_ANY: # already included above
4745
continue
48-
for task in langdict.get(c, {}).values():
49-
yield task
46+
yield from langdict.get(c, {}).values()
5047

5148

5249
def fetch_task(

pii-manager/test/unit/api/test_file.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def datafile(name: str) -> str:
1212

1313

1414
def readfile(name: str) -> str:
15-
with open(name, "rt", encoding="utf-8") as f:
15+
with open(name, encoding="utf-8") as f:
1616
return f.read().strip()
1717

1818

pii-manager/test/unit/api/test_file_taskfile.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def datafile(name: str) -> str:
1414

1515

1616
def readfile(name: str) -> str:
17-
with open(name, "rt", encoding="utf-8") as f:
17+
with open(name, encoding="utf-8") as f:
1818
return f.read().strip()
1919

2020

0 commit comments

Comments
 (0)