Quillot Mathias / volia

1

import argparse

1

import argparse

2

from utils import SubCommandRunner

2

from utils import SubCommandRunner

3

4

def utt2spk(features: str, outfile: str):

4

def utt2spk(features: str, outfile: str):

5

"""Generate a utt2spk file from a feature file of voxceleb.

5

"""Generate a utt2spk file from a feature file of voxceleb.

6

(it also works with list files instead of features)

6

(it also works with list files instead of features)

7

8

Args:

8

Args:

9

features (str): features file (or list)

9

features (str): features file (or list)

10

outfile (str): output file to store the utt2spk

10

outfile (str): output file to store the utt2spk

11

"""

11

"""

12

with open(features, "r") as f, open(outfile, "w") as out:

12

with open(features, "r") as f, open(outfile, "w") as out:

13

for line in f:

13

for line in f:

14

splited = line.replace("\n", "").split(" ")

14

splited = line.replace("\n", "").split(" ")

15

id_ = splited[0]

15

id_ = splited[0]

16

id_splited = id_.split("-")

16

id_splited = id_.split("-")

17

spk = id_splited[0]

17

spk = id_splited[0]

18

out.write(id_ + " " + spk + "\n")

18

out.write(id_ + " " + spk + "\n")

19

20

21

def spk2utt(features: str, outfile: str):

21

def spk2utt(features: str, outfile: str):

22

"""Generate a spk2utt file from a feature file of voxceleb.

22

"""Generate a spk2utt file from a feature file of voxceleb.

23

(it also works with list files instead of features)

23

(it also works with list files instead of features)

24

25

Args:

25

Args:

26

features (str): features file (or list)

26

features (str): features file (or list)

27

outfile (str): output file to store the spk2utt

27

outfile (str): output file to store the spk2utt

28

"""

28

"""

29

with open(features, "r") as f, open(outfile, "w") as out:

29

with open(features, "r") as f, open(outfile, "w") as out:

30

spk2utt_dict = {}

30

spk2utt_dict = {}

31

for line in f:

31

for line in f:

32

splited = line.replace("\n", "").split(" ")

32

splited = line.replace("\n", "").split(" ")

33

id_ = splited[0]

33

id_ = splited[0]

34

id_splited = id_.split("-")

34

id_splited = id_.split("-")

35

spk = id_splited[0]

35

spk = id_splited[0]

36

if spk not in spk2utt_dict:

36

if spk not in spk2utt_dict:

37

spk2utt_dict[spk] = []

37

spk2utt_dict[spk] = []

38

spk2utt_dict[spk].append(id_)

38

spk2utt_dict[spk].append(id_)

39

40

for spk, ids in spk2utt_dict.items():

40

for spk, ids in spk2utt_dict.items():

41

out.write(spk + " " + " ".join(ids) + "\n")

41

out.write(spk + " " + " ".join(ids) + "\n")

42

43

44

def wavscp(datadir: str, outfile: str):

45

raise Exception("Under construction")

46

pass

47

44

if __name__ == "__main__":

48

if __name__ == "__main__":

45

# Main parser

49

# Main parser

46

parser = argparse.ArgumentParser(description="Voxceleb data management")

50

parser = argparse.ArgumentParser(description="Voxceleb data management")

47

subparsers = parser.add_subparsers(title="action")

51

subparsers = parser.add_subparsers(title="action")

48

52

49

# utt2spk

53

# utt2spk

50

parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).")

54

parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).")

51

parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)")

55

parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)")

52

parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file")

56

parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file")

53

parser_utt2spk.set_defaults(which="utt2spk")

57

parser_utt2spk.set_defaults(which="utt2spk")

54

58

55

# spk2utt

59

# spk2utt

56

parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).")

60

parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).")

57

parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)")

61

parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)")

58

parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file")

62

parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file")

59

parser_spk2utt.set_defaults(which="spk2utt")

63

parser_spk2utt.set_defaults(which="spk2utt")

60

64

65

# wavscp

66

parser_wavscp = subparser.add_parser("wavscp", help="generate wav scp file")

67

parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")

68

parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")

69

parser_wavscp.set_defaults(which="wavscp")

70

61

# Parse

71

# Parse

62

args = parser.parse_args()

72

args = parser.parse_args()

63

73

64

# Run commands

74

# Run commands

65

runner = SubCommandRunner({

75

runner = SubCommandRunner({

66

"utt2spk" : utt2spk,

76

"utt2spk" : utt2spk,

67

"spk2utt": spk2utt,

77

"spk2utt": spk2utt,

78

"wavscp": wavscp

68

})

79

})

69

80

70

runner.run(args.which, args.__dict__, remove="which")

81

runner.run(args.which, args.__dict__, remove="which")

71

82

GITLAB

Quillot Mathias / volia

add wav scp method to voxceleb module but it is under construction

 import argparse
 from utils import SubCommandRunner
 def utt2spk(features: str, outfile: str):
     """Generate a utt2spk file from a feature file of voxceleb.
     (it also works with list files instead of features)
     Args:
         features (str): features file (or list)
         outfile (str): output file to store the utt2spk
     """
     with open(features, "r") as f, open(outfile, "w") as out:
         for line in f:
             splited = line.replace("\n", "").split(" ")
             id_ = splited[0]
             id_splited = id_.split("-")
             spk = id_splited[0]
             out.write(id_ + " " + spk + "\n")
 def spk2utt(features: str, outfile: str):
     """Generate a spk2utt file from a feature file of voxceleb.
     (it also works with list files instead of features)
     Args:
         features (str): features file (or list)
         outfile (str): output file to store the spk2utt
     """
     with open(features, "r") as f, open(outfile, "w") as out:
         spk2utt_dict = {}
         for line in f:
             splited = line.replace("\n", "").split(" ")
             id_ = splited[0]
             id_splited = id_.split("-")
             spk = id_splited[0]
             if spk not in spk2utt_dict:
                 spk2utt_dict[spk] = []
             spk2utt_dict[spk].append(id_)
         for spk, ids in spk2utt_dict.items():
             out.write(spk + " " + " ".join(ids) + "\n")
+def wavscp(datadir: str, outfile: str):
+    raise Exception("Under construction")
+    pass
 if __name__ == "__main__":
     # Main parser
     parser = argparse.ArgumentParser(description="Voxceleb data management")
     subparsers = parser.add_subparsers(title="action")
     # utt2spk
     parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).")
     parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)")
     parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file")
     parser_utt2spk.set_defaults(which="utt2spk")
     # spk2utt
     parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).")
     parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)")
     parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file")
     parser_spk2utt.set_defaults(which="spk2utt")
+    # wavscp
+    parser_wavscp = subparser.add_parser("wavscp", help="generate wav scp file")
+    parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")
+    parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")
+    parser_wavscp.set_defaults(which="wavscp")
     # Parse
     args = parser.parse_args()
     # Run commands
     runner = SubCommandRunner({
         "utt2spk" : utt2spk,
         "spk2utt": spk2utt,
+        "wavscp": wavscp
     })
     runner.run(args.which, args.__dict__, remove="which")