diff --git a/src/scripts/Preprocessor10.scala b/src/scripts/Preprocessor10.scala new file mode 100644 index 0000000000000000000000000000000000000000..2704763b019b2bc6063ee856047d5ff0cabdec99 --- /dev/null +++ b/src/scripts/Preprocessor10.scala @@ -0,0 +1,18 @@ +import scala.io.Source +import java.io._ + +val timestamp = System.currentTimeMillis(); + + +val input500 = sc.textFile("/source/nt/Universities400.nt") + +var prefix = Source.fromFile("./3rd-dep/prefix-list.csv").getLines().toList; +var prefixPairList = prefix.map(line => line.split(",").toList) + + + +val parse500 = input500.map(line => line.replace(prefixPairList(0)(1), prefixPairList(0)(0))).map(line => line.replace(prefixPairList(1)(1), prefixPairList(1)(0))).map(line => line.replace(prefixPairList(2)(1), prefixPairList(2)(0))).map(line => line.replace(prefixPairList(3)(1), prefixPairList(3)(0))).map(line => line.replace(prefixPairList(4)(1), prefixPairList(4)(0))).map(line => line.replace(" <", "\t<")).map(line => line.replace(" _", "\t_")).map(line => line.replace(" \"", "\t\"")).map(line => line.replace(" \\.$", "")) + +val union500 = parse500.map(line => line.replaceAll(" \\.$", "")) + +union500.saveAsTextFile("/source/input/instance/400")