From 030b580131c82e4d06785b2798fce1b8c3ad1a52 Mon Sep 17 00:00:00 2001 From: dogfooter <dogfooter219@gmail.com> Date: Fri, 6 Jul 2018 16:59:57 +0900 Subject: [PATCH] add: preprocessing 400 --- src/scripts/Preprocessor10.scala | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/scripts/Preprocessor10.scala diff --git a/src/scripts/Preprocessor10.scala b/src/scripts/Preprocessor10.scala new file mode 100644 index 0000000..2704763 --- /dev/null +++ b/src/scripts/Preprocessor10.scala @@ -0,0 +1,18 @@ +import scala.io.Source +import java.io._ + +val timestamp = System.currentTimeMillis(); + + +val input500 = sc.textFile("/source/nt/Universities400.nt") + +var prefix = Source.fromFile("./3rd-dep/prefix-list.csv").getLines().toList; +var prefixPairList = prefix.map(line => line.split(",").toList) + + + +val parse500 = input500.map(line => line.replace(prefixPairList(0)(1), prefixPairList(0)(0))).map(line => line.replace(prefixPairList(1)(1), prefixPairList(1)(0))).map(line => line.replace(prefixPairList(2)(1), prefixPairList(2)(0))).map(line => line.replace(prefixPairList(3)(1), prefixPairList(3)(0))).map(line => line.replace(prefixPairList(4)(1), prefixPairList(4)(0))).map(line => line.replace(" <", "\t<")).map(line => line.replace(" _", "\t_")).map(line => line.replace(" \"", "\t\"")).map(line => line.replace(" \\.$", "")) + +val union500 = parse500.map(line => line.replaceAll(" \\.$", "")) + +union500.saveAsTextFile("/source/input/instance/400") -- GitLab