public class RDDConverterUtils extends Object
| Modifier and Type | Class and Description | 
|---|---|
| static class  | RDDConverterUtils.BinaryCellToBinaryBlockFunction | 
| static class  | RDDConverterUtils.DataFrameExtractIDFunction | 
| Modifier and Type | Field and Description | 
|---|---|
| static String | DF_ID_COLUMN | 
| Constructor and Description | 
|---|
| RDDConverterUtils() | 
| Modifier and Type | Method and Description | 
|---|---|
| static org.apache.spark.api.java.JavaRDD<String> | binaryBlockToCsv(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> in,
                DataCharacteristics mcIn,
                FileFormatPropertiesCSV props,
                boolean strict) | 
| static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> | binaryBlockToDataFrame(org.apache.spark.sql.SparkSession sparkSession,
                      org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> in,
                      DataCharacteristics mc,
                      boolean toVector) | 
| static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> | binaryBlockToDataFrame(org.apache.spark.sql.SQLContext sqlContext,
                      org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> in,
                      DataCharacteristics mc,
                      boolean toVector)Deprecated.  | 
| static org.apache.spark.api.java.JavaRDD<org.apache.spark.ml.feature.LabeledPoint> | binaryBlockToLabeledPoints(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> in)Converter from binary block rdd to rdd of labeled points. | 
| static org.apache.spark.api.java.JavaRDD<String> | binaryBlockToTextCell(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> in,
                     DataCharacteristics mc) | 
| static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> | binaryCellToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                       org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixCell> input,
                       DataCharacteristics mcOut,
                       boolean outputEmptyBlocks) | 
| static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> | csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> input,
                DataCharacteristics mc,
                boolean hasHeader,
                String delim,
                boolean fill,
                double fillValue,
                Set<String> naStrings) | 
| static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> | csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                org.apache.spark.api.java.JavaRDD<String> input,
                DataCharacteristics mcOut,
                boolean hasHeader,
                String delim,
                boolean fill,
                double fillValue,
                Set<String> naStrings) | 
| static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> | dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                      org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df,
                      DataCharacteristics mc,
                      boolean containsID,
                      boolean isVector) | 
| static void | libsvmToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                   String pathIn,
                   String pathX,
                   String pathY,
                   DataCharacteristics mcOutX)Converts a libsvm text input file into two binary block matrices for features 
 and labels, and saves these to the specified output files. | 
| static org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> | stringToSerializableText(org.apache.spark.api.java.JavaPairRDD<Long,String> in) | 
| static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> | textCellToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                     org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> input,
                     DataCharacteristics mcOut,
                     boolean outputEmptyBlocks,
                     FileFormatPropertiesMM mmProps) | 
public static final String DF_ID_COLUMN
public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> textCellToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> input, DataCharacteristics mcOut, boolean outputEmptyBlocks, FileFormatPropertiesMM mmProps)
public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> binaryCellToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixCell> input, DataCharacteristics mcOut, boolean outputEmptyBlocks)
public static org.apache.spark.api.java.JavaRDD<org.apache.spark.ml.feature.LabeledPoint> binaryBlockToLabeledPoints(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> in)
in - matrix as JavaPairRDD<MatrixIndexes, MatrixBlock>public static org.apache.spark.api.java.JavaRDD<String> binaryBlockToTextCell(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> in, DataCharacteristics mc)
public static org.apache.spark.api.java.JavaRDD<String> binaryBlockToCsv(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> in, DataCharacteristics mcIn, FileFormatPropertiesCSV props, boolean strict)
public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> input, DataCharacteristics mc, boolean hasHeader, String delim, boolean fill, double fillValue, Set<String> naStrings)
public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaRDD<String> input, DataCharacteristics mcOut, boolean hasHeader, String delim, boolean fill, double fillValue, Set<String> naStrings)
public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df, DataCharacteristics mc, boolean containsID, boolean isVector)
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> binaryBlockToDataFrame(org.apache.spark.sql.SparkSession sparkSession,
                                                                                            org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> in,
                                                                                            DataCharacteristics mc,
                                                                                            boolean toVector)
@Deprecated public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> binaryBlockToDataFrame(org.apache.spark.sql.SQLContext sqlContext, org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> in, DataCharacteristics mc, boolean toVector)
public static void libsvmToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                                       String pathIn,
                                       String pathX,
                                       String pathY,
                                       DataCharacteristics mcOutX)
 Note: We use org.apache.spark.mllib.util.MLUtils.loadLibSVMFile for parsing 
 the libsvm input files in order to ensure consistency with Spark.
sc - java spark contextpathIn - path to libsvm input filepathX - path to binary block output file of featurespathY - path to binary block output file of labelsmcOutX - matrix characteristics of output matrix XCopyright © 2020 The Apache Software Foundation. All rights reserved.