com.nicta.scoobi.application

InputsOutputs

Related Docs: object InputsOutputs | package application

trait InputsOutputs extends TextInput with TextOutput with AvroInput with AvroOutput with SequenceInput with SequenceOutput

This trait provides way to create DLists from files and to add sinks to DLists so that the results of computations can be saved to files

Linear Supertypes
Known Subclasses
Ordering
  1. Alphabetic
  2. By inheritance
Inherited
  1. InputsOutputs
  2. SequenceOutput
  3. SequenceInput
  4. AvroOutput
  5. AvroInput
  6. TextOutput
  7. TextInput
  8. AnyRef
  9. Any
  1. Hide All
  2. Show all
Learn more about member selection
Visibility
  1. Public
  2. All

Type Members

  1. type AvroFixed[A] = io.avro.AvroFixed[A]

  2. type AvroSchema[A] = io.avro.AvroSchema[A]

  3. implicit class ConvertKeyListToSequenceFile[K] extends AnyRef

  4. implicit class ConvertKeyListToSequenceFile1[K, V] extends AnyRef

  5. implicit class ConvertKeyObjectToSequenceFile[K] extends AnyRef

  6. implicit class ConvertListToSequenceFile[T] extends AnyRef

  7. implicit class ConvertObjectToSequenceFile[T] extends AnyRef

  8. implicit class ConvertValueListToSequenceFile[V] extends AnyRef

  9. implicit class ConvertValueListToSequenceFile1[K, V] extends AnyRef

  10. implicit class ConvertValueObjectToSequenceFile[V] extends AnyRef

  11. implicit class ListToAvroFile[A] extends AnyRef

  12. implicit class ListToCheckpointFile[A] extends AnyRef

    checkpoints

  13. implicit class ListToDelimitedTextFile[A <: Product] extends AnyRef

  14. implicit class ListToPartitionedSequenceFile[T] extends AnyRef

  15. implicit class ListToPartitionedTextFile[K, V] extends AnyRef

  16. implicit class ListToTextFile[A] extends AnyRef

  17. implicit class ObjectToAvroFile[A] extends AnyRef

  18. implicit class ObjectToDelimitedTextFile[A <: Product] extends AnyRef

  19. implicit class ObjectToTextFile[A] extends AnyRef

  20. type SeqSchema[A] = io.sequence.SeqSchema[A]

    Sequence File I/O

Value Members

  1. final def !=(arg0: Any): Boolean

    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Definition Classes
    AnyRef → Any
  4. object ADouble

    Extract a Double from a String.

    Extract a Double from a String.

    Definition Classes
    TextInput
  5. object AFloat

    Extract a Float from a String.

    Extract a Float from a String.

    Definition Classes
    TextInput
  6. object ALong

    Extract a Long from a String.

    Extract a Long from a String.

    Definition Classes
    TextInput
  7. object AnInt

    Extract an Int from a String.

    Extract an Int from a String.

    Definition Classes
    TextInput
  8. val AvroSchema: io.avro.AvroSchema.type

    Avro I/O

  9. def anyToString(any: Any, sep: String): String

    Definition Classes
    TextOutput
  10. final def asInstanceOf[T0]: T0

    Definition Classes
    Any
  11. def avroSink[B](path: String, overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck, checkpoint: Boolean = false, expiryPolicy: ExpiryPolicy = ExpiryPolicy.default)(implicit sc: ScoobiConfiguration, schema: io.avro.AvroSchema[B]): AvroSink[AvroType, B]

    Definition Classes
    AvroOutput
  12. def checkedSource[K, V](paths: Seq[String], checkKeyValueTypes: Boolean = true)(implicit arg0: Manifest[K], arg1: Manifest[V]): CheckedSeqSource[K, V, (K, V)]

    Definition Classes
    SequenceInput
  13. def clone(): AnyRef

    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  14. def defaultSequenceInputFormat[K, V]: Class[SequenceFileInputFormat[K, V]]

    Definition Classes
    SequenceInput
  15. def defaultTextConverter: InputConverter[LongWritable, Text, String]

    INPUT CONVERTERS

    INPUT CONVERTERS

    Definition Classes
    TextInput
  16. def defaultTextConverterToString: InputConverter[Text, Text, (String, String)]

    Definition Classes
    TextInput
  17. def defaultTextConverterWithPath: InputConverter[LongWritable, Text, (String, String)]

    Definition Classes
    TextInput
  18. final def eq(arg0: AnyRef): Boolean

    Definition Classes
    AnyRef
  19. def equals(arg0: Any): Boolean

    Definition Classes
    AnyRef → Any
  20. def finalize(): Unit

    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  21. def fromAvroFile[A](paths: Seq[String], checkSchemas: Boolean = true, check: InputCheck = Source.defaultInputCheck)(implicit arg0: WireFormat[A], arg1: io.avro.AvroSchema[A]): DList[A]

    Create a new DList from the contents of a list of one or more Avro files.

    Create a new DList from the contents of a list of one or more Avro files. The type of the DList must conform to the schema types allowed by Avro, as constrained by the 'AvroSchema' type class. In the case of a directory being specified, the input forms all the files in that directory.

    Definition Classes
    AvroInput
  22. def fromAvroFile[A](paths: String*)(implicit arg0: WireFormat[A], arg1: io.avro.AvroSchema[A]): DList[A]

    Create a new DList from the contents of one or more Avro files.

    Create a new DList from the contents of one or more Avro files. The type of the DList must conform to the schema types allowed by Avro, as constrained by the 'AvroSchema' type class. In the case of a directory being specified, the input forms all the files in that directory.

    Definition Classes
    AvroInput
  23. def fromDelimitedTextFile[A](path: String, sep: String = "\t", check: InputCheck = Source.defaultInputCheck)(extractFn: PartialFunction[Seq[String], A])(implicit arg0: WireFormat[A]): DList[A]

    Create a distributed list from a text file that is a number of fields delimited by some separator.

    Create a distributed list from a text file that is a number of fields delimited by some separator. Use an extractor function to pull out the required fields to create the distributed list.

    Definition Classes
    TextInput
  24. def fromDelimitedTextFiles[A](paths: Seq[String], sep: String = "\t", check: InputCheck = Source.defaultInputCheck)(extractFn: PartialFunction[Seq[String], A])(implicit arg0: WireFormat[A]): DList[A]

    Create a distributed list from a list of one ore more files or directories (in the case of a directory, the input forms all files in that directory).

    Create a distributed list from a list of one ore more files or directories (in the case of a directory, the input forms all files in that directory). The file(s) contain a number of fields delimited by a separator. Use an extractor function to pull out the required fields to create the distributed list.

    Definition Classes
    TextInput
  25. def fromLazySeq[A](seq: ⇒ Seq[A], seqSize: Int = 1000)(implicit arg0: WireFormat[A]): DList[A]

    create a DList from a stream of elements which will only be evaluated on the cluster

  26. def fromSequenceFile[K, V](paths: Seq[String], checkKeyValueTypes: Boolean = true, check: InputCheck = Source.defaultInputCheck)(implicit arg0: WireFormat[K], arg1: io.sequence.SeqSchema[K], arg2: WireFormat[V], arg3: io.sequence.SeqSchema[V]): DList[(K, V)]

    Create a new DList from the contents of a list of one or more Sequence Files.

    Create a new DList from the contents of a list of one or more Sequence Files. Note that the type parameters K and V are the "converted" Scala types for the Writable key-value types that must be contained in the the Sequence Files. In the case of a directory being specified, the input forms all the files in that directory.

    Definition Classes
    SequenceInput
  27. def fromSequenceFile[K, V](paths: String*)(implicit arg0: WireFormat[K], arg1: io.sequence.SeqSchema[K], arg2: WireFormat[V], arg3: io.sequence.SeqSchema[V]): DList[(K, V)]

    Definition Classes
    SequenceInput
  28. def fromSequenceFileWithPath[K, V](paths: String*)(implicit arg0: WireFormat[K], arg1: io.sequence.SeqSchema[K], arg2: WireFormat[V], arg3: io.sequence.SeqSchema[V]): DList[(String, (K, V))]

    Create a new DList from the contents of a list of one or more Sequence Files.

    Create a new DList from the contents of a list of one or more Sequence Files. Note that the type parameters K and V are the "converted" Scala types for the Writable key-value types that must be contained in the the Sequence Files. In the case of a directory being specified, the input forms all the files in that directory.

    Definition Classes
    SequenceInput
  29. def fromSequenceFileWithPaths[K, V](paths: Seq[String], checkKeyValueTypes: Boolean = true, check: InputCheck = Source.defaultInputCheck)(implicit arg0: WireFormat[K], arg1: io.sequence.SeqSchema[K], arg2: WireFormat[V], arg3: io.sequence.SeqSchema[V]): DList[(String, (K, V))]

    Definition Classes
    SequenceInput
  30. def fromSequenceSource[K, V, A](source: SeqSource[K, V, A])(implicit arg0: WireFormat[A]): DListImpl[A]

    Definition Classes
    SequenceInput
  31. def fromSource[A](source: DataSource[_, _, A])(implicit arg0: WireFormat[A]): DListImpl[A]

    SOURCES

  32. def fromTextFile(paths: String*): DList[String]

    Create a DList from one or more files or directories (in the case of a directory, the input forms all files in that directory).

    Create a DList from one or more files or directories (in the case of a directory, the input forms all files in that directory).

    Definition Classes
    TextInput
  33. def fromTextFileWithPath(path: String, check: InputCheck = Source.defaultInputCheck): DList[(String, String)]

    Create a distributed list from one or more files or directories (in the case of a directory, the input forms all files in that directory).

    Create a distributed list from one or more files or directories (in the case of a directory, the input forms all files in that directory). The distributed list is a tuple where the first part is the path of the originating file and the second part is a line of text.

    Definition Classes
    TextInput
  34. def fromTextFileWithPaths(paths: Seq[String], check: InputCheck = Source.defaultInputCheck): DList[(String, String)]

    Create a distributed list from a list of one or more files or directories (in the case of a directory, the input forms all files in that directory).

    Create a distributed list from a list of one or more files or directories (in the case of a directory, the input forms all files in that directory). The distributed list is a tuple where the first part is the path of the originating file and the second part is a line of text.

    Definition Classes
    TextInput
  35. def fromTextFiles(paths: Seq[String], check: InputCheck = Source.defaultInputCheck): DList[String]

    Definition Classes
    TextInput
  36. def fromTextSource[A](source: DataSource[_, _, A])(implicit arg0: WireFormat[A]): DListImpl[A]

    TEXT SOURCES

    TEXT SOURCES

    Definition Classes
    TextInput
  37. final def getClass(): Class[_]

    Definition Classes
    AnyRef → Any
  38. def hashCode(): Int

    Definition Classes
    AnyRef → Any
  39. final def isInstanceOf[T0]: Boolean

    Definition Classes
    Any
  40. def keyFromSequenceFile[K](paths: Seq[String], checkKeyType: Boolean = true, check: InputCheck = Source.defaultInputCheck)(implicit arg0: WireFormat[K], arg1: io.sequence.SeqSchema[K]): DList[K]

    Create a new DList from the "key" contents of a list of one or more Sequence Files.

    Create a new DList from the "key" contents of a list of one or more Sequence Files. Note that the type parameter K is the "converted" Scala type for the Writable key type that must be contained in the the Sequence Files. In the case of a directory being specified, the input forms all the files in that directory.

    Definition Classes
    SequenceInput
  41. def keyFromSequenceFile[K](paths: String*)(implicit arg0: WireFormat[K], arg1: io.sequence.SeqSchema[K]): DList[K]

    Create a new DList from the "key" contents of one or more Sequence Files.

    Create a new DList from the "key" contents of one or more Sequence Files. Note that the type parameter K is the "converted" Scala type for the Writable key type that must be contained in the the Sequence Files. In the case of a directory being specified, the input forms all the files in that directory.

    Definition Classes
    SequenceInput
  42. def keyFromSequenceFileWithPath[K](path: String, checkKeyType: Boolean = true, check: InputCheck = Source.defaultInputCheck)(implicit arg0: WireFormat[K], arg1: io.sequence.SeqSchema[K]): DList[(String, K)]

    Create a new DList from the "key" contents of a list of one or more Sequence Files.

    Create a new DList from the "key" contents of a list of one or more Sequence Files. Note that the type parameter K is the "converted" Scala type for the Writable key type that must be contained in the the Sequence Files. In the case of a directory being specified, the input forms all the files in that directory. The distributed list is a tuple where the first part is the path of the originating file and the second part is the "key".

    Definition Classes
    SequenceInput
  43. def keyFromSequenceFileWithPaths[K](paths: Seq[String], checkKeyType: Boolean = true, check: InputCheck = Source.defaultInputCheck)(implicit arg0: WireFormat[K], arg1: io.sequence.SeqSchema[K]): DList[(String, K)]

    Definition Classes
    SequenceInput
  44. def keySchemaPartitionedSequenceSink[P, K](path: String, partition: (P) ⇒ String, overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck)(implicit convK: io.sequence.SeqSchema[K], sc: ScoobiConfiguration): PartitionedSink[P, SeqType, NullWritable, K]

    Partitioned sinks

    Partitioned sinks

    Definition Classes
    SequenceOutput
  45. def keySchemaSequenceFile[K](path: String, overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck)(implicit convK: io.sequence.SeqSchema[K]): SeqSink[SeqType, NullWritable, K]

    Definition Classes
    SequenceOutput
  46. def lazyObject[A](o: ⇒ A)(implicit arg0: WireFormat[A]): DObject[A]

    create a DObject which will only be evaluated on the cluster

  47. def listToDelimitedTextFile[A <: Product](dl: DList[A], path: String, sep: String = "\t", overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck)(implicit arg0: Manifest[A]): DList[String]

    Persist a distributed lists of 'Products' (e.g.

    Persist a distributed lists of 'Products' (e.g. Tuples) as a delimited text file.

    Definition Classes
    TextOutput
  48. final def ne(arg0: AnyRef): Boolean

    Definition Classes
    AnyRef
  49. final def notify(): Unit

    Definition Classes
    AnyRef
  50. final def notifyAll(): Unit

    Definition Classes
    AnyRef
  51. def objectFromAvroFile[A](paths: Seq[String], checkSchemas: Boolean = true)(implicit arg0: WireFormat[A], arg1: AvroSchema[A]): DObject[A]

  52. def objectFromAvroFile[A](paths: String*)(implicit arg0: WireFormat[A], arg1: AvroSchema[A]): DObject[A]

  53. def objectFromDelimitedTextFile[A](path: String, sep: String = "\t", check: InputCheck = Source.defaultInputCheck)(extractFn: PartialFunction[Seq[String], A])(implicit arg0: WireFormat[A]): DObject[A]

  54. def objectFromSequenceFile[K, V](paths: Seq[String], checkKeyValueTypes: Boolean = true)(implicit arg0: WireFormat[K], arg1: SeqSchema[K], arg2: WireFormat[V], arg3: SeqSchema[V]): DObject[(K, V)]

  55. def objectFromSequenceFile[K, V](paths: String*)(implicit arg0: WireFormat[K], arg1: SeqSchema[K], arg2: WireFormat[V], arg3: SeqSchema[V]): DObject[(K, V)]

  56. def objectFromTextFile(paths: String*): DObject[String]

    Text file I/O

  57. def objectKeyFromSequenceFile[K](paths: Seq[String], checkKeyType: Boolean = true)(implicit arg0: WireFormat[K], arg1: SeqSchema[K]): DObject[K]

  58. def objectKeyFromSequenceFile[K](paths: String*)(implicit arg0: WireFormat[K], arg1: SeqSchema[K]): DObject[K]

  59. def objectToDelimitedTextFile[A <: Product](o: DObject[A], path: String, sep: String = "\t", overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck)(implicit arg0: Manifest[A]): DObject[String]

    Persist a distributed object of 'Products' (e.g.

    Persist a distributed object of 'Products' (e.g. Tuples) as a delimited text file.

    Definition Classes
    TextOutput
  60. def objectValueFromSequenceFile[V](paths: Seq[String], checkValueType: Boolean = true)(implicit arg0: WireFormat[V], arg1: SeqSchema[V]): DObject[V]

  61. def objectValueFromSequenceFile[V](paths: String*)(implicit arg0: WireFormat[V], arg1: SeqSchema[V]): DObject[V]

  62. def partitionedSequenceSink[P, K <: Writable, V <: Writable](path: String, partition: (P) ⇒ String, overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck)(implicit mk: Manifest[K], mv: Manifest[V], sc: ScoobiConfiguration): PartitionedSink[P, K, V, (K, V)]

    Definition Classes
    SequenceOutput
  63. def schemaPartitionedSequenceSink[P, K, V](path: String, partition: (P) ⇒ String, overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck)(implicit convK: io.sequence.SeqSchema[K], convV: io.sequence.SeqSchema[V], sc: ScoobiConfiguration): PartitionedSink[P, SeqType, SeqType, (K, V)]

    Definition Classes
    SequenceOutput
  64. def schemaSequenceSink[K, V](path: String, overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck, checkpoint: Boolean = false, expiryPolicy: ExpiryPolicy = ExpiryPolicy.default)(implicit convK: io.sequence.SeqSchema[K], convV: io.sequence.SeqSchema[V], sc: ScoobiConfiguration): SeqSink[SeqType, SeqType, (K, V)]

    Definition Classes
    SequenceOutput
  65. def sequenceSink[K <: Writable, V <: Writable](path: String, overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck, checkpoint: Boolean = false, expiryPolicy: ExpiryPolicy = ExpiryPolicy.default)(implicit mk: Manifest[K], mv: Manifest[V], sc: ScoobiConfiguration): SeqSink[K, V, (K, V)]

    Definition Classes
    SequenceOutput
  66. def source[K, V](paths: Seq[String])(implicit arg0: WireFormat[K], arg1: io.sequence.SeqSchema[K], arg2: WireFormat[V], arg3: io.sequence.SeqSchema[V]): SeqSource[K, V, (K, V)]

    Definition Classes
    SequenceInput
  67. def source[A](paths: Seq[String], checkSchemas: Boolean = true, check: InputCheck = Source.defaultInputCheck)(implicit arg0: io.avro.AvroSchema[A]): AvroDataSource[io.avro.AvroInput.source.schema.type.AvroType, A] forSome {val schema: io.avro.AvroSchema[A]}

    Definition Classes
    AvroInput
  68. def splitpath[K, V](context: MapContext[K, V, _, _]): String

    Attributes
    protected
    Definition Classes
    SequenceInput
  69. final def synchronized[T0](arg0: ⇒ T0): T0

    Definition Classes
    AnyRef
  70. def textFilePartitionedSink[P, V](path: String, partition: (P) ⇒ String, overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck)(implicit arg0: Manifest[P], arg1: Manifest[V]): PartitionedSink[P, NullWritable, V, V]

    Definition Classes
    TextOutput
  71. def textFileSink[A](path: String, overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck)(implicit arg0: Manifest[A]): TextFileSink[A]

    SINKS

    SINKS

    Definition Classes
    TextOutput
  72. def textSource(paths: Seq[String], check: InputCheck = Source.defaultInputCheck): TextSource[String]

    create a text source

    create a text source

    Definition Classes
    TextInput
  73. def toString(): String

    Definition Classes
    AnyRef → Any
  74. def valueFromSequenceFile[V](paths: Seq[String], checkValueType: Boolean = true, check: InputCheck = Source.defaultInputCheck)(implicit arg0: WireFormat[V], arg1: io.sequence.SeqSchema[V]): DList[V]

    Create a new DList from the "value" contents of a list of one or more Sequence Files.

    Create a new DList from the "value" contents of a list of one or more Sequence Files. Note that the type parameter V is the "converted" Scala type for the Writable value type that must be contained in the the Sequence Files. In the case of a directory being specified, the input forms all the files in that directory.

    Definition Classes
    SequenceInput
  75. def valueFromSequenceFile[V](paths: String*)(implicit arg0: WireFormat[V], arg1: io.sequence.SeqSchema[V]): DList[V]

    Create a new DList from the "value" contents of one or more Sequence Files.

    Create a new DList from the "value" contents of one or more Sequence Files. Note that the type parameter V is the "converted" Scala type for the Writable value type that must be contained in the the Sequence Files. In the case of a directory being specified, the input forms all the files in that directory.

    Definition Classes
    SequenceInput
  76. def valueFromSequenceFileWithPath[V](path: String, checkValueType: Boolean = true, check: InputCheck = Source.defaultInputCheck)(implicit arg0: WireFormat[V], arg1: io.sequence.SeqSchema[V]): DList[(String, V)]

    Create a new DList from the "value" contents of a list of one or more Sequence Files.

    Create a new DList from the "value" contents of a list of one or more Sequence Files. Note that the type parameter V is the "converted" Scala type for the Writable value type that must be contained in the the Sequence Files. In the case of a directory being specified, the input forms all the files in that directory. The distributed list is a tuple where the first part is the path of the originating file and the second part is the "value".

    Definition Classes
    SequenceInput
  77. def valueFromSequenceFileWithPaths[V](paths: Seq[String], checkValueType: Boolean = true, check: InputCheck = Source.defaultInputCheck)(implicit arg0: WireFormat[V], arg1: io.sequence.SeqSchema[V]): DList[(String, V)]

    Definition Classes
    SequenceInput
  78. def valueSchemaPartitionedSequenceSink[P, V](path: String, partition: (P) ⇒ String, overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck)(implicit convV: io.sequence.SeqSchema[V], sc: ScoobiConfiguration): PartitionedSink[P, NullWritable, SeqType, V]

    Definition Classes
    SequenceOutput
  79. def valueSchemaSequenceFile[V](path: String, overwrite: Boolean = false, check: OutputCheck = Sink.defaultOutputCheck, checkpoint: Boolean = false, expiryPolicy: ExpiryPolicy = ExpiryPolicy.default)(implicit convV: io.sequence.SeqSchema[V], sc: ScoobiConfiguration): SeqSink[NullWritable, SeqType, V]

    Definition Classes
    SequenceOutput
  80. def valueSource[V](paths: Seq[String])(implicit arg0: io.sequence.SeqSchema[V]): SeqSource[Writable, io.sequence.SequenceInput.valueSource.convV.type.SeqType, V] forSome {val convV: io.sequence.SeqSchema[V]}

    Definition Classes
    SequenceInput
  81. final def wait(): Unit

    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  82. final def wait(arg0: Long, arg1: Int): Unit

    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  83. final def wait(arg0: Long): Unit

    Definition Classes
    AnyRef
    Annotations
    @throws( ... )

Inherited from SequenceOutput

Inherited from SequenceInput

Inherited from AvroOutput

Inherited from AvroInput

Inherited from TextOutput

Inherited from TextInput

Inherited from AnyRef

Inherited from Any

Ungrouped