You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
While using a csv parser to process large char arrays i ran into the problem that the toString method of ArrayCharSequence was very slow on large buffers (and, as it appears, also on small buffers)
The problematic method:
override def toString = xs drop start take length mkString ""
I created a faster version which is already almost 50% faster on an empty charsequence and only becomes relatively faster when the array gets bigger (already 20x as fast in the test below). It appears the implementation is used in 2.10 (and maybe earlier) until the latest version.
Code (it's all about the toString method):
final class FastArrayCharSequence(val xs: Array[Char], start: Int, end: Int) extends CharSequence {
// yikes
// java.lang.VerifyError: (class: scala/runtime/ArrayCharSequence, method: <init> signature: ([C)V)
// Constructor must call super() or this()
//
// def this(xs: Array[Char]) = this(xs, 0, xs.length)
def length: Int = math.max(0, end - start)
def charAt(index: Int): Char = {
if (0 <= index && index < length)
xs(start + index)
else throw new ArrayIndexOutOfBoundsException(index)
}
def subSequence(start0: Int, end0: Int): CharSequence = {
if (start0 < 0) throw new ArrayIndexOutOfBoundsException(start0)
else if (end0 > length) throw new ArrayIndexOutOfBoundsException(end0)
else if (end0 <= start0) new FastArrayCharSequence(xs, 0, 0)
else {
val newlen = end0 - start0
val start1 = start + start0
new FastArrayCharSequence(xs, start1, start1 + newlen)
}
}
override def toString = {
if (start >= end)
""
else
new String(xs, start, length)
}
}
object FastArrayCharSequence {
implicit class FastArrayCharSequenceArrayWrapper(val xs: Array[Char]) extends AnyVal {
def fastSubSequence(start: Int, end: Int) : CharSequence = {
new FastArrayCharSequence(xs, start, Math.min(end, xs.length))
}
}
}
object Test extends App {
import FastArrayCharSequence._
val chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".toCharArray
val tests = for {
i <- 0 to chars.length
k <- 0 to chars.length
} yield {
chars.subSequence(i, k).toString == chars.fastSubSequence(i, k).toString
}
println("All equal: " + tests.forall(eq => eq))
var index = 0
while(index < chars.length ) {
val someChars = new String(chars).substring(0, index).toCharArray
var start = System.currentTimeMillis()
println(s"Size: $index")
for {
c <- 0 to 1000
i <- 0 to someChars.length
k <- 0 to someChars.length
} yield {
someChars.subSequence(i, k).toString.length
}
val oldMillis = System.currentTimeMillis() - start
start = System.currentTimeMillis()
for {
c <- 0 to 1000
i <- 0 to someChars.length
k <- 0 to someChars.length
} yield {
someChars.fastSubSequence(i, k).toString.length
}
val newMillis = System.currentTimeMillis() - start
println(s"String size: $index, current: $oldMillis, new: $newMillis, improvment: ${((oldMillis.toDouble / newMillis.toDouble)).toInt}")
index = index + 1
}
}
The text was updated successfully, but these errors were encountered:
While using a csv parser to process large char arrays i ran into the problem that the toString method of ArrayCharSequence was very slow on large buffers (and, as it appears, also on small buffers)
The problematic method:
I created a faster version which is already almost 50% faster on an empty charsequence and only becomes relatively faster when the array gets bigger (already 20x as fast in the test below). It appears the implementation is used in 2.10 (and maybe earlier) until the latest version.
Code (it's all about the toString method):
The text was updated successfully, but these errors were encountered: