rlselect2
Rewrite of rlselect with a new approach to performance.
Home
Source code
git clone https://projects.rickardlindberg.me/scm/rlselect2.git
Recent events
2025-10-15 00:01 Rickard pushed to rlselect2
commit 7c24cefd8cfd13587f6b96fc5af4f3d3b1d0ec12
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Tue Oct 14 23:55:47 2025 +0200
Prevent flickering of header
diff --git a/rlselect2.py b/rlselect2.py
index 2a571b5..b33de63 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -386,10 +386,14 @@ class CursesUi:
def render_search_results(self, screen):
max_y, max_x = screen.getmaxyx()
- screen.erase()
y = 2
t1 = time.perf_counter()
self.selection = None
+ for clear_y in range(y, max_y):
+ try:
+ screen.addstr(clear_y, 0, " "*max_x)
+ except:
+ pass
for line_index, match in enumerate(self.data.yield_lines_matching(self.pattern)):
if self.selection_index is None:
self.selection_index = 0
2025-10-14 23:48 Rickard pushed to rlselect2
commit a802c4f1f06750d574ad2e41a6700ac42cefb128
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Tue Oct 14 23:48:17 2025 +0200
Attempt to not render control characters in curses
diff --git a/rlselect2.py b/rlselect2.py
index 6c2f099..2a571b5 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -12,6 +12,7 @@ unnecessary work.
"""
from collections import defaultdict
+from itertools import chain
import contextlib
import io
import os
@@ -201,7 +202,10 @@ class Chunk:
return self.match_count > 1
def get_text(self, encoding):
- return self.get_bytes().decode(encoding, "replace")
+ return self.get_bytes().decode(encoding, "replace").translate({
+ char: "�"
+ for char in chain(range(0x00, 0x20), range(0x7f, 0xa0))
+ })
def get_bytes(self):
return self.data.get_bytes(self.start, self.end)
commit b0ed743f1587a3502530ee27fc4476acbe7168d7
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Tue Oct 14 23:34:09 2025 +0200
Nicer looking header
diff --git a/rlselect2.py b/rlselect2.py
index 42b1abc..6c2f099 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -369,14 +369,14 @@ class CursesUi:
def render_header(self, screen, loading):
max_y, max_x = screen.getmaxyx()
- attrs = curses.A_REVERSE | curses.A_BOLD
+ loading_attrs = curses.A_BOLD
if curses.has_colors():
- attrs |= curses.color_pair(1)
+ loading_attrs |= curses.color_pair(1)
screen.addstr(1, 0, f"completed search in {self.elapsed}ms ".rjust(max_x)[:max_x], curses.A_REVERSE)
screen.addstr(0, 0, "".ljust(max_x))
if loading:
- screen.addstr(0, max_x-1, "-")
- screen.addstr(0, 0, self.pattern.get_text(), attrs)
+ screen.addstr(0, max_x-1, "-", loading_attrs)
+ screen.addstr(0, 0, f"> {self.pattern.get_text()[-max_x+4:]}")
self.header_yx = screen.getyx()
screen.refresh()
commit 436b04188fdaba545162f440f7baa0b8430d16c6
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Tue Oct 14 23:27:45 2025 +0200
Indicate loading in header
diff --git a/rlselect2.py b/rlselect2.py
index ad42d2e..42b1abc 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -330,9 +330,11 @@ class CursesUi:
curses.init_pair(2, curses.COLOR_WHITE, curses.COLOR_GREEN)
self.pattern = Pattern.empty(encoding=screen.encoding)
self.selection_index = None
- self.loading_render(screen)
+ self.elapsed = "?"
while True:
- self.render(screen)
+ self.render_header(screen, loading=True)
+ self.render_search_results(screen)
+ self.render_header(screen, loading=False)
c = screen.get_wch()
if c == CTRL_W:
self.pattern.remove_last_word()
@@ -365,17 +367,20 @@ class CursesUi:
else:
return self.selection.get_bytes()
- def loading_render(self, screen):
+ def render_header(self, screen, loading):
max_y, max_x = screen.getmaxyx()
- screen.addstr(1, 0, f"".rjust(max_x)[:max_x], curses.A_REVERSE)
attrs = curses.A_REVERSE | curses.A_BOLD
if curses.has_colors():
attrs |= curses.color_pair(1)
- screen.addstr(2, 0, "loading...", attrs)
- screen.move(0, 0)
+ screen.addstr(1, 0, f"completed search in {self.elapsed}ms ".rjust(max_x)[:max_x], curses.A_REVERSE)
+ screen.addstr(0, 0, "".ljust(max_x))
+ if loading:
+ screen.addstr(0, max_x-1, "-")
+ screen.addstr(0, 0, self.pattern.get_text(), attrs)
+ self.header_yx = screen.getyx()
screen.refresh()
- def render(self, screen):
+ def render_search_results(self, screen):
max_y, max_x = screen.getmaxyx()
screen.erase()
y = 2
@@ -428,12 +433,8 @@ class CursesUi:
pass
y += 1
t2 = time.perf_counter()
- elapsed = int((t2-t1)*1000)
- screen.addstr(1, 0, f"completed search in {elapsed}ms ".rjust(max_x)[:max_x], curses.A_REVERSE)
- attrs = curses.A_REVERSE | curses.A_BOLD
- if curses.has_colors():
- attrs |= curses.color_pair(1)
- screen.addstr(0, 0, self.pattern.get_text(), attrs)
+ self.elapsed = int((t2-t1)*1000)
+ screen.move(*self.header_yx)
screen.refresh()
class FilterCommand:
commit 8b3af64cab0e26ddff5a130b9f43ef81576a7c48
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Tue Oct 14 22:51:50 2025 +0200
Read input incrementally
diff --git a/rlselect2.py b/rlselect2.py
index 3d7e39b..ad42d2e 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -34,7 +34,8 @@ class Data:
def __init__(self, stream):
self.stream = stream
- self.data = None
+ self.data = bytearray()
+ self.data_end = 0
def get_bytes(self, start, end):
return self.data[start:end]
@@ -43,7 +44,7 @@ class Data:
def test_matches(data, pattern):
data = Data.from_stream(io.BytesIO(data))
for match in data.yield_lines_matching(Pattern.from_text(pattern.decode("ascii"), encoding="ascii")):
- print([chunk.get_bytes() for chunk in match.yield_chunks()])
+ print([bytes(chunk.get_bytes()) for chunk in match.yield_chunks()])
def yield_lines_matching(self, pattern):
"""
@@ -90,23 +91,20 @@ class Data:
>>> Data.test_matches(b"hello there", b"RL")
"""
- if self.data is None:
- self.data = self.stream.read()
res = [re.compile(re_pattern) for re_pattern in pattern.to_res()]
if res:
first_re = res[0]
else:
first_re = re.compile(b"")
line_start = 0
- data_end = len(self.data)
- while line_start < data_end:
- match = first_re.search(self.data, pos=line_start, endpos=data_end)
+ while line_start < self.data_end or self.read_chunk():
+ match = first_re.search(self.data, pos=line_start, endpos=self.data_end)
if match:
initial_match = match.start()
line_start = self.data.rfind(NEWLINE, 0, initial_match) + 1
- line_end = self.data.find(NEWLINE, initial_match, data_end)
+ line_end = self.data.find(NEWLINE, initial_match, self.data_end)
if line_end == -1:
- line_end = data_end
+ line_end = self.data_end
regions = []
for compiled_re in res:
matches = list(compiled_re.finditer(self.data, pos=line_start, endpos=line_end))
@@ -118,7 +116,22 @@ class Data:
yield Match(self, Region(line_start, line_end), regions)
line_start = line_end + len(NEWLINE)
else:
- line_start = data_end
+ line_start = self.data_end
+
+ def read_chunk(self):
+ chunk = self.stream.read(2**12)
+ if chunk:
+ self.data += chunk
+ newline_index = chunk.rfind(NEWLINE)
+ if newline_index != -1:
+ self.data_end = len(self.data) - len(chunk) + newline_index + 1
+ return True
+ else:
+ if self.data_end == len(self.data):
+ return False
+ else:
+ self.data_end = len(self.data)
+ return True
class Match:
commit 4b4c1286fdadffc9ac4a721767e1417a431d1719
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Tue Oct 14 22:37:10 2025 +0200
Add end position to searches
diff --git a/rlselect2.py b/rlselect2.py
index 9310777..3d7e39b 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -100,11 +100,11 @@ class Data:
line_start = 0
data_end = len(self.data)
while line_start < data_end:
- match = first_re.search(self.data, pos=line_start)
+ match = first_re.search(self.data, pos=line_start, endpos=data_end)
if match:
initial_match = match.start()
line_start = self.data.rfind(NEWLINE, 0, initial_match) + 1
- line_end = self.data.find(NEWLINE, initial_match)
+ line_end = self.data.find(NEWLINE, initial_match, data_end)
if line_end == -1:
line_end = data_end
regions = []
commit dc78c4752c3ac40a0ada1615e6be6a831af9c113
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Tue Oct 14 22:32:49 2025 +0200
Match and Chunk know the data they come from
diff --git a/rlselect2.py b/rlselect2.py
index 23b5ec2..9310777 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -43,7 +43,7 @@ class Data:
def test_matches(data, pattern):
data = Data.from_stream(io.BytesIO(data))
for match in data.yield_lines_matching(Pattern.from_text(pattern.decode("ascii"), encoding="ascii")):
- print([chunk.get_bytes(data) for chunk in match.yield_chunks()])
+ print([chunk.get_bytes() for chunk in match.yield_chunks()])
def yield_lines_matching(self, pattern):
"""
@@ -115,42 +115,43 @@ class Data:
else:
regions.extend([Region(*x.span()) for x in matches])
else:
- yield Match(Region(line_start, line_end), regions)
+ yield Match(self, Region(line_start, line_end), regions)
line_start = line_end + len(NEWLINE)
else:
line_start = data_end
class Match:
- def __init__(self, whole, regions):
+ def __init__(self, data, whole, regions):
+ self.data = data
self.whole = whole
self.regions = regions
- def get_bytes(self, data):
- return data.get_bytes(self.whole.start, self.whole.end)
+ def get_bytes(self):
+ return self.data.get_bytes(self.whole.start, self.whole.end)
def yield_chunks(self):
"""
- >>> for chunk in Match(Region(2, 10), [Region(3, 4)]).yield_chunks():
+ >>> for chunk in Match(None, Region(2, 10), [Region(3, 4)]).yield_chunks():
... print(chunk)
Chunk(2, 3, 0)
Chunk(3, 4, 1)
Chunk(4, 10, 0)
- >>> for chunk in Match(Region(5, 10), [Region(5, 10)]).yield_chunks():
+ >>> for chunk in Match(None, Region(5, 10), [Region(5, 10)]).yield_chunks():
... print(chunk)
Chunk(5, 10, 1)
- >>> for chunk in Match(Region(5, 10), [Region(5, 9)]).yield_chunks():
+ >>> for chunk in Match(None, Region(5, 10), [Region(5, 9)]).yield_chunks():
... print(chunk)
Chunk(5, 9, 1)
Chunk(9, 10, 0)
- >>> for chunk in Match(Region(5, 10), []).yield_chunks():
+ >>> for chunk in Match(None, Region(5, 10), []).yield_chunks():
... print(chunk)
Chunk(5, 10, 0)
- >>> for chunk in Match(Region(5, 5), []).yield_chunks():
+ >>> for chunk in Match(None, Region(5, 5), []).yield_chunks():
... print(chunk)
"""
match_counts = defaultdict(lambda: 0)
@@ -167,14 +168,15 @@ class Match:
match_count = next_match_count
else:
if next_match_count != match_count:
- yield Chunk(start, index, match_count)
+ yield Chunk(self.data, start, index, match_count)
start = index
match_count = next_match_count
- yield Chunk(start, index+1, match_count)
+ yield Chunk(self.data, start, index+1, match_count)
class Chunk:
- def __init__(self, start, end, match_count):
+ def __init__(self, data, start, end, match_count):
+ self.data = data
self.start = start
self.end = end
self.match_count = match_count
@@ -185,11 +187,11 @@ class Chunk:
def is_multi_match(self):
return self.match_count > 1
- def get_text(self, data, encoding):
- return self.get_bytes(data).decode(encoding, "replace")
+ def get_text(self, encoding):
+ return self.get_bytes().decode(encoding, "replace")
- def get_bytes(self, data):
- return data.get_bytes(self.start, self.end)
+ def get_bytes(self):
+ return self.data.get_bytes(self.start, self.end)
def __repr__(self):
return f"Chunk({self.start!r}, {self.end!r}, {self.match_count!r})"
@@ -348,7 +350,7 @@ class CursesUi:
if self.selection is None:
return self.pattern.get_bytes()
else:
- return self.selection.get_bytes(self.data)
+ return self.selection.get_bytes()
def loading_render(self, screen):
max_y, max_x = screen.getmaxyx()
@@ -380,7 +382,7 @@ class CursesUi:
for chunk in match.yield_chunks():
if x == max_x:
break
- text = chunk.get_text(self.data, screen.encoding)[:max_x-x]
+ text = chunk.get_text(screen.encoding)[:max_x-x]
attrs = 0
if is_selected:
if curses.has_colors():
@@ -527,7 +529,7 @@ if __name__ == "__main__":
sys.stdout.buffer.write(ESCAPE_MULTI_MATCH)
else:
sys.stdout.buffer.write(ESCAPE_MATCH)
- sys.stdout.buffer.write(chunk.get_bytes(data))
+ sys.stdout.buffer.write(chunk.get_bytes())
if chunk.is_match():
sys.stdout.buffer.write(ESCAPE_RESET)
sys.stdout.buffer.write(NEWLINE)
commit e078a8e240ef094e4d9e5bdd3061642f25553c96
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Tue Oct 14 22:26:22 2025 +0200
Remove todo
diff --git a/rlselect2.py b/rlselect2.py
index 172226d..23b5ec2 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -9,10 +9,6 @@ Read the input only once. Don't mangle it. Etc.
Don't do anything that the user has not asked for. As a design priciple. Skip
unnecessary work.
-
-* How fast to create a bytearray which is split+unique?
- * In one go?
- * In multiple passes?
"""
from collections import defaultdict
commit b1d9e9cef8a1c7336768837d09d3b459348dee44
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Mon Oct 13 20:52:12 2025 +0200
Better drawing of loading screen
diff --git a/rlselect2.py b/rlselect2.py
index 7a789a9..172226d 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -319,6 +319,7 @@ class CursesUi:
curses.init_pair(2, curses.COLOR_WHITE, curses.COLOR_GREEN)
self.pattern = Pattern.empty(encoding=screen.encoding)
self.selection_index = None
+ self.loading_render(screen)
while True:
self.render(screen)
c = screen.get_wch()
@@ -353,6 +354,16 @@ class CursesUi:
else:
return self.selection.get_bytes(self.data)
+ def loading_render(self, screen):
+ max_y, max_x = screen.getmaxyx()
+ screen.addstr(1, 0, f"".rjust(max_x)[:max_x], curses.A_REVERSE)
+ attrs = curses.A_REVERSE | curses.A_BOLD
+ if curses.has_colors():
+ attrs |= curses.color_pair(1)
+ screen.addstr(2, 0, "loading...", attrs)
+ screen.move(0, 0)
+ screen.refresh()
+
def render(self, screen):
max_y, max_x = screen.getmaxyx()
screen.erase()
commit 59df602be6545b6c9dde65cf256b1be9773ab52a
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Mon Oct 13 20:47:42 2025 +0200
Refactor towards incremental loading of data
diff --git a/rlselect2.py b/rlselect2.py
index 77769b6..7a789a9 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -38,12 +38,7 @@ class Data:
def __init__(self, stream):
self.stream = stream
- self.data = b"loading..."
- self.is_read = False
-
- def read(self):
- self.data = self.stream.read()
- self.is_read = True
+ self.data = None
def get_bytes(self, start, end):
return self.data[start:end]
@@ -51,7 +46,6 @@ class Data:
@staticmethod
def test_matches(data, pattern):
data = Data.from_stream(io.BytesIO(data))
- data.read()
for match in data.yield_lines_matching(Pattern.from_text(pattern.decode("ascii"), encoding="ascii")):
print([chunk.get_bytes(data) for chunk in match.yield_chunks()])
@@ -100,6 +94,8 @@ class Data:
>>> Data.test_matches(b"hello there", b"RL")
"""
+ if self.data is None:
+ self.data = self.stream.read()
res = [re.compile(re_pattern) for re_pattern in pattern.to_res()]
if res:
first_re = res[0]
@@ -325,9 +321,6 @@ class CursesUi:
self.selection_index = None
while True:
self.render(screen)
- if not self.data.is_read:
- self.data.read()
- continue
c = screen.get_wch()
if c == CTRL_W:
self.pattern.remove_last_word()
@@ -520,7 +513,6 @@ if __name__ == "__main__":
ESCAPE_MULTI_MATCH = b"\033[43m"
ESCAPE_RESET = b"\033[0m"
data = Data.from_stdin()
- data.read()
for match in data.yield_lines_matching(Pattern.from_argv()):
for chunk in match.yield_chunks():
if chunk.is_match():
2025-10-13 20:10 Rickard pushed to rlselect2
commit 7b9ee66243dfbffd1eeeed0bf81599aa9a4d8df3
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Mon Oct 13 19:40:20 2025 +0200
Rely on variable instead of len
diff --git a/rlselect2.py b/rlselect2.py
index 0f4e920..77769b6 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -106,14 +106,15 @@ class Data:
else:
first_re = re.compile(b"")
line_start = 0
- while line_start < len(self.data):
+ data_end = len(self.data)
+ while line_start < data_end:
match = first_re.search(self.data, pos=line_start)
if match:
initial_match = match.start()
line_start = self.data.rfind(NEWLINE, 0, initial_match) + 1
line_end = self.data.find(NEWLINE, initial_match)
if line_end == -1:
- line_end = len(self.data)
+ line_end = data_end
regions = []
for compiled_re in res:
matches = list(compiled_re.finditer(self.data, pos=line_start, endpos=line_end))
@@ -125,7 +126,7 @@ class Data:
yield Match(Region(line_start, line_end), regions)
line_start = line_end + len(NEWLINE)
else:
- line_start = len(self.data)
+ line_start = data_end
class Match:
commit d25f0aef2e1d66ce72e4e488a8be64c003f729e7
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Mon Oct 13 19:37:51 2025 +0200
Replace break with more intuitive code
diff --git a/rlselect2.py b/rlselect2.py
index 081075f..0f4e920 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -125,7 +125,7 @@ class Data:
yield Match(Region(line_start, line_end), regions)
line_start = line_end + len(NEWLINE)
else:
- break
+ line_start = len(self.data)
class Match:
commit 66672de43ba5b4d2dd059f2f310c7e8ebc3ed6f2
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Mon Oct 13 19:29:00 2025 +0200
Simplify search by always having a first_re
diff --git a/rlselect2.py b/rlselect2.py
index 2002402..081075f 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -95,21 +95,21 @@ class Data:
>>> Data.test_matches(b"x TimelineCanvas", b"time")
[b'x ', b'Time', b'lineCanvas']
+
+ No match:
+
+ >>> Data.test_matches(b"hello there", b"RL")
"""
res = [re.compile(re_pattern) for re_pattern in pattern.to_res()]
+ if res:
+ first_re = res[0]
+ else:
+ first_re = re.compile(b"")
line_start = 0
while line_start < len(self.data):
- if res:
- match = res[0].search(self.data, pos=line_start)
- if match:
- initial_match = match.start()
- else:
- initial_match = None
- else:
- initial_match = line_start
- if initial_match is None:
- break
- else:
+ match = first_re.search(self.data, pos=line_start)
+ if match:
+ initial_match = match.start()
line_start = self.data.rfind(NEWLINE, 0, initial_match) + 1
line_end = self.data.find(NEWLINE, initial_match)
if line_end == -1:
@@ -124,6 +124,8 @@ class Data:
else:
yield Match(Region(line_start, line_end), regions)
line_start = line_end + len(NEWLINE)
+ else:
+ break
class Match:
commit 789a704e9ac4754804d8f000614d472d5807d8a1
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Mon Oct 13 19:21:36 2025 +0200
More idiomatic get of start
diff --git a/rlselect2.py b/rlselect2.py
index 039c585..2002402 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -102,7 +102,7 @@ class Data:
if res:
match = res[0].search(self.data, pos=line_start)
if match:
- initial_match = match.span()[0]
+ initial_match = match.start()
else:
initial_match = None
else:
commit 55e467f24da08bd70974a877641b562fc0e76ee0
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Mon Oct 13 19:10:47 2025 +0200
Get rid of incorrect percent
diff --git a/rlselect2.py b/rlselect2.py
index f0a4741..039c585 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -98,7 +98,6 @@ class Data:
"""
res = [re.compile(re_pattern) for re_pattern in pattern.to_res()]
line_start = 0
- self.percent = 0
while line_start < len(self.data):
if res:
match = res[0].search(self.data, pos=line_start)
@@ -125,7 +124,6 @@ class Data:
else:
yield Match(Region(line_start, line_end), regions)
line_start = line_end + len(NEWLINE)
- self.percent = int(100*(line_start/len(self.data)))
class Match:
@@ -413,7 +411,7 @@ class CursesUi:
y += 1
t2 = time.perf_counter()
elapsed = int((t2-t1)*1000)
- screen.addstr(1, 0, f"searched {self.data.percent}% of input in {elapsed}ms ".rjust(max_x)[:max_x], curses.A_REVERSE)
+ screen.addstr(1, 0, f"completed search in {elapsed}ms ".rjust(max_x)[:max_x], curses.A_REVERSE)
attrs = curses.A_REVERSE | curses.A_BOLD
if curses.has_colors():
attrs |= curses.color_pair(1)
commit 059fe2ce13e793353eb6a634c07501122d6e486c
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Mon Oct 13 07:35:30 2025 +0200
Start UI before reading stdin for faster perceived startup time
diff --git a/rlselect2.py b/rlselect2.py
index 082d5da..f0a4741 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -30,17 +30,28 @@ class Data:
@classmethod
def from_stdin(cls):
- return Data(sys.stdin.buffer)
+ return cls.from_stream(sys.stdin.buffer)
+
+ @classmethod
+ def from_stream(cls, stream):
+ return cls(stream)
def __init__(self, stream):
- self.data = stream.read()
+ self.stream = stream
+ self.data = b"loading..."
+ self.is_read = False
+
+ def read(self):
+ self.data = self.stream.read()
+ self.is_read = True
def get_bytes(self, start, end):
return self.data[start:end]
@staticmethod
def test_matches(data, pattern):
- data = Data(io.BytesIO(data))
+ data = Data.from_stream(io.BytesIO(data))
+ data.read()
for match in data.yield_lines_matching(Pattern.from_text(pattern.decode("ascii"), encoding="ascii")):
print([chunk.get_bytes(data) for chunk in match.yield_chunks()])
@@ -277,17 +288,22 @@ class Pattern:
class CursesUi:
def run(self):
- self.data = Data.from_stdin()
- with self.redirect_terminal():
- action, fail, selection = curses.wrapper(self.run_wrapper)
+ original_stdin = os.fdopen(os.dup(sys.stdin.fileno()), "rb")
+ original_stdout = os.fdopen(os.dup(sys.stdout.fileno()), "wb")
+ self.data = Data.from_stream(original_stdin)
+ terminal_stdin = open("/dev/tty", "rb")
+ terminal_stdout = open("/dev/tty", "wb")
+ os.dup2(terminal_stdin.fileno(), sys.stdin.fileno())
+ os.dup2(terminal_stdout.fileno(), sys.stdout.fileno())
+ action, fail, selection = curses.wrapper(self.run_wrapper)
if "--action" in sys.argv[1:]:
- sys.stdout.buffer.write(action)
- sys.stdout.buffer.write(NEWLINE)
+ original_stdout.write(action)
+ original_stdout.write(NEWLINE)
if fail:
sys.exit(1)
else:
- sys.stdout.buffer.write(selection)
- sys.stdout.buffer.write(NEWLINE)
+ original_stdout.write(selection)
+ original_stdout.write(NEWLINE)
def run_wrapper(self, screen):
CTRL_N = u"\u000E"
@@ -308,6 +324,9 @@ class CursesUi:
self.selection_index = None
while True:
self.render(screen)
+ if not self.data.is_read:
+ self.data.read()
+ continue
c = screen.get_wch()
if c == CTRL_W:
self.pattern.remove_last_word()
@@ -401,22 +420,6 @@ class CursesUi:
screen.addstr(0, 0, self.pattern.get_text(), attrs)
screen.refresh()
- @contextlib.contextmanager
- def redirect_terminal(self):
- stdin_fileno = sys.stdin.fileno()
- stdout_fileno = sys.stdout.fileno()
- process_stdin = os.dup(sys.stdin.fileno())
- process_stdout = os.dup(sys.stdout.fileno())
- try:
- terminal_stdin = open("/dev/tty", "rb")
- terminal_stdout = open("/dev/tty", "wb")
- os.dup2(terminal_stdin.fileno(), stdin_fileno)
- os.dup2(terminal_stdout.fileno(), stdout_fileno)
- yield
- finally:
- os.dup2(process_stdin, stdin_fileno)
- os.dup2(process_stdout, stdout_fileno)
-
class FilterCommand:
"""
@@ -516,6 +519,7 @@ if __name__ == "__main__":
ESCAPE_MULTI_MATCH = b"\033[43m"
ESCAPE_RESET = b"\033[0m"
data = Data.from_stdin()
+ data.read()
for match in data.yield_lines_matching(Pattern.from_argv()):
for chunk in match.yield_chunks():
if chunk.is_match():
2025-10-12 09:11 Rickard pushed to rlselect2
commit 467bac5c9f78b8c8149b4d5035838c251b9fecd6
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Sun Oct 12 09:11:08 2025 +0200
Do complete processing experiment
diff --git a/experiments/pyperftest.py b/experiments/pyperftest.py
index 2f545f3..8b23b92 100755
--- a/experiments/pyperftest.py
+++ b/experiments/pyperftest.py
@@ -26,6 +26,12 @@ with timeit("load stdin"):
buffer = sys.stdin.buffer.read()
print(f"buffer size = {len(buffer):,}")
+with timeit("complete processing"):
+ lines = buffer.split(b"\n")
+ unique_dict = dict.fromkeys(lines)
+ unique = b"\n".join(unique_dict)
+ print(f"unique size = {len(unique):,}")
+
#chunk_size = 2**12
#with timeit(f"load incrementally {chunk_size}"):
# buffer = bytearray()
commit 1bf7caa4de156d2fda3ec7119db534da98584d71
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Sun Oct 12 08:50:18 2025 +0200
Do another load timing experiment
diff --git a/experiments/pyperftest.py b/experiments/pyperftest.py
index 2cb41f4..2f545f3 100755
--- a/experiments/pyperftest.py
+++ b/experiments/pyperftest.py
@@ -26,6 +26,18 @@ with timeit("load stdin"):
buffer = sys.stdin.buffer.read()
print(f"buffer size = {len(buffer):,}")
+#chunk_size = 2**12
+#with timeit(f"load incrementally {chunk_size}"):
+# buffer = bytearray()
+# while True:
+# chunk = sys.stdin.buffer.read(chunk_size)
+# if chunk:
+# buffer += chunk
+# pass
+# else:
+# break
+# print(f"buffer size = {len(buffer):,}")
+
header("Find")
with timeit("find"):
commit 8edf5e1b100f336046e33d4ebb880575f58e3381
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Sat Oct 11 22:51:44 2025 +0200
Change structure of search code
diff --git a/rlselect2.py b/rlselect2.py
index daebded..082d5da 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -91,26 +91,30 @@ class Data:
while line_start < len(self.data):
if res:
match = res[0].search(self.data, pos=line_start)
- if not match:
- break
- initial_match = match.span()[0]
+ if match:
+ initial_match = match.span()[0]
+ else:
+ initial_match = None
else:
initial_match = line_start
- line_start = self.data.rfind(NEWLINE, 0, initial_match) + 1
- line_end = self.data.find(NEWLINE, initial_match)
- if line_end == -1:
- line_end = len(self.data)
- regions = []
- for compiled_re in res:
- matches = list(compiled_re.finditer(self.data, pos=line_start, endpos=line_end))
- if not matches:
- break
- else:
- regions.extend([Region(*x.span()) for x in matches])
+ if initial_match is None:
+ break
else:
- yield Match(Region(line_start, line_end), regions)
- line_start = line_end + len(NEWLINE)
- self.percent = int(100*(line_start/len(self.data)))
+ line_start = self.data.rfind(NEWLINE, 0, initial_match) + 1
+ line_end = self.data.find(NEWLINE, initial_match)
+ if line_end == -1:
+ line_end = len(self.data)
+ regions = []
+ for compiled_re in res:
+ matches = list(compiled_re.finditer(self.data, pos=line_start, endpos=line_end))
+ if not matches:
+ break
+ else:
+ regions.extend([Region(*x.span()) for x in matches])
+ else:
+ yield Match(Region(line_start, line_end), regions)
+ line_start = line_end + len(NEWLINE)
+ self.percent = int(100*(line_start/len(self.data)))
class Match:
commit eb2953ac7aad79221c349fffddf8de4595c9f105
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Sat Oct 11 21:22:52 2025 +0200
Data takes a stream
diff --git a/rlselect2.py b/rlselect2.py
index db4fd30..daebded 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -17,6 +17,7 @@ unnecessary work.
from collections import defaultdict
import contextlib
+import io
import os
import re
import subprocess
@@ -29,17 +30,17 @@ class Data:
@classmethod
def from_stdin(cls):
- return Data(sys.stdin.buffer.read())
+ return Data(sys.stdin.buffer)
- def __init__(self, data):
- self.data = data
+ def __init__(self, stream):
+ self.data = stream.read()
def get_bytes(self, start, end):
return self.data[start:end]
@staticmethod
def test_matches(data, pattern):
- data = Data(data)
+ data = Data(io.BytesIO(data))
for match in data.yield_lines_matching(Pattern.from_text(pattern.decode("ascii"), encoding="ascii")):
print([chunk.get_bytes(data) for chunk in match.yield_chunks()])
@@ -415,14 +416,12 @@ class CursesUi:
class FilterCommand:
"""
- >>> import io
>>> i = io.BytesIO(b"one\\ntwo\\ntwo\\nthree")
>>> o = io.BytesIO()
>>> FilterCommand(in_stream=i, out_stream=o, buffer_size=1).run()
>>> o.getvalue()
b'one\\ntwo\\nthree\\n'
- >>> import io
>>> i = io.BytesIO(b"one\\ntwo\\ntwo\\nthree\\n")
>>> o = io.BytesIO()
>>> FilterCommand(in_stream=i, out_stream=o, buffer_size=1).run()
@@ -461,7 +460,6 @@ class FilterCommand:
class ReverseFilterCommand:
"""
- >>> import io
>>> i = io.BytesIO(b"one\\ntwo\\ntwo\\nthree")
>>> o = io.BytesIO()
>>> ReverseFilterCommand(in_stream=i, out_stream=o).run()
2025-10-11 08:48 Rickard pushed to rlselect2
commit af9d2fafe670d6712a390cd15228d53ba45156fd
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Fri Oct 10 18:22:42 2025 +0200
Extract Filter and ReverseFilter
diff --git a/rlselect2.py b/rlselect2.py
index 5968018..db4fd30 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -412,6 +412,77 @@ class CursesUi:
os.dup2(process_stdin, stdin_fileno)
os.dup2(process_stdout, stdout_fileno)
+class FilterCommand:
+
+ """
+ >>> import io
+ >>> i = io.BytesIO(b"one\\ntwo\\ntwo\\nthree")
+ >>> o = io.BytesIO()
+ >>> FilterCommand(in_stream=i, out_stream=o, buffer_size=1).run()
+ >>> o.getvalue()
+ b'one\\ntwo\\nthree\\n'
+
+ >>> import io
+ >>> i = io.BytesIO(b"one\\ntwo\\ntwo\\nthree\\n")
+ >>> o = io.BytesIO()
+ >>> FilterCommand(in_stream=i, out_stream=o, buffer_size=1).run()
+ >>> o.getvalue()
+ b'one\\ntwo\\nthree\\n'
+ """
+
+ def __init__(self, in_stream, out_stream, buffer_size):
+ self.in_stream = in_stream
+ self.out_stream = out_stream
+ self.buffer_size = buffer_size
+
+ def run(self):
+ buffer = bytearray(self.buffer_size)
+ tail = b""
+ seen = set()
+ while True:
+ bytes_read = self.in_stream.readinto(buffer)
+ if bytes_read > 0:
+ splits = (tail+buffer[0:bytes_read]).split(NEWLINE)
+ tail = splits.pop(-1)
+ unique = []
+ for line in splits:
+ if line not in seen:
+ seen.add(line)
+ unique.append(line)
+ if unique:
+ self.out_stream.write(NEWLINE.join(unique))
+ self.out_stream.write(NEWLINE)
+ else:
+ if tail:
+ self.out_stream.write(tail)
+ self.out_stream.write(NEWLINE)
+ break
+
+class ReverseFilterCommand:
+
+ """
+ >>> import io
+ >>> i = io.BytesIO(b"one\\ntwo\\ntwo\\nthree")
+ >>> o = io.BytesIO()
+ >>> ReverseFilterCommand(in_stream=i, out_stream=o).run()
+ >>> o.getvalue()
+ b'three\\ntwo\\none\\n'
+ """
+
+ def __init__(self, in_stream, out_stream):
+ self.in_stream = in_stream
+ self.out_stream = out_stream
+
+ def run(self):
+ buffer = self.in_stream.read()
+ lines = buffer.split(NEWLINE)
+ seen = set()
+ for line in lines[::-1]:
+ if line not in seen:
+ seen.add(line)
+ self.out_stream.write(line)
+ self.out_stream.write(NEWLINE)
+
if __name__ == "__main__":
if sys.argv[1:] == ["--selftest"]:
import doctest
@@ -425,11 +496,16 @@ if __name__ == "__main__":
if not unittest.TextTestRunner().run(suite).wasSuccessful():
sys.exit(1)
elif sys.argv[1:] == ["--filter"]:
- buffer = sys.stdin.buffer.read()
- lines = buffer.split(NEWLINE)
- unique_lines = list(dict.fromkeys(lines))
- output = NEWLINE.join(unique_lines)
- sys.stdout.buffer.write(output)
+ FilterCommand(
+ in_stream=sys.stdin.buffer,
+ out_stream=sys.stdout.buffer,
+ buffer_size=2**12,
+ ).run()
+ elif sys.argv[1:] == ["--reverse-filter"]:
+ ReverseFilterCommand(
+ in_stream=sys.stdin.buffer,
+ out_stream=sys.stdout.buffer,
+ ).run()
elif sys.argv[1:] == ["--curses"]:
import curses
CursesUi().run()
commit f0d7e71f60f34a303ce683fe8f85230e9ff0c19e
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Thu Oct 9 21:53:53 2025 +0200
Add experiments/consumeincrementally.py
diff --git a/experiments/consumeincrementally.py b/experiments/consumeincrementally.py
new file mode 100755
index 0000000..67035e2
--- /dev/null
+++ b/experiments/consumeincrementally.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+import sys
+
+buffer = bytearray(10)
+print(buffer)
+
+while True:
+ bytes_read = sys.stdin.buffer.readinto(buffer)
+ if bytes_read > 0:
+ print("")
+ print("read", bytes_read)
+ print(buffer)
+ print(buffer[0:bytes_read])
+ else:
+ break
2025-10-08 22:45 Rickard pushed to rlselect2
commit feea9d09d121398636dd13e820984f9cd2d5a6e0
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 22:45:51 2025 +0200
Fix --grep parameters
diff --git a/rlselect2.py b/rlselect2.py
index c2782c7..5968018 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -205,7 +205,7 @@ class Pattern:
@classmethod
def from_argv(cls):
return cls.from_text(
- " ".join(sys.argv[1:]),
+ " ".join(sys.argv[2:]),
encoding=sys.getfilesystemencoding()
)
@@ -433,7 +433,7 @@ if __name__ == "__main__":
elif sys.argv[1:] == ["--curses"]:
import curses
CursesUi().run()
- elif sys.argv[1:] == ["--grep"]:
+ elif sys.argv[1:2] == ["--grep"]:
ESCAPE_MATCH = b"\033[30;43m"
ESCAPE_MULTI_MATCH = b"\033[43m"
ESCAPE_RESET = b"\033[0m"
2025-10-08 21:58 Rickard pushed to rlselect2
commit 37ed8adb7016e4125f2daf94cfe68911c23fbfc6
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:52:57 2025 +0200
Fix tests after behavior change and fix one test failure which was a bug
diff --git a/rlselect2.py b/rlselect2.py
index ad48cc0..c2782c7 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -67,11 +67,16 @@ class Data:
>>> Data.test_matches(b"foo\\n"*5, b"")
[b'foo']
+ [b'foo']
+ [b'foo']
+ [b'foo']
+ [b'foo']
Empty lines:
>>> Data.test_matches(b"foo\\n\\nbar", b"")
[b'foo']
+ []
[b'bar']
Mixed search test:
@@ -100,7 +105,7 @@ class Data:
if not matches:
break
else:
- regions.extend([Region(*(y+line_start for y in x.span())) for x in matches])
+ regions.extend([Region(*x.span()) for x in matches])
else:
yield Match(Region(line_start, line_end), regions)
line_start = line_end + len(NEWLINE)
commit 4e7357861e71286ef494f8e5b6a8886b9d19f755
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:49:45 2025 +0200
Skip duplicate handling in ui (it is done in --filter pre-processor)
diff --git a/rlselect2.py b/rlselect2.py
index d9ff350..ad48cc0 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -33,10 +33,6 @@ class Data:
def __init__(self, data):
self.data = data
- self.duplicates = set()
-
- def find(self, term, start, end):
- return self.data.find(term, start, end)
def get_bytes(self, start, end):
return self.data[start:end]
@@ -84,7 +80,6 @@ class Data:
[b'x ', b'Time', b'lineCanvas']
"""
res = [re.compile(re_pattern) for re_pattern in pattern.to_res()]
- seen = set()
line_start = 0
self.percent = 0
while line_start < len(self.data):
@@ -99,21 +94,15 @@ class Data:
line_end = self.data.find(NEWLINE, initial_match)
if line_end == -1:
line_end = len(self.data)
- if line_end > line_start and line_start not in self.duplicates:
- line = self.data[line_start:line_end]
- if line in seen:
- self.duplicates.add(line_start)
+ regions = []
+ for compiled_re in res:
+ matches = list(compiled_re.finditer(self.data, pos=line_start, endpos=line_end))
+ if not matches:
+ break
else:
- seen.add(line)
- regions = []
- for compiled_re in res:
- matches = list(compiled_re.finditer(line))
- if not matches:
- break
- else:
- regions.extend([Region(*(y+line_start for y in x.span())) for x in matches])
- else:
- yield Match(Region(line_start, line_end), regions)
+ regions.extend([Region(*(y+line_start for y in x.span())) for x in matches])
+ else:
+ yield Match(Region(line_start, line_end), regions)
line_start = line_end + len(NEWLINE)
self.percent = int(100*(line_start/len(self.data)))
commit 8ac9bbc3f26553bb7fe959d9cdf7f0666b31276c
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:39:09 2025 +0200
Use pipe mechanism to handle filtering
diff --git a/rlselect2.py b/rlselect2.py
index 7276353..d9ff350 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -19,6 +19,7 @@ from collections import defaultdict
import contextlib
import os
import re
+import subprocess
import sys
import time
@@ -438,7 +439,7 @@ if __name__ == "__main__":
elif sys.argv[1:] == ["--curses"]:
import curses
CursesUi().run()
- else:
+ elif sys.argv[1:] == ["--grep"]:
ESCAPE_MATCH = b"\033[30;43m"
ESCAPE_MULTI_MATCH = b"\033[43m"
ESCAPE_RESET = b"\033[0m"
@@ -454,3 +455,14 @@ if __name__ == "__main__":
if chunk.is_match():
sys.stdout.buffer.write(ESCAPE_RESET)
sys.stdout.buffer.write(NEWLINE)
+ else:
+ filter_process = subprocess.Popen(
+ [sys.executable, sys.argv[0], "--filter"],
+ stdout=subprocess.PIPE
+ )
+ ui_process = subprocess.Popen(
+ [sys.executable, sys.argv[0], "--curses"],
+ stdin=filter_process.stdout
+ )
+ filter_process.stdout.close()
+ ui_process.communicate()
2025-10-08 21:34 Rickard pushed to rlselect2
commit a339eaed74da7f090dc75eb566a3560d7d12f615
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:34:32 2025 +0200
Use constant
diff --git a/rlselect2.py b/rlselect2.py
index 58bf111..7276353 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -431,9 +431,9 @@ if __name__ == "__main__":
sys.exit(1)
elif sys.argv[1:] == ["--filter"]:
buffer = sys.stdin.buffer.read()
- lines = buffer.split(b"\n")
+ lines = buffer.split(NEWLINE)
unique_lines = list(dict.fromkeys(lines))
- output = b"\n".join(unique_lines)
+ output = NEWLINE.join(unique_lines)
sys.stdout.buffer.write(output)
elif sys.argv[1:] == ["--curses"]:
import curses
commit 75cc7903763369fbe717a14bb5e721b672760656
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:33:27 2025 +0200
Add filter program
diff --git a/rlselect2.py b/rlselect2.py
index da1f27f..58bf111 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -429,6 +429,12 @@ if __name__ == "__main__":
))
if not unittest.TextTestRunner().run(suite).wasSuccessful():
sys.exit(1)
+ elif sys.argv[1:] == ["--filter"]:
+ buffer = sys.stdin.buffer.read()
+ lines = buffer.split(b"\n")
+ unique_lines = list(dict.fromkeys(lines))
+ output = b"\n".join(unique_lines)
+ sys.stdout.buffer.write(output)
elif sys.argv[1:] == ["--curses"]:
import curses
CursesUi().run()
2025-10-08 21:26 Rickard pushed to rlselect2
commit f1d0cf2814f0ef71347113776fa0dc2167e1bbc5
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:20:22 2025 +0200
Rename
diff --git a/resources/consumetest.py b/experiments/consumetest.py
similarity index 100%
rename from resources/consumetest.py
rename to experiments/consumetest.py
diff --git a/resources/producetest.py b/experiments/producetest.py
similarity index 100%
rename from resources/producetest.py
rename to experiments/producetest.py
diff --git a/resources/pyperftest.py b/experiments/pyperftest.py
similarity index 100%
rename from resources/pyperftest.py
rename to experiments/pyperftest.py
commit bdc8948e4712bb4a63d1b6d5e79e788416b91636
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:19:39 2025 +0200
Make executable
diff --git a/rlselect2.py b/rlselect2.py
old mode 100644
new mode 100755
index 677b6ea..da1f27f
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
"""
Inspired by Casey Muratoru.
commit 99bfe0bc57b2193975a93d42d92fa9c37ad5fc19
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:18:44 2025 +0200
Remove no longer used perf script
diff --git a/perf-summary.sh b/perf-summary.sh
deleted file mode 100755
index d5c8ec7..0000000
--- a/perf-summary.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-
-./perf.sh "$@" 2>&1 | grep ^real
commit 4a150a95b26d78a6597e75d66e7557c01360af98
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:17:03 2025 +0200
Add example inputs
diff --git a/.gitignore b/.gitignore
index bee8a64..5a78843 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
__pycache__
+/inputs/1gb.bin
diff --git a/inputs/latin1.txt b/inputs/latin1.txt
new file mode 100644
index 0000000..c4fb1f6
--- /dev/null
+++ b/inputs/latin1.txt
@@ -0,0 +1 @@
+Jag �r latin1!
diff --git a/inputs/utf8.txt b/inputs/utf8.txt
new file mode 100644
index 0000000..f72d74b
--- /dev/null
+++ b/inputs/utf8.txt
@@ -0,0 +1,4 @@
+Jag är ÄR utf-8!
+
+emojis: 😃 🤣 😈 end
+emojis:-cc-xx-xx- end
commit d444374d1a4e35cc6d88db3ff6d10ada3433194c
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:15:12 2025 +0200
Add tool
diff --git a/tools/print1gb.py b/tools/print1gb.py
new file mode 100755
index 0000000..5ff7f37
--- /dev/null
+++ b/tools/print1gb.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+import random
+import sys
+
+left = 1024*1024*1024
+
+while left:
+ buffer = b""
+ for x in range(random.randrange(min(left, 256))):
+ while True:
+ y = random.randrange(13, 256).to_bytes()
+ if y != b"\n":
+ break
+ buffer += y
+ left -= len(buffer)
+ sys.stdout.buffer.write(buffer)
+ left -= 1
+ sys.stdout.buffer.write(b"\n")
commit 04c3d179d9011a7a99dfbe44bef7c3ac83008441
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:14:06 2025 +0200
Move pyperfets.py
diff --git a/pyperftest.py b/resources/pyperftest.py
similarity index 100%
rename from pyperftest.py
rename to resources/pyperftest.py
commit 8da2aac385630c366b1f0108144ef1ffec4d7264
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:09:35 2025 +0200
Add produce test
diff --git a/consumetest.py b/resources/consumetest.py
similarity index 83%
rename from consumetest.py
rename to resources/consumetest.py
index 081f078..49311e0 100755
--- a/consumetest.py
+++ b/resources/consumetest.py
@@ -11,8 +11,11 @@ https://unix.stackexchange.com/questions/626975/what-happens-when-writing-gigaby
import sys
import time
+lines = []
for index, line in enumerate(sys.stdin.buffer):
print(line)
+ lines.append(line)
if index > 2:
- time.sleep(60)
+ time.sleep(5)
+ print(lines)
sys.exit(0)
diff --git a/resources/producetest.py b/resources/producetest.py
new file mode 100755
index 0000000..f67882d
--- /dev/null
+++ b/resources/producetest.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+"""
+`foo | bar`
+ * What happens if bar doesn't comsume all as quickly as possible?
+ * What happens if bar exits before consuming all?
+
+https://unix.stackexchange.com/questions/626975/what-happens-when-writing-gigabytes-of-data-to-a-pipe
+
+https://docs.python.org/3/library/signal.html#note-on-sigpipe
+"""
+
+import sys
+import os
+
+try:
+ for n in range(1000):
+ line = f"line {n}\n"
+ sys.stderr.write("log: "+line)
+ sys.stdout.write(line)
+ n += 1
+ sys.stdout.flush()
+except BrokenPipeError:
+ devnull = os.open(os.devnull, os.O_WRONLY)
+ os.dup2(devnull, sys.stdout.fileno())
+ sys.stderr.write("broken pipe\n")
2025-10-08 21:20 Rickard pushed to rlselect2
commit f1d0cf2814f0ef71347113776fa0dc2167e1bbc5
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:20:22 2025 +0200
Rename
diff --git a/resources/consumetest.py b/experiments/consumetest.py
similarity index 100%
rename from resources/consumetest.py
rename to experiments/consumetest.py
diff --git a/resources/producetest.py b/experiments/producetest.py
similarity index 100%
rename from resources/producetest.py
rename to experiments/producetest.py
diff --git a/resources/pyperftest.py b/experiments/pyperftest.py
similarity index 100%
rename from resources/pyperftest.py
rename to experiments/pyperftest.py
commit bdc8948e4712bb4a63d1b6d5e79e788416b91636
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:19:39 2025 +0200
Make executable
diff --git a/rlselect2.py b/rlselect2.py
old mode 100644
new mode 100755
index 677b6ea..da1f27f
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
"""
Inspired by Casey Muratoru.
commit 99bfe0bc57b2193975a93d42d92fa9c37ad5fc19
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:18:44 2025 +0200
Remove no longer used perf script
diff --git a/perf-summary.sh b/perf-summary.sh
deleted file mode 100755
index d5c8ec7..0000000
--- a/perf-summary.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-
-./perf.sh "$@" 2>&1 | grep ^real
commit 4a150a95b26d78a6597e75d66e7557c01360af98
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:17:03 2025 +0200
Add example inputs
diff --git a/.gitignore b/.gitignore
index bee8a64..5a78843 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
__pycache__
+/inputs/1gb.bin
diff --git a/inputs/latin1.txt b/inputs/latin1.txt
new file mode 100644
index 0000000..c4fb1f6
--- /dev/null
+++ b/inputs/latin1.txt
@@ -0,0 +1 @@
+Jag �r latin1!
diff --git a/inputs/utf8.txt b/inputs/utf8.txt
new file mode 100644
index 0000000..f72d74b
--- /dev/null
+++ b/inputs/utf8.txt
@@ -0,0 +1,4 @@
+Jag är ÄR utf-8!
+
+emojis: 😃 🤣 😈 end
+emojis:-cc-xx-xx- end
commit d444374d1a4e35cc6d88db3ff6d10ada3433194c
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:15:12 2025 +0200
Add tool
diff --git a/tools/print1gb.py b/tools/print1gb.py
new file mode 100755
index 0000000..5ff7f37
--- /dev/null
+++ b/tools/print1gb.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+import random
+import sys
+
+left = 1024*1024*1024
+
+while left:
+ buffer = b""
+ for x in range(random.randrange(min(left, 256))):
+ while True:
+ y = random.randrange(13, 256).to_bytes()
+ if y != b"\n":
+ break
+ buffer += y
+ left -= len(buffer)
+ sys.stdout.buffer.write(buffer)
+ left -= 1
+ sys.stdout.buffer.write(b"\n")
commit 04c3d179d9011a7a99dfbe44bef7c3ac83008441
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:14:06 2025 +0200
Move pyperfets.py
diff --git a/pyperftest.py b/resources/pyperftest.py
similarity index 100%
rename from pyperftest.py
rename to resources/pyperftest.py
commit 8da2aac385630c366b1f0108144ef1ffec4d7264
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date: Wed Oct 8 21:09:35 2025 +0200
Add produce test
diff --git a/consumetest.py b/resources/consumetest.py
similarity index 83%
rename from consumetest.py
rename to resources/consumetest.py
index 081f078..49311e0 100755
--- a/consumetest.py
+++ b/resources/consumetest.py
@@ -11,8 +11,11 @@ https://unix.stackexchange.com/questions/626975/what-happens-when-writing-gigaby
import sys
import time
+lines = []
for index, line in enumerate(sys.stdin.buffer):
print(line)
+ lines.append(line)
if index > 2:
- time.sleep(60)
+ time.sleep(5)
+ print(lines)
sys.exit(0)
diff --git a/resources/producetest.py b/resources/producetest.py
new file mode 100755
index 0000000..f67882d
--- /dev/null
+++ b/resources/producetest.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+"""
+`foo | bar`
+ * What happens if bar doesn't comsume all as quickly as possible?
+ * What happens if bar exits before consuming all?
+
+https://unix.stackexchange.com/questions/626975/what-happens-when-writing-gigabytes-of-data-to-a-pipe
+
+https://docs.python.org/3/library/signal.html#note-on-sigpipe
+"""
+
+import sys
+import os
+
+try:
+ for n in range(1000):
+ line = f"line {n}\n"
+ sys.stderr.write("log: "+line)
+ sys.stdout.write(line)
+ n += 1
+ sys.stdout.flush()
+except BrokenPipeError:
+ devnull = os.open(os.devnull, os.O_WRONLY)
+ os.dup2(devnull, sys.stdout.fileno())
+ sys.stderr.write("broken pipe\n")