rlselect2

Rewrite of rlselect with a new approach to performance.

Home

Source code

git clone https://projects.rickardlindberg.me/scm/rlselect2.git

Recent events

2025-10-15 00:01 Rickard pushed to rlselect2

commit 7c24cefd8cfd13587f6b96fc5af4f3d3b1d0ec12
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Tue Oct 14 23:55:47 2025 +0200

    Prevent flickering of header

diff --git a/rlselect2.py b/rlselect2.py
index 2a571b5..b33de63 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -386,10 +386,14 @@ class CursesUi:
 
     def render_search_results(self, screen):
         max_y, max_x = screen.getmaxyx()
-        screen.erase()
         y = 2
         t1 = time.perf_counter()
         self.selection = None
+        for clear_y in range(y, max_y):
+            try:
+                screen.addstr(clear_y, 0, " "*max_x)
+            except:
+                pass
         for line_index, match in enumerate(self.data.yield_lines_matching(self.pattern)):
             if self.selection_index is None:
                 self.selection_index = 0

2025-10-14 23:48 Rickard pushed to rlselect2

commit a802c4f1f06750d574ad2e41a6700ac42cefb128
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Tue Oct 14 23:48:17 2025 +0200

    Attempt to not render control characters in curses

diff --git a/rlselect2.py b/rlselect2.py
index 6c2f099..2a571b5 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -12,6 +12,7 @@ unnecessary work.
 """
 
 from collections import defaultdict
+from itertools import chain
 import contextlib
 import io
 import os
@@ -201,7 +202,10 @@ class Chunk:
         return self.match_count > 1
 
     def get_text(self, encoding):
-        return self.get_bytes().decode(encoding, "replace")
+        return self.get_bytes().decode(encoding, "replace").translate({
+            char: "�"
+            for char in chain(range(0x00, 0x20), range(0x7f, 0xa0))
+        })
 
     def get_bytes(self):
         return self.data.get_bytes(self.start, self.end)

commit b0ed743f1587a3502530ee27fc4476acbe7168d7
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Tue Oct 14 23:34:09 2025 +0200

    Nicer looking header

diff --git a/rlselect2.py b/rlselect2.py
index 42b1abc..6c2f099 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -369,14 +369,14 @@ class CursesUi:
 
     def render_header(self, screen, loading):
         max_y, max_x = screen.getmaxyx()
-        attrs = curses.A_REVERSE | curses.A_BOLD
+        loading_attrs = curses.A_BOLD
         if curses.has_colors():
-            attrs |= curses.color_pair(1)
+            loading_attrs |= curses.color_pair(1)
         screen.addstr(1, 0, f"completed search in {self.elapsed}ms ".rjust(max_x)[:max_x], curses.A_REVERSE)
         screen.addstr(0, 0, "".ljust(max_x))
         if loading:
-            screen.addstr(0, max_x-1, "-")
-        screen.addstr(0, 0, self.pattern.get_text(), attrs)
+            screen.addstr(0, max_x-1, "-", loading_attrs)
+        screen.addstr(0, 0, f"> {self.pattern.get_text()[-max_x+4:]}")
         self.header_yx = screen.getyx()
         screen.refresh()
 

commit 436b04188fdaba545162f440f7baa0b8430d16c6
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Tue Oct 14 23:27:45 2025 +0200

    Indicate loading in header

diff --git a/rlselect2.py b/rlselect2.py
index ad42d2e..42b1abc 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -330,9 +330,11 @@ class CursesUi:
             curses.init_pair(2, curses.COLOR_WHITE, curses.COLOR_GREEN)
         self.pattern = Pattern.empty(encoding=screen.encoding)
         self.selection_index = None
-        self.loading_render(screen)
+        self.elapsed = "?"
         while True:
-            self.render(screen)
+            self.render_header(screen, loading=True)
+            self.render_search_results(screen)
+            self.render_header(screen, loading=False)
             c = screen.get_wch()
             if c == CTRL_W:
                 self.pattern.remove_last_word()
@@ -365,17 +367,20 @@ class CursesUi:
         else:
             return self.selection.get_bytes()
 
-    def loading_render(self, screen):
+    def render_header(self, screen, loading):
         max_y, max_x = screen.getmaxyx()
-        screen.addstr(1, 0, f"".rjust(max_x)[:max_x], curses.A_REVERSE)
         attrs = curses.A_REVERSE | curses.A_BOLD
         if curses.has_colors():
             attrs |= curses.color_pair(1)
-        screen.addstr(2, 0, "loading...", attrs)
-        screen.move(0, 0)
+        screen.addstr(1, 0, f"completed search in {self.elapsed}ms ".rjust(max_x)[:max_x], curses.A_REVERSE)
+        screen.addstr(0, 0, "".ljust(max_x))
+        if loading:
+            screen.addstr(0, max_x-1, "-")
+        screen.addstr(0, 0, self.pattern.get_text(), attrs)
+        self.header_yx = screen.getyx()
         screen.refresh()
 
-    def render(self, screen):
+    def render_search_results(self, screen):
         max_y, max_x = screen.getmaxyx()
         screen.erase()
         y = 2
@@ -428,12 +433,8 @@ class CursesUi:
                     pass
             y += 1
         t2 = time.perf_counter()
-        elapsed = int((t2-t1)*1000)
-        screen.addstr(1, 0, f"completed search in {elapsed}ms ".rjust(max_x)[:max_x], curses.A_REVERSE)
-        attrs = curses.A_REVERSE | curses.A_BOLD
-        if curses.has_colors():
-            attrs |= curses.color_pair(1)
-        screen.addstr(0, 0, self.pattern.get_text(), attrs)
+        self.elapsed = int((t2-t1)*1000)
+        screen.move(*self.header_yx)
         screen.refresh()
 
 class FilterCommand:

commit 8b3af64cab0e26ddff5a130b9f43ef81576a7c48
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Tue Oct 14 22:51:50 2025 +0200

    Read input incrementally

diff --git a/rlselect2.py b/rlselect2.py
index 3d7e39b..ad42d2e 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -34,7 +34,8 @@ class Data:
 
     def __init__(self, stream):
         self.stream = stream
-        self.data = None
+        self.data = bytearray()
+        self.data_end = 0
 
     def get_bytes(self, start, end):
         return self.data[start:end]
@@ -43,7 +44,7 @@ class Data:
     def test_matches(data, pattern):
         data = Data.from_stream(io.BytesIO(data))
         for match in data.yield_lines_matching(Pattern.from_text(pattern.decode("ascii"), encoding="ascii")):
-            print([chunk.get_bytes() for chunk in match.yield_chunks()])
+            print([bytes(chunk.get_bytes()) for chunk in match.yield_chunks()])
 
     def yield_lines_matching(self, pattern):
         """
@@ -90,23 +91,20 @@ class Data:
 
         >>> Data.test_matches(b"hello there", b"RL")
         """
-        if self.data is None:
-            self.data = self.stream.read()
         res = [re.compile(re_pattern) for re_pattern in pattern.to_res()]
         if res:
             first_re = res[0]
         else:
             first_re = re.compile(b"")
         line_start = 0
-        data_end = len(self.data)
-        while line_start < data_end:
-            match = first_re.search(self.data, pos=line_start, endpos=data_end)
+        while line_start < self.data_end or self.read_chunk():
+            match = first_re.search(self.data, pos=line_start, endpos=self.data_end)
             if match:
                 initial_match = match.start()
                 line_start = self.data.rfind(NEWLINE, 0, initial_match) + 1
-                line_end = self.data.find(NEWLINE, initial_match, data_end)
+                line_end = self.data.find(NEWLINE, initial_match, self.data_end)
                 if line_end == -1:
-                    line_end = data_end
+                    line_end = self.data_end
                 regions = []
                 for compiled_re in res:
                     matches = list(compiled_re.finditer(self.data, pos=line_start, endpos=line_end))
@@ -118,7 +116,22 @@ class Data:
                     yield Match(self, Region(line_start, line_end), regions)
                 line_start = line_end + len(NEWLINE)
             else:
-                line_start = data_end
+                line_start = self.data_end
+
+    def read_chunk(self):
+        chunk = self.stream.read(2**12)
+        if chunk:
+            self.data += chunk
+            newline_index = chunk.rfind(NEWLINE)
+            if newline_index != -1:
+                self.data_end = len(self.data) - len(chunk) + newline_index + 1
+            return True
+        else:
+            if self.data_end == len(self.data):
+                return False
+            else:
+                self.data_end = len(self.data)
+                return True
 
 class Match:
 

commit 4b4c1286fdadffc9ac4a721767e1417a431d1719
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Tue Oct 14 22:37:10 2025 +0200

    Add end position to searches

diff --git a/rlselect2.py b/rlselect2.py
index 9310777..3d7e39b 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -100,11 +100,11 @@ class Data:
         line_start = 0
         data_end = len(self.data)
         while line_start < data_end:
-            match = first_re.search(self.data, pos=line_start)
+            match = first_re.search(self.data, pos=line_start, endpos=data_end)
             if match:
                 initial_match = match.start()
                 line_start = self.data.rfind(NEWLINE, 0, initial_match) + 1
-                line_end = self.data.find(NEWLINE, initial_match)
+                line_end = self.data.find(NEWLINE, initial_match, data_end)
                 if line_end == -1:
                     line_end = data_end
                 regions = []

commit dc78c4752c3ac40a0ada1615e6be6a831af9c113
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Tue Oct 14 22:32:49 2025 +0200

    Match and Chunk know the data they come from

diff --git a/rlselect2.py b/rlselect2.py
index 23b5ec2..9310777 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -43,7 +43,7 @@ class Data:
     def test_matches(data, pattern):
         data = Data.from_stream(io.BytesIO(data))
         for match in data.yield_lines_matching(Pattern.from_text(pattern.decode("ascii"), encoding="ascii")):
-            print([chunk.get_bytes(data) for chunk in match.yield_chunks()])
+            print([chunk.get_bytes() for chunk in match.yield_chunks()])
 
     def yield_lines_matching(self, pattern):
         """
@@ -115,42 +115,43 @@ class Data:
                     else:
                         regions.extend([Region(*x.span()) for x in matches])
                 else:
-                    yield Match(Region(line_start, line_end), regions)
+                    yield Match(self, Region(line_start, line_end), regions)
                 line_start = line_end + len(NEWLINE)
             else:
                 line_start = data_end
 
 class Match:
 
-    def __init__(self, whole, regions):
+    def __init__(self, data, whole, regions):
+        self.data = data
         self.whole = whole
         self.regions = regions
 
-    def get_bytes(self, data):
-        return data.get_bytes(self.whole.start, self.whole.end)
+    def get_bytes(self):
+        return self.data.get_bytes(self.whole.start, self.whole.end)
 
     def yield_chunks(self):
         """
-        >>> for chunk in Match(Region(2, 10), [Region(3, 4)]).yield_chunks():
+        >>> for chunk in Match(None, Region(2, 10), [Region(3, 4)]).yield_chunks():
         ...     print(chunk)
         Chunk(2, 3, 0)
         Chunk(3, 4, 1)
         Chunk(4, 10, 0)
 
-        >>> for chunk in Match(Region(5, 10), [Region(5, 10)]).yield_chunks():
+        >>> for chunk in Match(None, Region(5, 10), [Region(5, 10)]).yield_chunks():
         ...     print(chunk)
         Chunk(5, 10, 1)
 
-        >>> for chunk in Match(Region(5, 10), [Region(5, 9)]).yield_chunks():
+        >>> for chunk in Match(None, Region(5, 10), [Region(5, 9)]).yield_chunks():
         ...     print(chunk)
         Chunk(5, 9, 1)
         Chunk(9, 10, 0)
 
-        >>> for chunk in Match(Region(5, 10), []).yield_chunks():
+        >>> for chunk in Match(None, Region(5, 10), []).yield_chunks():
         ...     print(chunk)
         Chunk(5, 10, 0)
 
-        >>> for chunk in Match(Region(5, 5), []).yield_chunks():
+        >>> for chunk in Match(None, Region(5, 5), []).yield_chunks():
         ...     print(chunk)
         """
         match_counts = defaultdict(lambda: 0)
@@ -167,14 +168,15 @@ class Match:
                     match_count = next_match_count
                 else:
                     if next_match_count != match_count:
-                        yield Chunk(start, index, match_count)
+                        yield Chunk(self.data, start, index, match_count)
                         start = index
                         match_count = next_match_count
-            yield Chunk(start, index+1, match_count)
+            yield Chunk(self.data, start, index+1, match_count)
 
 class Chunk:
 
-    def __init__(self, start, end, match_count):
+    def __init__(self, data, start, end, match_count):
+        self.data = data
         self.start = start
         self.end = end
         self.match_count = match_count
@@ -185,11 +187,11 @@ class Chunk:
     def is_multi_match(self):
         return self.match_count > 1
 
-    def get_text(self, data, encoding):
-        return self.get_bytes(data).decode(encoding, "replace")
+    def get_text(self, encoding):
+        return self.get_bytes().decode(encoding, "replace")
 
-    def get_bytes(self, data):
-        return data.get_bytes(self.start, self.end)
+    def get_bytes(self):
+        return self.data.get_bytes(self.start, self.end)
 
     def __repr__(self):
         return f"Chunk({self.start!r}, {self.end!r}, {self.match_count!r})"
@@ -348,7 +350,7 @@ class CursesUi:
         if self.selection is None:
             return self.pattern.get_bytes()
         else:
-            return self.selection.get_bytes(self.data)
+            return self.selection.get_bytes()
 
     def loading_render(self, screen):
         max_y, max_x = screen.getmaxyx()
@@ -380,7 +382,7 @@ class CursesUi:
             for chunk in match.yield_chunks():
                 if x == max_x:
                     break
-                text = chunk.get_text(self.data, screen.encoding)[:max_x-x]
+                text = chunk.get_text(screen.encoding)[:max_x-x]
                 attrs = 0
                 if is_selected:
                     if curses.has_colors():
@@ -527,7 +529,7 @@ if __name__ == "__main__":
                         sys.stdout.buffer.write(ESCAPE_MULTI_MATCH)
                     else:
                         sys.stdout.buffer.write(ESCAPE_MATCH)
-                sys.stdout.buffer.write(chunk.get_bytes(data))
+                sys.stdout.buffer.write(chunk.get_bytes())
                 if chunk.is_match():
                     sys.stdout.buffer.write(ESCAPE_RESET)
             sys.stdout.buffer.write(NEWLINE)

commit e078a8e240ef094e4d9e5bdd3061642f25553c96
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Tue Oct 14 22:26:22 2025 +0200

    Remove todo

diff --git a/rlselect2.py b/rlselect2.py
index 172226d..23b5ec2 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -9,10 +9,6 @@ Read the input only once. Don't mangle it. Etc.
 
 Don't do anything that the user has not asked for. As a design priciple. Skip
 unnecessary work.
-
-* How fast to create a bytearray which is split+unique?
-    * In one go?
-    * In multiple passes?
 """
 
 from collections import defaultdict

commit b1d9e9cef8a1c7336768837d09d3b459348dee44
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Mon Oct 13 20:52:12 2025 +0200

    Better drawing of loading screen

diff --git a/rlselect2.py b/rlselect2.py
index 7a789a9..172226d 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -319,6 +319,7 @@ class CursesUi:
             curses.init_pair(2, curses.COLOR_WHITE, curses.COLOR_GREEN)
         self.pattern = Pattern.empty(encoding=screen.encoding)
         self.selection_index = None
+        self.loading_render(screen)
         while True:
             self.render(screen)
             c = screen.get_wch()
@@ -353,6 +354,16 @@ class CursesUi:
         else:
             return self.selection.get_bytes(self.data)
 
+    def loading_render(self, screen):
+        max_y, max_x = screen.getmaxyx()
+        screen.addstr(1, 0, f"".rjust(max_x)[:max_x], curses.A_REVERSE)
+        attrs = curses.A_REVERSE | curses.A_BOLD
+        if curses.has_colors():
+            attrs |= curses.color_pair(1)
+        screen.addstr(2, 0, "loading...", attrs)
+        screen.move(0, 0)
+        screen.refresh()
+
     def render(self, screen):
         max_y, max_x = screen.getmaxyx()
         screen.erase()

commit 59df602be6545b6c9dde65cf256b1be9773ab52a
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Mon Oct 13 20:47:42 2025 +0200

    Refactor towards incremental loading of data

diff --git a/rlselect2.py b/rlselect2.py
index 77769b6..7a789a9 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -38,12 +38,7 @@ class Data:
 
     def __init__(self, stream):
         self.stream = stream
-        self.data = b"loading..."
-        self.is_read = False
-
-    def read(self):
-        self.data = self.stream.read()
-        self.is_read = True
+        self.data = None
 
     def get_bytes(self, start, end):
         return self.data[start:end]
@@ -51,7 +46,6 @@ class Data:
     @staticmethod
     def test_matches(data, pattern):
         data = Data.from_stream(io.BytesIO(data))
-        data.read()
         for match in data.yield_lines_matching(Pattern.from_text(pattern.decode("ascii"), encoding="ascii")):
             print([chunk.get_bytes(data) for chunk in match.yield_chunks()])
 
@@ -100,6 +94,8 @@ class Data:
 
         >>> Data.test_matches(b"hello there", b"RL")
         """
+        if self.data is None:
+            self.data = self.stream.read()
         res = [re.compile(re_pattern) for re_pattern in pattern.to_res()]
         if res:
             first_re = res[0]
@@ -325,9 +321,6 @@ class CursesUi:
         self.selection_index = None
         while True:
             self.render(screen)
-            if not self.data.is_read:
-                self.data.read()
-                continue
             c = screen.get_wch()
             if c == CTRL_W:
                 self.pattern.remove_last_word()
@@ -520,7 +513,6 @@ if __name__ == "__main__":
         ESCAPE_MULTI_MATCH = b"\033[43m"
         ESCAPE_RESET = b"\033[0m"
         data = Data.from_stdin()
-        data.read()
         for match in data.yield_lines_matching(Pattern.from_argv()):
             for chunk in match.yield_chunks():
                 if chunk.is_match():

2025-10-13 20:10 Rickard pushed to rlselect2

commit 7b9ee66243dfbffd1eeeed0bf81599aa9a4d8df3
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Mon Oct 13 19:40:20 2025 +0200

    Rely on variable instead of len

diff --git a/rlselect2.py b/rlselect2.py
index 0f4e920..77769b6 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -106,14 +106,15 @@ class Data:
         else:
             first_re = re.compile(b"")
         line_start = 0
-        while line_start < len(self.data):
+        data_end = len(self.data)
+        while line_start < data_end:
             match = first_re.search(self.data, pos=line_start)
             if match:
                 initial_match = match.start()
                 line_start = self.data.rfind(NEWLINE, 0, initial_match) + 1
                 line_end = self.data.find(NEWLINE, initial_match)
                 if line_end == -1:
-                    line_end = len(self.data)
+                    line_end = data_end
                 regions = []
                 for compiled_re in res:
                     matches = list(compiled_re.finditer(self.data, pos=line_start, endpos=line_end))
@@ -125,7 +126,7 @@ class Data:
                     yield Match(Region(line_start, line_end), regions)
                 line_start = line_end + len(NEWLINE)
             else:
-                line_start = len(self.data)
+                line_start = data_end
 
 class Match:
 

commit d25f0aef2e1d66ce72e4e488a8be64c003f729e7
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Mon Oct 13 19:37:51 2025 +0200

    Replace break with more intuitive code

diff --git a/rlselect2.py b/rlselect2.py
index 081075f..0f4e920 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -125,7 +125,7 @@ class Data:
                     yield Match(Region(line_start, line_end), regions)
                 line_start = line_end + len(NEWLINE)
             else:
-                break
+                line_start = len(self.data)
 
 class Match:
 

commit 66672de43ba5b4d2dd059f2f310c7e8ebc3ed6f2
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Mon Oct 13 19:29:00 2025 +0200

    Simplify search by always having a first_re

diff --git a/rlselect2.py b/rlselect2.py
index 2002402..081075f 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -95,21 +95,21 @@ class Data:
 
         >>> Data.test_matches(b"x TimelineCanvas", b"time")
         [b'x ', b'Time', b'lineCanvas']
+
+        No match:
+
+        >>> Data.test_matches(b"hello there", b"RL")
         """
         res = [re.compile(re_pattern) for re_pattern in pattern.to_res()]
+        if res:
+            first_re = res[0]
+        else:
+            first_re = re.compile(b"")
         line_start = 0
         while line_start < len(self.data):
-            if res:
-                match = res[0].search(self.data, pos=line_start)
-                if match:
-                    initial_match = match.start()
-                else:
-                    initial_match = None
-            else:
-                initial_match = line_start
-            if initial_match is None:
-                break
-            else:
+            match = first_re.search(self.data, pos=line_start)
+            if match:
+                initial_match = match.start()
                 line_start = self.data.rfind(NEWLINE, 0, initial_match) + 1
                 line_end = self.data.find(NEWLINE, initial_match)
                 if line_end == -1:
@@ -124,6 +124,8 @@ class Data:
                 else:
                     yield Match(Region(line_start, line_end), regions)
                 line_start = line_end + len(NEWLINE)
+            else:
+                break
 
 class Match:
 

commit 789a704e9ac4754804d8f000614d472d5807d8a1
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Mon Oct 13 19:21:36 2025 +0200

    More idiomatic get of start

diff --git a/rlselect2.py b/rlselect2.py
index 039c585..2002402 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -102,7 +102,7 @@ class Data:
             if res:
                 match = res[0].search(self.data, pos=line_start)
                 if match:
-                    initial_match = match.span()[0]
+                    initial_match = match.start()
                 else:
                     initial_match = None
             else:

commit 55e467f24da08bd70974a877641b562fc0e76ee0
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Mon Oct 13 19:10:47 2025 +0200

    Get rid of incorrect percent

diff --git a/rlselect2.py b/rlselect2.py
index f0a4741..039c585 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -98,7 +98,6 @@ class Data:
         """
         res = [re.compile(re_pattern) for re_pattern in pattern.to_res()]
         line_start = 0
-        self.percent = 0
         while line_start < len(self.data):
             if res:
                 match = res[0].search(self.data, pos=line_start)
@@ -125,7 +124,6 @@ class Data:
                 else:
                     yield Match(Region(line_start, line_end), regions)
                 line_start = line_end + len(NEWLINE)
-                self.percent = int(100*(line_start/len(self.data)))
 
 class Match:
 
@@ -413,7 +411,7 @@ class CursesUi:
             y += 1
         t2 = time.perf_counter()
         elapsed = int((t2-t1)*1000)
-        screen.addstr(1, 0, f"searched {self.data.percent}% of input in {elapsed}ms ".rjust(max_x)[:max_x], curses.A_REVERSE)
+        screen.addstr(1, 0, f"completed search in {elapsed}ms ".rjust(max_x)[:max_x], curses.A_REVERSE)
         attrs = curses.A_REVERSE | curses.A_BOLD
         if curses.has_colors():
             attrs |= curses.color_pair(1)

commit 059fe2ce13e793353eb6a634c07501122d6e486c
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Mon Oct 13 07:35:30 2025 +0200

    Start UI before reading stdin for faster perceived startup time

diff --git a/rlselect2.py b/rlselect2.py
index 082d5da..f0a4741 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -30,17 +30,28 @@ class Data:
 
     @classmethod
     def from_stdin(cls):
-        return Data(sys.stdin.buffer)
+        return cls.from_stream(sys.stdin.buffer)
+
+    @classmethod
+    def from_stream(cls, stream):
+        return cls(stream)
 
     def __init__(self, stream):
-        self.data = stream.read()
+        self.stream = stream
+        self.data = b"loading..."
+        self.is_read = False
+
+    def read(self):
+        self.data = self.stream.read()
+        self.is_read = True
 
     def get_bytes(self, start, end):
         return self.data[start:end]
 
     @staticmethod
     def test_matches(data, pattern):
-        data = Data(io.BytesIO(data))
+        data = Data.from_stream(io.BytesIO(data))
+        data.read()
         for match in data.yield_lines_matching(Pattern.from_text(pattern.decode("ascii"), encoding="ascii")):
             print([chunk.get_bytes(data) for chunk in match.yield_chunks()])
 
@@ -277,17 +288,22 @@ class Pattern:
 class CursesUi:
 
     def run(self):
-        self.data = Data.from_stdin()
-        with self.redirect_terminal():
-            action, fail, selection = curses.wrapper(self.run_wrapper)
+        original_stdin = os.fdopen(os.dup(sys.stdin.fileno()), "rb")
+        original_stdout = os.fdopen(os.dup(sys.stdout.fileno()), "wb")
+        self.data = Data.from_stream(original_stdin)
+        terminal_stdin = open("/dev/tty", "rb")
+        terminal_stdout = open("/dev/tty", "wb")
+        os.dup2(terminal_stdin.fileno(), sys.stdin.fileno())
+        os.dup2(terminal_stdout.fileno(), sys.stdout.fileno())
+        action, fail, selection = curses.wrapper(self.run_wrapper)
         if "--action" in sys.argv[1:]:
-            sys.stdout.buffer.write(action)
-            sys.stdout.buffer.write(NEWLINE)
+            original_stdout.write(action)
+            original_stdout.write(NEWLINE)
         if fail:
             sys.exit(1)
         else:
-            sys.stdout.buffer.write(selection)
-            sys.stdout.buffer.write(NEWLINE)
+            original_stdout.write(selection)
+            original_stdout.write(NEWLINE)
 
     def run_wrapper(self, screen):
         CTRL_N = u"\u000E"
@@ -308,6 +324,9 @@ class CursesUi:
         self.selection_index = None
         while True:
             self.render(screen)
+            if not self.data.is_read:
+                self.data.read()
+                continue
             c = screen.get_wch()
             if c == CTRL_W:
                 self.pattern.remove_last_word()
@@ -401,22 +420,6 @@ class CursesUi:
         screen.addstr(0, 0, self.pattern.get_text(), attrs)
         screen.refresh()
 
-    @contextlib.contextmanager
-    def redirect_terminal(self):
-        stdin_fileno = sys.stdin.fileno()
-        stdout_fileno = sys.stdout.fileno()
-        process_stdin = os.dup(sys.stdin.fileno())
-        process_stdout = os.dup(sys.stdout.fileno())
-        try:
-            terminal_stdin = open("/dev/tty", "rb")
-            terminal_stdout = open("/dev/tty", "wb")
-            os.dup2(terminal_stdin.fileno(), stdin_fileno)
-            os.dup2(terminal_stdout.fileno(), stdout_fileno)
-            yield
-        finally:
-            os.dup2(process_stdin, stdin_fileno)
-            os.dup2(process_stdout, stdout_fileno)
-
 class FilterCommand:
 
     """
@@ -516,6 +519,7 @@ if __name__ == "__main__":
         ESCAPE_MULTI_MATCH = b"\033[43m"
         ESCAPE_RESET = b"\033[0m"
         data = Data.from_stdin()
+        data.read()
         for match in data.yield_lines_matching(Pattern.from_argv()):
             for chunk in match.yield_chunks():
                 if chunk.is_match():

2025-10-12 09:11 Rickard pushed to rlselect2

commit 467bac5c9f78b8c8149b4d5035838c251b9fecd6
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Sun Oct 12 09:11:08 2025 +0200

    Do complete processing experiment

diff --git a/experiments/pyperftest.py b/experiments/pyperftest.py
index 2f545f3..8b23b92 100755
--- a/experiments/pyperftest.py
+++ b/experiments/pyperftest.py
@@ -26,6 +26,12 @@ with timeit("load stdin"):
     buffer = sys.stdin.buffer.read()
     print(f"buffer size = {len(buffer):,}")
 
+with timeit("complete processing"):
+    lines = buffer.split(b"\n")
+    unique_dict = dict.fromkeys(lines)
+    unique = b"\n".join(unique_dict)
+    print(f"unique size = {len(unique):,}")
+
 #chunk_size = 2**12
 #with timeit(f"load incrementally {chunk_size}"):
 #    buffer = bytearray()

commit 1bf7caa4de156d2fda3ec7119db534da98584d71
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Sun Oct 12 08:50:18 2025 +0200

    Do another load timing experiment

diff --git a/experiments/pyperftest.py b/experiments/pyperftest.py
index 2cb41f4..2f545f3 100755
--- a/experiments/pyperftest.py
+++ b/experiments/pyperftest.py
@@ -26,6 +26,18 @@ with timeit("load stdin"):
     buffer = sys.stdin.buffer.read()
     print(f"buffer size = {len(buffer):,}")
 
+#chunk_size = 2**12
+#with timeit(f"load incrementally {chunk_size}"):
+#    buffer = bytearray()
+#    while True:
+#        chunk = sys.stdin.buffer.read(chunk_size)
+#        if chunk:
+#            buffer += chunk
+#            pass
+#        else:
+#            break
+#    print(f"buffer size = {len(buffer):,}")
+
 header("Find")
 
 with timeit("find"):

commit 8edf5e1b100f336046e33d4ebb880575f58e3381
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Sat Oct 11 22:51:44 2025 +0200

    Change structure of search code

diff --git a/rlselect2.py b/rlselect2.py
index daebded..082d5da 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -91,26 +91,30 @@ class Data:
         while line_start < len(self.data):
             if res:
                 match = res[0].search(self.data, pos=line_start)
-                if not match:
-                    break
-                initial_match = match.span()[0]
+                if match:
+                    initial_match = match.span()[0]
+                else:
+                    initial_match = None
             else:
                 initial_match = line_start
-            line_start = self.data.rfind(NEWLINE, 0, initial_match) + 1
-            line_end = self.data.find(NEWLINE, initial_match)
-            if line_end == -1:
-                line_end = len(self.data)
-            regions = []
-            for compiled_re in res:
-                matches = list(compiled_re.finditer(self.data, pos=line_start, endpos=line_end))
-                if not matches:
-                    break
-                else:
-                    regions.extend([Region(*x.span()) for x in matches])
+            if initial_match is None:
+                break
             else:
-                yield Match(Region(line_start, line_end), regions)
-            line_start = line_end + len(NEWLINE)
-            self.percent = int(100*(line_start/len(self.data)))
+                line_start = self.data.rfind(NEWLINE, 0, initial_match) + 1
+                line_end = self.data.find(NEWLINE, initial_match)
+                if line_end == -1:
+                    line_end = len(self.data)
+                regions = []
+                for compiled_re in res:
+                    matches = list(compiled_re.finditer(self.data, pos=line_start, endpos=line_end))
+                    if not matches:
+                        break
+                    else:
+                        regions.extend([Region(*x.span()) for x in matches])
+                else:
+                    yield Match(Region(line_start, line_end), regions)
+                line_start = line_end + len(NEWLINE)
+                self.percent = int(100*(line_start/len(self.data)))
 
 class Match:
 

commit eb2953ac7aad79221c349fffddf8de4595c9f105
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Sat Oct 11 21:22:52 2025 +0200

    Data takes a stream

diff --git a/rlselect2.py b/rlselect2.py
index db4fd30..daebded 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -17,6 +17,7 @@ unnecessary work.
 
 from collections import defaultdict
 import contextlib
+import io
 import os
 import re
 import subprocess
@@ -29,17 +30,17 @@ class Data:
 
     @classmethod
     def from_stdin(cls):
-        return Data(sys.stdin.buffer.read())
+        return Data(sys.stdin.buffer)
 
-    def __init__(self, data):
-        self.data = data
+    def __init__(self, stream):
+        self.data = stream.read()
 
     def get_bytes(self, start, end):
         return self.data[start:end]
 
     @staticmethod
     def test_matches(data, pattern):
-        data = Data(data)
+        data = Data(io.BytesIO(data))
         for match in data.yield_lines_matching(Pattern.from_text(pattern.decode("ascii"), encoding="ascii")):
             print([chunk.get_bytes(data) for chunk in match.yield_chunks()])
 
@@ -415,14 +416,12 @@ class CursesUi:
 class FilterCommand:
 
     """
-    >>> import io
     >>> i = io.BytesIO(b"one\\ntwo\\ntwo\\nthree")
     >>> o = io.BytesIO()
     >>> FilterCommand(in_stream=i, out_stream=o, buffer_size=1).run()
     >>> o.getvalue()
     b'one\\ntwo\\nthree\\n'
 
-    >>> import io
     >>> i = io.BytesIO(b"one\\ntwo\\ntwo\\nthree\\n")
     >>> o = io.BytesIO()
     >>> FilterCommand(in_stream=i, out_stream=o, buffer_size=1).run()
@@ -461,7 +460,6 @@ class FilterCommand:
 class ReverseFilterCommand:
 
     """
-    >>> import io
     >>> i = io.BytesIO(b"one\\ntwo\\ntwo\\nthree")
     >>> o = io.BytesIO()
     >>> ReverseFilterCommand(in_stream=i, out_stream=o).run()

2025-10-11 08:48 Rickard pushed to rlselect2

commit af9d2fafe670d6712a390cd15228d53ba45156fd
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Fri Oct 10 18:22:42 2025 +0200

    Extract Filter and ReverseFilter

diff --git a/rlselect2.py b/rlselect2.py
index 5968018..db4fd30 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -412,6 +412,77 @@ class CursesUi:
             os.dup2(process_stdin, stdin_fileno)
             os.dup2(process_stdout, stdout_fileno)
 
+class FilterCommand:
+
+    """
+    >>> import io
+    >>> i = io.BytesIO(b"one\\ntwo\\ntwo\\nthree")
+    >>> o = io.BytesIO()
+    >>> FilterCommand(in_stream=i, out_stream=o, buffer_size=1).run()
+    >>> o.getvalue()
+    b'one\\ntwo\\nthree\\n'
+
+    >>> import io
+    >>> i = io.BytesIO(b"one\\ntwo\\ntwo\\nthree\\n")
+    >>> o = io.BytesIO()
+    >>> FilterCommand(in_stream=i, out_stream=o, buffer_size=1).run()
+    >>> o.getvalue()
+    b'one\\ntwo\\nthree\\n'
+    """
+
+    def __init__(self, in_stream, out_stream, buffer_size):
+        self.in_stream = in_stream
+        self.out_stream = out_stream
+        self.buffer_size = buffer_size
+
+    def run(self):
+        buffer = bytearray(self.buffer_size)
+        tail = b""
+        seen = set()
+        while True:
+            bytes_read = self.in_stream.readinto(buffer)
+            if bytes_read > 0:
+                splits = (tail+buffer[0:bytes_read]).split(NEWLINE)
+                tail = splits.pop(-1)
+                unique = []
+                for line in splits:
+                    if line not in seen:
+                        seen.add(line)
+                        unique.append(line)
+                if unique:
+                    self.out_stream.write(NEWLINE.join(unique))
+                    self.out_stream.write(NEWLINE)
+            else:
+                if tail:
+                    self.out_stream.write(tail)
+                    self.out_stream.write(NEWLINE)
+                break
+
+class ReverseFilterCommand:
+
+    """
+    >>> import io
+    >>> i = io.BytesIO(b"one\\ntwo\\ntwo\\nthree")
+    >>> o = io.BytesIO()
+    >>> ReverseFilterCommand(in_stream=i, out_stream=o).run()
+    >>> o.getvalue()
+    b'three\\ntwo\\none\\n'
+    """
+
+    def __init__(self, in_stream, out_stream):
+        self.in_stream = in_stream
+        self.out_stream = out_stream
+
+    def run(self):
+        buffer = self.in_stream.read()
+        lines = buffer.split(NEWLINE)
+        seen = set()
+        for line in lines[::-1]:
+            if line not in seen:
+                seen.add(line)
+                self.out_stream.write(line)
+                self.out_stream.write(NEWLINE)
+
 if __name__ == "__main__":
     if sys.argv[1:] == ["--selftest"]:
         import doctest
@@ -425,11 +496,16 @@ if __name__ == "__main__":
         if not unittest.TextTestRunner().run(suite).wasSuccessful():
             sys.exit(1)
     elif sys.argv[1:] == ["--filter"]:
-        buffer = sys.stdin.buffer.read()
-        lines = buffer.split(NEWLINE)
-        unique_lines = list(dict.fromkeys(lines))
-        output = NEWLINE.join(unique_lines)
-        sys.stdout.buffer.write(output)
+        FilterCommand(
+            in_stream=sys.stdin.buffer,
+            out_stream=sys.stdout.buffer,
+            buffer_size=2**12,
+        ).run()
+    elif sys.argv[1:] == ["--reverse-filter"]:
+        ReverseFilterCommand(
+            in_stream=sys.stdin.buffer,
+            out_stream=sys.stdout.buffer,
+        ).run()
     elif sys.argv[1:] == ["--curses"]:
         import curses
         CursesUi().run()

commit f0d7e71f60f34a303ce683fe8f85230e9ff0c19e
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Thu Oct 9 21:53:53 2025 +0200

    Add experiments/consumeincrementally.py

diff --git a/experiments/consumeincrementally.py b/experiments/consumeincrementally.py
new file mode 100755
index 0000000..67035e2
--- /dev/null
+++ b/experiments/consumeincrementally.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+import sys
+
+buffer = bytearray(10)
+print(buffer)
+
+while True:
+    bytes_read = sys.stdin.buffer.readinto(buffer)
+    if bytes_read > 0:
+        print("")
+        print("read", bytes_read)
+        print(buffer)
+        print(buffer[0:bytes_read])
+    else:
+        break

2025-10-08 22:45 Rickard pushed to rlselect2

commit feea9d09d121398636dd13e820984f9cd2d5a6e0
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 22:45:51 2025 +0200

    Fix --grep parameters

diff --git a/rlselect2.py b/rlselect2.py
index c2782c7..5968018 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -205,7 +205,7 @@ class Pattern:
     @classmethod
     def from_argv(cls):
         return cls.from_text(
-            " ".join(sys.argv[1:]),
+            " ".join(sys.argv[2:]),
             encoding=sys.getfilesystemencoding()
         )
 
@@ -433,7 +433,7 @@ if __name__ == "__main__":
     elif sys.argv[1:] == ["--curses"]:
         import curses
         CursesUi().run()
-    elif sys.argv[1:] == ["--grep"]:
+    elif sys.argv[1:2] == ["--grep"]:
         ESCAPE_MATCH = b"\033[30;43m"
         ESCAPE_MULTI_MATCH = b"\033[43m"
         ESCAPE_RESET = b"\033[0m"

2025-10-08 21:58 Rickard pushed to rlselect2

commit 37ed8adb7016e4125f2daf94cfe68911c23fbfc6
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:52:57 2025 +0200

    Fix tests after behavior change and fix one test failure which was a bug

diff --git a/rlselect2.py b/rlselect2.py
index ad48cc0..c2782c7 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -67,11 +67,16 @@ class Data:
 
         >>> Data.test_matches(b"foo\\n"*5, b"")
         [b'foo']
+        [b'foo']
+        [b'foo']
+        [b'foo']
+        [b'foo']
 
         Empty lines:
 
         >>> Data.test_matches(b"foo\\n\\nbar", b"")
         [b'foo']
+        []
         [b'bar']
 
         Mixed search test:
@@ -100,7 +105,7 @@ class Data:
                 if not matches:
                     break
                 else:
-                    regions.extend([Region(*(y+line_start for y in x.span())) for x in matches])
+                    regions.extend([Region(*x.span()) for x in matches])
             else:
                 yield Match(Region(line_start, line_end), regions)
             line_start = line_end + len(NEWLINE)

commit 4e7357861e71286ef494f8e5b6a8886b9d19f755
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:49:45 2025 +0200

    Skip duplicate handling in ui (it is done in --filter pre-processor)

diff --git a/rlselect2.py b/rlselect2.py
index d9ff350..ad48cc0 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -33,10 +33,6 @@ class Data:
 
     def __init__(self, data):
         self.data = data
-        self.duplicates = set()
-
-    def find(self, term, start, end):
-        return self.data.find(term, start, end)
 
     def get_bytes(self, start, end):
         return self.data[start:end]
@@ -84,7 +80,6 @@ class Data:
         [b'x ', b'Time', b'lineCanvas']
         """
         res = [re.compile(re_pattern) for re_pattern in pattern.to_res()]
-        seen = set()
         line_start = 0
         self.percent = 0
         while line_start < len(self.data):
@@ -99,21 +94,15 @@ class Data:
             line_end = self.data.find(NEWLINE, initial_match)
             if line_end == -1:
                 line_end = len(self.data)
-            if line_end > line_start and line_start not in self.duplicates:
-                line = self.data[line_start:line_end]
-                if line in seen:
-                    self.duplicates.add(line_start)
+            regions = []
+            for compiled_re in res:
+                matches = list(compiled_re.finditer(self.data, pos=line_start, endpos=line_end))
+                if not matches:
+                    break
                 else:
-                    seen.add(line)
-                    regions = []
-                    for compiled_re in res:
-                        matches = list(compiled_re.finditer(line))
-                        if not matches:
-                            break
-                        else:
-                            regions.extend([Region(*(y+line_start for y in x.span())) for x in matches])
-                    else:
-                        yield Match(Region(line_start, line_end), regions)
+                    regions.extend([Region(*(y+line_start for y in x.span())) for x in matches])
+            else:
+                yield Match(Region(line_start, line_end), regions)
             line_start = line_end + len(NEWLINE)
             self.percent = int(100*(line_start/len(self.data)))
 

commit 8ac9bbc3f26553bb7fe959d9cdf7f0666b31276c
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:39:09 2025 +0200

    Use pipe mechanism to handle filtering

diff --git a/rlselect2.py b/rlselect2.py
index 7276353..d9ff350 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -19,6 +19,7 @@ from collections import defaultdict
 import contextlib
 import os
 import re
+import subprocess
 import sys
 import time
 
@@ -438,7 +439,7 @@ if __name__ == "__main__":
     elif sys.argv[1:] == ["--curses"]:
         import curses
         CursesUi().run()
-    else:
+    elif sys.argv[1:] == ["--grep"]:
         ESCAPE_MATCH = b"\033[30;43m"
         ESCAPE_MULTI_MATCH = b"\033[43m"
         ESCAPE_RESET = b"\033[0m"
@@ -454,3 +455,14 @@ if __name__ == "__main__":
                 if chunk.is_match():
                     sys.stdout.buffer.write(ESCAPE_RESET)
             sys.stdout.buffer.write(NEWLINE)
+    else:
+        filter_process = subprocess.Popen(
+            [sys.executable, sys.argv[0], "--filter"],
+            stdout=subprocess.PIPE
+        )
+        ui_process = subprocess.Popen(
+            [sys.executable, sys.argv[0], "--curses"],
+            stdin=filter_process.stdout
+        )
+        filter_process.stdout.close()
+        ui_process.communicate()

2025-10-08 21:34 Rickard pushed to rlselect2

commit a339eaed74da7f090dc75eb566a3560d7d12f615
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:34:32 2025 +0200

    Use constant

diff --git a/rlselect2.py b/rlselect2.py
index 58bf111..7276353 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -431,9 +431,9 @@ if __name__ == "__main__":
             sys.exit(1)
     elif sys.argv[1:] == ["--filter"]:
         buffer = sys.stdin.buffer.read()
-        lines = buffer.split(b"\n")
+        lines = buffer.split(NEWLINE)
         unique_lines = list(dict.fromkeys(lines))
-        output = b"\n".join(unique_lines)
+        output = NEWLINE.join(unique_lines)
         sys.stdout.buffer.write(output)
     elif sys.argv[1:] == ["--curses"]:
         import curses

commit 75cc7903763369fbe717a14bb5e721b672760656
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:33:27 2025 +0200

    Add filter program

diff --git a/rlselect2.py b/rlselect2.py
index da1f27f..58bf111 100755
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -429,6 +429,12 @@ if __name__ == "__main__":
         ))
         if not unittest.TextTestRunner().run(suite).wasSuccessful():
             sys.exit(1)
+    elif sys.argv[1:] == ["--filter"]:
+        buffer = sys.stdin.buffer.read()
+        lines = buffer.split(b"\n")
+        unique_lines = list(dict.fromkeys(lines))
+        output = b"\n".join(unique_lines)
+        sys.stdout.buffer.write(output)
     elif sys.argv[1:] == ["--curses"]:
         import curses
         CursesUi().run()

2025-10-08 21:26 Rickard pushed to rlselect2

commit f1d0cf2814f0ef71347113776fa0dc2167e1bbc5
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:20:22 2025 +0200

    Rename

diff --git a/resources/consumetest.py b/experiments/consumetest.py
similarity index 100%
rename from resources/consumetest.py
rename to experiments/consumetest.py
diff --git a/resources/producetest.py b/experiments/producetest.py
similarity index 100%
rename from resources/producetest.py
rename to experiments/producetest.py
diff --git a/resources/pyperftest.py b/experiments/pyperftest.py
similarity index 100%
rename from resources/pyperftest.py
rename to experiments/pyperftest.py

commit bdc8948e4712bb4a63d1b6d5e79e788416b91636
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:19:39 2025 +0200

    Make executable

diff --git a/rlselect2.py b/rlselect2.py
old mode 100644
new mode 100755
index 677b6ea..da1f27f
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 """
 Inspired by Casey Muratoru.
 

commit 99bfe0bc57b2193975a93d42d92fa9c37ad5fc19
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:18:44 2025 +0200

    Remove no longer used perf script

diff --git a/perf-summary.sh b/perf-summary.sh
deleted file mode 100755
index d5c8ec7..0000000
--- a/perf-summary.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-
-./perf.sh "$@" 2>&1 | grep ^real

commit 4a150a95b26d78a6597e75d66e7557c01360af98
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:17:03 2025 +0200

    Add example inputs

diff --git a/.gitignore b/.gitignore
index bee8a64..5a78843 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 __pycache__
+/inputs/1gb.bin
diff --git a/inputs/latin1.txt b/inputs/latin1.txt
new file mode 100644
index 0000000..c4fb1f6
--- /dev/null
+++ b/inputs/latin1.txt
@@ -0,0 +1 @@
+Jag �r latin1!
diff --git a/inputs/utf8.txt b/inputs/utf8.txt
new file mode 100644
index 0000000..f72d74b
--- /dev/null
+++ b/inputs/utf8.txt
@@ -0,0 +1,4 @@
+Jag är ÄR utf-8!
+
+emojis: 😃 🤣 😈  end
+emojis:-cc-xx-xx- end

commit d444374d1a4e35cc6d88db3ff6d10ada3433194c
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:15:12 2025 +0200

    Add tool

diff --git a/tools/print1gb.py b/tools/print1gb.py
new file mode 100755
index 0000000..5ff7f37
--- /dev/null
+++ b/tools/print1gb.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+import random
+import sys
+
+left = 1024*1024*1024
+
+while left:
+    buffer = b""
+    for x in range(random.randrange(min(left, 256))):
+        while True:
+            y = random.randrange(13, 256).to_bytes()
+            if y != b"\n":
+                break
+        buffer += y
+    left -= len(buffer)
+    sys.stdout.buffer.write(buffer)
+    left -= 1
+    sys.stdout.buffer.write(b"\n")

commit 04c3d179d9011a7a99dfbe44bef7c3ac83008441
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:14:06 2025 +0200

    Move pyperfets.py

diff --git a/pyperftest.py b/resources/pyperftest.py
similarity index 100%
rename from pyperftest.py
rename to resources/pyperftest.py

commit 8da2aac385630c366b1f0108144ef1ffec4d7264
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:09:35 2025 +0200

    Add produce test

diff --git a/consumetest.py b/resources/consumetest.py
similarity index 83%
rename from consumetest.py
rename to resources/consumetest.py
index 081f078..49311e0 100755
--- a/consumetest.py
+++ b/resources/consumetest.py
@@ -11,8 +11,11 @@ https://unix.stackexchange.com/questions/626975/what-happens-when-writing-gigaby
 import sys
 import time
 
+lines = []
 for index, line in enumerate(sys.stdin.buffer):
     print(line)
+    lines.append(line)
     if index > 2:
-        time.sleep(60)
+        time.sleep(5)
+        print(lines)
         sys.exit(0)
diff --git a/resources/producetest.py b/resources/producetest.py
new file mode 100755
index 0000000..f67882d
--- /dev/null
+++ b/resources/producetest.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+"""
+`foo | bar`
+    * What happens if bar doesn't comsume all as quickly as possible?
+    * What happens if bar exits before consuming all?
+
+https://unix.stackexchange.com/questions/626975/what-happens-when-writing-gigabytes-of-data-to-a-pipe
+
+https://docs.python.org/3/library/signal.html#note-on-sigpipe
+"""
+
+import sys
+import os
+
+try:
+    for n in range(1000):
+        line = f"line {n}\n"
+        sys.stderr.write("log: "+line)
+        sys.stdout.write(line)
+        n += 1
+    sys.stdout.flush()
+except BrokenPipeError:
+    devnull = os.open(os.devnull, os.O_WRONLY)
+    os.dup2(devnull, sys.stdout.fileno())
+    sys.stderr.write("broken pipe\n")

2025-10-08 21:20 Rickard pushed to rlselect2

commit f1d0cf2814f0ef71347113776fa0dc2167e1bbc5
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:20:22 2025 +0200

    Rename

diff --git a/resources/consumetest.py b/experiments/consumetest.py
similarity index 100%
rename from resources/consumetest.py
rename to experiments/consumetest.py
diff --git a/resources/producetest.py b/experiments/producetest.py
similarity index 100%
rename from resources/producetest.py
rename to experiments/producetest.py
diff --git a/resources/pyperftest.py b/experiments/pyperftest.py
similarity index 100%
rename from resources/pyperftest.py
rename to experiments/pyperftest.py

commit bdc8948e4712bb4a63d1b6d5e79e788416b91636
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:19:39 2025 +0200

    Make executable

diff --git a/rlselect2.py b/rlselect2.py
old mode 100644
new mode 100755
index 677b6ea..da1f27f
--- a/rlselect2.py
+++ b/rlselect2.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 """
 Inspired by Casey Muratoru.
 

commit 99bfe0bc57b2193975a93d42d92fa9c37ad5fc19
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:18:44 2025 +0200

    Remove no longer used perf script

diff --git a/perf-summary.sh b/perf-summary.sh
deleted file mode 100755
index d5c8ec7..0000000
--- a/perf-summary.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-
-./perf.sh "$@" 2>&1 | grep ^real

commit 4a150a95b26d78a6597e75d66e7557c01360af98
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:17:03 2025 +0200

    Add example inputs

diff --git a/.gitignore b/.gitignore
index bee8a64..5a78843 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 __pycache__
+/inputs/1gb.bin
diff --git a/inputs/latin1.txt b/inputs/latin1.txt
new file mode 100644
index 0000000..c4fb1f6
--- /dev/null
+++ b/inputs/latin1.txt
@@ -0,0 +1 @@
+Jag �r latin1!
diff --git a/inputs/utf8.txt b/inputs/utf8.txt
new file mode 100644
index 0000000..f72d74b
--- /dev/null
+++ b/inputs/utf8.txt
@@ -0,0 +1,4 @@
+Jag är ÄR utf-8!
+
+emojis: 😃 🤣 😈  end
+emojis:-cc-xx-xx- end

commit d444374d1a4e35cc6d88db3ff6d10ada3433194c
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:15:12 2025 +0200

    Add tool

diff --git a/tools/print1gb.py b/tools/print1gb.py
new file mode 100755
index 0000000..5ff7f37
--- /dev/null
+++ b/tools/print1gb.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+import random
+import sys
+
+left = 1024*1024*1024
+
+while left:
+    buffer = b""
+    for x in range(random.randrange(min(left, 256))):
+        while True:
+            y = random.randrange(13, 256).to_bytes()
+            if y != b"\n":
+                break
+        buffer += y
+    left -= len(buffer)
+    sys.stdout.buffer.write(buffer)
+    left -= 1
+    sys.stdout.buffer.write(b"\n")

commit 04c3d179d9011a7a99dfbe44bef7c3ac83008441
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:14:06 2025 +0200

    Move pyperfets.py

diff --git a/pyperftest.py b/resources/pyperftest.py
similarity index 100%
rename from pyperftest.py
rename to resources/pyperftest.py

commit 8da2aac385630c366b1f0108144ef1ffec4d7264
Author: Rickard Lindberg <rickard@rickardlindberg.me>
Date:   Wed Oct 8 21:09:35 2025 +0200

    Add produce test

diff --git a/consumetest.py b/resources/consumetest.py
similarity index 83%
rename from consumetest.py
rename to resources/consumetest.py
index 081f078..49311e0 100755
--- a/consumetest.py
+++ b/resources/consumetest.py
@@ -11,8 +11,11 @@ https://unix.stackexchange.com/questions/626975/what-happens-when-writing-gigaby
 import sys
 import time
 
+lines = []
 for index, line in enumerate(sys.stdin.buffer):
     print(line)
+    lines.append(line)
     if index > 2:
-        time.sleep(60)
+        time.sleep(5)
+        print(lines)
         sys.exit(0)
diff --git a/resources/producetest.py b/resources/producetest.py
new file mode 100755
index 0000000..f67882d
--- /dev/null
+++ b/resources/producetest.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+"""
+`foo | bar`
+    * What happens if bar doesn't comsume all as quickly as possible?
+    * What happens if bar exits before consuming all?
+
+https://unix.stackexchange.com/questions/626975/what-happens-when-writing-gigabytes-of-data-to-a-pipe
+
+https://docs.python.org/3/library/signal.html#note-on-sigpipe
+"""
+
+import sys
+import os
+
+try:
+    for n in range(1000):
+        line = f"line {n}\n"
+        sys.stderr.write("log: "+line)
+        sys.stdout.write(line)
+        n += 1
+    sys.stdout.flush()
+except BrokenPipeError:
+    devnull = os.open(os.devnull, os.O_WRONLY)
+    os.dup2(devnull, sys.stdout.fileno())
+    sys.stderr.write("broken pipe\n")