The stock XCompose file which ships with most distros isn't exhaustive and uses a number of layout specific keysyms intended to be easy to memorize (rather than globally accessible). To circumvent this problem we ship our own set of simplified compose definitions.master
parent
013f1e55e1
commit
e24de53d56
21 changed files with 69495 additions and 671 deletions
@ -0,0 +1,2 @@ |
||||
data/* -diff |
||||
src/unicode.c -diff |
||||
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
@ -1,55 +0,0 @@ |
||||
#!/usr/bin/env python3 |
||||
|
||||
import re |
||||
import sys |
||||
import subprocess |
||||
|
||||
keys = subprocess.check_output(['keyd', '-l']).decode('utf8').split('\n') |
||||
keymap = {k: k for k in keys} |
||||
keymap.update({ |
||||
"quotedbl": "\"", |
||||
"apostrophe": "'", |
||||
"Multi_key": "compose", |
||||
"exclam": "!", |
||||
"question": "?", |
||||
"asciicircum": "^", |
||||
}) |
||||
|
||||
|
||||
def create_macro(keys): |
||||
return f"macro({' '.join(keymap[k] for k in keys)})" |
||||
|
||||
|
||||
macros = {} |
||||
for line in open('/usr/share/X11/locale/en_US.UTF-8/Compose').readlines(): |
||||
try: |
||||
keys, glyph = re.match('(\s*<.*?>+)\s*:\s*"(.*?)".*', line).groups() |
||||
keys = re.findall('<(.*?)>', keys) |
||||
|
||||
macros[glyph] = create_macro(keys) |
||||
except: |
||||
pass |
||||
|
||||
print('''/* GENERATED BY "%s" DO NOT EDIT BY HAND */ |
||||
|
||||
#ifndef ALIASES_H |
||||
#define ALIASES_H |
||||
struct alias { |
||||
const char *name; |
||||
const char *def; |
||||
}; |
||||
|
||||
static struct alias aliases[] = { |
||||
''' % sys.argv[0]) |
||||
|
||||
for glyph, macro in macros.items(): |
||||
macro = macro.replace('"', '\\"') |
||||
print('\t{"%s", "%s"},' % (glyph, macro)) |
||||
|
||||
print(''' |
||||
}; |
||||
|
||||
const size_t nr_aliases = sizeof(aliases)/sizeof(aliases[0]); |
||||
|
||||
#endif |
||||
''') |
||||
@ -0,0 +1,55 @@ |
||||
#!/usr/bin/env python3 |
||||
|
||||
import sys |
||||
|
||||
codes = [] |
||||
for line in open('data/unicode.txt').readlines(): # Original source: https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt |
||||
try: |
||||
code = int(line.split(';')[0], 16) |
||||
|
||||
# Ensure the character is encodable (surrogates are not) |
||||
chr(code).encode('utf8') |
||||
|
||||
if (code >= 128): |
||||
codes.append(code) |
||||
except: |
||||
pass |
||||
|
||||
# Generate the compose file |
||||
|
||||
data = '' |
||||
for n, code in enumerate(codes): |
||||
data += '<Linefeed> ' |
||||
data += ' '.join(f'<{c}>' for c in ('%05d' % n)) |
||||
data += f' : "{chr(code)}"\n' |
||||
|
||||
open('data/keyd.compose', 'w').write(data) |
||||
|
||||
# Generate the corresponding src/unicode.c |
||||
|
||||
# OPT: We could condense this and shave off lookup time by using an offset |
||||
# table to capitalize on codepoint contiguity, but 35k is small enough to |
||||
# warrant keeping the entire thing in memory. |
||||
|
||||
open('src/unicode.c', 'w').write(f''' |
||||
/* GENERATED BY {sys.argv[0]}, DO NOT MODIFY BY HAND. */ |
||||
|
||||
#include <stdint.h> |
||||
#include <stdlib.h> |
||||
|
||||
uint32_t unicode_table[] = {{ {','.join(map(str, codes))} }}; |
||||
|
||||
int lookup_xcompose_code(uint32_t codepoint) {{ |
||||
size_t i = 0; |
||||
|
||||
for(i = 0; i < sizeof(unicode_table)/sizeof(unicode_table[0]); i++) {{ |
||||
if (unicode_table[i] == codepoint) |
||||
return i; |
||||
}} |
||||
|
||||
return -1; |
||||
}} |
||||
''' |
||||
.replace('\n\t', '\n') |
||||
.lstrip() |
||||
) |
||||
File diff suppressed because one or more lines are too long
@ -0,0 +1,63 @@ |
||||
/*
|
||||
* keyd - A key remapping daemon. |
||||
* |
||||
* © 2019 Raheman Vaiya (see also: LICENSE). |
||||
*/ |
||||
#ifndef UNICODE_H |
||||
#define UNICODE_H |
||||
|
||||
/*
|
||||
* Overview |
||||
* |
||||
* Unicode input is accomplished using one of several 'input methods' or |
||||
* 'IMEs'. The X input method (xim) is the name of the default input method |
||||
* which ships with X, and currently seems to be the most widely supported one. |
||||
* An emerging competitor called 'ibus' exists, but seems to be less |
||||
* ubiquitous, a notable advantage being that it allows codepoints to be input |
||||
* directly by their hex values (C-S-u). |
||||
* |
||||
* xim, by contrast, works by requiring the user to explicitly specify a |
||||
* mapping for each codepoint of interest in an XCompose(5) file, which maps a |
||||
* sequence of keysyms (usually beginning with a dedicated 'compose' key |
||||
* (<Multi_key>)) into a valid utf8 output string. |
||||
* |
||||
* Unfortunately xim doesn't provide a mechanism by which arbitrary unicode |
||||
* points can be input, so we have to construct an XCompose file containing |
||||
* explicit mappings between each sequence and the desired utf8 output. |
||||
* |
||||
* ~/.XCompose Constraints: |
||||
* |
||||
* To compound matters, every program/toolkit seems to have its own XCompose |
||||
* parser and consequently only supports a subset of the full spec. The |
||||
* following real-world constraints have been arrived at empirically: |
||||
* |
||||
* 1. Each sequence should be less than 6 characters since some programs (e.g |
||||
* chrome) seem to have a maximum sequence length. |
||||
* |
||||
* 2. No sequence should be a subset of another sequence since some programs |
||||
* don't handle this properly (e.g kitty) |
||||
* |
||||
* 3. Sequences should confine themselves to keysyms available on all layouts |
||||
* (e.g no a-f (hex)). |
||||
* |
||||
* Approach |
||||
* |
||||
* In order to satisfy the above constraints, we create an XCompose file |
||||
* mapping each codepoint's index in a lookup table to the desired utf8 |
||||
* sequence. The use of a table index instead of the codepoint value ensures |
||||
* all codepoints consist of a maximum of 5 decimal digits (since there are |
||||
* <35k of them). Each codepoint is zero-left padded to 5 characters to avoid |
||||
* the subset issue. |
||||
* |
||||
* Finally, we use linefeed as our compose prefix so the user doesn't have to |
||||
* faff about with XkbOptions. This technically introduces the possibility of a |
||||
* conflict, but I haven't found any evidence which suggests that Linefeed is |
||||
* anything other than a vestigial keypendage from a more glorious era. |
||||
* |
||||
* </end_verbiage> |
||||
*/ |
||||
|
||||
|
||||
int lookup_xcompose_code(uint32_t codepoint); |
||||
|
||||
#endif |
||||
@ -0,0 +1,25 @@ |
||||
3 down |
||||
\ down |
||||
\ up |
||||
3 up |
||||
|
||||
control down |
||||
control up |
||||
1 down |
||||
1 up |
||||
linefeed down |
||||
linefeed up |
||||
3 down |
||||
3 up |
||||
2 down |
||||
2 up |
||||
3 down |
||||
3 up |
||||
3 down |
||||
3 up |
||||
4 down |
||||
4 up |
||||
2 down |
||||
2 up |
||||
control down |
||||
control up |
||||
@ -0,0 +1,15 @@ |
||||
\ down |
||||
\ up |
||||
|
||||
linefeed down |
||||
linefeed up |
||||
3 down |
||||
3 up |
||||
2 down |
||||
2 up |
||||
3 down |
||||
3 up |
||||
3 down |
||||
3 up |
||||
4 down |
||||
4 up |
||||
Loading…
Reference in new issue