unicode_linebreak/shared.rs
1/// Unicode line breaking class.
2#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
3#[repr(u8)]
4pub enum BreakClass {
5 // Non-tailorable
6 /// Cause a line break (after)
7 Mandatory,
8 /// Cause a line break (after), except between CR and LF
9 CarriageReturn,
10 /// Cause a line break (after)
11 LineFeed,
12 /// Prohibit a line break between the character and the preceding character
13 CombiningMark,
14 /// Cause a line break (after)
15 NextLine,
16 /// Do not occur in well-formed text
17 Surrogate,
18 /// Prohibit line breaks before and after
19 WordJoiner,
20 /// Provide a break opportunity
21 ZeroWidthSpace,
22 /// Prohibit line breaks before and after
23 NonBreakingGlue,
24 /// Enable indirect line breaks
25 Space,
26 /// Prohibit line breaks within joiner sequences
27 ZeroWidthJoiner,
28 // Break opportunities
29 /// Provide a line break opportunity before and after the character
30 BeforeAndAfter,
31 /// Generally provide a line break opportunity after the character
32 After,
33 /// Generally provide a line break opportunity before the character
34 Before,
35 /// Provide a line break opportunity after the character, except in numeric context
36 Hyphen,
37 /// Provide a line break opportunity contingent on additional information
38 Contingent,
39 // Characters prohibiting certain breaks
40 /// Prohibit line breaks before
41 ClosePunctuation,
42 /// Prohibit line breaks before
43 CloseParenthesis,
44 /// Prohibit line breaks before
45 Exclamation,
46 /// Allow only indirect line breaks between pairs
47 Inseparable,
48 /// Allow only indirect line breaks before
49 NonStarter,
50 /// Prohibit line breaks after
51 OpenPunctuation,
52 /// Act like they are both opening and closing
53 Quotation,
54 // Numeric context
55 /// Prevent breaks after any and before numeric
56 InfixSeparator,
57 /// Form numeric expressions for line breaking purposes
58 Numeric,
59 /// Do not break following a numeric expression
60 Postfix,
61 /// Do not break in front of a numeric expression
62 Prefix,
63 /// Prevent a break before, and allow a break after
64 Symbol,
65 // Other characters
66 /// Act like AL when the resolved EAW is N; otherwise, act as ID
67 Ambiguous,
68 /// Are alphabetic characters or symbols that are used with alphabetic characters
69 Alphabetic,
70 /// Treat as NS or ID for strict or normal breaking.
71 ConditionalJapaneseStarter,
72 /// Do not break from following Emoji Modifier
73 EmojiBase,
74 /// Do not break from preceding Emoji Base
75 EmojiModifier,
76 /// Form Korean syllable blocks
77 HangulLvSyllable,
78 /// Form Korean syllable blocks
79 HangulLvtSyllable,
80 /// Do not break around a following hyphen; otherwise act as Alphabetic
81 HebrewLetter,
82 /// Break before or after, except in some numeric context
83 Ideographic,
84 /// Form Korean syllable blocks
85 HangulLJamo,
86 /// Form Korean syllable blocks
87 HangulVJamo,
88 /// Form Korean syllable blocks
89 HangulTJamo,
90 /// Keep pairs together. For pairs, break before and after other classes
91 RegionalIndicator,
92 /// Provide a line break opportunity contingent on additional, language-specific context analysis
93 ComplexContext,
94 /// Have as yet unknown line breaking behavior or unassigned code positions
95 Unknown,
96}
97
98use BreakClass::{
99 After as BA, Alphabetic as AL, Ambiguous as AI, Before as BB, BeforeAndAfter as B2,
100 CarriageReturn as CR, CloseParenthesis as CP, ClosePunctuation as CL, CombiningMark as CM,
101 ComplexContext as SA, ConditionalJapaneseStarter as CJ, Contingent as CB, EmojiBase as EB,
102 EmojiModifier as EM, Exclamation as EX, HangulLJamo as JL, HangulLvSyllable as H2,
103 HangulLvtSyllable as H3, HangulTJamo as JT, HangulVJamo as JV, HebrewLetter as HL,
104 Hyphen as HY, Ideographic as ID, InfixSeparator as IS, Inseparable as IN, LineFeed as LF,
105 Mandatory as BK, NextLine as NL, NonBreakingGlue as GL, NonStarter as NS, Numeric as NU,
106 OpenPunctuation as OP, Postfix as PO, Prefix as PR, Quotation as QU, RegionalIndicator as RI,
107 Space as SP, Surrogate as SG, Symbol as SY, Unknown as XX, WordJoiner as WJ,
108 ZeroWidthJoiner as ZWJ, ZeroWidthSpace as ZW,
109};
110
111/// Ceiling for code points in the Basic Multilingual Place (BMP).
112const BMP_LIMIT: u32 = 0x10000;
113
114/// Shift size for getting index-3 table offset.
115const SHIFT_3: u32 = 4;
116/// Shift size for getting index-2 table offset.
117const SHIFT_2: u32 = 5 + SHIFT_3;
118/// Shift size for getting index-1 table offset.
119const SHIFT_1: u32 = 5 + SHIFT_2;
120/// Shift size for getting BMP block start.
121const BMP_SHIFT: u32 = 6;
122
123const INDEX_2_BLOCK_LENGTH: u32 = 1 << (SHIFT_1 - SHIFT_2);
124const INDEX_3_BLOCK_LENGTH: u32 = 1 << (SHIFT_2 - SHIFT_3);
125const SMALL_DATA_BLOCK_LENGTH: u32 = 1 << SHIFT_3;
126const BMP_DATA_BLOCK_LENGTH: u32 = 1 << BMP_SHIFT;
127
128const ALLOWED_BREAK_BIT: u8 = 0x80;
129const MANDATORY_BREAK_BIT: u8 = 0x40;
130
131#[allow(non_upper_case_globals)]
132const eot: u8 = 43;
133#[allow(non_upper_case_globals)]
134const sot: u8 = 44;