summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.cargo_vcs_info.json5
-rw-r--r--.github/workflows/ci.yml31
-rw-r--r--.gitignore2
-rw-r--r--Cargo.toml29
-rw-r--r--Cargo.toml.orig19
-rw-r--r--LICENSE-APACHE201
-rw-r--r--LICENSE-MIT25
-rw-r--r--bors.toml3
-rw-r--r--rustfmt.toml1
-rw-r--r--src/expr.rs610
-rw-r--r--src/lib.rs149
-rw-r--r--src/literal.rs361
-rw-r--r--src/token.rs44
-rw-r--r--tests/clang.rs339
-rw-r--r--tests/input/chars.h3
-rw-r--r--tests/input/fail.h9
-rw-r--r--tests/input/floats.h8
-rw-r--r--tests/input/int_signed.h3
-rw-r--r--tests/input/int_unsigned.h29
-rw-r--r--tests/input/strings.h17
-rw-r--r--tests/input/test_llvm_bug_9069.h4
21 files changed, 1892 insertions, 0 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..72d29e1
--- /dev/null
+++ b/.cargo_vcs_info.json
@@ -0,0 +1,5 @@
+{
+ "git": {
+ "sha1": "c7ccdfbc37b508cfda1171ab4f89afaeb72e82f3"
+ }
+}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..8af3b70
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,31 @@
+name: CI
+on:
+ push:
+ branches:
+ - master
+ pull_request:
+ branches:
+ - master
+
+
+jobs:
+ build_and_test:
+ name: Build and Test
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Install LLVM and Clang
+ uses: KyleMayes/install-llvm-action@v1
+ with:
+ version: "11.0"
+ directory: ${{ runner.temp }}/llvm-11.0
+
+ - uses: actions-rs/toolchain@v1
+ with:
+ toolchain: stable
+
+ - uses: actions-rs/cargo@v1
+ with:
+ command: test
+ args: --verbose --all
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a9d37c5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+target
+Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..4956001
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,29 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2018"
+name = "cexpr"
+version = "0.6.0"
+authors = ["Jethro Beekman <jethro@jbeekman.nl>"]
+description = "A C expression parser and evaluator"
+documentation = "https://docs.rs/cexpr/"
+keywords = ["C", "expression", "parser"]
+license = "Apache-2.0/MIT"
+repository = "https://github.com/jethrogb/rust-cexpr"
+[dependencies.nom]
+version = "7"
+features = ["std"]
+default-features = false
+[dev-dependencies.clang-sys]
+version = ">= 0.13.0, < 0.29.0"
+[badges.travis-ci]
+repository = "jethrogb/rust-cexpr"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..6de1e89
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,19 @@
+[package]
+name = "cexpr"
+version = "0.6.0"
+edition = "2018"
+authors = ["Jethro Beekman <jethro@jbeekman.nl>"]
+license = "Apache-2.0/MIT"
+description = "A C expression parser and evaluator"
+documentation = "https://docs.rs/cexpr/"
+repository = "https://github.com/jethrogb/rust-cexpr"
+keywords = ["C","expression","parser"]
+
+[badges]
+travis-ci = { repository = "jethrogb/rust-cexpr" }
+
+[dependencies]
+nom = { version = "7", default-features = false, features = ["std"] }
+
+[dev-dependencies]
+clang-sys = ">= 0.13.0, < 0.29.0"
diff --git a/LICENSE-APACHE b/LICENSE-APACHE
new file mode 100644
index 0000000..16fe87b
--- /dev/null
+++ b/LICENSE-APACHE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/LICENSE-MIT b/LICENSE-MIT
new file mode 100644
index 0000000..ed958e7
--- /dev/null
+++ b/LICENSE-MIT
@@ -0,0 +1,25 @@
+(C) Copyright 2016 Jethro G. Beekman
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/bors.toml b/bors.toml
new file mode 100644
index 0000000..ca08e81
--- /dev/null
+++ b/bors.toml
@@ -0,0 +1,3 @@
+status = [
+ "continuous-integration/travis-ci/push",
+]
diff --git a/rustfmt.toml b/rustfmt.toml
new file mode 100644
index 0000000..32a9786
--- /dev/null
+++ b/rustfmt.toml
@@ -0,0 +1 @@
+edition = "2018"
diff --git a/src/expr.rs b/src/expr.rs
new file mode 100644
index 0000000..7f7e458
--- /dev/null
+++ b/src/expr.rs
@@ -0,0 +1,610 @@
+// (C) Copyright 2016 Jethro G. Beekman
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+//! Evaluating C expressions from tokens.
+//!
+//! Numerical operators are supported. All numerical values are treated as
+//! `i64` or `f64`. Type casting is not supported. `i64` are converted to
+//! `f64` when used in conjunction with a `f64`. Right shifts are always
+//! arithmetic shifts.
+//!
+//! The `sizeof` operator is not supported.
+//!
+//! String concatenation is supported, but width prefixes are ignored; all
+//! strings are treated as narrow strings.
+//!
+//! Use the `IdentifierParser` to substitute identifiers found in expressions.
+
+use std::collections::HashMap;
+use std::num::Wrapping;
+use std::ops::{
+ AddAssign, BitAndAssign, BitOrAssign, BitXorAssign, DivAssign, MulAssign, RemAssign, ShlAssign,
+ ShrAssign, SubAssign,
+};
+
+use crate::literal::{self, CChar};
+use crate::token::{Kind as TokenKind, Token};
+use crate::ToCexprResult;
+use nom::branch::alt;
+use nom::combinator::{complete, map, map_opt};
+use nom::multi::{fold_many0, many0, separated_list0};
+use nom::sequence::{delimited, pair, preceded};
+use nom::*;
+
+/// Expression parser/evaluator that supports identifiers.
+#[derive(Debug)]
+pub struct IdentifierParser<'ident> {
+ identifiers: &'ident HashMap<Vec<u8>, EvalResult>,
+}
+#[derive(Copy, Clone)]
+struct PRef<'a>(&'a IdentifierParser<'a>);
+
+/// A shorthand for the type of cexpr expression evaluation results.
+pub type CResult<'a, R> = IResult<&'a [Token], R, crate::Error<&'a [Token]>>;
+
+/// The result of parsing a literal or evaluating an expression.
+#[derive(Debug, Clone, PartialEq)]
+#[allow(missing_docs)]
+pub enum EvalResult {
+ Int(Wrapping<i64>),
+ Float(f64),
+ Char(CChar),
+ Str(Vec<u8>),
+ Invalid,
+}
+
+macro_rules! result_opt (
+ (fn $n:ident: $e:ident -> $t:ty) => (
+ #[allow(dead_code)]
+ #[allow(clippy::wrong_self_convention)]
+ fn $n(self) -> Option<$t> {
+ if let EvalResult::$e(v) = self {
+ Some(v)
+ } else {
+ None
+ }
+ }
+ );
+);
+
+impl EvalResult {
+ result_opt!(fn as_int: Int -> Wrapping<i64>);
+ result_opt!(fn as_float: Float -> f64);
+ result_opt!(fn as_char: Char -> CChar);
+ result_opt!(fn as_str: Str -> Vec<u8>);
+
+ #[allow(clippy::wrong_self_convention)]
+ fn as_numeric(self) -> Option<EvalResult> {
+ match self {
+ EvalResult::Int(_) | EvalResult::Float(_) => Some(self),
+ _ => None,
+ }
+ }
+}
+
+impl From<Vec<u8>> for EvalResult {
+ fn from(s: Vec<u8>) -> EvalResult {
+ EvalResult::Str(s)
+ }
+}
+
+// ===========================================
+// ============= Clang tokens ================
+// ===========================================
+
+macro_rules! exact_token (
+ ($k:ident, $c:expr) => ({
+ move |input: &[Token]| {
+ if input.is_empty() {
+ let res: CResult<'_, &[u8]> = Err(crate::nom::Err::Incomplete(Needed::new($c.len())));
+ res
+ } else {
+ if input[0].kind==TokenKind::$k && &input[0].raw[..]==$c {
+ Ok((&input[1..], &input[0].raw[..]))
+ } else {
+ Err(crate::nom::Err::Error((input, crate::ErrorKind::ExactToken(TokenKind::$k,$c)).into()))
+ }
+ }
+ }
+ });
+);
+
+fn identifier_token(input: &[Token]) -> CResult<'_, &[u8]> {
+ if input.is_empty() {
+ let res: CResult<'_, &[u8]> = Err(nom::Err::Incomplete(Needed::new(1)));
+ res
+ } else {
+ if input[0].kind == TokenKind::Identifier {
+ Ok((&input[1..], &input[0].raw[..]))
+ } else {
+ Err(crate::nom::Err::Error((input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into()))
+ }
+ }
+}
+
+fn p(c: &'static str) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> {
+ exact_token!(Punctuation, c.as_bytes())
+}
+
+fn one_of_punctuation(c: &'static [&'static str]) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> {
+ move |input| {
+ if input.is_empty() {
+ let min = c
+ .iter()
+ .map(|opt| opt.len())
+ .min()
+ .expect("at least one option");
+ Err(crate::nom::Err::Incomplete(Needed::new(min)))
+ } else if input[0].kind == TokenKind::Punctuation
+ && c.iter().any(|opt| opt.as_bytes() == &input[0].raw[..])
+ {
+ Ok((&input[1..], &input[0].raw[..]))
+ } else {
+ Err(crate::nom::Err::Error(
+ (
+ input,
+ crate::ErrorKind::ExactTokens(TokenKind::Punctuation, c),
+ )
+ .into(),
+ ))
+ }
+ }
+}
+
+// ==================================================
+// ============= Numeric expressions ================
+// ==================================================
+
+impl<'a> AddAssign<&'a EvalResult> for EvalResult {
+ fn add_assign(&mut self, rhs: &'a EvalResult) {
+ use self::EvalResult::*;
+ *self = match (&*self, rhs) {
+ (&Int(a), &Int(b)) => Int(a + b),
+ (&Float(a), &Int(b)) => Float(a + (b.0 as f64)),
+ (&Int(a), &Float(b)) => Float(a.0 as f64 + b),
+ (&Float(a), &Float(b)) => Float(a + b),
+ _ => Invalid,
+ };
+ }
+}
+impl<'a> BitAndAssign<&'a EvalResult> for EvalResult {
+ fn bitand_assign(&mut self, rhs: &'a EvalResult) {
+ use self::EvalResult::*;
+ *self = match (&*self, rhs) {
+ (&Int(a), &Int(b)) => Int(a & b),
+ _ => Invalid,
+ };
+ }
+}
+impl<'a> BitOrAssign<&'a EvalResult> for EvalResult {
+ fn bitor_assign(&mut self, rhs: &'a EvalResult) {
+ use self::EvalResult::*;
+ *self = match (&*self, rhs) {
+ (&Int(a), &Int(b)) => Int(a | b),
+ _ => Invalid,
+ };
+ }
+}
+impl<'a> BitXorAssign<&'a EvalResult> for EvalResult {
+ fn bitxor_assign(&mut self, rhs: &'a EvalResult) {
+ use self::EvalResult::*;
+ *self = match (&*self, rhs) {
+ (&Int(a), &Int(b)) => Int(a ^ b),
+ _ => Invalid,
+ };
+ }
+}
+impl<'a> DivAssign<&'a EvalResult> for EvalResult {
+ fn div_assign(&mut self, rhs: &'a EvalResult) {
+ use self::EvalResult::*;
+ *self = match (&*self, rhs) {
+ (&Int(a), &Int(b)) => Int(a / b),
+ (&Float(a), &Int(b)) => Float(a / (b.0 as f64)),
+ (&Int(a), &Float(b)) => Float(a.0 as f64 / b),
+ (&Float(a), &Float(b)) => Float(a / b),
+ _ => Invalid,
+ };
+ }
+}
+impl<'a> MulAssign<&'a EvalResult> for EvalResult {
+ fn mul_assign(&mut self, rhs: &'a EvalResult) {
+ use self::EvalResult::*;
+ *self = match (&*self, rhs) {
+ (&Int(a), &Int(b)) => Int(a * b),
+ (&Float(a), &Int(b)) => Float(a * (b.0 as f64)),
+ (&Int(a), &Float(b)) => Float(a.0 as f64 * b),
+ (&Float(a), &Float(b)) => Float(a * b),
+ _ => Invalid,
+ };
+ }
+}
+impl<'a> RemAssign<&'a EvalResult> for EvalResult {
+ fn rem_assign(&mut self, rhs: &'a EvalResult) {
+ use self::EvalResult::*;
+ *self = match (&*self, rhs) {
+ (&Int(a), &Int(b)) => Int(a % b),
+ (&Float(a), &Int(b)) => Float(a % (b.0 as f64)),
+ (&Int(a), &Float(b)) => Float(a.0 as f64 % b),
+ (&Float(a), &Float(b)) => Float(a % b),
+ _ => Invalid,
+ };
+ }
+}
+impl<'a> ShlAssign<&'a EvalResult> for EvalResult {
+ fn shl_assign(&mut self, rhs: &'a EvalResult) {
+ use self::EvalResult::*;
+ *self = match (&*self, rhs) {
+ (&Int(a), &Int(b)) => Int(a << (b.0 as usize)),
+ _ => Invalid,
+ };
+ }
+}
+impl<'a> ShrAssign<&'a EvalResult> for EvalResult {
+ fn shr_assign(&mut self, rhs: &'a EvalResult) {
+ use self::EvalResult::*;
+ *self = match (&*self, rhs) {
+ (&Int(a), &Int(b)) => Int(a >> (b.0 as usize)),
+ _ => Invalid,
+ };
+ }
+}
+impl<'a> SubAssign<&'a EvalResult> for EvalResult {
+ fn sub_assign(&mut self, rhs: &'a EvalResult) {
+ use self::EvalResult::*;
+ *self = match (&*self, rhs) {
+ (&Int(a), &Int(b)) => Int(a - b),
+ (&Float(a), &Int(b)) => Float(a - (b.0 as f64)),
+ (&Int(a), &Float(b)) => Float(a.0 as f64 - b),
+ (&Float(a), &Float(b)) => Float(a - b),
+ _ => Invalid,
+ };
+ }
+}
+
+fn unary_op(input: (&[u8], EvalResult)) -> Option<EvalResult> {
+ use self::EvalResult::*;
+ assert_eq!(input.0.len(), 1);
+ match (input.0[0], input.1) {
+ (b'+', i) => Some(i),
+ (b'-', Int(i)) => Some(Int(Wrapping(i.0.wrapping_neg()))), // impl Neg for Wrapping not until rust 1.10...
+ (b'-', Float(i)) => Some(Float(-i)),
+ (b'-', _) => unreachable!("non-numeric unary op"),
+ (b'~', Int(i)) => Some(Int(!i)),
+ (b'~', Float(_)) => None,
+ (b'~', _) => unreachable!("non-numeric unary op"),
+ _ => unreachable!("invalid unary op"),
+ }
+}
+
+fn numeric<I: Clone, E: nom::error::ParseError<I>, F>(
+ f: F,
+) -> impl FnMut(I) -> nom::IResult<I, EvalResult, E>
+where
+ F: FnMut(I) -> nom::IResult<I, EvalResult, E>,
+{
+ nom::combinator::map_opt(f, EvalResult::as_numeric)
+}
+
+impl<'a> PRef<'a> {
+ fn unary(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+ alt((
+ delimited(p("("), |i| self.numeric_expr(i), p(")")),
+ numeric(|i| self.literal(i)),
+ numeric(|i| self.identifier(i)),
+ map_opt(
+ pair(one_of_punctuation(&["+", "-", "~"][..]), |i| self.unary(i)),
+ unary_op,
+ ),
+ ))(input)
+ }
+
+ fn mul_div_rem(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+ let (input, acc) = self.unary(input)?;
+ fold_many0(
+ pair(complete(one_of_punctuation(&["*", "/", "%"][..])), |i| {
+ self.unary(i)
+ }),
+ move || acc.clone(),
+ |mut acc, (op, val): (&[u8], EvalResult)| {
+ match op[0] as char {
+ '*' => acc *= &val,
+ '/' => acc /= &val,
+ '%' => acc %= &val,
+ _ => unreachable!(),
+ };
+ acc
+ },
+ )(input)
+ }
+
+ fn add_sub(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+ let (input, acc) = self.mul_div_rem(input)?;
+ fold_many0(
+ pair(complete(one_of_punctuation(&["+", "-"][..])), |i| {
+ self.mul_div_rem(i)
+ }),
+ move || acc.clone(),
+ |mut acc, (op, val): (&[u8], EvalResult)| {
+ match op[0] as char {
+ '+' => acc += &val,
+ '-' => acc -= &val,
+ _ => unreachable!(),
+ };
+ acc
+ },
+ )(input)
+ }
+
+ fn shl_shr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+ let (input, acc) = self.add_sub(input)?;
+ numeric(fold_many0(
+ pair(complete(one_of_punctuation(&["<<", ">>"][..])), |i| {
+ self.add_sub(i)
+ }),
+ move || acc.clone(),
+ |mut acc, (op, val): (&[u8], EvalResult)| {
+ match op {
+ b"<<" => acc <<= &val,
+ b">>" => acc >>= &val,
+ _ => unreachable!(),
+ };
+ acc
+ },
+ ))(input)
+ }
+
+ fn and(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+ let (input, acc) = self.shl_shr(input)?;
+ numeric(fold_many0(
+ preceded(complete(p("&")), |i| self.shl_shr(i)),
+ move || acc.clone(),
+ |mut acc, val: EvalResult| {
+ acc &= &val;
+ acc
+ },
+ ))(input)
+ }
+
+ fn xor(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+ let (input, acc) = self.and(input)?;
+ numeric(fold_many0(
+ preceded(complete(p("^")), |i| self.and(i)),
+ move || acc.clone(),
+ |mut acc, val: EvalResult| {
+ acc ^= &val;
+ acc
+ },
+ ))(input)
+ }
+
+ fn or(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+ let (input, acc) = self.xor(input)?;
+ numeric(fold_many0(
+ preceded(complete(p("|")), |i| self.xor(i)),
+ move || acc.clone(),
+ |mut acc, val: EvalResult| {
+ acc |= &val;
+ acc
+ },
+ ))(input)
+ }
+
+ #[inline(always)]
+ fn numeric_expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+ self.or(input)
+ }
+}
+
+// =======================================================
+// ============= Literals and identifiers ================
+// =======================================================
+
+impl<'a> PRef<'a> {
+ fn identifier(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+ match input.split_first() {
+ None => Err(Err::Incomplete(Needed::new(1))),
+ Some((
+ &Token {
+ kind: TokenKind::Identifier,
+ ref raw,
+ },
+ rest,
+ )) => {
+ if let Some(r) = self.identifiers.get(&raw[..]) {
+ Ok((rest, r.clone()))
+ } else {
+ Err(Err::Error(
+ (input, crate::ErrorKind::UnknownIdentifier).into(),
+ ))
+ }
+ }
+ Some(_) => Err(Err::Error(
+ (input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into(),
+ )),
+ }
+ }
+
+ fn literal(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+ match input.split_first() {
+ None => Err(Err::Incomplete(Needed::new(1))),
+ Some((
+ &Token {
+ kind: TokenKind::Literal,
+ ref raw,
+ },
+ rest,
+ )) => match literal::parse(raw) {
+ Ok((_, result)) => Ok((rest, result)),
+ _ => Err(Err::Error((input, crate::ErrorKind::InvalidLiteral).into())),
+ },
+ Some(_) => Err(Err::Error(
+ (input, crate::ErrorKind::TypedToken(TokenKind::Literal)).into(),
+ )),
+ }
+ }
+
+ fn string(self, input: &'_ [Token]) -> CResult<'_, Vec<u8>> {
+ alt((
+ map_opt(|i| self.literal(i), EvalResult::as_str),
+ map_opt(|i| self.identifier(i), EvalResult::as_str),
+ ))(input)
+ .to_cexpr_result()
+ }
+
+ // "string1" "string2" etc...
+ fn concat_str(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+ map(
+ pair(|i| self.string(i), many0(complete(|i| self.string(i)))),
+ |(first, v)| {
+ Vec::into_iter(v)
+ .fold(first, |mut s, elem| {
+ Vec::extend_from_slice(&mut s, Vec::<u8>::as_slice(&elem));
+ s
+ })
+ .into()
+ },
+ )(input)
+ .to_cexpr_result()
+ }
+
+ fn expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
+ alt((
+ |i| self.numeric_expr(i),
+ delimited(p("("), |i| self.expr(i), p(")")),
+ |i| self.concat_str(i),
+ |i| self.literal(i),
+ |i| self.identifier(i),
+ ))(input)
+ .to_cexpr_result()
+ }
+
+ fn macro_definition(self, input: &'_ [Token]) -> CResult<'_, (&'_ [u8], EvalResult)> {
+ pair(identifier_token, |i| self.expr(i))(input)
+ }
+}
+
+impl<'a> ::std::ops::Deref for PRef<'a> {
+ type Target = IdentifierParser<'a>;
+ fn deref(&self) -> &IdentifierParser<'a> {
+ self.0
+ }
+}
+
+impl<'ident> IdentifierParser<'ident> {
+ fn as_ref(&self) -> PRef<'_> {
+ PRef(self)
+ }
+
+ /// Create a new `IdentifierParser` with a set of known identifiers. When
+ /// a known identifier is encountered during parsing, it is substituted
+ /// for the value specified.
+ pub fn new(identifiers: &HashMap<Vec<u8>, EvalResult>) -> IdentifierParser<'_> {
+ IdentifierParser { identifiers }
+ }
+
+ /// Parse and evaluate an expression of a list of tokens.
+ ///
+ /// Returns an error if the input is not a valid expression or if the token
+ /// stream contains comments, keywords or unknown identifiers.
+ pub fn expr<'a>(&self, input: &'a [Token]) -> CResult<'a, EvalResult> {
+ self.as_ref().expr(input)
+ }
+
+ /// Parse and evaluate a macro definition from a list of tokens.
+ ///
+ /// Returns the identifier for the macro and its replacement evaluated as an
+ /// expression. The input should not include `#define`.
+ ///
+ /// Returns an error if the replacement is not a valid expression, if called
+ /// on most function-like macros, or if the token stream contains comments,
+ /// keywords or unknown identifiers.
+ ///
+ /// N.B. This is intended to fail on function-like macros, but if it the
+ /// macro takes a single argument, the argument name is defined as an
+ /// identifier, and the macro otherwise parses as an expression, it will
+ /// return a result even on function-like macros.
+ ///
+ /// ```c
+ /// // will evaluate into IDENTIFIER
+ /// #define DELETE(IDENTIFIER)
+ /// // will evaluate into IDENTIFIER-3
+ /// #define NEGATIVE_THREE(IDENTIFIER) -3
+ /// ```
+ pub fn macro_definition<'a>(&self, input: &'a [Token]) -> CResult<'a, (&'a [u8], EvalResult)> {
+ crate::assert_full_parse(self.as_ref().macro_definition(input))
+ }
+}
+
+/// Parse and evaluate an expression of a list of tokens.
+///
+/// Returns an error if the input is not a valid expression or if the token
+/// stream contains comments, keywords or identifiers.
+pub fn expr(input: &[Token]) -> CResult<'_, EvalResult> {
+ IdentifierParser::new(&HashMap::new()).expr(input)
+}
+
+/// Parse and evaluate a macro definition from a list of tokens.
+///
+/// Returns the identifier for the macro and its replacement evaluated as an
+/// expression. The input should not include `#define`.
+///
+/// Returns an error if the replacement is not a valid expression, if called
+/// on a function-like macro, or if the token stream contains comments,
+/// keywords or identifiers.
+pub fn macro_definition(input: &[Token]) -> CResult<'_, (&'_ [u8], EvalResult)> {
+ IdentifierParser::new(&HashMap::new()).macro_definition(input)
+}
+
+/// Parse a functional macro declaration from a list of tokens.
+///
+/// Returns the identifier for the macro and the argument list (in order). The
+/// input should not include `#define`. The actual definition is not parsed and
+/// may be obtained from the unparsed data returned.
+///
+/// Returns an error if the input is not a functional macro or if the token
+/// stream contains comments.
+///
+/// # Example
+/// ```
+/// use cexpr::expr::{IdentifierParser, EvalResult, fn_macro_declaration};
+/// use cexpr::assert_full_parse;
+/// use cexpr::token::Kind::*;
+/// use cexpr::token::Token;
+///
+/// // #define SUFFIX(arg) arg "suffix"
+/// let tokens = vec![
+/// (Identifier, &b"SUFFIX"[..]).into(),
+/// (Punctuation, &b"("[..]).into(),
+/// (Identifier, &b"arg"[..]).into(),
+/// (Punctuation, &b")"[..]).into(),
+/// (Identifier, &b"arg"[..]).into(),
+/// (Literal, &br#""suffix""#[..]).into(),
+/// ];
+///
+/// // Try to parse the functional part
+/// let (expr, (ident, args)) = fn_macro_declaration(&tokens).unwrap();
+/// assert_eq!(ident, b"SUFFIX");
+///
+/// // Create dummy arguments
+/// let idents = args.into_iter().map(|arg|
+/// (arg.to_owned(), EvalResult::Str(b"test".to_vec()))
+/// ).collect();
+///
+/// // Evaluate the macro
+/// let (_, evaluated) = assert_full_parse(IdentifierParser::new(&idents).expr(expr)).unwrap();
+/// assert_eq!(evaluated, EvalResult::Str(b"testsuffix".to_vec()));
+/// ```
+pub fn fn_macro_declaration(input: &[Token]) -> CResult<'_, (&[u8], Vec<&[u8]>)> {
+ pair(
+ identifier_token,
+ delimited(
+ p("("),
+ separated_list0(p(","), identifier_token),
+ p(")"),
+ ),
+ )(input)
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..5170f97
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,149 @@
+// (C) Copyright 2016 Jethro G. Beekman
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+//! A C expression parser and evaluator.
+//!
+//! This crate provides methods for parsing and evaluating simple C expressions. In general, the
+//! crate can handle most arithmetic expressions that would appear in macros or the definition of
+//! constants, as well as string and character constants.
+//!
+//! The main entry point for is [`token::parse`], which parses a byte string and returns its
+//! evaluated value.
+#![warn(rust_2018_idioms)]
+#![warn(missing_docs)]
+#![allow(deprecated)]
+
+pub mod nom {
+ //! nom's result types, re-exported.
+ pub use nom::{error::ErrorKind, error::Error, Err, IResult, Needed};
+}
+pub mod expr;
+pub mod literal;
+pub mod token;
+
+/// Parsing errors specific to C parsing
+#[derive(Debug)]
+pub enum ErrorKind {
+ /// Expected the specified token
+ ExactToken(token::Kind, &'static [u8]),
+ /// Expected one of the specified tokens
+ ExactTokens(token::Kind, &'static [&'static str]),
+ /// Expected a token of the specified kind
+ TypedToken(token::Kind),
+ /// An unknown identifier was encountered
+ UnknownIdentifier,
+ /// An invalid literal was encountered.
+ ///
+ /// When encountered, this generally means a bug exists in the data that
+ /// was passed in or the parsing logic.
+ InvalidLiteral,
+ /// A full parse was requested, but data was left over after parsing finished.
+ Partial,
+ /// An error occurred in an underlying nom parser.
+ Parser(nom::ErrorKind),
+}
+
+impl From<nom::ErrorKind> for ErrorKind {
+ fn from(k: nom::ErrorKind) -> Self {
+ ErrorKind::Parser(k)
+ }
+}
+
+impl From<u32> for ErrorKind {
+ fn from(_: u32) -> Self {
+ ErrorKind::InvalidLiteral
+ }
+}
+
+/// Parsing errors specific to C parsing.
+///
+/// This is a superset of `(I, nom::ErrorKind)` that includes the additional errors specified by
+/// [`ErrorKind`].
+#[derive(Debug)]
+pub struct Error<I> {
+ /// The remainder of the input stream at the time of the error.
+ pub input: I,
+ /// The error that occurred.
+ pub error: ErrorKind,
+}
+
+impl<I> From<(I, nom::ErrorKind)> for Error<I> {
+ fn from(e: (I, nom::ErrorKind)) -> Self {
+ Self::from((e.0, ErrorKind::from(e.1)))
+ }
+}
+
+impl<I> From<(I, ErrorKind)> for Error<I> {
+ fn from(e: (I, ErrorKind)) -> Self {
+ Self {
+ input: e.0,
+ error: e.1,
+ }
+ }
+}
+
+impl<I> From<::nom::error::Error<I>> for Error<I> {
+ fn from(e: ::nom::error::Error<I>) -> Self {
+ Self {
+ input: e.input,
+ error: e.code.into(),
+ }
+ }
+}
+
+impl<I> ::nom::error::ParseError<I> for Error<I> {
+ fn from_error_kind(input: I, kind: nom::ErrorKind) -> Self {
+ Self {
+ input,
+ error: kind.into(),
+ }
+ }
+
+ fn append(_: I, _: nom::ErrorKind, other: Self) -> Self {
+ other
+ }
+}
+
+// in lieu of https://github.com/Geal/nom/issues/1010
+trait ToCexprResult<I, O> {
+ fn to_cexpr_result(self) -> nom::IResult<I, O, Error<I>>;
+}
+impl<I, O, E> ToCexprResult<I, O> for nom::IResult<I, O, E>
+where
+ Error<I>: From<E>,
+{
+ fn to_cexpr_result(self) -> nom::IResult<I, O, Error<I>> {
+ match self {
+ Ok(v) => Ok(v),
+ Err(nom::Err::Incomplete(n)) => Err(nom::Err::Incomplete(n)),
+ Err(nom::Err::Error(e)) => Err(nom::Err::Error(e.into())),
+ Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(e.into())),
+ }
+ }
+}
+
+/// If the input result indicates a succesful parse, but there is data left,
+/// return an `Error::Partial` instead.
+pub fn assert_full_parse<'i, I: 'i, O, E>(
+ result: nom::IResult<&'i [I], O, E>,
+) -> nom::IResult<&'i [I], O, Error<&'i [I]>>
+where
+ Error<&'i [I]>: From<E>,
+{
+ match result.to_cexpr_result() {
+ Ok((rem, output)) => {
+ if rem.is_empty() {
+ Ok((rem, output))
+ } else {
+ Err(nom::Err::Error((rem, ErrorKind::Partial).into()))
+ }
+ }
+ Err(nom::Err::Incomplete(n)) => Err(nom::Err::Incomplete(n)),
+ Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(e)),
+ Err(nom::Err::Error(e)) => Err(nom::Err::Error(e)),
+ }
+}
diff --git a/src/literal.rs b/src/literal.rs
new file mode 100644
index 0000000..68e85c7
--- /dev/null
+++ b/src/literal.rs
@@ -0,0 +1,361 @@
+// (C) Copyright 2016 Jethro G. Beekman
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+//! Parsing C literals from byte slices.
+//!
+//! This will parse a representation of a C literal into a Rust type.
+//!
+//! # characters
+//! Character literals are stored into the `CChar` type, which can hold values
+//! that are not valid Unicode code points. ASCII characters are represented as
+//! `char`, literal bytes with the high byte set are converted into the raw
+//! representation. Escape sequences are supported. If hex and octal escapes
+//! map to an ASCII character, that is used, otherwise, the raw encoding is
+//! used, including for values over 255. Unicode escapes are checked for
+//! validity and mapped to `char`. Character sequences are not supported. Width
+//! prefixes are ignored.
+//!
+//! # strings
+//! Strings are interpreted as byte vectors. Escape sequences are supported. If
+//! hex and octal escapes map onto multi-byte characters, they are truncated to
+//! one 8-bit character. Unicode escapes are converted into their UTF-8
+//! encoding. Width prefixes are ignored.
+//!
+//! # integers
+//! Integers are read into `i64`. Binary, octal, decimal and hexadecimal are
+//! all supported. If the literal value is between `i64::MAX` and `u64::MAX`,
+//! it is bit-cast to `i64`. Values over `u64::MAX` cannot be parsed. Width and
+//! sign suffixes are ignored. Sign prefixes are not supported.
+//!
+//! # real numbers
+//! Reals are read into `f64`. Width suffixes are ignored. Sign prefixes are
+//! not supported in the significand. Hexadecimal floating points are not
+//! supported.
+
+use std::char;
+use std::str::{self, FromStr};
+
+use nom::branch::alt;
+use nom::bytes::complete::is_not;
+use nom::bytes::complete::tag;
+use nom::character::complete::{char, one_of};
+use nom::combinator::{complete, map, map_opt, opt, recognize};
+use nom::multi::{fold_many0, many0, many1, many_m_n};
+use nom::sequence::{delimited, pair, preceded, terminated, tuple};
+use nom::*;
+
+use crate::expr::EvalResult;
+use crate::ToCexprResult;
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+/// Representation of a C character
+pub enum CChar {
+ /// A character that can be represented as a `char`
+ Char(char),
+ /// Any other character (8-bit characters, unicode surrogates, etc.)
+ Raw(u64),
+}
+
+impl From<u8> for CChar {
+ fn from(i: u8) -> CChar {
+ match i {
+ 0..=0x7f => CChar::Char(i as u8 as char),
+ _ => CChar::Raw(i as u64),
+ }
+ }
+}
+
+// A non-allocating version of this would be nice...
+impl std::convert::Into<Vec<u8>> for CChar {
+ fn into(self) -> Vec<u8> {
+ match self {
+ CChar::Char(c) => {
+ let mut s = String::with_capacity(4);
+ s.extend(&[c]);
+ s.into_bytes()
+ }
+ CChar::Raw(i) => {
+ let mut v = Vec::with_capacity(1);
+ v.push(i as u8);
+ v
+ }
+ }
+ }
+}
+
+/// ensures the child parser consumes the whole input
+pub fn full<I: Clone, O, F>(
+ f: F,
+) -> impl Fn(I) -> nom::IResult<I, O>
+where
+ I: nom::InputLength,
+ F: Fn(I) -> nom::IResult<I, O>,
+{
+ move |input| {
+ let res = f(input);
+ match res {
+ Ok((i, o)) => {
+ if i.input_len() == 0 {
+ Ok((i, o))
+ } else {
+ Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::Complete)))
+ }
+ }
+ r => r,
+ }
+ }
+}
+
+// =================================
+// ======== matching digits ========
+// =================================
+
+macro_rules! byte {
+ ($($p: pat)|* ) => {{
+ fn parser(i: &[u8]) -> crate::nom::IResult<&[u8], u8> {
+ match i.split_first() {
+ $(Some((&c @ $p,rest)))|* => Ok((rest,c)),
+ Some(_) => Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::OneOf))),
+ None => Err(nom::Err::Incomplete(Needed::new(1))),
+ }
+ }
+
+ parser
+ }}
+}
+
+fn binary(i: &[u8]) -> nom::IResult<&[u8], u8> {
+ byte!(b'0'..=b'1')(i)
+}
+
+fn octal(i: &[u8]) -> nom::IResult<&[u8], u8> {
+ byte!(b'0'..=b'7')(i)
+}
+
+fn decimal(i: &[u8]) -> nom::IResult<&[u8], u8> {
+ byte!(b'0'..=b'9')(i)
+}
+
+fn hexadecimal(i: &[u8]) -> nom::IResult<&[u8], u8> {
+ byte!(b'0' ..= b'9' | b'a' ..= b'f' | b'A' ..= b'F')(i)
+}
+
+// ========================================
+// ======== characters and strings ========
+// ========================================
+
+fn escape2char(c: char) -> CChar {
+ CChar::Char(match c {
+ 'a' => '\x07',
+ 'b' => '\x08',
+ 'f' => '\x0c',
+ 'n' => '\n',
+ 'r' => '\r',
+ 't' => '\t',
+ 'v' => '\x0b',
+ _ => unreachable!("invalid escape {}", c),
+ })
+}
+
+fn c_raw_escape(n: Vec<u8>, radix: u32) -> Option<CChar> {
+ str::from_utf8(&n)
+ .ok()
+ .and_then(|i| u64::from_str_radix(i, radix).ok())
+ .map(|i| match i {
+ 0..=0x7f => CChar::Char(i as u8 as char),
+ _ => CChar::Raw(i),
+ })
+}
+
+fn c_unicode_escape(n: Vec<u8>) -> Option<CChar> {
+ str::from_utf8(&n)
+ .ok()
+ .and_then(|i| u32::from_str_radix(i, 16).ok())
+ .and_then(char::from_u32)
+ .map(CChar::Char)
+}
+
+fn escaped_char(i: &[u8]) -> nom::IResult<&[u8], CChar> {
+ preceded(
+ char('\\'),
+ alt((
+ map(one_of(r#"'"?\"#), CChar::Char),
+ map(one_of("abfnrtv"), escape2char),
+ map_opt(many_m_n(1, 3, octal), |v| c_raw_escape(v, 8)),
+ map_opt(preceded(char('x'), many1(hexadecimal)), |v| {
+ c_raw_escape(v, 16)
+ }),
+ map_opt(
+ preceded(char('u'), many_m_n(4, 4, hexadecimal)),
+ c_unicode_escape,
+ ),
+ map_opt(
+ preceded(char('U'), many_m_n(8, 8, hexadecimal)),
+ c_unicode_escape,
+ ),
+ )),
+ )(i)
+}
+
+fn c_width_prefix(i: &[u8]) -> nom::IResult<&[u8], &[u8]> {
+ alt((tag("u8"), tag("u"), tag("U"), tag("L")))(i)
+}
+
+fn c_char(i: &[u8]) -> nom::IResult<&[u8], CChar> {
+ delimited(
+ terminated(opt(c_width_prefix), char('\'')),
+ alt((
+ escaped_char,
+ map(byte!(0 ..= 91 /* \=92 */ | 93 ..= 255), CChar::from),
+ )),
+ char('\''),
+ )(i)
+}
+
+fn c_string(i: &[u8]) -> nom::IResult<&[u8], Vec<u8>> {
+ delimited(
+ alt((preceded(c_width_prefix, char('"')), char('"'))),
+ fold_many0(
+ alt((
+ map(escaped_char, |c: CChar| c.into()),
+ map(is_not([b'\\', b'"']), |c: &[u8]| c.into()),
+ )),
+ Vec::new,
+ |mut v: Vec<u8>, res: Vec<u8>| {
+ v.extend_from_slice(&res);
+ v
+ },
+ ),
+ char('"'),
+ )(i)
+}
+
+// ================================
+// ======== parse integers ========
+// ================================
+
+fn c_int_radix(n: Vec<u8>, radix: u32) -> Option<u64> {
+ str::from_utf8(&n)
+ .ok()
+ .and_then(|i| u64::from_str_radix(i, radix).ok())
+}
+
+fn take_ul(input: &[u8]) -> IResult<&[u8], &[u8]> {
+ let r = input.split_at_position(|c| c != b'u' && c != b'U' && c != b'l' && c != b'L');
+ match r {
+ Err(Err::Incomplete(_)) => Ok((&input[input.len()..], input)),
+ res => res,
+ }
+}
+
+fn c_int(i: &[u8]) -> nom::IResult<&[u8], i64> {
+ map(
+ terminated(
+ alt((
+ map_opt(preceded(tag("0x"), many1(complete(hexadecimal))), |v| {
+ c_int_radix(v, 16)
+ }),
+ map_opt(preceded(tag("0X"), many1(complete(hexadecimal))), |v| {
+ c_int_radix(v, 16)
+ }),
+ map_opt(preceded(tag("0b"), many1(complete(binary))), |v| {
+ c_int_radix(v, 2)
+ }),
+ map_opt(preceded(tag("0B"), many1(complete(binary))), |v| {
+ c_int_radix(v, 2)
+ }),
+ map_opt(preceded(char('0'), many1(complete(octal))), |v| {
+ c_int_radix(v, 8)
+ }),
+ map_opt(many1(complete(decimal)), |v| c_int_radix(v, 10)),
+ |input| Err(crate::nom::Err::Error(nom::error::Error::new(input, crate::nom::ErrorKind::Fix))),
+ )),
+ opt(take_ul),
+ ),
+ |i| i as i64,
+ )(i)
+}
+
+// ==============================
+// ======== parse floats ========
+// ==============================
+
+fn float_width(i: &[u8]) -> nom::IResult<&[u8], u8> {
+ nom::combinator::complete(byte!(b'f' | b'l' | b'F' | b'L'))(i)
+}
+
+fn float_exp(i: &[u8]) -> nom::IResult<&[u8], (Option<u8>, Vec<u8>)> {
+ preceded(
+ byte!(b'e' | b'E'),
+ pair(opt(byte!(b'-' | b'+')), many1(complete(decimal))),
+ )(i)
+}
+
+fn c_float(i: &[u8]) -> nom::IResult<&[u8], f64> {
+ map_opt(
+ alt((
+ terminated(
+ recognize(tuple((
+ many1(complete(decimal)),
+ byte!(b'.'),
+ many0(complete(decimal)),
+ ))),
+ opt(float_width),
+ ),
+ terminated(
+ recognize(tuple((
+ many0(complete(decimal)),
+ byte!(b'.'),
+ many1(complete(decimal)),
+ ))),
+ opt(float_width),
+ ),
+ terminated(
+ recognize(tuple((
+ many0(complete(decimal)),
+ opt(byte!(b'.')),
+ many1(complete(decimal)),
+ float_exp,
+ ))),
+ opt(float_width),
+ ),
+ terminated(
+ recognize(tuple((
+ many1(complete(decimal)),
+ opt(byte!(b'.')),
+ many0(complete(decimal)),
+ float_exp,
+ ))),
+ opt(float_width),
+ ),
+ terminated(recognize(many1(complete(decimal))), float_width),
+ )),
+ |v| str::from_utf8(v).ok().and_then(|i| f64::from_str(i).ok()),
+ )(i)
+}
+
+// ================================
+// ======== main interface ========
+// ================================
+
+fn one_literal(input: &[u8]) -> nom::IResult<&[u8], EvalResult, crate::Error<&[u8]>> {
+ alt((
+ map(full(c_char), EvalResult::Char),
+ map(full(c_int), |i| EvalResult::Int(::std::num::Wrapping(i))),
+ map(full(c_float), EvalResult::Float),
+ map(full(c_string), EvalResult::Str),
+ ))(input)
+ .to_cexpr_result()
+}
+
+/// Parse a C literal.
+///
+/// The input must contain exactly the representation of a single literal
+/// token, and in particular no whitespace or sign prefixes.
+pub fn parse(input: &[u8]) -> IResult<&[u8], EvalResult, crate::Error<&[u8]>> {
+ crate::assert_full_parse(one_literal(input))
+}
diff --git a/src/token.rs b/src/token.rs
new file mode 100644
index 0000000..dbc5949
--- /dev/null
+++ b/src/token.rs
@@ -0,0 +1,44 @@
+// (C) Copyright 2016 Jethro G. Beekman
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+//! Representation of a C token
+//!
+//! This is designed to map onto a libclang CXToken.
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+#[allow(missing_docs)]
+pub enum Kind {
+ Punctuation,
+ Keyword,
+ Identifier,
+ Literal,
+ Comment,
+}
+
+/// A single token in a C expression.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Token {
+ /// The type of this token.
+ pub kind: Kind,
+ /// The bytes that make up the token.
+ pub raw: Box<[u8]>,
+}
+
+impl<'a> From<(Kind, &'a [u8])> for Token {
+ fn from((kind, value): (Kind, &'a [u8])) -> Token {
+ Token {
+ kind,
+ raw: value.to_owned().into_boxed_slice(),
+ }
+ }
+}
+
+/// Remove all comment tokens from a vector of tokens
+pub fn remove_comments(v: &mut Vec<Token>) -> &mut Vec<Token> {
+ v.retain(|t| t.kind != Kind::Comment);
+ v
+}
diff --git a/tests/clang.rs b/tests/clang.rs
new file mode 100644
index 0000000..b2484f0
--- /dev/null
+++ b/tests/clang.rs
@@ -0,0 +1,339 @@
+// (C) Copyright 2016 Jethro G. Beekman
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+extern crate cexpr;
+extern crate clang_sys;
+
+use std::collections::HashMap;
+use std::io::Write;
+use std::str::{self, FromStr};
+use std::{char, ffi, mem, ptr, slice};
+
+use cexpr::assert_full_parse;
+use cexpr::expr::{fn_macro_declaration, EvalResult, IdentifierParser};
+use cexpr::literal::CChar;
+use cexpr::token::Token;
+use clang_sys::*;
+
+// main testing routine
+fn test_definition(
+ ident: Vec<u8>,
+ tokens: &[Token],
+ idents: &mut HashMap<Vec<u8>, EvalResult>,
+) -> bool {
+ fn bytes_to_int(value: &[u8]) -> Option<EvalResult> {
+ str::from_utf8(value)
+ .ok()
+ .map(|s| s.replace("n", "-"))
+ .map(|s| s.replace("_", ""))
+ .and_then(|v| i64::from_str(&v).ok())
+ .map(::std::num::Wrapping)
+ .map(Int)
+ }
+
+ use cexpr::expr::EvalResult::*;
+
+ let display_name = String::from_utf8_lossy(&ident).into_owned();
+
+ let functional;
+ let test = {
+ // Split name such as Str_test_string into (Str,test_string)
+ let pos = ident
+ .iter()
+ .position(|c| *c == b'_')
+ .expect(&format!("Invalid definition in testcase: {}", display_name));
+ let mut expected = &ident[..pos];
+ let mut value = &ident[(pos + 1)..];
+
+ functional = expected == b"Fn";
+
+ if functional {
+ let ident = value;
+ let pos = ident
+ .iter()
+ .position(|c| *c == b'_')
+ .expect(&format!("Invalid definition in testcase: {}", display_name));
+ expected = &ident[..pos];
+ value = &ident[(pos + 1)..];
+ }
+
+ if expected == b"Str" {
+ let mut splits = value.split(|c| *c == b'U');
+ let mut s = Vec::with_capacity(value.len());
+ s.extend_from_slice(splits.next().unwrap());
+ for split in splits {
+ let (chr, rest) = split.split_at(6);
+ let chr = u32::from_str_radix(str::from_utf8(chr).unwrap(), 16).unwrap();
+ write!(s, "{}", char::from_u32(chr).unwrap()).unwrap();
+ s.extend_from_slice(rest);
+ }
+ Some(Str(s))
+ } else if expected == b"Int" {
+ bytes_to_int(value)
+ } else if expected == b"Float" {
+ str::from_utf8(value)
+ .ok()
+ .map(|s| s.replace("n", "-").replace("p", "."))
+ .and_then(|v| f64::from_str(&v).ok())
+ .map(Float)
+ } else if expected == b"CharRaw" {
+ str::from_utf8(value)
+ .ok()
+ .and_then(|v| u64::from_str(v).ok())
+ .map(CChar::Raw)
+ .map(Char)
+ } else if expected == b"CharChar" {
+ str::from_utf8(value)
+ .ok()
+ .and_then(|v| u32::from_str(v).ok())
+ .and_then(char::from_u32)
+ .map(CChar::Char)
+ .map(Char)
+ } else {
+ Some(Invalid)
+ }
+ .expect(&format!("Invalid definition in testcase: {}", display_name))
+ };
+
+ let result = if functional {
+ let mut fnidents;
+ let expr_tokens;
+ match fn_macro_declaration(&tokens) {
+ Ok((rest, (_, args))) => {
+ fnidents = idents.clone();
+ expr_tokens = rest;
+ for arg in args {
+ let val = match test {
+ Int(_) => bytes_to_int(&arg),
+ Str(_) => Some(Str(arg.to_owned())),
+ _ => unimplemented!(),
+ }
+ .expect(&format!(
+ "Invalid argument in functional macro testcase: {}",
+ display_name
+ ));
+ fnidents.insert(arg.to_owned(), val);
+ }
+ }
+ e => {
+ println!(
+ "Failed test for {}, unable to parse functional macro declaration: {:?}",
+ display_name, e
+ );
+ return false;
+ }
+ }
+ assert_full_parse(IdentifierParser::new(&fnidents).expr(&expr_tokens))
+ } else {
+ IdentifierParser::new(idents)
+ .macro_definition(&tokens)
+ .map(|(i, (_, val))| (i, val))
+ };
+
+ match result {
+ Ok((_, val)) => {
+ if val == test {
+ if let Some(_) = idents.insert(ident, val) {
+ panic!("Duplicate definition for testcase: {}", display_name);
+ }
+ true
+ } else {
+ println!(
+ "Failed test for {}, expected {:?}, got {:?}",
+ display_name, test, val
+ );
+ false
+ }
+ }
+ e => {
+ if test == Invalid {
+ true
+ } else {
+ println!(
+ "Failed test for {}, expected {:?}, got {:?}",
+ display_name, test, e
+ );
+ false
+ }
+ }
+ }
+}
+
+// support code for the clang lexer
+unsafe fn clang_str_to_vec(s: CXString) -> Vec<u8> {
+ let vec = ffi::CStr::from_ptr(clang_getCString(s))
+ .to_bytes()
+ .to_owned();
+ clang_disposeString(s);
+ vec
+}
+
+#[allow(non_upper_case_globals)]
+unsafe fn token_clang_to_cexpr(tu: CXTranslationUnit, orig: &CXToken) -> Token {
+ Token {
+ kind: match clang_getTokenKind(*orig) {
+ CXToken_Comment => cexpr::token::Kind::Comment,
+ CXToken_Identifier => cexpr::token::Kind::Identifier,
+ CXToken_Keyword => cexpr::token::Kind::Keyword,
+ CXToken_Literal => cexpr::token::Kind::Literal,
+ CXToken_Punctuation => cexpr::token::Kind::Punctuation,
+ _ => panic!("invalid token kind: {:?}", *orig),
+ },
+ raw: clang_str_to_vec(clang_getTokenSpelling(tu, *orig)).into_boxed_slice(),
+ }
+}
+
+extern "C" fn visit_children_thunk<F>(
+ cur: CXCursor,
+ parent: CXCursor,
+ closure: CXClientData,
+) -> CXChildVisitResult
+where
+ F: FnMut(CXCursor, CXCursor) -> CXChildVisitResult,
+{
+ unsafe { (&mut *(closure as *mut F))(cur, parent) }
+}
+
+unsafe fn visit_children<F>(cursor: CXCursor, mut f: F)
+where
+ F: FnMut(CXCursor, CXCursor) -> CXChildVisitResult,
+{
+ clang_visitChildren(
+ cursor,
+ visit_children_thunk::<F> as _,
+ &mut f as *mut F as CXClientData,
+ );
+}
+
+unsafe fn location_in_scope(r: CXSourceRange) -> bool {
+ let start = clang_getRangeStart(r);
+ let mut file = ptr::null_mut();
+ clang_getSpellingLocation(
+ start,
+ &mut file,
+ ptr::null_mut(),
+ ptr::null_mut(),
+ ptr::null_mut(),
+ );
+ clang_Location_isFromMainFile(start) != 0
+ && clang_Location_isInSystemHeader(start) == 0
+ && file != ptr::null_mut()
+}
+
+/// tokenize_range_adjust can be used to work around LLVM bug 9069
+/// https://bugs.llvm.org//show_bug.cgi?id=9069
+fn file_visit_macros<F: FnMut(Vec<u8>, Vec<Token>)>(
+ file: &str,
+ tokenize_range_adjust: bool,
+ mut visitor: F,
+) {
+ unsafe {
+ let tu = {
+ let index = clang_createIndex(true as _, false as _);
+ let cfile = ffi::CString::new(file).unwrap();
+ let mut tu = mem::MaybeUninit::uninit();
+ assert!(
+ clang_parseTranslationUnit2(
+ index,
+ cfile.as_ptr(),
+ [b"-std=c11\0".as_ptr() as *const ::std::os::raw::c_char].as_ptr(),
+ 1,
+ ptr::null_mut(),
+ 0,
+ CXTranslationUnit_DetailedPreprocessingRecord,
+ &mut *tu.as_mut_ptr()
+ ) == CXError_Success,
+ "Failure reading test case {}",
+ file
+ );
+ tu.assume_init()
+ };
+ visit_children(clang_getTranslationUnitCursor(tu), |cur, _parent| {
+ if cur.kind == CXCursor_MacroDefinition {
+ let mut range = clang_getCursorExtent(cur);
+ if !location_in_scope(range) {
+ return CXChildVisit_Continue;
+ }
+ range.end_int_data -= if tokenize_range_adjust { 1 } else { 0 };
+ let mut token_ptr = ptr::null_mut();
+ let mut num = 0;
+ clang_tokenize(tu, range, &mut token_ptr, &mut num);
+ if token_ptr != ptr::null_mut() {
+ let tokens = slice::from_raw_parts(token_ptr, num as usize);
+ let tokens: Vec<_> = tokens
+ .iter()
+ .filter_map(|t| {
+ if clang_getTokenKind(*t) != CXToken_Comment {
+ Some(token_clang_to_cexpr(tu, t))
+ } else {
+ None
+ }
+ })
+ .collect();
+ clang_disposeTokens(tu, token_ptr, num);
+ visitor(clang_str_to_vec(clang_getCursorSpelling(cur)), tokens)
+ }
+ }
+ CXChildVisit_Continue
+ });
+ clang_disposeTranslationUnit(tu);
+ };
+}
+
+fn test_file(file: &str) -> bool {
+ let mut idents = HashMap::new();
+ let mut all_succeeded = true;
+ file_visit_macros(file, fix_bug_9069(), |ident, tokens| {
+ all_succeeded &= test_definition(ident, &tokens, &mut idents)
+ });
+ all_succeeded
+}
+
+fn fix_bug_9069() -> bool {
+ fn check_bug_9069() -> bool {
+ let mut token_sets = vec![];
+ file_visit_macros(
+ "tests/input/test_llvm_bug_9069.h",
+ false,
+ |ident, tokens| {
+ assert_eq!(&ident, b"A");
+ token_sets.push(tokens);
+ },
+ );
+ assert_eq!(token_sets.len(), 2);
+ token_sets[0] != token_sets[1]
+ }
+
+ use std::sync::atomic::{AtomicBool, Ordering};
+ use std::sync::Once;
+
+ static CHECK_FIX: Once = Once::new();
+ static FIX: AtomicBool = AtomicBool::new(false);
+
+ CHECK_FIX.call_once(|| FIX.store(check_bug_9069(), Ordering::SeqCst));
+
+ FIX.load(Ordering::SeqCst)
+}
+
+macro_rules! test_file {
+ ($f:ident) => {
+ #[test]
+ fn $f() {
+ assert!(
+ test_file(concat!("tests/input/", stringify!($f), ".h")),
+ "test_file"
+ )
+ }
+ };
+}
+
+test_file!(floats);
+test_file!(chars);
+test_file!(strings);
+test_file!(int_signed);
+test_file!(int_unsigned);
+test_file!(fail);
diff --git a/tests/input/chars.h b/tests/input/chars.h
new file mode 100644
index 0000000..45351d3
--- /dev/null
+++ b/tests/input/chars.h
@@ -0,0 +1,3 @@
+#define CharChar_65 'A'
+#define CharChar_127849 '\U0001f369' // 🍩
+#define CharRaw_255 U'\xff'
diff --git a/tests/input/fail.h b/tests/input/fail.h
new file mode 100644
index 0000000..fd416bc
--- /dev/null
+++ b/tests/input/fail.h
@@ -0,0 +1,9 @@
+#define FAIL_function_like(x) 3
+#define FAIL_empty
+#define FAIL_invalid_for_radix 0b2
+#define FAIL_shift_by_float 3<<1f
+#define FAIL_unknown_identifier UNKNOWN
+#define Int_0 0
+#define Str_str "str"
+#define FAIL_concat_integer "test" Str_str Int_0
+#define FAIL_too_large_int 18446744073709551616
diff --git a/tests/input/floats.h b/tests/input/floats.h
new file mode 100644
index 0000000..61942cf
--- /dev/null
+++ b/tests/input/floats.h
@@ -0,0 +1,8 @@
+#define Float_0 0.
+#define Float_1 1f
+#define Float_p1 .1
+#define Float_2 2.0
+#define Float_1000 1e3
+#define Float_2000 2e+3
+#define Float_p001 1e-3
+#define Float_80 10.0*(1<<3)
diff --git a/tests/input/int_signed.h b/tests/input/int_signed.h
new file mode 100644
index 0000000..65854a6
--- /dev/null
+++ b/tests/input/int_signed.h
@@ -0,0 +1,3 @@
+#define Int_n3 -(-(-3))
+#define Int_n5 -3-2
+#define Int_n9223372036854775808 -9223372036854775808
diff --git a/tests/input/int_unsigned.h b/tests/input/int_unsigned.h
new file mode 100644
index 0000000..6663dda
--- /dev/null
+++ b/tests/input/int_unsigned.h
@@ -0,0 +1,29 @@
+#define Int_456 456
+#define Int_0 0
+#define Int_1 0b1
+#define Int_2 0x2
+#define Int_3 3L
+#define Int_4 0X4
+#define Int_5 0B101
+#define Int_63 077
+#define Int_123 123
+#define Int_124 124u
+#define Int_125 125uL
+#define Int_126 126LuL
+#define Int_16 (((1)<<4ULL))/*comment*/
+#define Int_13 1|8^6&2<<1
+
+#define Int_47 32|15
+#define Int_38 (32|15)^9
+#define Int_6 ((32|15)^9)&7
+#define Int_12 (((32|15)^9)&7)<<1
+#define Int_17 ((((32|15)^9)&7)<<1)+5
+#define Int_15 (((((32|15)^9)&7)<<1)+5)-2
+#define Int_60 ((((((32|15)^9)&7)<<1)+5)-2)*4
+#define Int_30 (((((((32|15)^9)&7)<<1)+5)-2)*4)/2
+#define Int_39 32|15^9&7<<1+5-2*4/2
+
+#define Int_n1 18446744073709551615 /*2^64-1*/
+#define Int_n9223372036854775808 9223372036854775808
+
+#define Fn_Int_9(_3) _3*3
diff --git a/tests/input/strings.h b/tests/input/strings.h
new file mode 100644
index 0000000..d01d409
--- /dev/null
+++ b/tests/input/strings.h
@@ -0,0 +1,17 @@
+#define Str_ ""
+#define Str_str "str"
+#define Str_unicode u"unicode"
+#define Str_long L"long"
+#define Str_concat u"con" L"cat"
+#define Str_concat_parens ("concat" U"_parens")
+#define Str_concat_identifier (Str_concat L"_identifier")
+#define Str_hex_escape_all "\x68\x65\x78\x5f\x65\x73\x63\x61\x70\x65\x5f\x61\x6c\x6c"
+#define Str_hex_escape_hex "h\x65x_\x65s\x63\x61p\x65_h\x65x"
+#define Str_quote_U000022_escape "quote_\"_escape"
+#define Str_Fly_away_in_my_space_U01F680_You_no_need_put_U01F4B5_in_my_pocket \
+ u8"Fly_away_in_my_space_🚀_You_no_need_put_💵_in_my_pocket"
+#define Fn_Str_no_args() "no_args"
+#define Fn_Str_no_args_concat() "no_args_" Str_concat
+#define Fn_Str_prepend_arg(arg) "prepend_" arg
+#define Fn_Str_two_args(two, args) two "_" args
+#define Fn_Str_three_args(three, _, args) three _ args
diff --git a/tests/input/test_llvm_bug_9069.h b/tests/input/test_llvm_bug_9069.h
new file mode 100644
index 0000000..a92374e
--- /dev/null
+++ b/tests/input/test_llvm_bug_9069.h
@@ -0,0 +1,4 @@
+// The following two definitions should yield the same list of tokens.
+// If https://bugs.llvm.org//show_bug.cgi?id=9069 is not fixed, they don't.
+#define A 1
+#define A 1