diff options
-rw-r--r-- | .cargo_vcs_info.json | 6 | ||||
-rw-r--r-- | Cargo.toml | 26 | ||||
-rw-r--r-- | Cargo.toml.orig | 15 | ||||
-rw-r--r-- | LICENSE-APACHE | 201 | ||||
-rw-r--r-- | LICENSE-MIT | 25 | ||||
-rw-r--r-- | src/lib.rs | 416 |
6 files changed, 689 insertions, 0 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json new file mode 100644 index 0000000..aec1933 --- /dev/null +++ b/.cargo_vcs_info.json @@ -0,0 +1,6 @@ +{ + "git": { + "sha1": "359bc90a4f07224f79cc79c45dc873d44bcd6f14" + }, + "path_in_vcs": "form_urlencoded" +}
\ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..037361d --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,26 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +rust-version = "1.51" +name = "form_urlencoded" +version = "1.1.0" +authors = ["The rust-url developers"] +description = "Parser and serializer for the application/x-www-form-urlencoded syntax, as used by HTML forms." +license = "MIT OR Apache-2.0" +repository = "https://github.com/servo/rust-url" + +[lib] +test = false + +[dependencies.percent-encoding] +version = "2.2.0" diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 0000000..bdbb5ce --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,15 @@ +[package] +name = "form_urlencoded" +version = "1.1.0" +authors = ["The rust-url developers"] +description = "Parser and serializer for the application/x-www-form-urlencoded syntax, as used by HTML forms." +repository = "https://github.com/servo/rust-url" +license = "MIT OR Apache-2.0" +edition = "2018" +rust-version = "1.51" + +[lib] +test = false + +[dependencies] +percent-encoding = { version = "2.2.0", path = "../percent_encoding" } diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..24de6b4 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2013-2016 The rust-url developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..477594b --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,416 @@ +// Copyright 2013-2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Parser and serializer for the [`application/x-www-form-urlencoded` syntax]( +//! http://url.spec.whatwg.org/#application/x-www-form-urlencoded), +//! as used by HTML forms. +//! +//! Converts between a string (such as an URL’s query string) +//! and a sequence of (name, value) pairs. + +use percent_encoding::{percent_decode, percent_encode_byte}; +use std::borrow::{Borrow, Cow}; +use std::str; + +/// Convert a byte string in the `application/x-www-form-urlencoded` syntax +/// into a iterator of (name, value) pairs. +/// +/// Use `parse(input.as_bytes())` to parse a `&str` string. +/// +/// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be +/// converted to `[("#first", "%try%")]`. +#[inline] +pub fn parse(input: &[u8]) -> Parse<'_> { + Parse { input } +} +/// The return type of `parse()`. +#[derive(Copy, Clone)] +pub struct Parse<'a> { + input: &'a [u8], +} + +impl<'a> Iterator for Parse<'a> { + type Item = (Cow<'a, str>, Cow<'a, str>); + + fn next(&mut self) -> Option<Self::Item> { + loop { + if self.input.is_empty() { + return None; + } + let mut split2 = self.input.splitn(2, |&b| b == b'&'); + let sequence = split2.next().unwrap(); + self.input = split2.next().unwrap_or(&[][..]); + if sequence.is_empty() { + continue; + } + let mut split2 = sequence.splitn(2, |&b| b == b'='); + let name = split2.next().unwrap(); + let value = split2.next().unwrap_or(&[][..]); + return Some((decode(name), decode(value))); + } + } +} + +fn decode(input: &[u8]) -> Cow<'_, str> { + let replaced = replace_plus(input); + decode_utf8_lossy(match percent_decode(&replaced).into() { + Cow::Owned(vec) => Cow::Owned(vec), + Cow::Borrowed(_) => replaced, + }) +} + +/// Replace b'+' with b' ' +fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> { + match input.iter().position(|&b| b == b'+') { + None => Cow::Borrowed(input), + Some(first_position) => { + let mut replaced = input.to_owned(); + replaced[first_position] = b' '; + for byte in &mut replaced[first_position + 1..] { + if *byte == b'+' { + *byte = b' '; + } + } + Cow::Owned(replaced) + } + } +} + +impl<'a> Parse<'a> { + /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`. + pub fn into_owned(self) -> ParseIntoOwned<'a> { + ParseIntoOwned { inner: self } + } +} + +/// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`. +pub struct ParseIntoOwned<'a> { + inner: Parse<'a>, +} + +impl<'a> Iterator for ParseIntoOwned<'a> { + type Item = (String, String); + + fn next(&mut self) -> Option<Self::Item> { + self.inner + .next() + .map(|(k, v)| (k.into_owned(), v.into_owned())) + } +} + +/// The [`application/x-www-form-urlencoded` byte serializer]( +/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer). +/// +/// Return an iterator of `&str` slices. +pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> { + ByteSerialize { bytes: input } +} + +/// Return value of `byte_serialize()`. +#[derive(Debug)] +pub struct ByteSerialize<'a> { + bytes: &'a [u8], +} + +fn byte_serialized_unchanged(byte: u8) -> bool { + matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z') +} + +impl<'a> Iterator for ByteSerialize<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + if let Some((&first, tail)) = self.bytes.split_first() { + if !byte_serialized_unchanged(first) { + self.bytes = tail; + return Some(if first == b' ' { + "+" + } else { + percent_encode_byte(first) + }); + } + let position = tail.iter().position(|&b| !byte_serialized_unchanged(b)); + let (unchanged_slice, remaining) = match position { + // 1 for first_byte + i unchanged in tail + Some(i) => self.bytes.split_at(1 + i), + None => (self.bytes, &[][..]), + }; + self.bytes = remaining; + // This unsafe is appropriate because we have already checked these + // bytes in byte_serialized_unchanged, which checks for a subset + // of UTF-8. So we know these bytes are valid UTF-8, and doing + // another UTF-8 check would be wasteful. + Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + if self.bytes.is_empty() { + (0, Some(0)) + } else { + (1, Some(self.bytes.len())) + } + } +} + +/// The [`application/x-www-form-urlencoded` serializer]( +/// https://url.spec.whatwg.org/#concept-urlencoded-serializer). +pub struct Serializer<'a, T: Target> { + target: Option<T>, + start_position: usize, + encoding: EncodingOverride<'a>, +} + +pub trait Target { + fn as_mut_string(&mut self) -> &mut String; + fn finish(self) -> Self::Finished; + type Finished; +} + +impl Target for String { + fn as_mut_string(&mut self) -> &mut String { + self + } + fn finish(self) -> Self { + self + } + type Finished = Self; +} + +impl<'a> Target for &'a mut String { + fn as_mut_string(&mut self) -> &mut String { + &mut **self + } + fn finish(self) -> Self { + self + } + type Finished = Self; +} + +impl<'a, T: Target> Serializer<'a, T> { + /// Create a new `application/x-www-form-urlencoded` serializer for the given target. + /// + /// If the target is non-empty, + /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. + pub fn new(target: T) -> Self { + Self::for_suffix(target, 0) + } + + /// Create a new `application/x-www-form-urlencoded` serializer + /// for a suffix of the given target. + /// + /// If that suffix is non-empty, + /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. + pub fn for_suffix(mut target: T, start_position: usize) -> Self { + if target.as_mut_string().len() < start_position { + panic!( + "invalid length {} for target of length {}", + start_position, + target.as_mut_string().len() + ); + } + + Serializer { + target: Some(target), + start_position, + encoding: None, + } + } + + /// Remove any existing name/value pair. + /// + /// Panics if called after `.finish()`. + pub fn clear(&mut self) -> &mut Self { + string(&mut self.target).truncate(self.start_position); + self + } + + /// Set the character encoding to be used for names and values before percent-encoding. + pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self { + self.encoding = new; + self + } + + /// Serialize and append a name/value pair. + /// + /// Panics if called after `.finish()`. + pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self { + append_pair( + string(&mut self.target), + self.start_position, + self.encoding, + name, + value, + ); + self + } + + /// Serialize and append a name of parameter without any value. + /// + /// Panics if called after `.finish()`. + pub fn append_key_only(&mut self, name: &str) -> &mut Self { + append_key_only( + string(&mut self.target), + self.start_position, + self.encoding, + name, + ); + self + } + + /// Serialize and append a number of name/value pairs. + /// + /// This simply calls `append_pair` repeatedly. + /// This can be more convenient, so the user doesn’t need to introduce a block + /// to limit the scope of `Serializer`’s borrow of its string. + /// + /// Panics if called after `.finish()`. + pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self + where + I: IntoIterator, + I::Item: Borrow<(K, V)>, + K: AsRef<str>, + V: AsRef<str>, + { + { + let string = string(&mut self.target); + for pair in iter { + let &(ref k, ref v) = pair.borrow(); + append_pair( + string, + self.start_position, + self.encoding, + k.as_ref(), + v.as_ref(), + ); + } + } + self + } + + /// Serialize and append a number of names without values. + /// + /// This simply calls `append_key_only` repeatedly. + /// This can be more convenient, so the user doesn’t need to introduce a block + /// to limit the scope of `Serializer`’s borrow of its string. + /// + /// Panics if called after `.finish()`. + pub fn extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self + where + I: IntoIterator, + I::Item: Borrow<K>, + K: AsRef<str>, + { + { + let string = string(&mut self.target); + for key in iter { + let k = key.borrow().as_ref(); + append_key_only(string, self.start_position, self.encoding, k); + } + } + self + } + + /// If this serializer was constructed with a string, take and return that string. + /// + /// ```rust + /// use form_urlencoded; + /// let encoded: String = form_urlencoded::Serializer::new(String::new()) + /// .append_pair("foo", "bar & baz") + /// .append_pair("saison", "Été+hiver") + /// .finish(); + /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver"); + /// ``` + /// + /// Panics if called more than once. + pub fn finish(&mut self) -> T::Finished { + self.target + .take() + .expect("url::form_urlencoded::Serializer double finish") + .finish() + } +} + +fn append_separator_if_needed(string: &mut String, start_position: usize) { + if string.len() > start_position { + string.push('&') + } +} + +fn string<T: Target>(target: &mut Option<T>) -> &mut String { + target + .as_mut() + .expect("url::form_urlencoded::Serializer finished") + .as_mut_string() +} + +fn append_pair( + string: &mut String, + start_position: usize, + encoding: EncodingOverride<'_>, + name: &str, + value: &str, +) { + append_separator_if_needed(string, start_position); + append_encoded(name, string, encoding); + string.push('='); + append_encoded(value, string, encoding); +} + +fn append_key_only( + string: &mut String, + start_position: usize, + encoding: EncodingOverride, + name: &str, +) { + append_separator_if_needed(string, start_position); + append_encoded(name, string, encoding); +} + +fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) { + string.extend(byte_serialize(&encode(encoding, s))) +} + +pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> { + if let Some(o) = encoding_override { + return o(input); + } + input.as_bytes().into() +} + +pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> { + // Note: This function is duplicated in `percent_encoding/lib.rs`. + match input { + Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), + Cow::Owned(bytes) => { + match String::from_utf8_lossy(&bytes) { + Cow::Borrowed(utf8) => { + // If from_utf8_lossy returns a Cow::Borrowed, then we can + // be sure our original bytes were valid UTF-8. This is because + // if the bytes were invalid UTF-8 from_utf8_lossy would have + // to allocate a new owned string to back the Cow so it could + // replace invalid bytes with a placeholder. + + // First we do a debug_assert to confirm our description above. + let raw_utf8: *const [u8] = utf8.as_bytes(); + debug_assert!(raw_utf8 == &*bytes as *const [u8]); + + // Given we know the original input bytes are valid UTF-8, + // and we have ownership of those bytes, we re-use them and + // return a Cow::Owned here. + Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }) + } + Cow::Owned(s) => Cow::Owned(s), + } + } + } +} + +pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>; |