PLYwoot
Header-only C++17 library for parsing and writing PLY files
Loading...
Searching...
No Matches
header_scanner.hpp
Go to the documentation of this file.
1/*
2 This file is part of PLYwoot, a header-only PLY parser.
3
4 Copyright (C) 2023-2026, Ton van den Heuvel
5
6 PLYwoot is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20#ifndef PLYWOOT_HEADER_SCANNER_HPP
21#define PLYWOOT_HEADER_SCANNER_HPP
22
24
25#include "std.hpp"
26
27#include <cstdint>
28#include <cstring>
29#include <istream>
30#include <string>
31#include <string_view>
32
33namespace plywoot {
34
37{
41 HeaderScannerException(const std::string &message) : Exception("scanner error: " + message) {}
42};
43
51
52}
53
54namespace plywoot::detail {
55
56static constexpr const char endHeaderToken[] = "end_header";
57
58inline std::istream &getline_without_linefeed(std::istream &is, std::string &s)
59{
60 if (std::getline(is, s) && !s.empty() && s.back() == '\r') s.pop_back();
61 return is;
62}
63
73// clang-format off
74constexpr bool isTokenDelimiter[256] = {
75 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
76 false, false, false, false, false, false, false, false, false, true, true, false, false, true, false, false, // 0 - 15
77 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 16 - 31
78 true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 32 - 47
79 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 48 - 63
80 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 64 - 79
81 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 80 - 95
82 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 96 - 111
83 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 112 - 127
84 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 128 - 143
85 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 144 - 159
86 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 160 - 175
87 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 176 - 191
88 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 192 - 207
89 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 208 - 223
90 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 224 - 239
91 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true , // 240 - 255
92};
93// clang-format on
94
97class HeaderScanner
98{
99public:
102 HeaderScanner(std::istream &is)
103 {
104 if (!is) { throw InvalidInputStream{}; }
105
106 std::string line;
107 while (bool(detail::getline_without_linefeed(is, line)) && line != endHeaderToken)
108 {
109 buffer_.append(line);
110 buffer_.push_back('\n');
111 }
112
113 if (line == endHeaderToken)
114 {
115 buffer_.append(line);
116 buffer_.push_back('\n');
117 }
118 else { buffer_.push_back(EOF); }
119
120 // Initialize the read head to the start of the buffered data.
121 c_ = buffer_.data();
122 }
123
125 enum class Token {
126 Unknown = 0,
127 Ascii,
128 BinaryBigEndian,
129 BinaryLittleEndian,
130 Char,
131 Comment,
132 Double,
133 Element,
134 EndHeader,
135 Eof,
136 Float,
137 FloatingPointNumber,
138 Format,
139 Identifier,
140 Int,
141 List,
142 MagicNumber,
143 Number,
144 Property,
145 Short,
146 UChar,
147 UInt,
148 UShort,
149 };
150
152 Token nextToken() noexcept
153 {
154 // Ignore all whitespace, read upto the first non-whitespace character.
155 const char *last = buffer_.data() + buffer_.size();
156 while (c_ < last && 0 <= *c_ && *c_ <= 0x20)
157 {
158 if (*c_ == '\n') ++line_;
159 ++c_;
160 }
161
162 // Read an identifier. After an identifier is read, the read head is
163 // positioned at the start of the next token or whitespace.
164 const char *tokenStart = c_;
165 while (!isTokenDelimiter[(unsigned char)*c_]) { c_++; }
166 tokenString_ = std::string_view(tokenStart, c_ - tokenStart);
167
168 // In case the identifier is one of the reserved keywords, handle it as
169 // such. Use first character for quick comparison.
170 switch (tokenString_.front())
171 {
172 case 'a':
173 token_ = !tokenString_.compare("ascii") ? Token::Ascii : Token::Identifier;
174 break;
175 case 'b':
176 if (!tokenString_.compare("binary_big_endian"))
177 token_ = Token::BinaryBigEndian;
178 else if (!tokenString_.compare("binary_little_endian"))
179 token_ = Token::BinaryLittleEndian;
180 else
181 token_ = Token::Identifier;
182 break;
183 case 'c':
184 if (!tokenString_.compare("char")) { token_ = Token::Char; }
185 else if (!tokenString_.compare("comment"))
186 {
187 token_ = Token::Comment;
188 readComment();
189 }
190 else { token_ = Token::Identifier; }
191 break;
192 case 'd':
193 token_ = (!tokenString_.compare("double") ? Token::Double : Token::Identifier);
194 break;
195 case 'e':
196 if (!tokenString_.compare("element"))
197 token_ = Token::Element;
198 else if (!tokenString_.compare("end_header"))
199 token_ = Token::EndHeader;
200 else
201 token_ = Token::Identifier;
202 break;
203 case 'f':
204 if (!tokenString_.compare("format"))
205 token_ = Token::Format;
206 else if (!tokenString_.compare("float") || !tokenString_.compare("float32"))
207 token_ = Token::Float;
208 else if (!tokenString_.compare("float64"))
209 token_ = Token::Double;
210 else
211 token_ = Token::Identifier;
212 break;
213 case 'l':
214 token_ = (!tokenString_.compare("list") ? Token::List : Token::Identifier);
215 break;
216 case 'i':
217 if (!tokenString_.compare("int") || !tokenString_.compare("int32"))
218 token_ = Token::Int;
219 else if (!tokenString_.compare("int8"))
220 token_ = Token::Char;
221 else if (!tokenString_.compare("int16"))
222 token_ = Token::Short;
223 else
224 token_ = Token::Identifier;
225 break;
226 case 'p':
227 if (!tokenString_.compare("ply"))
228 token_ = Token::MagicNumber;
229 else if (!tokenString_.compare("property"))
230 token_ = Token::Property;
231 else
232 token_ = Token::Identifier;
233 break;
234 case 's':
235 token_ = (!tokenString_.compare("short") ? Token::Short : Token::Identifier);
236 break;
237 case 'u':
238 if (!tokenString_.compare("uint8") || !tokenString_.compare("uchar"))
239 token_ = Token::UChar;
240 else if (!tokenString_.compare("uint16") || !tokenString_.compare("ushort"))
241 token_ = Token::UShort;
242 else if (!tokenString_.compare("uint32") || !tokenString_.compare("uint"))
243 token_ = Token::UInt;
244 else
245 token_ = Token::Identifier;
246 break;
247 case '-':
248 case '+':
249 case '.':
250 case '0':
251 case '1':
252 case '2':
253 case '3':
254 case '4':
255 case '5':
256 case '6':
257 case '7':
258 case '8':
259 case '9':
260 // TODO(ton): scientific notation for floating point numbers?
261 token_ = (tokenString_.find('.') != std::string::npos) ? Token::FloatingPointNumber : Token::Number;
262 break;
263 case EOF:
264 token_ = Token::Eof;
265 break;
266 default:
267 token_ = Token::Identifier;
268 break;
269 }
270
271 return token_;
272 }
273
275 static constexpr bool isKeyword(Token token)
276 {
277 switch (token)
278 {
279 case Token::Ascii:
280 case Token::BinaryBigEndian:
281 case Token::BinaryLittleEndian:
282 case Token::Char:
283 case Token::Double:
284 case Token::Element:
285 case Token::EndHeader:
286 case Token::Float:
287 case Token::Format:
288 case Token::Int:
289 case Token::List:
290 case Token::Property:
291 case Token::Short:
292 case Token::UChar:
293 case Token::UInt:
294 case Token::UShort:
295 return true;
296 default:
297 break;
298 }
299
300 return false;
301 }
302
306 Comment comment() const { return {line_, tokenString()}; }
307
309 Token token() const noexcept { return token_; }
310
312 std::size_t tokenNumber() const noexcept
313 {
314 return static_cast<std::size_t>(std::strtoull(tokenString_.data(), nullptr, 10));
315 }
316
318 std::string tokenString() const noexcept { return std::string(tokenString_.data(), tokenString_.size()); }
319
320private:
323 void readComment()
324 {
325 // Skip spaces and tabs and the first non-whitespace character.
326 const char *end = buffer_.data() + buffer_.size();
327 while (c_ < end && (*c_ == ' ' || *c_ == '\t')) { ++c_; }
328
329 const std::size_t remainingBytes = buffer_.size() - (c_ - buffer_.data());
330 const char *last = static_cast<const char *>(::memchr(c_, '\n', remainingBytes));
331 if (last != nullptr)
332 {
333 tokenString_ = std::string_view(c_, last - c_);
334 c_ = last;
335 }
336 }
337
339 std::string buffer_;
345 const char *c_{buffer_.data()};
346
348 Token token_{Token::Unknown};
350 std::string_view tokenString_;
352 std::uint32_t line_{0};
353};
354
355}
356
357#endif
Base class for all exceptions thrown by PLYwoot.
constexpr bool isTokenDelimiter[256]
Base class for all header scanner exceptions.
HeaderScannerException(const std::string &message)
InvalidInputStream()
Constructs an invalid input stream exception.