{"version":1,"pages":[{"id":"-LyG_wqTiu8B8CJyx6Xr","title":"A Beginner's Guide to Clean Data","pathname":"/beginners-guide-to-clean-data","siteSpaceId":"sitesp_QTp8E","description":"Practical advice to spot and avoid data quality problems. - Benjamin Greve"},{"id":"-LyGaTF19SEsUzZMcTrq","title":"Foreword","pathname":"/beginners-guide-to-clean-data/introduction-1/foreword","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Introduction"}]},{"id":"-LyGbOCvUedaOd3IJmQq","title":"The value of data","pathname":"/beginners-guide-to-clean-data/introduction-1/the-value-of-data","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Introduction"}]},{"id":"-LyGbQxPituWM9VaC99b","title":"The intangible nature of data","pathname":"/beginners-guide-to-clean-data/introduction-1/the-intangible-nature-of-data","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Introduction"}]},{"id":"-LyGbsOKEvmMivokYU8J","title":"Missing values","pathname":"/beginners-guide-to-clean-data/missing-data-1/missing-values","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Missing data"}]},{"id":"-LyGcPG2WVl-JLIRQYOm","title":"Missing value patterns","pathname":"/beginners-guide-to-clean-data/missing-data-1/missing-value-patterns","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Missing data"}]},{"id":"-LyGcRvXHJgz7yAsO6mX","title":"Missing value representations","pathname":"/beginners-guide-to-clean-data/missing-data-1/missing-value-representations","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Missing data"}]},{"id":"-LyGcV7NsePIoT69aLqZ","title":"Missing observations","pathname":"/beginners-guide-to-clean-data/missing-data-1/missing-observations","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Missing data"}]},{"id":"-LyGcY0UQCOEnmzLVp7g","title":"Truncated exports","pathname":"/beginners-guide-to-clean-data/missing-data-1/truncated-exports","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Missing data"}]},{"id":"-LyGc_sG-Rn-F_32VJMy","title":"Handling missing values","pathname":"/beginners-guide-to-clean-data/missing-data-1/handling-missing-values","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Missing data"}]},{"id":"-LyGby5OnuflzjrT3z9E","title":"Unexpected values","pathname":"/beginners-guide-to-clean-data/data-range-problems/untitled","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data range problems"}]},{"id":"-LyGg6IEVts7einhnYiT","title":"Outliers","pathname":"/beginners-guide-to-clean-data/data-range-problems/outliers","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data range problems"}]},{"id":"-LyGg8V5KwjF8dgyDWhT","title":"Freak cases","pathname":"/beginners-guide-to-clean-data/data-range-problems/freak-cases","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data range problems"}]},{"id":"-LyGc-i84NBKPz3DqPnz","title":"CSV basics","pathname":"/beginners-guide-to-clean-data/common-csv-problems/untitled","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Common CSV problems"}]},{"id":"-LyGgDd6l6bw9Ih-uEvm","title":"Quotation characters","pathname":"/beginners-guide-to-clean-data/common-csv-problems/quotation-characters","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Common CSV problems"}]},{"id":"-LyGgHT9CRdjSjHB17LU","title":"Line breaks in text fields","pathname":"/beginners-guide-to-clean-data/common-csv-problems/line-breaks-in-text-fields","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Common CSV problems"}]},{"id":"-LyGgI8JIx2tvSRWtPFT","title":"Missing or insufficient headers","pathname":"/beginners-guide-to-clean-data/common-csv-problems/missing-or-insufficient-headers","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Common CSV problems"}]},{"id":"-LyGgJn-mtencJSHUc6f","title":"Trailing line breaks","pathname":"/beginners-guide-to-clean-data/common-csv-problems/trailing-line-breaks","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Common CSV problems"}]},{"id":"-LyGgJgQMJmsrKqLoF1U","title":"Data export and import","pathname":"/beginners-guide-to-clean-data/common-csv-problems/data-export-and-import","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Common CSV problems"}]},{"id":"-LyGgIXQd_BEf8jkihNg","title":"Column type violations","pathname":"/beginners-guide-to-clean-data/common-csv-problems/column-type-violations","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Common CSV problems"}]},{"id":"-LyGgZFfvjWo4Ljd3hvX","title":"Guidelines for working with CSV files","pathname":"/beginners-guide-to-clean-data/common-csv-problems/guidelines-for-working-with-csv-files","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Common CSV problems"}]},{"id":"-LyGc3CNnhY3gPyJMI-N","title":"Text mining basics","pathname":"/beginners-guide-to-clean-data/text-mining-problems/untitled","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Text mining problems"}]},{"id":"-LyGghbXZvH52S5NOeNV","title":"Encoding in your data and IDE","pathname":"/beginners-guide-to-clean-data/text-mining-problems/encoding-in-your-data-and-ide","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Text mining problems"}]},{"id":"-LyGgi5fD0QqGOFOiqvn","title":"Special characters","pathname":"/beginners-guide-to-clean-data/text-mining-problems/special-characters","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Text mining problems"}]},{"id":"-LyGgibLgvUpnY_5ZybB","title":"Character entities","pathname":"/beginners-guide-to-clean-data/text-mining-problems/character-entities","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Text mining problems"}]},{"id":"-LyGghDGbYAgVnK_gyLa","title":"Lookalike characters","pathname":"/beginners-guide-to-clean-data/text-mining-problems/lookalike-characters","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Text mining problems"}]},{"id":"-LyGgfjyFPVItmV0RKn6","title":"Dummy words","pathname":"/beginners-guide-to-clean-data/text-mining-problems/dummy-words","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Text mining problems"}]},{"id":"-LyGc6fwKYyqiqOilGSa","title":"Inconsistent timestamp formats","pathname":"/beginners-guide-to-clean-data/type-and-format-related-problems/untitled","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Type- and format-related problems"}]},{"id":"-LyGgwcJzFo--WHOoE3i","title":"Whitespace-padded strings","pathname":"/beginners-guide-to-clean-data/type-and-format-related-problems/whitespace-padded-strings","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Type- and format-related problems"}]},{"id":"-LyGgw1JtpRuNw0vDuOb","title":"Binary data","pathname":"/beginners-guide-to-clean-data/type-and-format-related-problems/binary-data","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Type- and format-related problems"}]},{"id":"-LyGgvu8HgQAC0I9CsuP","title":"Semi-structured log files","pathname":"/beginners-guide-to-clean-data/type-and-format-related-problems/semi-structured-log-files","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Type- and format-related problems"}]},{"id":"-LyGgvaJoVADzXtQ9VHE","title":"Proprietary data formats","pathname":"/beginners-guide-to-clean-data/type-and-format-related-problems/proprietary-data-formats","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Type- and format-related problems"}]},{"id":"-LyGgvOWbLLFkDvAhCTS","title":"Spreadsheets","pathname":"/beginners-guide-to-clean-data/type-and-format-related-problems/spreadsheets","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Type- and format-related problems"}]},{"id":"-LyGcAivlM5OZI6wiESH","title":"Numeric overflow","pathname":"/beginners-guide-to-clean-data/database-related-problems/untitled","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Database-related problems"}]},{"id":"-LyGhHk-rYUNK7KAuVQb","title":"Duplicate rows","pathname":"/beginners-guide-to-clean-data/database-related-problems/duplicate-rows","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Database-related problems"}]},{"id":"-LyGhHdfY2PFg0K_SBGm","title":"Table joins","pathname":"/beginners-guide-to-clean-data/database-related-problems/table-joins","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Database-related problems"}]},{"id":"-LyGhHY-PNNdjL-hvVyi","title":"Huge enterprise databases","pathname":"/beginners-guide-to-clean-data/database-related-problems/huge-enterprise-databases","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Database-related problems"}]},{"id":"-LyGhHQu5b5Ja-9dNhlR","title":"Case sensitivity","pathname":"/beginners-guide-to-clean-data/database-related-problems/case-sensitivity","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Database-related problems"}]},{"id":"-LyGhHBD59Vw58jUJl1q","title":"Separating DDL and DML statements","pathname":"/beginners-guide-to-clean-data/database-related-problems/separating-ddl-and-dml-statements","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Database-related problems"}]},{"id":"-LyGhHJ3T-T-CM1lj37z","title":"Database performance considerations","pathname":"/beginners-guide-to-clean-data/database-related-problems/database-performance-considerations","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Database-related problems"}]},{"id":"-LyGhH2xqluH1zQOL0nN","title":"Naming tables and columns","pathname":"/beginners-guide-to-clean-data/database-related-problems/naming-tables-and-columns","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Database-related problems"}]},{"id":"-LyGhGu9RmIwGFSQPGMK","title":"Poorly written SQL","pathname":"/beginners-guide-to-clean-data/database-related-problems/poorly-written-sql","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Database-related problems"}]},{"id":"-LyGhGlb7wjEKWZ2O652","title":"Large monolithic SQL scripts","pathname":"/beginners-guide-to-clean-data/database-related-problems/large-monolithic-sql-scripts","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Database-related problems"}]},{"id":"-MTtycS_4lDDxQEuf68g","title":"SQL orchestration","pathname":"/beginners-guide-to-clean-data/database-related-problems/sql-orchestration","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Database-related problems"}]},{"id":"-LyGcDkzFYSJc3Htizes","title":"No single point of truth","pathname":"/beginners-guide-to-clean-data/data-inconsistency/untitled","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data inconsistency"}]},{"id":"-LyGhkF9Ov7hVfvAXJEq","title":"Non-matching aggregated data","pathname":"/beginners-guide-to-clean-data/data-inconsistency/non-matching-aggregated-data","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data inconsistency"}]},{"id":"-LyGhk1VUZiQLtPTKI32","title":"Internal inconsistency","pathname":"/beginners-guide-to-clean-data/data-inconsistency/internal-inconsistency","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data inconsistency"}]},{"id":"-LyGcGyV_XkCFtMFnV0W","title":"Business concepts","pathname":"/beginners-guide-to-clean-data/data-modeling/untitled","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data modeling"}]},{"id":"-LyGi0frHsnA6RxT_rff","title":"Handling complexity","pathname":"/beginners-guide-to-clean-data/data-modeling/handling-complexity","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data modeling"}]},{"id":"-LyGi3Eum_ztM0kLF4zH","title":"Interfaces","pathname":"/beginners-guide-to-clean-data/data-modeling/interfaces","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data modeling"}]},{"id":"-LyGi5TYDBfs_42zmgN3","title":"Generalized data models","pathname":"/beginners-guide-to-clean-data/data-modeling/generalized-data-models","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data modeling"}]},{"id":"-LyGi89LdSnK2TCZiRrZ","title":"Reproducibility","pathname":"/beginners-guide-to-clean-data/data-modeling/reproducibility","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data modeling"}]},{"id":"-LyGiBq-Ulr-ePQkqcGm","title":"Feature stores and feature engines","pathname":"/beginners-guide-to-clean-data/data-modeling/feature-stores-and-feature-engines","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data modeling"}]},{"id":"-LyGiBQem4N3tFjHODu9","title":"Thinking pragmatic","pathname":"/beginners-guide-to-clean-data/data-modeling/thinking-pragmatic","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Data modeling"}]},{"id":"-LyGcJ88lPRA90ARnbUR","title":"Automated testing","pathname":"/beginners-guide-to-clean-data/monitoring-and-testing/untitled","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Monitoring and testing"}]},{"id":"-LyGiKwToz79777oR0Tv","title":"Measuring database load","pathname":"/beginners-guide-to-clean-data/monitoring-and-testing/measuring-database-load","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Monitoring and testing"}]},{"id":"-LyGcKysZQR46Q6nhNSy","title":"Checklist for new data","pathname":"/beginners-guide-to-clean-data/bonus-content/untitled","siteSpaceId":"sitesp_QTp8E","description":"","breadcrumbs":[{"label":"Bonus content"}]}]}