{"id":161764,"date":"2010-08-01T00:00:00","date_gmt":"2010-08-01T00:00:00","guid":{"rendered":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/msr-research-item\/a-comparison-of-unsupervised-methods-for-part-of-speech-tagging-in-chinese\/"},"modified":"2018-10-16T19:58:18","modified_gmt":"2018-10-17T02:58:18","slug":"a-comparison-of-unsupervised-methods-for-part-of-speech-tagging-in-chinese","status":"publish","type":"msr-research-item","link":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/publication\/a-comparison-of-unsupervised-methods-for-part-of-speech-tagging-in-chinese\/","title":{"rendered":"A Comparison of Unsupervised Methods for Part-of-Speech Tagging in Chinese"},"content":{"rendered":"<p>We conduct a series of Part-of-Speech (POS) Tagging experiments using Expectation Maximization (EM), Variational Bayes (VB) and Gibbs Sampling (GS) against the Chinese Penn Treebank.\u00a0 We want to first establish a baseline for unsupervised POS tagging in Chinese, which will facilitate future research in this area.\u00a0 Secondly, by comparing and analyzing the results between Chinese and English, we highlight some of the strengths and weaknesses of each of the algorithms in POS tagging task and attempt to explain the differences based on some preliminary linguistics analysis.\u00a0 Comparing to English, we find that all algorithms perform rather poorly in Chinese in 1-to-1 accuracy result but are more competitive in many-to-1 accuracy.\u00a0 We attribute one possible explanation of this to the algorithms\u2019 inability to correctly produce tags that match the desired tag count distribution.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>We conduct a series of Part-of-Speech (POS) Tagging experiments using Expectation Maximization (EM), Variational Bayes (VB) and Gibbs Sampling (GS) against the Chinese Penn Treebank.\u00a0 We want to first establish a baseline for unsupervised POS tagging in Chinese, which will facilitate future research in this area.\u00a0 Secondly, by comparing and analyzing the results between Chinese [&hellip;]<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","msr-author-ordering":null,"msr_publishername":"","msr_publisher_other":"","msr_booktitle":"","msr_chapter":"","msr_edition":"Coling 2010: Posters","msr_editors":"","msr_how_published":"","msr_isbn":"","msr_issue":"","msr_journal":"","msr_number":"","msr_organization":"","msr_pages_string":"135\u2013143","msr_page_range_start":"135","msr_page_range_end":"143","msr_series":"","msr_volume":"","msr_copyright":"","msr_conference_name":"Coling 2010 Organizing Committee","msr_doi":"","msr_arxiv_id":"","msr_s2_paper_id":"","msr_mag_id":"","msr_pubmed_id":"","msr_other_authors":"Fei Xia","msr_other_contributors":"","msr_speaker":"","msr_award":"","msr_affiliation":"","msr_institution":"","msr_host":"","msr_version":"","msr_duration":"","msr_original_fields_of_study":"","msr_release_tracker_id":"","msr_s2_match_type":"","msr_citation_count_updated":"","msr_published_date":"2010-08-01","msr_highlight_text":"","msr_notes":"","msr_longbiography":"","msr_publicationurl":"http:\/\/www.aclweb.org\/anthology\/C10-2016.pdf","msr_external_url":"","msr_secondary_video_url":"","msr_conference_url":"","msr_journal_url":"","msr_s2_pdf_url":"","msr_year":2010,"msr_citation_count":0,"msr_influential_citations":0,"msr_reference_count":0,"msr_s2_match_confidence":0,"msr_microsoftintellectualproperty":true,"msr_s2_open_access":false,"msr_s2_author_ids":[],"msr_pub_ids":[],"msr_hide_image_in_river":0,"footnotes":""},"msr-research-highlight":[],"research-area":[13561],"msr-publication-type":[193716],"msr-publisher":[],"msr-focus-area":[],"msr-locale":[268875],"msr-post-option":[],"msr-field-of-study":[],"msr-conference":[],"msr-journal":[],"msr-impact-theme":[],"msr-pillar":[],"class_list":["post-161764","msr-research-item","type-msr-research-item","status-publish","hentry","msr-research-area-algorithms","msr-locale-en_us"],"msr_publishername":"","msr_edition":"Coling 2010: Posters","msr_affiliation":"","msr_published_date":"2010-08-01","msr_host":"","msr_duration":"","msr_version":"","msr_speaker":"","msr_other_contributors":"","msr_booktitle":"","msr_pages_string":"135\u2013143","msr_chapter":"","msr_isbn":"","msr_journal":"","msr_volume":"","msr_number":"","msr_editors":"","msr_series":"","msr_issue":"","msr_organization":"","msr_how_published":"","msr_notes":"","msr_highlight_text":"","msr_release_tracker_id":"","msr_original_fields_of_study":"","msr_download_urls":"","msr_external_url":"","msr_secondary_video_url":"","msr_longbiography":"","msr_microsoftintellectualproperty":1,"msr_main_download":"","msr_publicationurl":"http:\/\/www.aclweb.org\/anthology\/C10-2016.pdf","msr_doi":"","msr_publication_uploader":[{"type":"url","title":"http:\/\/www.aclweb.org\/anthology\/C10-2016.pdf","viewUrl":false,"id":false,"label_id":0}],"msr_related_uploader":"","msr_citation_count":0,"msr_citation_count_updated":"","msr_s2_paper_id":"","msr_influential_citations":0,"msr_reference_count":0,"msr_arxiv_id":"","msr_s2_author_ids":[],"msr_s2_open_access":false,"msr_s2_pdf_url":null,"msr_attachments":[{"id":0,"url":"http:\/\/www.aclweb.org\/anthology\/C10-2016.pdf"}],"msr-author-ordering":[{"type":"user_nicename","value":"alcheng","user_id":30922,"rest_url":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=alcheng"},{"type":"text","value":"Fei Xia","user_id":0,"rest_url":false},{"type":"user_nicename","value":"jfgao","user_id":32246,"rest_url":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=jfgao"}],"msr_impact_theme":[],"msr_research_lab":[],"msr_event":[],"msr_group":[],"msr_project":[],"publication":[],"video":[],"msr-tool":[],"msr_publication_type":"inproceedings","related_content":[],"_links":{"self":[{"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/161764","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item"}],"about":[{"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-research-item"}],"version-history":[{"count":1,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/161764\/revisions"}],"predecessor-version":[{"id":403058,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/161764\/revisions\/403058"}],"wp:attachment":[{"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/media?parent=161764"}],"wp:term":[{"taxonomy":"msr-research-highlight","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-research-highlight?post=161764"},{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=161764"},{"taxonomy":"msr-publication-type","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-publication-type?post=161764"},{"taxonomy":"msr-publisher","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-publisher?post=161764"},{"taxonomy":"msr-focus-area","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-focus-area?post=161764"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=161764"},{"taxonomy":"msr-post-option","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-post-option?post=161764"},{"taxonomy":"msr-field-of-study","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-field-of-study?post=161764"},{"taxonomy":"msr-conference","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-conference?post=161764"},{"taxonomy":"msr-journal","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-journal?post=161764"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=161764"},{"taxonomy":"msr-pillar","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-pillar?post=161764"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}